1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* 26 * Copyright (c) 1990 Mentat Inc. 27 */ 28 29 #pragma ident "%Z%%M% %I% %E% SMI" 30 31 #include <sys/types.h> 32 #include <sys/stream.h> 33 #include <sys/dlpi.h> 34 #include <sys/stropts.h> 35 #include <sys/sysmacros.h> 36 #include <sys/strsun.h> 37 #include <sys/strlog.h> 38 #include <sys/strsubr.h> 39 #define _SUN_TPI_VERSION 2 40 #include <sys/tihdr.h> 41 #include <sys/ddi.h> 42 #include <sys/sunddi.h> 43 #include <sys/cmn_err.h> 44 #include <sys/debug.h> 45 #include <sys/sdt.h> 46 #include <sys/kobj.h> 47 #include <sys/zone.h> 48 #include <sys/neti.h> 49 #include <sys/hook.h> 50 51 #include <sys/kmem.h> 52 #include <sys/systm.h> 53 #include <sys/param.h> 54 #include <sys/socket.h> 55 #include <sys/vtrace.h> 56 #include <sys/isa_defs.h> 57 #include <sys/atomic.h> 58 #include <sys/iphada.h> 59 #include <sys/policy.h> 60 #include <net/if.h> 61 #include <net/if_types.h> 62 #include <net/route.h> 63 #include <net/if_dl.h> 64 #include <sys/sockio.h> 65 #include <netinet/in.h> 66 #include <netinet/ip6.h> 67 #include <netinet/icmp6.h> 68 #include <netinet/sctp.h> 69 70 #include <inet/common.h> 71 #include <inet/mi.h> 72 #include <inet/mib2.h> 73 #include <inet/nd.h> 74 #include <inet/arp.h> 75 76 #include <inet/ip.h> 77 #include <inet/ip_impl.h> 78 #include <inet/ip6.h> 79 #include <inet/ip6_asp.h> 80 #include <inet/tcp.h> 81 #include <inet/tcp_impl.h> 82 #include <inet/udp_impl.h> 83 #include <inet/ipp_common.h> 84 85 #include <inet/ip_multi.h> 86 #include <inet/ip_if.h> 87 #include <inet/ip_ire.h> 88 #include <inet/ip_rts.h> 89 #include <inet/optcom.h> 90 #include <inet/ip_ndp.h> 91 #include <net/pfkeyv2.h> 92 #include <inet/ipsec_info.h> 93 #include <inet/sadb.h> 94 #include <inet/ipsec_impl.h> 95 #include <inet/tun.h> 96 #include <inet/sctp_ip.h> 97 #include <sys/pattr.h> 98 #include <inet/ipclassifier.h> 99 #include <inet/ipsecah.h> 100 #include <inet/udp_impl.h> 101 #include <sys/squeue.h> 102 103 #include <sys/tsol/label.h> 104 #include <sys/tsol/tnet.h> 105 106 #include <rpc/pmap_prot.h> 107 108 /* Temporary; for CR 6451644 work-around */ 109 #include <sys/ethernet.h> 110 111 extern squeue_func_t ip_input_proc; 112 113 /* 114 * IP statistics. 115 */ 116 #define IP6_STAT(x) (ip6_statistics.x.value.ui64++) 117 #define IP6_STAT_UPDATE(x, n) (ip6_statistics.x.value.ui64 += (n)) 118 119 typedef struct ip6_stat { 120 kstat_named_t ip6_udp_fast_path; 121 kstat_named_t ip6_udp_slow_path; 122 kstat_named_t ip6_udp_fannorm; 123 kstat_named_t ip6_udp_fanmb; 124 kstat_named_t ip6_out_sw_cksum; 125 kstat_named_t ip6_in_sw_cksum; 126 kstat_named_t ip6_tcp_in_full_hw_cksum_err; 127 kstat_named_t ip6_tcp_in_part_hw_cksum_err; 128 kstat_named_t ip6_tcp_in_sw_cksum_err; 129 kstat_named_t ip6_tcp_out_sw_cksum_bytes; 130 kstat_named_t ip6_udp_in_full_hw_cksum_err; 131 kstat_named_t ip6_udp_in_part_hw_cksum_err; 132 kstat_named_t ip6_udp_in_sw_cksum_err; 133 kstat_named_t ip6_udp_out_sw_cksum_bytes; 134 kstat_named_t ip6_frag_mdt_pkt_out; 135 kstat_named_t ip6_frag_mdt_discarded; 136 kstat_named_t ip6_frag_mdt_allocfail; 137 kstat_named_t ip6_frag_mdt_addpdescfail; 138 kstat_named_t ip6_frag_mdt_allocd; 139 } ip6_stat_t; 140 141 static ip6_stat_t ip6_statistics = { 142 { "ip6_udp_fast_path", KSTAT_DATA_UINT64 }, 143 { "ip6_udp_slow_path", KSTAT_DATA_UINT64 }, 144 { "ip6_udp_fannorm", KSTAT_DATA_UINT64 }, 145 { "ip6_udp_fanmb", KSTAT_DATA_UINT64 }, 146 { "ip6_out_sw_cksum", KSTAT_DATA_UINT64 }, 147 { "ip6_in_sw_cksum", KSTAT_DATA_UINT64 }, 148 { "ip6_tcp_in_full_hw_cksum_err", KSTAT_DATA_UINT64 }, 149 { "ip6_tcp_in_part_hw_cksum_err", KSTAT_DATA_UINT64 }, 150 { "ip6_tcp_in_sw_cksum_err", KSTAT_DATA_UINT64 }, 151 { "ip6_tcp_out_sw_cksum_bytes", KSTAT_DATA_UINT64 }, 152 { "ip6_udp_in_full_hw_cksum_err", KSTAT_DATA_UINT64 }, 153 { "ip6_udp_in_part_hw_cksum_err", KSTAT_DATA_UINT64 }, 154 { "ip6_udp_in_sw_cksum_err", KSTAT_DATA_UINT64 }, 155 { "ip6_udp_out_sw_cksum_bytes", KSTAT_DATA_UINT64 }, 156 { "ip6_frag_mdt_pkt_out", KSTAT_DATA_UINT64 }, 157 { "ip6_frag_mdt_discarded", KSTAT_DATA_UINT64 }, 158 { "ip6_frag_mdt_allocfail", KSTAT_DATA_UINT64 }, 159 { "ip6_frag_mdt_addpdescfail", KSTAT_DATA_UINT64 }, 160 { "ip6_frag_mdt_allocd", KSTAT_DATA_UINT64 }, 161 }; 162 163 static kstat_t *ip6_kstat; 164 165 /* 166 * Naming conventions: 167 * These rules should be judiciously applied 168 * if there is a need to identify something as IPv6 versus IPv4 169 * IPv6 funcions will end with _v6 in the ip module. 170 * IPv6 funcions will end with _ipv6 in the transport modules. 171 * IPv6 macros: 172 * Some macros end with _V6; e.g. ILL_FRAG_HASH_V6 173 * Some macros start with V6_; e.g. V6_OR_V4_INADDR_ANY 174 * And then there are ..V4_PART_OF_V6. 175 * The intent is that macros in the ip module end with _V6. 176 * IPv6 global variables will start with ipv6_ 177 * IPv6 structures will start with ipv6 178 * IPv6 defined constants should start with IPV6_ 179 * (but then there are NDP_DEFAULT_VERS_PRI_AND_FLOW, etc) 180 */ 181 182 /* 183 * IPv6 mibs when the interface (ill) is not known. 184 * When the ill is known the per-interface mib in the ill is used. 185 */ 186 mib2_ipv6IfStatsEntry_t ip6_mib; 187 mib2_ipv6IfIcmpEntry_t icmp6_mib; 188 189 /* 190 * ip6opt_ls is used to enable IPv6 (via /etc/system on TX systems). 191 * We need to do this because we didn't obtain the IP6OPT_LS (0x0a) 192 * from IANA. This mechanism will remain in effect until an official 193 * number is obtained. 194 */ 195 uchar_t ip6opt_ls; 196 197 uint_t ipv6_ire_default_count; /* Number of IPv6 IRE_DEFAULT entries */ 198 uint_t ipv6_ire_default_index; /* Walking IPv6 index used to mod in */ 199 200 const in6_addr_t ipv6_all_ones = 201 { 0xffffffffU, 0xffffffffU, 0xffffffffU, 0xffffffffU }; 202 const in6_addr_t ipv6_all_zeros = { 0, 0, 0, 0 }; 203 204 #ifdef _BIG_ENDIAN 205 const in6_addr_t ipv6_unspecified_group = { 0xff000000U, 0, 0, 0 }; 206 #else /* _BIG_ENDIAN */ 207 const in6_addr_t ipv6_unspecified_group = { 0x000000ffU, 0, 0, 0 }; 208 #endif /* _BIG_ENDIAN */ 209 210 #ifdef _BIG_ENDIAN 211 const in6_addr_t ipv6_loopback = { 0, 0, 0, 0x00000001U }; 212 #else /* _BIG_ENDIAN */ 213 const in6_addr_t ipv6_loopback = { 0, 0, 0, 0x01000000U }; 214 #endif /* _BIG_ENDIAN */ 215 216 #ifdef _BIG_ENDIAN 217 const in6_addr_t ipv6_all_hosts_mcast = { 0xff020000U, 0, 0, 0x00000001U }; 218 #else /* _BIG_ENDIAN */ 219 const in6_addr_t ipv6_all_hosts_mcast = { 0x000002ffU, 0, 0, 0x01000000U }; 220 #endif /* _BIG_ENDIAN */ 221 222 #ifdef _BIG_ENDIAN 223 const in6_addr_t ipv6_all_rtrs_mcast = { 0xff020000U, 0, 0, 0x00000002U }; 224 #else /* _BIG_ENDIAN */ 225 const in6_addr_t ipv6_all_rtrs_mcast = { 0x000002ffU, 0, 0, 0x02000000U }; 226 #endif /* _BIG_ENDIAN */ 227 228 #ifdef _BIG_ENDIAN 229 const in6_addr_t ipv6_all_v2rtrs_mcast = { 0xff020000U, 0, 0, 0x00000016U }; 230 #else /* _BIG_ENDIAN */ 231 const in6_addr_t ipv6_all_v2rtrs_mcast = { 0x000002ffU, 0, 0, 0x16000000U }; 232 #endif /* _BIG_ENDIAN */ 233 234 #ifdef _BIG_ENDIAN 235 const in6_addr_t ipv6_solicited_node_mcast = 236 { 0xff020000U, 0, 0x00000001U, 0xff000000U }; 237 #else /* _BIG_ENDIAN */ 238 const in6_addr_t ipv6_solicited_node_mcast = 239 { 0x000002ffU, 0, 0x01000000U, 0x000000ffU }; 240 #endif /* _BIG_ENDIAN */ 241 242 /* 243 * Used by icmp_send_redirect_v6 for picking random src. 244 */ 245 uint_t icmp_redirect_v6_src_index; 246 247 /* Leave room for ip_newroute to tack on the src and target addresses */ 248 #define OK_RESOLVER_MP_V6(mp) \ 249 ((mp) && ((mp)->b_wptr - (mp)->b_rptr) >= (2 * IPV6_ADDR_LEN)) 250 251 static void icmp_inbound_too_big_v6(queue_t *, mblk_t *, ill_t *ill, 252 boolean_t, zoneid_t); 253 static void icmp_pkt_v6(queue_t *, mblk_t *, void *, size_t, 254 const in6_addr_t *, boolean_t, zoneid_t); 255 static void icmp_redirect_v6(queue_t *, mblk_t *, ill_t *ill); 256 static int ip_bind_connected_v6(conn_t *, mblk_t *, in6_addr_t *, 257 uint16_t, const in6_addr_t *, ip6_pkt_t *, uint16_t, 258 boolean_t, boolean_t, boolean_t, boolean_t); 259 static boolean_t ip_bind_insert_ire_v6(mblk_t *, ire_t *, const in6_addr_t *, 260 iulp_t *); 261 static int ip_bind_laddr_v6(conn_t *, mblk_t *, const in6_addr_t *, 262 uint16_t, boolean_t, boolean_t, boolean_t); 263 static void ip_fanout_proto_v6(queue_t *, mblk_t *, ip6_t *, ill_t *, 264 ill_t *, uint8_t, uint_t, uint_t, boolean_t, zoneid_t); 265 static void ip_fanout_tcp_v6(queue_t *, mblk_t *, ip6_t *, ill_t *, 266 ill_t *, uint_t, uint_t, boolean_t, zoneid_t); 267 static void ip_fanout_udp_v6(queue_t *, mblk_t *, ip6_t *, uint32_t, 268 ill_t *, ill_t *, uint_t, boolean_t, zoneid_t); 269 static int ip_process_options_v6(queue_t *, mblk_t *, ip6_t *, 270 uint8_t *, uint_t, uint8_t); 271 static mblk_t *ip_rput_frag_v6(queue_t *, mblk_t *, ip6_t *, 272 ip6_frag_t *, uint_t, uint_t *, uint32_t *, uint16_t *); 273 static boolean_t ip_source_routed_v6(ip6_t *, mblk_t *); 274 static void ip_wput_ire_v6(queue_t *, mblk_t *, ire_t *, int, int, 275 conn_t *, int, int, int, zoneid_t); 276 static boolean_t ip_ulp_cando_pkt2big(int); 277 278 void ip_rput_v6(queue_t *, mblk_t *); 279 static void ip_wput_v6(queue_t *, mblk_t *); 280 281 /* 282 * A template for an IPv6 AR_ENTRY_QUERY 283 */ 284 static areq_t ipv6_areq_template = { 285 AR_ENTRY_QUERY, /* cmd */ 286 sizeof (areq_t)+(2*IPV6_ADDR_LEN), /* name offset */ 287 sizeof (areq_t), /* name len (filled by ill_arp_alloc) */ 288 IP6_DL_SAP, /* protocol, from arps perspective */ 289 sizeof (areq_t), /* target addr offset */ 290 IPV6_ADDR_LEN, /* target addr_length */ 291 0, /* flags */ 292 sizeof (areq_t) + IPV6_ADDR_LEN, /* sender addr offset */ 293 IPV6_ADDR_LEN, /* sender addr length */ 294 6, /* xmit_count */ 295 1000, /* (re)xmit_interval in milliseconds */ 296 4 /* max # of requests to buffer */ 297 /* anything else filled in by the code */ 298 }; 299 300 struct qinit rinit_ipv6 = { 301 (pfi_t)ip_rput_v6, 302 NULL, 303 ip_open, 304 ip_close, 305 NULL, 306 &ip_mod_info 307 }; 308 309 struct qinit winit_ipv6 = { 310 (pfi_t)ip_wput_v6, 311 (pfi_t)ip_wsrv, 312 ip_open, 313 ip_close, 314 NULL, 315 &ip_mod_info 316 }; 317 318 /* 319 * Handle IPv6 ICMP packets sent to us. Consume the mblk passed in. 320 * The message has already been checksummed and if needed, 321 * a copy has been made to be sent any interested ICMP client (conn) 322 * Note that this is different than icmp_inbound() which does the fanout 323 * to conn's as well as local processing of the ICMP packets. 324 * 325 * All error messages are passed to the matching transport stream. 326 * 327 * Zones notes: 328 * The packet is only processed in the context of the specified zone: typically 329 * only this zone will reply to an echo request. This means that the caller must 330 * call icmp_inbound_v6() for each relevant zone. 331 */ 332 static void 333 icmp_inbound_v6(queue_t *q, mblk_t *mp, ill_t *ill, uint_t hdr_length, 334 boolean_t mctl_present, uint_t flags, zoneid_t zoneid, mblk_t *dl_mp) 335 { 336 icmp6_t *icmp6; 337 ip6_t *ip6h; 338 boolean_t interested; 339 ip6i_t *ip6i; 340 in6_addr_t origsrc; 341 ire_t *ire; 342 mblk_t *first_mp; 343 ipsec_in_t *ii; 344 345 ASSERT(ill != NULL); 346 first_mp = mp; 347 if (mctl_present) { 348 mp = first_mp->b_cont; 349 ASSERT(mp != NULL); 350 351 ii = (ipsec_in_t *)first_mp->b_rptr; 352 ASSERT(ii->ipsec_in_type == IPSEC_IN); 353 } 354 355 ip6h = (ip6_t *)mp->b_rptr; 356 357 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInMsgs); 358 359 if ((mp->b_wptr - mp->b_rptr) < (hdr_length + ICMP6_MINLEN)) { 360 if (!pullupmsg(mp, hdr_length + ICMP6_MINLEN)) { 361 ip1dbg(("icmp_inbound_v6: pullupmsg failed\n")); 362 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors); 363 freemsg(first_mp); 364 return; 365 } 366 ip6h = (ip6_t *)mp->b_rptr; 367 } 368 if (icmp_accept_clear_messages == 0) { 369 first_mp = ipsec_check_global_policy(first_mp, NULL, 370 NULL, ip6h, mctl_present); 371 if (first_mp == NULL) 372 return; 373 } 374 375 /* 376 * On a labeled system, we have to check whether the zone itself is 377 * permitted to receive raw traffic. 378 */ 379 if (is_system_labeled()) { 380 if (zoneid == ALL_ZONES) 381 zoneid = tsol_packet_to_zoneid(mp); 382 if (!tsol_can_accept_raw(mp, B_FALSE)) { 383 ip1dbg(("icmp_inbound_v6: zone %d can't receive raw", 384 zoneid)); 385 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors); 386 freemsg(first_mp); 387 return; 388 } 389 } 390 391 icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]); 392 ip2dbg(("icmp_inbound_v6: type %d code %d\n", icmp6->icmp6_type, 393 icmp6->icmp6_code)); 394 interested = !(icmp6->icmp6_type & ICMP6_INFOMSG_MASK); 395 396 /* Initiate IPPF processing here */ 397 if (IP6_IN_IPP(flags)) { 398 399 /* 400 * If the ifindex changes due to SIOCSLIFINDEX 401 * packet may return to IP on the wrong ill. 402 */ 403 ip_process(IPP_LOCAL_IN, &mp, ill->ill_phyint->phyint_ifindex); 404 if (mp == NULL) { 405 if (mctl_present) { 406 freeb(first_mp); 407 } 408 return; 409 } 410 } 411 412 switch (icmp6->icmp6_type) { 413 case ICMP6_DST_UNREACH: 414 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInDestUnreachs); 415 if (icmp6->icmp6_code == ICMP6_DST_UNREACH_ADMIN) 416 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInAdminProhibs); 417 break; 418 419 case ICMP6_TIME_EXCEEDED: 420 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInTimeExcds); 421 break; 422 423 case ICMP6_PARAM_PROB: 424 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInParmProblems); 425 break; 426 427 case ICMP6_PACKET_TOO_BIG: 428 icmp_inbound_too_big_v6(q, first_mp, ill, mctl_present, 429 zoneid); 430 return; 431 case ICMP6_ECHO_REQUEST: 432 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInEchos); 433 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst) && 434 !ipv6_resp_echo_mcast) 435 break; 436 437 /* 438 * We must have exclusive use of the mblk to convert it to 439 * a response. 440 * If not, we copy it. 441 */ 442 if (mp->b_datap->db_ref > 1) { 443 mblk_t *mp1; 444 445 mp1 = copymsg(mp); 446 freemsg(mp); 447 if (mp1 == NULL) { 448 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 449 if (mctl_present) 450 freeb(first_mp); 451 return; 452 } 453 mp = mp1; 454 ip6h = (ip6_t *)mp->b_rptr; 455 icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]); 456 if (mctl_present) 457 first_mp->b_cont = mp; 458 else 459 first_mp = mp; 460 } 461 462 /* 463 * Turn the echo into an echo reply. 464 * Remove any extension headers (do not reverse a source route) 465 * and clear the flow id (keep traffic class for now). 466 */ 467 if (hdr_length != IPV6_HDR_LEN) { 468 int i; 469 470 for (i = 0; i < IPV6_HDR_LEN; i++) 471 mp->b_rptr[hdr_length - i - 1] = 472 mp->b_rptr[IPV6_HDR_LEN - i - 1]; 473 mp->b_rptr += (hdr_length - IPV6_HDR_LEN); 474 ip6h = (ip6_t *)mp->b_rptr; 475 ip6h->ip6_nxt = IPPROTO_ICMPV6; 476 hdr_length = IPV6_HDR_LEN; 477 } 478 ip6h->ip6_vcf &= ~IPV6_FLOWINFO_FLOWLABEL; 479 icmp6->icmp6_type = ICMP6_ECHO_REPLY; 480 481 ip6h->ip6_plen = 482 htons((uint16_t)(msgdsize(mp) - IPV6_HDR_LEN)); 483 origsrc = ip6h->ip6_src; 484 /* 485 * Reverse the source and destination addresses. 486 * If the return address is a multicast, zero out the source 487 * (ip_wput_v6 will set an address). 488 */ 489 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 490 ip6h->ip6_src = ipv6_all_zeros; 491 ip6h->ip6_dst = origsrc; 492 } else { 493 ip6h->ip6_src = ip6h->ip6_dst; 494 ip6h->ip6_dst = origsrc; 495 } 496 497 /* set the hop limit */ 498 ip6h->ip6_hops = ipv6_def_hops; 499 500 /* 501 * Prepare for checksum by putting icmp length in the icmp 502 * checksum field. The checksum is calculated in ip_wput_v6. 503 */ 504 icmp6->icmp6_cksum = ip6h->ip6_plen; 505 /* 506 * ICMP echo replies should go out on the same interface 507 * the request came on as probes used by in.mpathd for 508 * detecting NIC failures are ECHO packets. We turn-off load 509 * spreading by allocating a ip6i and setting ip6i_attach_if 510 * to B_TRUE which is handled both by ip_wput_v6 and 511 * ip_newroute_v6. If we don't turnoff load spreading, 512 * the packets might get dropped if there are no 513 * non-FAILED/INACTIVE interfaces for it to go out on and 514 * in.mpathd would wrongly detect a failure or mis-detect 515 * a NIC failure as a link failure. As load spreading can 516 * happen only if ill_group is not NULL, we do only for 517 * that case and this does not affect the normal case. 518 * 519 * We force this only on echo packets that came from on-link 520 * hosts. We restrict this to link-local addresses which 521 * is used by in.mpathd for probing. In the IPv6 case, 522 * default routes typically have an ire_ipif pointer and 523 * hence a MATCH_IRE_ILL later in ip_newroute_v6/ip_wput_v6 524 * might work. As a default route out of this interface 525 * may not be present, enforcing this packet to go out in 526 * this case may not work. 527 */ 528 if (ill->ill_group != NULL && 529 IN6_IS_ADDR_LINKLOCAL(&origsrc)) { 530 /* 531 * If we are sending replies to ourselves, don't 532 * set ATTACH_IF as we may not be able to find 533 * the IRE_LOCAL on this ill i.e setting ATTACH_IF 534 * causes ip_wput_v6 to look for an IRE_LOCAL on 535 * "ill" which it may not find and will try to 536 * create an IRE_CACHE for our local address. Once 537 * we do this, we will try to forward all packets 538 * meant to our LOCAL address. 539 */ 540 ire = ire_cache_lookup_v6(&ip6h->ip6_dst, ALL_ZONES, 541 NULL); 542 if (ire == NULL || ire->ire_type != IRE_LOCAL) { 543 mp = ip_add_info_v6(mp, NULL, &ip6h->ip6_dst); 544 if (mp == NULL) { 545 BUMP_MIB(ill->ill_icmp6_mib, 546 ipv6IfIcmpInErrors); 547 if (ire != NULL) 548 ire_refrele(ire); 549 if (mctl_present) 550 freeb(first_mp); 551 return; 552 } else if (mctl_present) { 553 first_mp->b_cont = mp; 554 } else { 555 first_mp = mp; 556 } 557 ip6i = (ip6i_t *)mp->b_rptr; 558 ip6i->ip6i_flags = IP6I_ATTACH_IF; 559 ip6i->ip6i_ifindex = 560 ill->ill_phyint->phyint_ifindex; 561 } 562 if (ire != NULL) 563 ire_refrele(ire); 564 } 565 566 if (!mctl_present) { 567 /* 568 * This packet should go out the same way as it 569 * came in i.e in clear. To make sure that global 570 * policy will not be applied to this in ip_wput, 571 * we attach a IPSEC_IN mp and clear ipsec_in_secure. 572 */ 573 ASSERT(first_mp == mp); 574 if ((first_mp = ipsec_in_alloc(B_FALSE)) == NULL) { 575 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 576 freemsg(mp); 577 return; 578 } 579 ii = (ipsec_in_t *)first_mp->b_rptr; 580 581 /* This is not a secure packet */ 582 ii->ipsec_in_secure = B_FALSE; 583 first_mp->b_cont = mp; 584 } 585 ii->ipsec_in_zoneid = zoneid; 586 ASSERT(zoneid != ALL_ZONES); 587 if (!ipsec_in_to_out(first_mp, NULL, ip6h)) { 588 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 589 return; 590 } 591 put(WR(q), first_mp); 592 return; 593 594 case ICMP6_ECHO_REPLY: 595 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInEchoReplies); 596 break; 597 598 case ND_ROUTER_SOLICIT: 599 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInRouterSolicits); 600 break; 601 602 case ND_ROUTER_ADVERT: 603 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInRouterAdvertisements); 604 break; 605 606 case ND_NEIGHBOR_SOLICIT: 607 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInNeighborSolicits); 608 if (mctl_present) 609 freeb(first_mp); 610 /* XXX may wish to pass first_mp up to ndp_input someday. */ 611 ndp_input(ill, mp, dl_mp); 612 return; 613 614 case ND_NEIGHBOR_ADVERT: 615 BUMP_MIB(ill->ill_icmp6_mib, 616 ipv6IfIcmpInNeighborAdvertisements); 617 if (mctl_present) 618 freeb(first_mp); 619 /* XXX may wish to pass first_mp up to ndp_input someday. */ 620 ndp_input(ill, mp, dl_mp); 621 return; 622 623 case ND_REDIRECT: { 624 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInRedirects); 625 626 if (ipv6_ignore_redirect) 627 break; 628 629 /* 630 * As there is no upper client to deliver, we don't 631 * need the first_mp any more. 632 */ 633 if (mctl_present) 634 freeb(first_mp); 635 if (!pullupmsg(mp, -1)) { 636 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects); 637 break; 638 } 639 icmp_redirect_v6(q, mp, ill); 640 return; 641 } 642 643 /* 644 * The next three icmp messages will be handled by MLD. 645 * Pass all valid MLD packets up to any process(es) 646 * listening on a raw ICMP socket. MLD messages are 647 * freed by mld_input function. 648 */ 649 case MLD_LISTENER_QUERY: 650 case MLD_LISTENER_REPORT: 651 case MLD_LISTENER_REDUCTION: 652 if (mctl_present) 653 freeb(first_mp); 654 mld_input(q, mp, ill); 655 return; 656 default: 657 break; 658 } 659 if (interested) { 660 icmp_inbound_error_fanout_v6(q, first_mp, ip6h, icmp6, ill, 661 mctl_present, zoneid); 662 } else { 663 freemsg(first_mp); 664 } 665 } 666 667 /* 668 * Process received IPv6 ICMP Packet too big. 669 * After updating any IRE it does the fanout to any matching transport streams. 670 * Assumes the IPv6 plus ICMPv6 headers have been pulled up but nothing else. 671 */ 672 /* ARGSUSED */ 673 static void 674 icmp_inbound_too_big_v6(queue_t *q, mblk_t *mp, ill_t *ill, 675 boolean_t mctl_present, zoneid_t zoneid) 676 { 677 ip6_t *ip6h; 678 ip6_t *inner_ip6h; 679 icmp6_t *icmp6; 680 uint16_t hdr_length; 681 uint32_t mtu; 682 ire_t *ire, *first_ire; 683 mblk_t *first_mp; 684 685 first_mp = mp; 686 if (mctl_present) 687 mp = first_mp->b_cont; 688 /* 689 * We must have exclusive use of the mblk to update the MTU 690 * in the packet. 691 * If not, we copy it. 692 * 693 * If there's an M_CTL present, we know that allocated first_mp 694 * earlier in this function, so we know first_mp has refcnt of one. 695 */ 696 ASSERT(!mctl_present || first_mp->b_datap->db_ref == 1); 697 if (mp->b_datap->db_ref > 1) { 698 mblk_t *mp1; 699 700 mp1 = copymsg(mp); 701 freemsg(mp); 702 if (mp1 == NULL) { 703 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 704 if (mctl_present) 705 freeb(first_mp); 706 return; 707 } 708 mp = mp1; 709 if (mctl_present) 710 first_mp->b_cont = mp; 711 else 712 first_mp = mp; 713 } 714 ip6h = (ip6_t *)mp->b_rptr; 715 if (ip6h->ip6_nxt != IPPROTO_ICMPV6) 716 hdr_length = ip_hdr_length_v6(mp, ip6h); 717 else 718 hdr_length = IPV6_HDR_LEN; 719 720 icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]); 721 ASSERT((size_t)(mp->b_wptr - mp->b_rptr) >= hdr_length + ICMP6_MINLEN); 722 inner_ip6h = (ip6_t *)&icmp6[1]; /* Packet in error */ 723 if ((uchar_t *)&inner_ip6h[1] > mp->b_wptr) { 724 if (!pullupmsg(mp, (uchar_t *)&inner_ip6h[1] - mp->b_rptr)) { 725 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 726 freemsg(first_mp); 727 return; 728 } 729 ip6h = (ip6_t *)mp->b_rptr; 730 icmp6 = (icmp6_t *)&mp->b_rptr[hdr_length]; 731 inner_ip6h = (ip6_t *)&icmp6[1]; 732 } 733 734 /* 735 * For link local destinations matching simply on IRE type is not 736 * sufficient. Same link local addresses for different ILL's is 737 * possible. 738 */ 739 740 if (IN6_IS_ADDR_LINKLOCAL(&inner_ip6h->ip6_dst)) { 741 first_ire = ire_ctable_lookup_v6(&inner_ip6h->ip6_dst, NULL, 742 IRE_CACHE, ill->ill_ipif, ALL_ZONES, NULL, 743 MATCH_IRE_TYPE | MATCH_IRE_ILL_GROUP); 744 745 if (first_ire == NULL) { 746 if (ip_debug > 2) { 747 /* ip1dbg */ 748 pr_addr_dbg("icmp_inbound_too_big_v6:" 749 "no ire for dst %s\n", AF_INET6, 750 &inner_ip6h->ip6_dst); 751 } 752 freemsg(first_mp); 753 return; 754 } 755 756 mtu = ntohl(icmp6->icmp6_mtu); 757 rw_enter(&first_ire->ire_bucket->irb_lock, RW_READER); 758 for (ire = first_ire; ire != NULL && 759 IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, &inner_ip6h->ip6_dst); 760 ire = ire->ire_next) { 761 mutex_enter(&ire->ire_lock); 762 if (mtu < IPV6_MIN_MTU) { 763 ip1dbg(("Received mtu less than IPv6 " 764 "min mtu %d: %d\n", IPV6_MIN_MTU, mtu)); 765 mtu = IPV6_MIN_MTU; 766 /* 767 * If an mtu less than IPv6 min mtu is received, 768 * we must include a fragment header in 769 * subsequent packets. 770 */ 771 ire->ire_frag_flag |= IPH_FRAG_HDR; 772 } 773 ip1dbg(("Received mtu from router: %d\n", mtu)); 774 ire->ire_max_frag = MIN(ire->ire_max_frag, mtu); 775 /* Record the new max frag size for the ULP. */ 776 if (ire->ire_frag_flag & IPH_FRAG_HDR) { 777 /* 778 * If we need a fragment header in every packet 779 * (above case or multirouting), make sure the 780 * ULP takes it into account when computing the 781 * payload size. 782 */ 783 icmp6->icmp6_mtu = htonl(ire->ire_max_frag - 784 sizeof (ip6_frag_t)); 785 } else { 786 icmp6->icmp6_mtu = htonl(ire->ire_max_frag); 787 } 788 mutex_exit(&ire->ire_lock); 789 } 790 rw_exit(&first_ire->ire_bucket->irb_lock); 791 ire_refrele(first_ire); 792 } else { 793 irb_t *irb = NULL; 794 /* 795 * for non-link local destinations we match only on the IRE type 796 */ 797 ire = ire_ctable_lookup_v6(&inner_ip6h->ip6_dst, NULL, 798 IRE_CACHE, ill->ill_ipif, ALL_ZONES, NULL, MATCH_IRE_TYPE); 799 if (ire == NULL) { 800 if (ip_debug > 2) { 801 /* ip1dbg */ 802 pr_addr_dbg("icmp_inbound_too_big_v6:" 803 "no ire for dst %s\n", 804 AF_INET6, &inner_ip6h->ip6_dst); 805 } 806 freemsg(first_mp); 807 return; 808 } 809 irb = ire->ire_bucket; 810 ire_refrele(ire); 811 rw_enter(&irb->irb_lock, RW_READER); 812 for (ire = irb->irb_ire; ire != NULL; ire = ire->ire_next) { 813 if (IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, 814 &inner_ip6h->ip6_dst)) { 815 mtu = ntohl(icmp6->icmp6_mtu); 816 mutex_enter(&ire->ire_lock); 817 if (mtu < IPV6_MIN_MTU) { 818 ip1dbg(("Received mtu less than IPv6" 819 "min mtu %d: %d\n", 820 IPV6_MIN_MTU, mtu)); 821 mtu = IPV6_MIN_MTU; 822 /* 823 * If an mtu less than IPv6 min mtu is 824 * received, we must include a fragment 825 * header in subsequent packets. 826 */ 827 ire->ire_frag_flag |= IPH_FRAG_HDR; 828 } 829 830 ip1dbg(("Received mtu from router: %d\n", mtu)); 831 ire->ire_max_frag = MIN(ire->ire_max_frag, mtu); 832 /* Record the new max frag size for the ULP. */ 833 if (ire->ire_frag_flag & IPH_FRAG_HDR) { 834 /* 835 * If we need a fragment header in 836 * every packet (above case or 837 * multirouting), make sure the ULP 838 * takes it into account when computing 839 * the payload size. 840 */ 841 icmp6->icmp6_mtu = 842 htonl(ire->ire_max_frag - 843 sizeof (ip6_frag_t)); 844 } else { 845 icmp6->icmp6_mtu = 846 htonl(ire->ire_max_frag); 847 } 848 mutex_exit(&ire->ire_lock); 849 } 850 } 851 rw_exit(&irb->irb_lock); 852 } 853 icmp_inbound_error_fanout_v6(q, first_mp, ip6h, icmp6, ill, 854 mctl_present, zoneid); 855 } 856 857 static void 858 pkt_too_big(conn_t *connp, void *arg) 859 { 860 mblk_t *mp; 861 862 if (!connp->conn_ipv6_recvpathmtu) 863 return; 864 865 /* create message and drop it on this connections read queue */ 866 if ((mp = dupb((mblk_t *)arg)) == NULL) { 867 return; 868 } 869 mp->b_datap->db_type = M_CTL; 870 871 putnext(connp->conn_rq, mp); 872 } 873 874 /* 875 * Fanout received ICMPv6 error packets to the transports. 876 * Assumes the IPv6 plus ICMPv6 headers have been pulled up but nothing else. 877 */ 878 void 879 icmp_inbound_error_fanout_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, 880 icmp6_t *icmp6, ill_t *ill, boolean_t mctl_present, zoneid_t zoneid) 881 { 882 uint16_t *up; /* Pointer to ports in ULP header */ 883 uint32_t ports; /* reversed ports for fanout */ 884 ip6_t rip6h; /* With reversed addresses */ 885 uint16_t hdr_length; 886 uint8_t *nexthdrp; 887 uint8_t nexthdr; 888 mblk_t *first_mp; 889 ipsec_in_t *ii; 890 tcpha_t *tcpha; 891 conn_t *connp; 892 893 first_mp = mp; 894 if (mctl_present) { 895 mp = first_mp->b_cont; 896 ASSERT(mp != NULL); 897 898 ii = (ipsec_in_t *)first_mp->b_rptr; 899 ASSERT(ii->ipsec_in_type == IPSEC_IN); 900 } else { 901 ii = NULL; 902 } 903 904 hdr_length = (uint16_t)((uchar_t *)icmp6 - (uchar_t *)ip6h); 905 ASSERT((size_t)(mp->b_wptr - (uchar_t *)icmp6) >= ICMP6_MINLEN); 906 907 /* 908 * Need to pullup everything in order to use 909 * ip_hdr_length_nexthdr_v6() 910 */ 911 if (mp->b_cont != NULL) { 912 if (!pullupmsg(mp, -1)) { 913 ip1dbg(("icmp_inbound_error_fanout_v6: " 914 "pullupmsg failed\n")); 915 goto drop_pkt; 916 } 917 ip6h = (ip6_t *)mp->b_rptr; 918 icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]); 919 } 920 921 ip6h = (ip6_t *)&icmp6[1]; /* Packet in error */ 922 if ((uchar_t *)&ip6h[1] > mp->b_wptr) 923 goto drop_pkt; 924 925 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_length, &nexthdrp)) 926 goto drop_pkt; 927 nexthdr = *nexthdrp; 928 929 /* Set message type, must be done after pullups */ 930 mp->b_datap->db_type = M_CTL; 931 932 if (icmp6->icmp6_type == ICMP6_PACKET_TOO_BIG) { 933 /* 934 * Deliver indication of ICMP6_PACKET_TOO_BIG to interested 935 * sockets. 936 * 937 * Note I don't like walking every connection to deliver 938 * this information to a set of listeners. A separate 939 * list could be kept to keep the cost of this down. 940 */ 941 ipcl_walk(pkt_too_big, (void *)mp); 942 } 943 944 /* Try to pass the ICMP message to clients who need it */ 945 switch (nexthdr) { 946 case IPPROTO_UDP: { 947 /* 948 * Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of 949 * UDP header to get the port information. 950 */ 951 if ((uchar_t *)ip6h + hdr_length + ICMP_MIN_TP_HDR_LEN > 952 mp->b_wptr) { 953 break; 954 } 955 /* 956 * Attempt to find a client stream based on port. 957 * Note that we do a reverse lookup since the header is 958 * in the form we sent it out. 959 * The rip6h header is only used for the IPCL_UDP_MATCH_V6 960 * and we only set the src and dst addresses and nexthdr. 961 */ 962 up = (uint16_t *)((uchar_t *)ip6h + hdr_length); 963 rip6h.ip6_src = ip6h->ip6_dst; 964 rip6h.ip6_dst = ip6h->ip6_src; 965 rip6h.ip6_nxt = nexthdr; 966 ((uint16_t *)&ports)[0] = up[1]; 967 ((uint16_t *)&ports)[1] = up[0]; 968 969 ip_fanout_udp_v6(q, first_mp, &rip6h, ports, ill, ill, 970 IP6_NO_IPPOLICY, mctl_present, zoneid); 971 return; 972 } 973 case IPPROTO_TCP: { 974 /* 975 * Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of 976 * the TCP header to get the port information. 977 */ 978 if ((uchar_t *)ip6h + hdr_length + ICMP_MIN_TP_HDR_LEN > 979 mp->b_wptr) { 980 break; 981 } 982 983 /* 984 * Attempt to find a client stream based on port. 985 * Note that we do a reverse lookup since the header is 986 * in the form we sent it out. 987 * The rip6h header is only used for the IP_TCP_*MATCH_V6 and 988 * we only set the src and dst addresses and nexthdr. 989 */ 990 991 tcpha = (tcpha_t *)((char *)ip6h + hdr_length); 992 connp = ipcl_tcp_lookup_reversed_ipv6(ip6h, tcpha, 993 TCPS_LISTEN, ill->ill_phyint->phyint_ifindex); 994 if (connp == NULL) { 995 goto drop_pkt; 996 } 997 998 squeue_fill(connp->conn_sqp, first_mp, tcp_input, 999 connp, SQTAG_TCP6_INPUT_ICMP_ERR); 1000 return; 1001 1002 } 1003 case IPPROTO_SCTP: 1004 /* 1005 * Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of 1006 * the SCTP header to get the port information. 1007 */ 1008 if ((uchar_t *)ip6h + hdr_length + ICMP_MIN_TP_HDR_LEN > 1009 mp->b_wptr) { 1010 break; 1011 } 1012 1013 up = (uint16_t *)((uchar_t *)ip6h + hdr_length); 1014 ((uint16_t *)&ports)[0] = up[1]; 1015 ((uint16_t *)&ports)[1] = up[0]; 1016 ip_fanout_sctp(mp, ill, (ipha_t *)ip6h, ports, 0, mctl_present, 1017 IP6_NO_IPPOLICY, 0, zoneid); 1018 return; 1019 case IPPROTO_ESP: 1020 case IPPROTO_AH: { 1021 int ipsec_rc; 1022 1023 /* 1024 * We need a IPSEC_IN in the front to fanout to AH/ESP. 1025 * We will re-use the IPSEC_IN if it is already present as 1026 * AH/ESP will not affect any fields in the IPSEC_IN for 1027 * ICMP errors. If there is no IPSEC_IN, allocate a new 1028 * one and attach it in the front. 1029 */ 1030 if (ii != NULL) { 1031 /* 1032 * ip_fanout_proto_again converts the ICMP errors 1033 * that come back from AH/ESP to M_DATA so that 1034 * if it is non-AH/ESP and we do a pullupmsg in 1035 * this function, it would work. Convert it back 1036 * to M_CTL before we send up as this is a ICMP 1037 * error. This could have been generated locally or 1038 * by some router. Validate the inner IPSEC 1039 * headers. 1040 * 1041 * NOTE : ill_index is used by ip_fanout_proto_again 1042 * to locate the ill. 1043 */ 1044 ASSERT(ill != NULL); 1045 ii->ipsec_in_ill_index = 1046 ill->ill_phyint->phyint_ifindex; 1047 ii->ipsec_in_rill_index = ii->ipsec_in_ill_index; 1048 first_mp->b_cont->b_datap->db_type = M_CTL; 1049 } else { 1050 /* 1051 * IPSEC_IN is not present. We attach a ipsec_in 1052 * message and send up to IPSEC for validating 1053 * and removing the IPSEC headers. Clear 1054 * ipsec_in_secure so that when we return 1055 * from IPSEC, we don't mistakenly think that this 1056 * is a secure packet came from the network. 1057 * 1058 * NOTE : ill_index is used by ip_fanout_proto_again 1059 * to locate the ill. 1060 */ 1061 ASSERT(first_mp == mp); 1062 first_mp = ipsec_in_alloc(B_FALSE); 1063 if (first_mp == NULL) { 1064 freemsg(mp); 1065 BUMP_MIB(&ip_mib, ipInDiscards); 1066 return; 1067 } 1068 ii = (ipsec_in_t *)first_mp->b_rptr; 1069 1070 /* This is not a secure packet */ 1071 ii->ipsec_in_secure = B_FALSE; 1072 first_mp->b_cont = mp; 1073 mp->b_datap->db_type = M_CTL; 1074 ASSERT(ill != NULL); 1075 ii->ipsec_in_ill_index = 1076 ill->ill_phyint->phyint_ifindex; 1077 ii->ipsec_in_rill_index = ii->ipsec_in_ill_index; 1078 } 1079 1080 if (!ipsec_loaded()) { 1081 ip_proto_not_sup(q, first_mp, 0, zoneid); 1082 return; 1083 } 1084 1085 if (nexthdr == IPPROTO_ESP) 1086 ipsec_rc = ipsecesp_icmp_error(first_mp); 1087 else 1088 ipsec_rc = ipsecah_icmp_error(first_mp); 1089 if (ipsec_rc == IPSEC_STATUS_FAILED) 1090 return; 1091 1092 ip_fanout_proto_again(first_mp, ill, ill, NULL); 1093 return; 1094 } 1095 case IPPROTO_ENCAP: 1096 case IPPROTO_IPV6: 1097 if ((uint8_t *)ip6h + hdr_length + 1098 (nexthdr == IPPROTO_ENCAP ? sizeof (ipha_t) : 1099 sizeof (ip6_t)) > mp->b_wptr) 1100 goto drop_pkt; 1101 1102 if (nexthdr == IPPROTO_ENCAP || 1103 !IN6_ARE_ADDR_EQUAL( 1104 &((ip6_t *)(((uint8_t *)ip6h) + hdr_length))->ip6_src, 1105 &ip6h->ip6_src) || 1106 !IN6_ARE_ADDR_EQUAL( 1107 &((ip6_t *)(((uint8_t *)ip6h) + hdr_length))->ip6_dst, 1108 &ip6h->ip6_dst)) { 1109 /* 1110 * For tunnels that have used IPsec protection, 1111 * we need to adjust the MTU to take into account 1112 * the IPsec overhead. 1113 */ 1114 if (ii != NULL) 1115 icmp6->icmp6_mtu = htons( 1116 ntohs(icmp6->icmp6_mtu) - 1117 ipsec_in_extra_length(first_mp)); 1118 } else { 1119 /* 1120 * Self-encapsulated case. As in the ipv4 case, 1121 * we need to strip the 2nd IP header. Since mp 1122 * is already pulled-up, we can simply bcopy 1123 * the 3rd header + data over the 2nd header. 1124 */ 1125 uint16_t unused_len; 1126 ip6_t *inner_ip6h = (ip6_t *) 1127 ((uchar_t *)ip6h + hdr_length); 1128 1129 /* 1130 * Make sure we don't do recursion more than once. 1131 */ 1132 if (!ip_hdr_length_nexthdr_v6(mp, inner_ip6h, 1133 &unused_len, &nexthdrp) || 1134 *nexthdrp == IPPROTO_IPV6) { 1135 goto drop_pkt; 1136 } 1137 1138 /* 1139 * We are about to modify the packet. Make a copy if 1140 * someone else has a reference to it. 1141 */ 1142 if (DB_REF(mp) > 1) { 1143 mblk_t *mp1; 1144 uint16_t icmp6_offset; 1145 1146 mp1 = copymsg(mp); 1147 if (mp1 == NULL) { 1148 goto drop_pkt; 1149 } 1150 icmp6_offset = (uint16_t) 1151 ((uchar_t *)icmp6 - mp->b_rptr); 1152 freemsg(mp); 1153 mp = mp1; 1154 1155 icmp6 = (icmp6_t *)(mp->b_rptr + icmp6_offset); 1156 ip6h = (ip6_t *)&icmp6[1]; 1157 inner_ip6h = (ip6_t *) 1158 ((uchar_t *)ip6h + hdr_length); 1159 1160 if (mctl_present) 1161 first_mp->b_cont = mp; 1162 else 1163 first_mp = mp; 1164 } 1165 1166 /* 1167 * Need to set db_type back to M_DATA before 1168 * refeeding mp into this function. 1169 */ 1170 DB_TYPE(mp) = M_DATA; 1171 1172 /* 1173 * Copy the 3rd header + remaining data on top 1174 * of the 2nd header. 1175 */ 1176 bcopy(inner_ip6h, ip6h, 1177 mp->b_wptr - (uchar_t *)inner_ip6h); 1178 1179 /* 1180 * Subtract length of the 2nd header. 1181 */ 1182 mp->b_wptr -= hdr_length; 1183 1184 /* 1185 * Now recurse, and see what I _really_ should be 1186 * doing here. 1187 */ 1188 icmp_inbound_error_fanout_v6(q, first_mp, 1189 (ip6_t *)mp->b_rptr, icmp6, ill, mctl_present, 1190 zoneid); 1191 return; 1192 } 1193 /* FALLTHRU */ 1194 default: 1195 /* 1196 * The rip6h header is only used for the lookup and we 1197 * only set the src and dst addresses and nexthdr. 1198 */ 1199 rip6h.ip6_src = ip6h->ip6_dst; 1200 rip6h.ip6_dst = ip6h->ip6_src; 1201 rip6h.ip6_nxt = nexthdr; 1202 ip_fanout_proto_v6(q, first_mp, &rip6h, ill, ill, nexthdr, 0, 1203 IP6_NO_IPPOLICY, mctl_present, zoneid); 1204 return; 1205 } 1206 /* NOTREACHED */ 1207 drop_pkt: 1208 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors); 1209 ip1dbg(("icmp_inbound_error_fanout_v6: drop pkt\n")); 1210 freemsg(first_mp); 1211 } 1212 1213 /* 1214 * Process received IPv6 ICMP Redirect messages. 1215 */ 1216 /* ARGSUSED */ 1217 static void 1218 icmp_redirect_v6(queue_t *q, mblk_t *mp, ill_t *ill) 1219 { 1220 ip6_t *ip6h; 1221 uint16_t hdr_length; 1222 nd_redirect_t *rd; 1223 ire_t *ire; 1224 ire_t *prev_ire; 1225 ire_t *redir_ire; 1226 in6_addr_t *src, *dst, *gateway; 1227 nd_opt_hdr_t *opt; 1228 nce_t *nce; 1229 int nce_flags = 0; 1230 int err = 0; 1231 boolean_t redirect_to_router = B_FALSE; 1232 int len; 1233 int optlen; 1234 iulp_t ulp_info = { 0 }; 1235 ill_t *prev_ire_ill; 1236 ipif_t *ipif; 1237 1238 ip6h = (ip6_t *)mp->b_rptr; 1239 if (ip6h->ip6_nxt != IPPROTO_ICMPV6) 1240 hdr_length = ip_hdr_length_v6(mp, ip6h); 1241 else 1242 hdr_length = IPV6_HDR_LEN; 1243 1244 rd = (nd_redirect_t *)&mp->b_rptr[hdr_length]; 1245 len = mp->b_wptr - mp->b_rptr - hdr_length; 1246 src = &ip6h->ip6_src; 1247 dst = &rd->nd_rd_dst; 1248 gateway = &rd->nd_rd_target; 1249 1250 /* Verify if it is a valid redirect */ 1251 if (!IN6_IS_ADDR_LINKLOCAL(src) || 1252 (ip6h->ip6_hops != IPV6_MAX_HOPS) || 1253 (rd->nd_rd_code != 0) || 1254 (len < sizeof (nd_redirect_t)) || 1255 (IN6_IS_ADDR_V4MAPPED(dst)) || 1256 (IN6_IS_ADDR_MULTICAST(dst))) { 1257 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects); 1258 freemsg(mp); 1259 return; 1260 } 1261 1262 if (!(IN6_IS_ADDR_LINKLOCAL(gateway) || 1263 IN6_ARE_ADDR_EQUAL(gateway, dst))) { 1264 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects); 1265 freemsg(mp); 1266 return; 1267 } 1268 1269 if (len > sizeof (nd_redirect_t)) { 1270 if (!ndp_verify_optlen((nd_opt_hdr_t *)&rd[1], 1271 len - sizeof (nd_redirect_t))) { 1272 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects); 1273 freemsg(mp); 1274 return; 1275 } 1276 } 1277 1278 if (!IN6_ARE_ADDR_EQUAL(gateway, dst)) { 1279 redirect_to_router = B_TRUE; 1280 nce_flags |= NCE_F_ISROUTER; 1281 } 1282 1283 /* ipif will be refreleased afterwards */ 1284 ipif = ipif_get_next_ipif(NULL, ill); 1285 if (ipif == NULL) { 1286 freemsg(mp); 1287 return; 1288 } 1289 1290 /* 1291 * Verify that the IP source address of the redirect is 1292 * the same as the current first-hop router for the specified 1293 * ICMP destination address. 1294 * Also, Make sure we had a route for the dest in question and 1295 * that route was pointing to the old gateway (the source of the 1296 * redirect packet.) 1297 */ 1298 1299 prev_ire = ire_route_lookup_v6(dst, 0, src, 0, ipif, NULL, 1300 ALL_ZONES, NULL, MATCH_IRE_GW | MATCH_IRE_ILL_GROUP | 1301 MATCH_IRE_DEFAULT); 1302 1303 /* 1304 * Check that 1305 * the redirect was not from ourselves 1306 * old gateway is still directly reachable 1307 */ 1308 if (prev_ire == NULL || 1309 prev_ire->ire_type == IRE_LOCAL) { 1310 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects); 1311 ipif_refrele(ipif); 1312 goto fail_redirect; 1313 } 1314 prev_ire_ill = ire_to_ill(prev_ire); 1315 ASSERT(prev_ire_ill != NULL); 1316 if (prev_ire_ill->ill_flags & ILLF_NONUD) 1317 nce_flags |= NCE_F_NONUD; 1318 1319 /* 1320 * Should we use the old ULP info to create the new gateway? From 1321 * a user's perspective, we should inherit the info so that it 1322 * is a "smooth" transition. If we do not do that, then new 1323 * connections going thru the new gateway will have no route metrics, 1324 * which is counter-intuitive to user. From a network point of 1325 * view, this may or may not make sense even though the new gateway 1326 * is still directly connected to us so the route metrics should not 1327 * change much. 1328 * 1329 * But if the old ire_uinfo is not initialized, we do another 1330 * recursive lookup on the dest using the new gateway. There may 1331 * be a route to that. If so, use it to initialize the redirect 1332 * route. 1333 */ 1334 if (prev_ire->ire_uinfo.iulp_set) { 1335 bcopy(&prev_ire->ire_uinfo, &ulp_info, sizeof (iulp_t)); 1336 } else if (redirect_to_router) { 1337 /* 1338 * Only do the following if the redirection is really to 1339 * a router. 1340 */ 1341 ire_t *tmp_ire; 1342 ire_t *sire; 1343 1344 tmp_ire = ire_ftable_lookup_v6(dst, 0, gateway, 0, NULL, &sire, 1345 ALL_ZONES, 0, NULL, 1346 (MATCH_IRE_RECURSIVE | MATCH_IRE_GW | MATCH_IRE_DEFAULT)); 1347 if (sire != NULL) { 1348 bcopy(&sire->ire_uinfo, &ulp_info, sizeof (iulp_t)); 1349 ASSERT(tmp_ire != NULL); 1350 ire_refrele(tmp_ire); 1351 ire_refrele(sire); 1352 } else if (tmp_ire != NULL) { 1353 bcopy(&tmp_ire->ire_uinfo, &ulp_info, 1354 sizeof (iulp_t)); 1355 ire_refrele(tmp_ire); 1356 } 1357 } 1358 1359 optlen = mp->b_wptr - mp->b_rptr - hdr_length - sizeof (nd_redirect_t); 1360 opt = (nd_opt_hdr_t *)&rd[1]; 1361 opt = ndp_get_option(opt, optlen, ND_OPT_TARGET_LINKADDR); 1362 if (opt != NULL) { 1363 err = ndp_lookup_then_add(ill, 1364 (uchar_t *)&opt[1], /* Link layer address */ 1365 gateway, 1366 &ipv6_all_ones, /* prefix mask */ 1367 &ipv6_all_zeros, /* Mapping mask */ 1368 0, 1369 nce_flags, 1370 ND_STALE, 1371 &nce, 1372 NULL, 1373 NULL); 1374 switch (err) { 1375 case 0: 1376 NCE_REFRELE(nce); 1377 break; 1378 case EEXIST: 1379 /* 1380 * Check to see if link layer address has changed and 1381 * process the nce_state accordingly. 1382 */ 1383 ndp_process(nce, (uchar_t *)&opt[1], 0, B_FALSE); 1384 NCE_REFRELE(nce); 1385 break; 1386 default: 1387 ip1dbg(("icmp_redirect_v6: NCE create failed %d\n", 1388 err)); 1389 ipif_refrele(ipif); 1390 goto fail_redirect; 1391 } 1392 } 1393 if (redirect_to_router) { 1394 /* icmp_redirect_ok_v6() must have already verified this */ 1395 ASSERT(IN6_IS_ADDR_LINKLOCAL(gateway)); 1396 1397 /* 1398 * Create a Route Association. This will allow us to remember 1399 * a router told us to use the particular gateway. 1400 */ 1401 ire = ire_create_v6( 1402 dst, 1403 &ipv6_all_ones, /* mask */ 1404 &prev_ire->ire_src_addr_v6, /* source addr */ 1405 gateway, /* gateway addr */ 1406 &prev_ire->ire_max_frag, /* max frag */ 1407 NULL, /* Fast Path header */ 1408 NULL, /* no rfq */ 1409 NULL, /* no stq */ 1410 IRE_HOST, 1411 NULL, 1412 prev_ire->ire_ipif, 1413 NULL, 1414 0, 1415 0, 1416 (RTF_DYNAMIC | RTF_GATEWAY | RTF_HOST), 1417 &ulp_info, 1418 NULL, 1419 NULL); 1420 } else { 1421 queue_t *stq; 1422 1423 stq = (ipif->ipif_net_type == IRE_IF_RESOLVER) 1424 ? ipif->ipif_rq : ipif->ipif_wq; 1425 1426 /* 1427 * Just create an on link entry, i.e. interface route. 1428 */ 1429 ire = ire_create_v6( 1430 dst, /* gateway == dst */ 1431 &ipv6_all_ones, /* mask */ 1432 &prev_ire->ire_src_addr_v6, /* source addr */ 1433 &ipv6_all_zeros, /* gateway addr */ 1434 &prev_ire->ire_max_frag, /* max frag */ 1435 NULL, /* Fast Path header */ 1436 NULL, /* ire rfq */ 1437 stq, /* ire stq */ 1438 ipif->ipif_net_type, /* IF_[NO]RESOLVER */ 1439 NULL, 1440 prev_ire->ire_ipif, 1441 &ipv6_all_ones, 1442 0, 1443 0, 1444 (RTF_DYNAMIC | RTF_HOST), 1445 &ulp_info, 1446 NULL, 1447 NULL); 1448 } 1449 1450 /* Release reference from earlier ipif_get_next_ipif() */ 1451 ipif_refrele(ipif); 1452 1453 if (ire == NULL) 1454 goto fail_redirect; 1455 1456 if (ire_add(&ire, NULL, NULL, NULL, B_FALSE) == 0) { 1457 1458 /* tell routing sockets that we received a redirect */ 1459 ip_rts_change_v6(RTM_REDIRECT, 1460 &rd->nd_rd_dst, 1461 &rd->nd_rd_target, 1462 &ipv6_all_ones, 0, &ire->ire_src_addr_v6, 1463 (RTF_DYNAMIC | RTF_GATEWAY | RTF_HOST), 0, 1464 (RTA_DST | RTA_GATEWAY | RTA_NETMASK | RTA_AUTHOR)); 1465 1466 /* 1467 * Delete any existing IRE_HOST type ires for this destination. 1468 * This together with the added IRE has the effect of 1469 * modifying an existing redirect. 1470 */ 1471 redir_ire = ire_ftable_lookup_v6(dst, 0, src, IRE_HOST, 1472 ire->ire_ipif, NULL, ALL_ZONES, 0, NULL, 1473 (MATCH_IRE_GW | MATCH_IRE_TYPE | MATCH_IRE_ILL_GROUP)); 1474 1475 ire_refrele(ire); /* Held in ire_add_v6 */ 1476 1477 if (redir_ire != NULL) { 1478 if (redir_ire->ire_flags & RTF_DYNAMIC) 1479 ire_delete(redir_ire); 1480 ire_refrele(redir_ire); 1481 } 1482 } 1483 1484 if (prev_ire->ire_type == IRE_CACHE) 1485 ire_delete(prev_ire); 1486 ire_refrele(prev_ire); 1487 prev_ire = NULL; 1488 1489 fail_redirect: 1490 if (prev_ire != NULL) 1491 ire_refrele(prev_ire); 1492 freemsg(mp); 1493 } 1494 1495 static ill_t * 1496 ip_queue_to_ill_v6(queue_t *q) 1497 { 1498 ill_t *ill; 1499 1500 ASSERT(WR(q) == q); 1501 1502 if (q->q_next != NULL) { 1503 ill = (ill_t *)q->q_ptr; 1504 if (ILL_CAN_LOOKUP(ill)) 1505 ill_refhold(ill); 1506 else 1507 ill = NULL; 1508 } else { 1509 ill = ill_lookup_on_name(ipif_loopback_name, B_FALSE, B_TRUE, 1510 NULL, NULL, NULL, NULL, NULL); 1511 } 1512 if (ill == NULL) 1513 ip0dbg(("ip_queue_to_ill_v6: no ill\n")); 1514 return (ill); 1515 } 1516 1517 /* 1518 * Assigns an appropriate source address to the packet. 1519 * If origdst is one of our IP addresses that use it as the source. 1520 * If the queue is an ill queue then select a source from that ill. 1521 * Otherwise pick a source based on a route lookup back to the origsrc. 1522 * 1523 * src is the return parameter. Returns a pointer to src or NULL if failure. 1524 */ 1525 static in6_addr_t * 1526 icmp_pick_source_v6(queue_t *wq, in6_addr_t *origsrc, in6_addr_t *origdst, 1527 in6_addr_t *src, zoneid_t zoneid) 1528 { 1529 ill_t *ill; 1530 ire_t *ire; 1531 ipif_t *ipif; 1532 1533 ASSERT(!(wq->q_flag & QREADR)); 1534 if (wq->q_next != NULL) { 1535 ill = (ill_t *)wq->q_ptr; 1536 } else { 1537 ill = NULL; 1538 } 1539 1540 ire = ire_route_lookup_v6(origdst, 0, 0, (IRE_LOCAL|IRE_LOOPBACK), 1541 NULL, NULL, zoneid, NULL, (MATCH_IRE_TYPE|MATCH_IRE_ZONEONLY)); 1542 if (ire != NULL) { 1543 /* Destined to one of our addresses */ 1544 *src = *origdst; 1545 ire_refrele(ire); 1546 return (src); 1547 } 1548 if (ire != NULL) { 1549 ire_refrele(ire); 1550 ire = NULL; 1551 } 1552 if (ill == NULL) { 1553 /* What is the route back to the original source? */ 1554 ire = ire_route_lookup_v6(origsrc, 0, 0, 0, 1555 NULL, NULL, zoneid, NULL, 1556 (MATCH_IRE_DEFAULT|MATCH_IRE_RECURSIVE)); 1557 if (ire == NULL) { 1558 BUMP_MIB(&ip6_mib, ipv6OutNoRoutes); 1559 return (NULL); 1560 } 1561 /* 1562 * Does not matter whether we use ire_stq or ire_ipif here. 1563 * Just pick an ill for ICMP replies. 1564 */ 1565 ASSERT(ire->ire_ipif != NULL); 1566 ill = ire->ire_ipif->ipif_ill; 1567 ire_refrele(ire); 1568 } 1569 ipif = ipif_select_source_v6(ill, origsrc, RESTRICT_TO_NONE, 1570 IPV6_PREFER_SRC_DEFAULT, zoneid); 1571 if (ipif != NULL) { 1572 *src = ipif->ipif_v6src_addr; 1573 ipif_refrele(ipif); 1574 return (src); 1575 } 1576 /* 1577 * Unusual case - can't find a usable source address to reach the 1578 * original source. Use what in the route to the source. 1579 */ 1580 ire = ire_route_lookup_v6(origsrc, 0, 0, 0, 1581 NULL, NULL, zoneid, NULL, 1582 (MATCH_IRE_DEFAULT|MATCH_IRE_RECURSIVE)); 1583 if (ire == NULL) { 1584 BUMP_MIB(&ip6_mib, ipv6OutNoRoutes); 1585 return (NULL); 1586 } 1587 ASSERT(ire != NULL); 1588 *src = ire->ire_src_addr_v6; 1589 ire_refrele(ire); 1590 return (src); 1591 } 1592 1593 /* 1594 * Build and ship an IPv6 ICMP message using the packet data in mp, 1595 * and the ICMP header pointed to by "stuff". (May be called as 1596 * writer.) 1597 * Note: assumes that icmp_pkt_err_ok_v6 has been called to 1598 * verify that an icmp error packet can be sent. 1599 * 1600 * If q is an ill write side queue (which is the case when packets 1601 * arrive from ip_rput) then ip_wput code will ensure that packets to 1602 * link-local destinations are sent out that ill. 1603 * 1604 * If v6src_ptr is set use it as a source. Otherwise select a reasonable 1605 * source address (see above function). 1606 */ 1607 static void 1608 icmp_pkt_v6(queue_t *q, mblk_t *mp, void *stuff, size_t len, 1609 const in6_addr_t *v6src_ptr, boolean_t mctl_present, zoneid_t zoneid) 1610 { 1611 ip6_t *ip6h; 1612 in6_addr_t v6dst; 1613 size_t len_needed; 1614 size_t msg_len; 1615 mblk_t *mp1; 1616 icmp6_t *icmp6; 1617 ill_t *ill; 1618 in6_addr_t v6src; 1619 mblk_t *ipsec_mp; 1620 ipsec_out_t *io; 1621 1622 ill = ip_queue_to_ill_v6(q); 1623 if (ill == NULL) { 1624 freemsg(mp); 1625 return; 1626 } 1627 1628 if (mctl_present) { 1629 /* 1630 * If it is : 1631 * 1632 * 1) a IPSEC_OUT, then this is caused by outbound 1633 * datagram originating on this host. IPSEC processing 1634 * may or may not have been done. Refer to comments above 1635 * icmp_inbound_error_fanout for details. 1636 * 1637 * 2) a IPSEC_IN if we are generating a icmp_message 1638 * for an incoming datagram destined for us i.e called 1639 * from ip_fanout_send_icmp. 1640 */ 1641 ipsec_info_t *in; 1642 1643 ipsec_mp = mp; 1644 mp = ipsec_mp->b_cont; 1645 1646 in = (ipsec_info_t *)ipsec_mp->b_rptr; 1647 ip6h = (ip6_t *)mp->b_rptr; 1648 1649 ASSERT(in->ipsec_info_type == IPSEC_OUT || 1650 in->ipsec_info_type == IPSEC_IN); 1651 1652 if (in->ipsec_info_type == IPSEC_IN) { 1653 /* 1654 * Convert the IPSEC_IN to IPSEC_OUT. 1655 */ 1656 if (!ipsec_in_to_out(ipsec_mp, NULL, ip6h)) { 1657 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 1658 ill_refrele(ill); 1659 return; 1660 } 1661 } else { 1662 ASSERT(in->ipsec_info_type == IPSEC_OUT); 1663 io = (ipsec_out_t *)in; 1664 /* 1665 * Clear out ipsec_out_proc_begin, so we do a fresh 1666 * ire lookup. 1667 */ 1668 io->ipsec_out_proc_begin = B_FALSE; 1669 } 1670 } else { 1671 /* 1672 * This is in clear. The icmp message we are building 1673 * here should go out in clear. 1674 */ 1675 ipsec_in_t *ii; 1676 ASSERT(mp->b_datap->db_type == M_DATA); 1677 if ((ipsec_mp = ipsec_in_alloc(B_FALSE)) == NULL) { 1678 freemsg(mp); 1679 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 1680 ill_refrele(ill); 1681 return; 1682 } 1683 ii = (ipsec_in_t *)ipsec_mp->b_rptr; 1684 1685 /* This is not a secure packet */ 1686 ii->ipsec_in_secure = B_FALSE; 1687 /* 1688 * For trusted extensions using a shared IP address we can 1689 * send using any zoneid. 1690 */ 1691 if (zoneid == ALL_ZONES) 1692 ii->ipsec_in_zoneid = GLOBAL_ZONEID; 1693 else 1694 ii->ipsec_in_zoneid = zoneid; 1695 ipsec_mp->b_cont = mp; 1696 ip6h = (ip6_t *)mp->b_rptr; 1697 /* 1698 * Convert the IPSEC_IN to IPSEC_OUT. 1699 */ 1700 if (!ipsec_in_to_out(ipsec_mp, NULL, ip6h)) { 1701 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 1702 ill_refrele(ill); 1703 return; 1704 } 1705 } 1706 io = (ipsec_out_t *)ipsec_mp->b_rptr; 1707 1708 if (v6src_ptr != NULL) { 1709 v6src = *v6src_ptr; 1710 } else { 1711 if (icmp_pick_source_v6(q, &ip6h->ip6_src, &ip6h->ip6_dst, 1712 &v6src, zoneid) == NULL) { 1713 freemsg(ipsec_mp); 1714 ill_refrele(ill); 1715 return; 1716 } 1717 } 1718 v6dst = ip6h->ip6_src; 1719 len_needed = ipv6_icmp_return - IPV6_HDR_LEN - len; 1720 msg_len = msgdsize(mp); 1721 if (msg_len > len_needed) { 1722 if (!adjmsg(mp, len_needed - msg_len)) { 1723 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutErrors); 1724 freemsg(ipsec_mp); 1725 ill_refrele(ill); 1726 return; 1727 } 1728 msg_len = len_needed; 1729 } 1730 mp1 = allocb(IPV6_HDR_LEN + len, BPRI_HI); 1731 if (mp1 == NULL) { 1732 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutErrors); 1733 freemsg(ipsec_mp); 1734 ill_refrele(ill); 1735 return; 1736 } 1737 ill_refrele(ill); 1738 mp1->b_cont = mp; 1739 mp = mp1; 1740 ASSERT(ipsec_mp->b_datap->db_type == M_CTL && 1741 io->ipsec_out_type == IPSEC_OUT); 1742 ipsec_mp->b_cont = mp; 1743 1744 /* 1745 * Set ipsec_out_icmp_loopback so we can let the ICMP messages this 1746 * node generates be accepted in peace by all on-host destinations. 1747 * If we do NOT assume that all on-host destinations trust 1748 * self-generated ICMP messages, then rework here, ip.c, and spd.c. 1749 * (Look for ipsec_out_icmp_loopback). 1750 */ 1751 io->ipsec_out_icmp_loopback = B_TRUE; 1752 1753 ip6h = (ip6_t *)mp->b_rptr; 1754 mp1->b_wptr = (uchar_t *)ip6h + (IPV6_HDR_LEN + len); 1755 1756 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 1757 ip6h->ip6_nxt = IPPROTO_ICMPV6; 1758 ip6h->ip6_hops = ipv6_def_hops; 1759 ip6h->ip6_dst = v6dst; 1760 ip6h->ip6_src = v6src; 1761 msg_len += IPV6_HDR_LEN + len; 1762 if (msg_len > IP_MAXPACKET + IPV6_HDR_LEN) { 1763 (void) adjmsg(mp, IP_MAXPACKET + IPV6_HDR_LEN - msg_len); 1764 msg_len = IP_MAXPACKET + IPV6_HDR_LEN; 1765 } 1766 ip6h->ip6_plen = htons((uint16_t)(msgdsize(mp) - IPV6_HDR_LEN)); 1767 icmp6 = (icmp6_t *)&ip6h[1]; 1768 bcopy(stuff, (char *)icmp6, len); 1769 /* 1770 * Prepare for checksum by putting icmp length in the icmp 1771 * checksum field. The checksum is calculated in ip_wput_v6. 1772 */ 1773 icmp6->icmp6_cksum = ip6h->ip6_plen; 1774 if (icmp6->icmp6_type == ND_REDIRECT) { 1775 ip6h->ip6_hops = IPV6_MAX_HOPS; 1776 } 1777 /* Send to V6 writeside put routine */ 1778 put(q, ipsec_mp); 1779 } 1780 1781 /* 1782 * Update the output mib when ICMPv6 packets are sent. 1783 */ 1784 static void 1785 icmp_update_out_mib_v6(ill_t *ill, icmp6_t *icmp6) 1786 { 1787 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutMsgs); 1788 1789 switch (icmp6->icmp6_type) { 1790 case ICMP6_DST_UNREACH: 1791 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutDestUnreachs); 1792 if (icmp6->icmp6_code == ICMP6_DST_UNREACH_ADMIN) 1793 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutAdminProhibs); 1794 break; 1795 1796 case ICMP6_TIME_EXCEEDED: 1797 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutTimeExcds); 1798 break; 1799 1800 case ICMP6_PARAM_PROB: 1801 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutParmProblems); 1802 break; 1803 1804 case ICMP6_PACKET_TOO_BIG: 1805 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutPktTooBigs); 1806 break; 1807 1808 case ICMP6_ECHO_REQUEST: 1809 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutEchos); 1810 break; 1811 1812 case ICMP6_ECHO_REPLY: 1813 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutEchoReplies); 1814 break; 1815 1816 case ND_ROUTER_SOLICIT: 1817 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutRouterSolicits); 1818 break; 1819 1820 case ND_ROUTER_ADVERT: 1821 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutRouterAdvertisements); 1822 break; 1823 1824 case ND_NEIGHBOR_SOLICIT: 1825 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutNeighborSolicits); 1826 break; 1827 1828 case ND_NEIGHBOR_ADVERT: 1829 BUMP_MIB(ill->ill_icmp6_mib, 1830 ipv6IfIcmpOutNeighborAdvertisements); 1831 break; 1832 1833 case ND_REDIRECT: 1834 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutRedirects); 1835 break; 1836 1837 case MLD_LISTENER_QUERY: 1838 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutGroupMembQueries); 1839 break; 1840 1841 case MLD_LISTENER_REPORT: 1842 case MLD_V2_LISTENER_REPORT: 1843 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutGroupMembResponses); 1844 break; 1845 1846 case MLD_LISTENER_REDUCTION: 1847 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutGroupMembReductions); 1848 break; 1849 } 1850 } 1851 1852 /* 1853 * Check if it is ok to send an ICMPv6 error packet in 1854 * response to the IP packet in mp. 1855 * Free the message and return null if no 1856 * ICMP error packet should be sent. 1857 */ 1858 static mblk_t * 1859 icmp_pkt_err_ok_v6(queue_t *q, mblk_t *mp, 1860 boolean_t llbcast, boolean_t mcast_ok) 1861 { 1862 ip6_t *ip6h; 1863 1864 if (!mp) 1865 return (NULL); 1866 1867 ip6h = (ip6_t *)mp->b_rptr; 1868 1869 /* Check if source address uniquely identifies the host */ 1870 1871 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_src) || 1872 IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_src) || 1873 IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 1874 freemsg(mp); 1875 return (NULL); 1876 } 1877 1878 if (ip6h->ip6_nxt == IPPROTO_ICMPV6) { 1879 size_t len_needed = IPV6_HDR_LEN + ICMP6_MINLEN; 1880 icmp6_t *icmp6; 1881 1882 if (mp->b_wptr - mp->b_rptr < len_needed) { 1883 if (!pullupmsg(mp, len_needed)) { 1884 ill_t *ill; 1885 1886 ill = ip_queue_to_ill_v6(q); 1887 if (ill == NULL) { 1888 BUMP_MIB(&icmp6_mib, 1889 ipv6IfIcmpInErrors); 1890 } else { 1891 BUMP_MIB(ill->ill_icmp6_mib, 1892 ipv6IfIcmpInErrors); 1893 ill_refrele(ill); 1894 } 1895 freemsg(mp); 1896 return (NULL); 1897 } 1898 ip6h = (ip6_t *)mp->b_rptr; 1899 } 1900 icmp6 = (icmp6_t *)&ip6h[1]; 1901 /* Explicitly do not generate errors in response to redirects */ 1902 if (ICMP6_IS_ERROR(icmp6->icmp6_type) || 1903 icmp6->icmp6_type == ND_REDIRECT) { 1904 freemsg(mp); 1905 return (NULL); 1906 } 1907 } 1908 /* 1909 * Check that the destination is not multicast and that the packet 1910 * was not sent on link layer broadcast or multicast. (Exception 1911 * is Packet too big message as per the draft - when mcast_ok is set.) 1912 */ 1913 if (!mcast_ok && 1914 (llbcast || IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst))) { 1915 freemsg(mp); 1916 return (NULL); 1917 } 1918 if (icmp_err_rate_limit()) { 1919 /* 1920 * Only send ICMP error packets every so often. 1921 * This should be done on a per port/source basis, 1922 * but for now this will suffice. 1923 */ 1924 freemsg(mp); 1925 return (NULL); 1926 } 1927 return (mp); 1928 } 1929 1930 /* 1931 * Generate an ICMPv6 redirect message. 1932 * Include target link layer address option if it exits. 1933 * Always include redirect header. 1934 */ 1935 static void 1936 icmp_send_redirect_v6(queue_t *q, mblk_t *mp, in6_addr_t *targetp, 1937 in6_addr_t *dest, ill_t *ill, boolean_t llbcast) 1938 { 1939 nd_redirect_t *rd; 1940 nd_opt_rd_hdr_t *rdh; 1941 uchar_t *buf; 1942 nce_t *nce = NULL; 1943 nd_opt_hdr_t *opt; 1944 int len; 1945 int ll_opt_len = 0; 1946 int max_redir_hdr_data_len; 1947 int pkt_len; 1948 in6_addr_t *srcp; 1949 1950 /* 1951 * We are called from ip_rput where we could 1952 * not have attached an IPSEC_IN. 1953 */ 1954 ASSERT(mp->b_datap->db_type == M_DATA); 1955 1956 mp = icmp_pkt_err_ok_v6(q, mp, llbcast, B_FALSE); 1957 if (mp == NULL) 1958 return; 1959 nce = ndp_lookup_v6(ill, targetp, B_FALSE); 1960 if (nce != NULL && nce->nce_state != ND_INCOMPLETE) { 1961 ll_opt_len = (sizeof (nd_opt_hdr_t) + 1962 ill->ill_phys_addr_length + 7)/8 * 8; 1963 } 1964 len = sizeof (nd_redirect_t) + sizeof (nd_opt_rd_hdr_t) + ll_opt_len; 1965 ASSERT(len % 4 == 0); 1966 buf = kmem_alloc(len, KM_NOSLEEP); 1967 if (buf == NULL) { 1968 if (nce != NULL) 1969 NCE_REFRELE(nce); 1970 freemsg(mp); 1971 return; 1972 } 1973 1974 rd = (nd_redirect_t *)buf; 1975 rd->nd_rd_type = (uint8_t)ND_REDIRECT; 1976 rd->nd_rd_code = 0; 1977 rd->nd_rd_reserved = 0; 1978 rd->nd_rd_target = *targetp; 1979 rd->nd_rd_dst = *dest; 1980 1981 opt = (nd_opt_hdr_t *)(buf + sizeof (nd_redirect_t)); 1982 if (nce != NULL && ll_opt_len != 0) { 1983 opt->nd_opt_type = ND_OPT_TARGET_LINKADDR; 1984 opt->nd_opt_len = ll_opt_len/8; 1985 bcopy((char *)nce->nce_res_mp->b_rptr + 1986 NCE_LL_ADDR_OFFSET(ill), &opt[1], 1987 ill->ill_phys_addr_length); 1988 } 1989 if (nce != NULL) 1990 NCE_REFRELE(nce); 1991 rdh = (nd_opt_rd_hdr_t *)(buf + sizeof (nd_redirect_t) + ll_opt_len); 1992 rdh->nd_opt_rh_type = (uint8_t)ND_OPT_REDIRECTED_HEADER; 1993 /* max_redir_hdr_data_len and nd_opt_rh_len must be multiple of 8 */ 1994 max_redir_hdr_data_len = (ipv6_icmp_return - IPV6_HDR_LEN - len)/8*8; 1995 pkt_len = msgdsize(mp); 1996 /* Make sure mp is 8 byte aligned */ 1997 if (pkt_len > max_redir_hdr_data_len) { 1998 rdh->nd_opt_rh_len = (max_redir_hdr_data_len + 1999 sizeof (nd_opt_rd_hdr_t))/8; 2000 (void) adjmsg(mp, max_redir_hdr_data_len - pkt_len); 2001 } else { 2002 rdh->nd_opt_rh_len = (pkt_len + sizeof (nd_opt_rd_hdr_t))/8; 2003 (void) adjmsg(mp, -(pkt_len % 8)); 2004 } 2005 rdh->nd_opt_rh_reserved1 = 0; 2006 rdh->nd_opt_rh_reserved2 = 0; 2007 /* ipif_v6src_addr contains the link-local source address */ 2008 rw_enter(&ill_g_lock, RW_READER); 2009 if (ill->ill_group != NULL) { 2010 /* 2011 * The receiver of the redirect will verify whether it 2012 * had a route through us (srcp that we will use in 2013 * the redirect) or not. As we load spread even link-locals, 2014 * we don't know which source address the receiver of 2015 * redirect has in its route for communicating with us. 2016 * Thus we randomly choose a source here and finally we 2017 * should get to the right one and it will eventually 2018 * accept the redirect from us. We can't call 2019 * ip_lookup_scope_v6 because we don't have the right 2020 * link-local address here. Thus we randomly choose one. 2021 */ 2022 int cnt = ill->ill_group->illgrp_ill_count; 2023 2024 ill = ill->ill_group->illgrp_ill; 2025 cnt = ++icmp_redirect_v6_src_index % cnt; 2026 while (cnt--) 2027 ill = ill->ill_group_next; 2028 srcp = &ill->ill_ipif->ipif_v6src_addr; 2029 } else { 2030 srcp = &ill->ill_ipif->ipif_v6src_addr; 2031 } 2032 rw_exit(&ill_g_lock); 2033 /* Redirects sent by router, and router is global zone */ 2034 icmp_pkt_v6(q, mp, buf, len, srcp, B_FALSE, GLOBAL_ZONEID); 2035 kmem_free(buf, len); 2036 } 2037 2038 2039 /* Generate an ICMP time exceeded message. (May be called as writer.) */ 2040 void 2041 icmp_time_exceeded_v6(queue_t *q, mblk_t *mp, uint8_t code, 2042 boolean_t llbcast, boolean_t mcast_ok, zoneid_t zoneid) 2043 { 2044 icmp6_t icmp6; 2045 boolean_t mctl_present; 2046 mblk_t *first_mp; 2047 2048 EXTRACT_PKT_MP(mp, first_mp, mctl_present); 2049 2050 mp = icmp_pkt_err_ok_v6(q, mp, llbcast, mcast_ok); 2051 if (mp == NULL) { 2052 if (mctl_present) 2053 freeb(first_mp); 2054 return; 2055 } 2056 bzero(&icmp6, sizeof (icmp6_t)); 2057 icmp6.icmp6_type = ICMP6_TIME_EXCEEDED; 2058 icmp6.icmp6_code = code; 2059 icmp_pkt_v6(q, first_mp, &icmp6, sizeof (icmp6_t), NULL, mctl_present, 2060 zoneid); 2061 } 2062 2063 /* 2064 * Generate an ICMP unreachable message. 2065 */ 2066 void 2067 icmp_unreachable_v6(queue_t *q, mblk_t *mp, uint8_t code, 2068 boolean_t llbcast, boolean_t mcast_ok, zoneid_t zoneid) 2069 { 2070 icmp6_t icmp6; 2071 boolean_t mctl_present; 2072 mblk_t *first_mp; 2073 2074 EXTRACT_PKT_MP(mp, first_mp, mctl_present); 2075 2076 mp = icmp_pkt_err_ok_v6(q, mp, llbcast, mcast_ok); 2077 if (mp == NULL) { 2078 if (mctl_present) 2079 freeb(first_mp); 2080 return; 2081 } 2082 bzero(&icmp6, sizeof (icmp6_t)); 2083 icmp6.icmp6_type = ICMP6_DST_UNREACH; 2084 icmp6.icmp6_code = code; 2085 icmp_pkt_v6(q, first_mp, &icmp6, sizeof (icmp6_t), NULL, mctl_present, 2086 zoneid); 2087 } 2088 2089 /* 2090 * Generate an ICMP pkt too big message. 2091 */ 2092 static void 2093 icmp_pkt2big_v6(queue_t *q, mblk_t *mp, uint32_t mtu, 2094 boolean_t llbcast, boolean_t mcast_ok, zoneid_t zoneid) 2095 { 2096 icmp6_t icmp6; 2097 mblk_t *first_mp; 2098 boolean_t mctl_present; 2099 2100 EXTRACT_PKT_MP(mp, first_mp, mctl_present); 2101 2102 mp = icmp_pkt_err_ok_v6(q, mp, llbcast, mcast_ok); 2103 if (mp == NULL) { 2104 if (mctl_present) 2105 freeb(first_mp); 2106 return; 2107 } 2108 bzero(&icmp6, sizeof (icmp6_t)); 2109 icmp6.icmp6_type = ICMP6_PACKET_TOO_BIG; 2110 icmp6.icmp6_code = 0; 2111 icmp6.icmp6_mtu = htonl(mtu); 2112 2113 icmp_pkt_v6(q, first_mp, &icmp6, sizeof (icmp6_t), NULL, mctl_present, 2114 zoneid); 2115 } 2116 2117 /* 2118 * Generate an ICMP parameter problem message. (May be called as writer.) 2119 * 'offset' is the offset from the beginning of the packet in error. 2120 */ 2121 static void 2122 icmp_param_problem_v6(queue_t *q, mblk_t *mp, uint8_t code, 2123 uint32_t offset, boolean_t llbcast, boolean_t mcast_ok, zoneid_t zoneid) 2124 { 2125 icmp6_t icmp6; 2126 boolean_t mctl_present; 2127 mblk_t *first_mp; 2128 2129 EXTRACT_PKT_MP(mp, first_mp, mctl_present); 2130 2131 mp = icmp_pkt_err_ok_v6(q, mp, llbcast, mcast_ok); 2132 if (mp == NULL) { 2133 if (mctl_present) 2134 freeb(first_mp); 2135 return; 2136 } 2137 bzero((char *)&icmp6, sizeof (icmp6_t)); 2138 icmp6.icmp6_type = ICMP6_PARAM_PROB; 2139 icmp6.icmp6_code = code; 2140 icmp6.icmp6_pptr = htonl(offset); 2141 icmp_pkt_v6(q, first_mp, &icmp6, sizeof (icmp6_t), NULL, mctl_present, 2142 zoneid); 2143 } 2144 2145 /* 2146 * This code will need to take into account the possibility of binding 2147 * to a link local address on a multi-homed host, in which case the 2148 * outgoing interface (from the conn) will need to be used when getting 2149 * an ire for the dst. Going through proper outgoing interface and 2150 * choosing the source address corresponding to the outgoing interface 2151 * is necessary when the destination address is a link-local address and 2152 * IPV6_BOUND_IF or IPV6_PKTINFO or scope_id has been set. 2153 * This can happen when active connection is setup; thus ipp pointer 2154 * is passed here from tcp_connect_*() routines, in non-TCP cases NULL 2155 * pointer is passed as ipp pointer. 2156 */ 2157 mblk_t * 2158 ip_bind_v6(queue_t *q, mblk_t *mp, conn_t *connp, ip6_pkt_t *ipp) 2159 { 2160 ssize_t len; 2161 int protocol; 2162 struct T_bind_req *tbr; 2163 sin6_t *sin6; 2164 ipa6_conn_t *ac6; 2165 in6_addr_t *v6srcp; 2166 in6_addr_t *v6dstp; 2167 uint16_t lport; 2168 uint16_t fport; 2169 uchar_t *ucp; 2170 mblk_t *mp1; 2171 boolean_t ire_requested; 2172 boolean_t ipsec_policy_set; 2173 int error = 0; 2174 boolean_t local_bind; 2175 boolean_t orig_pkt_isv6 = connp->conn_pkt_isv6; 2176 ipa6_conn_x_t *acx6; 2177 boolean_t verify_dst; 2178 2179 ASSERT(connp->conn_af_isv6); 2180 len = mp->b_wptr - mp->b_rptr; 2181 if (len < (sizeof (*tbr) + 1)) { 2182 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 2183 "ip_bind_v6: bogus msg, len %ld", len); 2184 goto bad_addr; 2185 } 2186 /* Back up and extract the protocol identifier. */ 2187 mp->b_wptr--; 2188 tbr = (struct T_bind_req *)mp->b_rptr; 2189 /* Reset the message type in preparation for shipping it back. */ 2190 mp->b_datap->db_type = M_PCPROTO; 2191 2192 protocol = *mp->b_wptr & 0xFF; 2193 connp->conn_ulp = (uint8_t)protocol; 2194 2195 /* 2196 * Check for a zero length address. This is from a protocol that 2197 * wants to register to receive all packets of its type. 2198 */ 2199 if (tbr->ADDR_length == 0) { 2200 if ((protocol == IPPROTO_TCP || protocol == IPPROTO_SCTP || 2201 protocol == IPPROTO_ESP || protocol == IPPROTO_AH) && 2202 ipcl_proto_fanout_v6[protocol].connf_head != NULL) { 2203 /* 2204 * TCP, SCTP, AH, and ESP have single protocol fanouts. 2205 * Do not allow others to bind to these. 2206 */ 2207 goto bad_addr; 2208 } 2209 2210 /* 2211 * 2212 * The udp module never sends down a zero-length address, 2213 * and allowing this on a labeled system will break MLP 2214 * functionality. 2215 */ 2216 if (is_system_labeled() && protocol == IPPROTO_UDP) 2217 goto bad_addr; 2218 2219 /* Allow ipsec plumbing */ 2220 if (connp->conn_mac_exempt && protocol != IPPROTO_AH && 2221 protocol != IPPROTO_ESP) 2222 goto bad_addr; 2223 2224 connp->conn_srcv6 = ipv6_all_zeros; 2225 ipcl_proto_insert_v6(connp, protocol); 2226 2227 tbr->PRIM_type = T_BIND_ACK; 2228 return (mp); 2229 } 2230 2231 /* Extract the address pointer from the message. */ 2232 ucp = (uchar_t *)mi_offset_param(mp, tbr->ADDR_offset, 2233 tbr->ADDR_length); 2234 if (ucp == NULL) { 2235 ip1dbg(("ip_bind_v6: no address\n")); 2236 goto bad_addr; 2237 } 2238 if (!OK_32PTR(ucp)) { 2239 ip1dbg(("ip_bind_v6: unaligned address\n")); 2240 goto bad_addr; 2241 } 2242 mp1 = mp->b_cont; /* trailing mp if any */ 2243 ire_requested = (mp1 && mp1->b_datap->db_type == IRE_DB_REQ_TYPE); 2244 ipsec_policy_set = (mp1 && mp1->b_datap->db_type == IPSEC_POLICY_SET); 2245 2246 switch (tbr->ADDR_length) { 2247 default: 2248 ip1dbg(("ip_bind_v6: bad address length %d\n", 2249 (int)tbr->ADDR_length)); 2250 goto bad_addr; 2251 2252 case IPV6_ADDR_LEN: 2253 /* Verification of local address only */ 2254 v6srcp = (in6_addr_t *)ucp; 2255 lport = 0; 2256 local_bind = B_TRUE; 2257 break; 2258 2259 case sizeof (sin6_t): 2260 sin6 = (sin6_t *)ucp; 2261 v6srcp = &sin6->sin6_addr; 2262 lport = sin6->sin6_port; 2263 local_bind = B_TRUE; 2264 break; 2265 2266 case sizeof (ipa6_conn_t): 2267 /* 2268 * Verify that both the source and destination addresses 2269 * are valid. 2270 * Note that we allow connect to broadcast and multicast 2271 * addresses when ire_requested is set. Thus the ULP 2272 * has to check for IRE_BROADCAST and multicast. 2273 */ 2274 ac6 = (ipa6_conn_t *)ucp; 2275 v6srcp = &ac6->ac6_laddr; 2276 v6dstp = &ac6->ac6_faddr; 2277 fport = ac6->ac6_fport; 2278 /* For raw socket, the local port is not set. */ 2279 lport = ac6->ac6_lport != 0 ? ac6->ac6_lport : 2280 connp->conn_lport; 2281 local_bind = B_FALSE; 2282 /* Always verify destination reachability. */ 2283 verify_dst = B_TRUE; 2284 break; 2285 2286 case sizeof (ipa6_conn_x_t): 2287 /* 2288 * Verify that the source address is valid. 2289 * Note that we allow connect to broadcast and multicast 2290 * addresses when ire_requested is set. Thus the ULP 2291 * has to check for IRE_BROADCAST and multicast. 2292 */ 2293 acx6 = (ipa6_conn_x_t *)ucp; 2294 ac6 = &acx6->ac6x_conn; 2295 v6srcp = &ac6->ac6_laddr; 2296 v6dstp = &ac6->ac6_faddr; 2297 fport = ac6->ac6_fport; 2298 lport = ac6->ac6_lport; 2299 local_bind = B_FALSE; 2300 /* 2301 * Client that passed ipa6_conn_x_t to us specifies whether to 2302 * verify destination reachability. 2303 */ 2304 verify_dst = (acx6->ac6x_flags & ACX_VERIFY_DST) != 0; 2305 break; 2306 } 2307 if (local_bind) { 2308 if (IN6_IS_ADDR_V4MAPPED(v6srcp) && !connp->conn_ipv6_v6only) { 2309 /* Bind to IPv4 address */ 2310 ipaddr_t v4src; 2311 2312 IN6_V4MAPPED_TO_IPADDR(v6srcp, v4src); 2313 2314 error = ip_bind_laddr(connp, mp, v4src, lport, 2315 ire_requested, ipsec_policy_set, 2316 tbr->ADDR_length != IPV6_ADDR_LEN); 2317 if (error != 0) 2318 goto bad_addr; 2319 connp->conn_pkt_isv6 = B_FALSE; 2320 } else { 2321 if (IN6_IS_ADDR_V4MAPPED(v6srcp)) { 2322 error = 0; 2323 goto bad_addr; 2324 } 2325 error = ip_bind_laddr_v6(connp, mp, v6srcp, lport, 2326 ire_requested, ipsec_policy_set, 2327 (tbr->ADDR_length != IPV6_ADDR_LEN)); 2328 if (error != 0) 2329 goto bad_addr; 2330 connp->conn_pkt_isv6 = B_TRUE; 2331 } 2332 if (protocol == IPPROTO_TCP) 2333 connp->conn_recv = tcp_conn_request; 2334 } else { 2335 /* 2336 * Bind to local and remote address. Local might be 2337 * unspecified in which case it will be extracted from 2338 * ire_src_addr_v6 2339 */ 2340 if (IN6_IS_ADDR_V4MAPPED(v6dstp) && !connp->conn_ipv6_v6only) { 2341 /* Connect to IPv4 address */ 2342 ipaddr_t v4src; 2343 ipaddr_t v4dst; 2344 2345 /* Is the source unspecified or mapped? */ 2346 if (!IN6_IS_ADDR_V4MAPPED(v6srcp) && 2347 !IN6_IS_ADDR_UNSPECIFIED(v6srcp)) { 2348 ip1dbg(("ip_bind_v6: " 2349 "dst is mapped, but not the src\n")); 2350 goto bad_addr; 2351 } 2352 IN6_V4MAPPED_TO_IPADDR(v6srcp, v4src); 2353 IN6_V4MAPPED_TO_IPADDR(v6dstp, v4dst); 2354 2355 /* 2356 * XXX Fix needed. Need to pass ipsec_policy_set 2357 * instead of B_FALSE. 2358 */ 2359 2360 /* Always verify destination reachability. */ 2361 error = ip_bind_connected(connp, mp, &v4src, lport, 2362 v4dst, fport, ire_requested, ipsec_policy_set, 2363 B_TRUE, B_TRUE); 2364 if (error != 0) 2365 goto bad_addr; 2366 IN6_IPADDR_TO_V4MAPPED(v4src, v6srcp); 2367 connp->conn_pkt_isv6 = B_FALSE; 2368 } else if (IN6_IS_ADDR_V4MAPPED(v6srcp)) { 2369 ip1dbg(("ip_bind_v6: " 2370 "src is mapped, but not the dst\n")); 2371 goto bad_addr; 2372 } else { 2373 error = ip_bind_connected_v6(connp, mp, v6srcp, 2374 lport, v6dstp, ipp, fport, ire_requested, 2375 ipsec_policy_set, B_TRUE, verify_dst); 2376 if (error != 0) 2377 goto bad_addr; 2378 connp->conn_pkt_isv6 = B_TRUE; 2379 } 2380 if (protocol == IPPROTO_TCP) 2381 connp->conn_recv = tcp_input; 2382 } 2383 /* Update qinfo if v4/v6 changed */ 2384 if ((orig_pkt_isv6 != connp->conn_pkt_isv6) && 2385 !(IPCL_IS_TCP(connp) || IPCL_IS_UDP(connp))) { 2386 if (connp->conn_pkt_isv6) 2387 ip_setqinfo(RD(q), IPV6_MINOR, B_TRUE); 2388 else 2389 ip_setqinfo(RD(q), IPV4_MINOR, B_TRUE); 2390 } 2391 2392 /* 2393 * Pass the IPSEC headers size in ire_ipsec_overhead. 2394 * We can't do this in ip_bind_insert_ire because the policy 2395 * may not have been inherited at that point in time and hence 2396 * conn_out_enforce_policy may not be set. 2397 */ 2398 mp1 = mp->b_cont; 2399 if (ire_requested && connp->conn_out_enforce_policy && 2400 mp1 != NULL && DB_TYPE(mp1) == IRE_DB_REQ_TYPE) { 2401 ire_t *ire = (ire_t *)mp1->b_rptr; 2402 ASSERT(MBLKL(mp1) >= sizeof (ire_t)); 2403 ire->ire_ipsec_overhead = (conn_ipsec_length(connp)); 2404 } 2405 2406 /* Send it home. */ 2407 mp->b_datap->db_type = M_PCPROTO; 2408 tbr->PRIM_type = T_BIND_ACK; 2409 return (mp); 2410 2411 bad_addr: 2412 if (error == EINPROGRESS) 2413 return (NULL); 2414 if (error > 0) 2415 mp = mi_tpi_err_ack_alloc(mp, TSYSERR, error); 2416 else 2417 mp = mi_tpi_err_ack_alloc(mp, TBADADDR, 0); 2418 return (mp); 2419 } 2420 2421 /* 2422 * Here address is verified to be a valid local address. 2423 * If the IRE_DB_REQ_TYPE mp is present, a multicast 2424 * address is also considered a valid local address. 2425 * In the case of a multicast address, however, the 2426 * upper protocol is expected to reset the src address 2427 * to 0 if it sees an ire with IN6_IS_ADDR_MULTICAST returned so that 2428 * no packets are emitted with multicast address as 2429 * source address. 2430 * The addresses valid for bind are: 2431 * (1) - in6addr_any 2432 * (2) - IP address of an UP interface 2433 * (3) - IP address of a DOWN interface 2434 * (4) - a multicast address. In this case 2435 * the conn will only receive packets destined to 2436 * the specified multicast address. Note: the 2437 * application still has to issue an 2438 * IPV6_JOIN_GROUP socket option. 2439 * 2440 * In all the above cases, the bound address must be valid in the current zone. 2441 * When the address is loopback or multicast, there might be many matching IREs 2442 * so bind has to look up based on the zone. 2443 */ 2444 static int 2445 ip_bind_laddr_v6(conn_t *connp, mblk_t *mp, const in6_addr_t *v6src, 2446 uint16_t lport, boolean_t ire_requested, boolean_t ipsec_policy_set, 2447 boolean_t fanout_insert) 2448 { 2449 int error = 0; 2450 ire_t *src_ire = NULL; 2451 ipif_t *ipif = NULL; 2452 mblk_t *policy_mp; 2453 zoneid_t zoneid; 2454 2455 if (ipsec_policy_set) 2456 policy_mp = mp->b_cont; 2457 2458 /* 2459 * If it was previously connected, conn_fully_bound would have 2460 * been set. 2461 */ 2462 connp->conn_fully_bound = B_FALSE; 2463 2464 zoneid = connp->conn_zoneid; 2465 2466 if (!IN6_IS_ADDR_UNSPECIFIED(v6src)) { 2467 src_ire = ire_route_lookup_v6(v6src, 0, 0, 2468 0, NULL, NULL, zoneid, NULL, MATCH_IRE_ZONEONLY); 2469 /* 2470 * If an address other than in6addr_any is requested, 2471 * we verify that it is a valid address for bind 2472 * Note: Following code is in if-else-if form for 2473 * readability compared to a condition check. 2474 */ 2475 ASSERT(src_ire == NULL || !(src_ire->ire_type & IRE_BROADCAST)); 2476 if (IRE_IS_LOCAL(src_ire)) { 2477 /* 2478 * (2) Bind to address of local UP interface 2479 */ 2480 ipif = src_ire->ire_ipif; 2481 } else if (IN6_IS_ADDR_MULTICAST(v6src)) { 2482 ipif_t *multi_ipif = NULL; 2483 ire_t *save_ire; 2484 /* 2485 * (4) bind to multicast address. 2486 * Fake out the IRE returned to upper 2487 * layer to be a broadcast IRE in 2488 * ip_bind_insert_ire_v6(). 2489 * Pass other information that matches 2490 * the ipif (e.g. the source address). 2491 * conn_multicast_ill is only used for 2492 * IPv6 packets 2493 */ 2494 mutex_enter(&connp->conn_lock); 2495 if (connp->conn_multicast_ill != NULL) { 2496 (void) ipif_lookup_zoneid( 2497 connp->conn_multicast_ill, zoneid, 0, 2498 &multi_ipif); 2499 } else { 2500 /* 2501 * Look for default like 2502 * ip_wput_v6 2503 */ 2504 multi_ipif = ipif_lookup_group_v6( 2505 &ipv6_unspecified_group, zoneid); 2506 } 2507 mutex_exit(&connp->conn_lock); 2508 save_ire = src_ire; 2509 src_ire = NULL; 2510 if (multi_ipif == NULL || !ire_requested || 2511 (src_ire = ipif_to_ire_v6(multi_ipif)) == NULL) { 2512 src_ire = save_ire; 2513 error = EADDRNOTAVAIL; 2514 } else { 2515 ASSERT(src_ire != NULL); 2516 if (save_ire != NULL) 2517 ire_refrele(save_ire); 2518 } 2519 if (multi_ipif != NULL) 2520 ipif_refrele(multi_ipif); 2521 } else { 2522 *mp->b_wptr++ = (char)connp->conn_ulp; 2523 ipif = ipif_lookup_addr_v6(v6src, NULL, zoneid, 2524 CONNP_TO_WQ(connp), mp, ip_wput_nondata, &error); 2525 if (ipif == NULL) { 2526 if (error == EINPROGRESS) { 2527 if (src_ire != NULL) 2528 ire_refrele(src_ire); 2529 return (error); 2530 } 2531 /* 2532 * Not a valid address for bind 2533 */ 2534 error = EADDRNOTAVAIL; 2535 } else { 2536 ipif_refrele(ipif); 2537 } 2538 /* 2539 * Just to keep it consistent with the processing in 2540 * ip_bind_v6(). 2541 */ 2542 mp->b_wptr--; 2543 } 2544 2545 if (error != 0) { 2546 /* Red Alert! Attempting to be a bogon! */ 2547 if (ip_debug > 2) { 2548 /* ip1dbg */ 2549 pr_addr_dbg("ip_bind_laddr_v6: bad src" 2550 " address %s\n", AF_INET6, v6src); 2551 } 2552 goto bad_addr; 2553 } 2554 } 2555 2556 /* 2557 * Allow setting new policies. For example, disconnects come 2558 * down as ipa_t bind. As we would have set conn_policy_cached 2559 * to B_TRUE before, we should set it to B_FALSE, so that policy 2560 * can change after the disconnect. 2561 */ 2562 connp->conn_policy_cached = B_FALSE; 2563 2564 /* If not fanout_insert this was just an address verification */ 2565 if (fanout_insert) { 2566 /* 2567 * The addresses have been verified. Time to insert in 2568 * the correct fanout list. 2569 */ 2570 connp->conn_srcv6 = *v6src; 2571 connp->conn_remv6 = ipv6_all_zeros; 2572 connp->conn_lport = lport; 2573 connp->conn_fport = 0; 2574 error = ipcl_bind_insert_v6(connp, *mp->b_wptr, v6src, lport); 2575 } 2576 if (error == 0) { 2577 if (ire_requested) { 2578 if (!ip_bind_insert_ire_v6(mp, src_ire, v6src, NULL)) { 2579 error = -1; 2580 goto bad_addr; 2581 } 2582 } else if (ipsec_policy_set) { 2583 if (!ip_bind_ipsec_policy_set(connp, policy_mp)) { 2584 error = -1; 2585 goto bad_addr; 2586 } 2587 } 2588 } 2589 bad_addr: 2590 if (error != 0) { 2591 if (connp->conn_anon_port) { 2592 (void) tsol_mlp_anon(crgetzone(connp->conn_cred), 2593 connp->conn_mlp_type, connp->conn_ulp, ntohs(lport), 2594 B_FALSE); 2595 } 2596 connp->conn_mlp_type = mlptSingle; 2597 } 2598 2599 if (src_ire != NULL) 2600 ire_refrele(src_ire); 2601 2602 if (ipsec_policy_set) { 2603 ASSERT(policy_mp != NULL); 2604 freeb(policy_mp); 2605 /* 2606 * As of now assume that nothing else accompanies 2607 * IPSEC_POLICY_SET. 2608 */ 2609 mp->b_cont = NULL; 2610 } 2611 return (error); 2612 } 2613 2614 /* ARGSUSED */ 2615 static void 2616 ip_bind_connected_resume_v6(ipsq_t *ipsq, queue_t *q, mblk_t *mp, 2617 void *dummy_arg) 2618 { 2619 conn_t *connp = NULL; 2620 t_scalar_t prim; 2621 2622 ASSERT(DB_TYPE(mp) == M_PROTO || DB_TYPE(mp) == M_PCPROTO); 2623 2624 if (CONN_Q(q)) 2625 connp = Q_TO_CONN(q); 2626 ASSERT(connp != NULL); 2627 2628 prim = ((union T_primitives *)mp->b_rptr)->type; 2629 ASSERT(prim == O_T_BIND_REQ || prim == T_BIND_REQ); 2630 2631 if (IPCL_IS_TCP(connp)) { 2632 /* Pass sticky_ipp for scope_id and pktinfo */ 2633 mp = ip_bind_v6(q, mp, connp, &connp->conn_tcp->tcp_sticky_ipp); 2634 } else { 2635 /* For UDP and ICMP */ 2636 mp = ip_bind_v6(q, mp, connp, NULL); 2637 } 2638 if (mp != NULL) { 2639 if (IPCL_IS_TCP(connp)) { 2640 CONN_INC_REF(connp); 2641 squeue_fill(connp->conn_sqp, mp, ip_resume_tcp_bind, 2642 connp, SQTAG_TCP_RPUTOTHER); 2643 } else if (IPCL_IS_UDP(connp)) { 2644 udp_resume_bind(connp, mp); 2645 } else { 2646 qreply(q, mp); 2647 CONN_OPER_PENDING_DONE(connp); 2648 } 2649 } 2650 } 2651 2652 /* 2653 * Verify that both the source and destination addresses 2654 * are valid. If verify_dst, then destination address must also be reachable, 2655 * i.e. have a route. Protocols like TCP want this. Tunnels do not. 2656 * It takes ip6_pkt_t * as one of the arguments to determine correct 2657 * source address when IPV6_PKTINFO or scope_id is set along with a link-local 2658 * destination address. Note that parameter ipp is only useful for TCP connect 2659 * when scope_id is set or IPV6_PKTINFO option is set with an ifindex. For all 2660 * non-TCP cases, it is NULL and for all other tcp cases it is not useful. 2661 * 2662 */ 2663 static int 2664 ip_bind_connected_v6(conn_t *connp, mblk_t *mp, in6_addr_t *v6src, 2665 uint16_t lport, const in6_addr_t *v6dst, ip6_pkt_t *ipp, uint16_t fport, 2666 boolean_t ire_requested, boolean_t ipsec_policy_set, 2667 boolean_t fanout_insert, boolean_t verify_dst) 2668 { 2669 ire_t *src_ire; 2670 ire_t *dst_ire; 2671 int error = 0; 2672 int protocol; 2673 mblk_t *policy_mp; 2674 ire_t *sire = NULL; 2675 ire_t *md_dst_ire = NULL; 2676 ill_t *md_ill = NULL; 2677 ill_t *dst_ill = NULL; 2678 ipif_t *src_ipif = NULL; 2679 zoneid_t zoneid; 2680 boolean_t ill_held = B_FALSE; 2681 2682 src_ire = dst_ire = NULL; 2683 /* 2684 * NOTE: The protocol is beyond the wptr because that's how 2685 * the undocumented transport<-->IP T_BIND_REQ behavior works. 2686 */ 2687 protocol = *mp->b_wptr & 0xFF; 2688 2689 /* 2690 * If we never got a disconnect before, clear it now. 2691 */ 2692 connp->conn_fully_bound = B_FALSE; 2693 2694 if (ipsec_policy_set) { 2695 policy_mp = mp->b_cont; 2696 } 2697 2698 zoneid = connp->conn_zoneid; 2699 2700 if (IN6_IS_ADDR_MULTICAST(v6dst)) { 2701 ipif_t *ipif; 2702 2703 /* 2704 * Use an "emulated" IRE_BROADCAST to tell the transport it 2705 * is a multicast. 2706 * Pass other information that matches 2707 * the ipif (e.g. the source address). 2708 * 2709 * conn_multicast_ill is only used for IPv6 packets 2710 */ 2711 mutex_enter(&connp->conn_lock); 2712 if (connp->conn_multicast_ill != NULL) { 2713 (void) ipif_lookup_zoneid(connp->conn_multicast_ill, 2714 zoneid, 0, &ipif); 2715 } else { 2716 /* Look for default like ip_wput_v6 */ 2717 ipif = ipif_lookup_group_v6(v6dst, zoneid); 2718 } 2719 mutex_exit(&connp->conn_lock); 2720 if (ipif == NULL || !ire_requested || 2721 (dst_ire = ipif_to_ire_v6(ipif)) == NULL) { 2722 if (ipif != NULL) 2723 ipif_refrele(ipif); 2724 if (ip_debug > 2) { 2725 /* ip1dbg */ 2726 pr_addr_dbg("ip_bind_connected_v6: bad " 2727 "connected multicast %s\n", AF_INET6, 2728 v6dst); 2729 } 2730 error = ENETUNREACH; 2731 goto bad_addr; 2732 } 2733 if (ipif != NULL) 2734 ipif_refrele(ipif); 2735 } else { 2736 dst_ire = ire_route_lookup_v6(v6dst, NULL, NULL, 0, 2737 NULL, &sire, zoneid, MBLK_GETLABEL(mp), 2738 MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT | 2739 MATCH_IRE_PARENT | MATCH_IRE_RJ_BHOLE | MATCH_IRE_SECATTR); 2740 /* 2741 * We also prevent ire's with src address INADDR_ANY to 2742 * be used, which are created temporarily for 2743 * sending out packets from endpoints that have 2744 * conn_unspec_src set. 2745 */ 2746 if (dst_ire == NULL || 2747 (dst_ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) || 2748 IN6_IS_ADDR_UNSPECIFIED(&dst_ire->ire_src_addr_v6)) { 2749 /* 2750 * When verifying destination reachability, we always 2751 * complain. 2752 * 2753 * When not verifying destination reachability but we 2754 * found an IRE, i.e. the destination is reachable, 2755 * then the other tests still apply and we complain. 2756 */ 2757 if (verify_dst || (dst_ire != NULL)) { 2758 if (ip_debug > 2) { 2759 /* ip1dbg */ 2760 pr_addr_dbg("ip_bind_connected_v6: bad" 2761 " connected dst %s\n", AF_INET6, 2762 v6dst); 2763 } 2764 if (dst_ire == NULL || 2765 !(dst_ire->ire_type & IRE_HOST)) { 2766 error = ENETUNREACH; 2767 } else { 2768 error = EHOSTUNREACH; 2769 } 2770 goto bad_addr; 2771 } 2772 } 2773 } 2774 2775 /* 2776 * We now know that routing will allow us to reach the destination. 2777 * Check whether Trusted Solaris policy allows communication with this 2778 * host, and pretend that the destination is unreachable if not. 2779 * 2780 * This is never a problem for TCP, since that transport is known to 2781 * compute the label properly as part of the tcp_rput_other T_BIND_ACK 2782 * handling. If the remote is unreachable, it will be detected at that 2783 * point, so there's no reason to check it here. 2784 * 2785 * Note that for sendto (and other datagram-oriented friends), this 2786 * check is done as part of the data path label computation instead. 2787 * The check here is just to make non-TCP connect() report the right 2788 * error. 2789 */ 2790 if (dst_ire != NULL && is_system_labeled() && 2791 !IPCL_IS_TCP(connp) && 2792 tsol_compute_label_v6(DB_CREDDEF(mp, connp->conn_cred), v6dst, NULL, 2793 connp->conn_mac_exempt) != 0) { 2794 error = EHOSTUNREACH; 2795 if (ip_debug > 2) { 2796 pr_addr_dbg("ip_bind_connected: no label for dst %s\n", 2797 AF_INET6, v6dst); 2798 } 2799 goto bad_addr; 2800 } 2801 2802 /* 2803 * If the app does a connect(), it means that it will most likely 2804 * send more than 1 packet to the destination. It makes sense 2805 * to clear the temporary flag. 2806 */ 2807 if (dst_ire != NULL && dst_ire->ire_type == IRE_CACHE && 2808 (dst_ire->ire_marks & IRE_MARK_TEMPORARY)) { 2809 irb_t *irb = dst_ire->ire_bucket; 2810 2811 rw_enter(&irb->irb_lock, RW_WRITER); 2812 dst_ire->ire_marks &= ~IRE_MARK_TEMPORARY; 2813 irb->irb_tmp_ire_cnt--; 2814 rw_exit(&irb->irb_lock); 2815 } 2816 2817 ASSERT(dst_ire == NULL || dst_ire->ire_ipversion == IPV6_VERSION); 2818 2819 /* 2820 * See if we should notify ULP about MDT; we do this whether or not 2821 * ire_requested is TRUE, in order to handle active connects; MDT 2822 * eligibility tests for passive connects are handled separately 2823 * through tcp_adapt_ire(). We do this before the source address 2824 * selection, because dst_ire may change after a call to 2825 * ipif_select_source_v6(). This is a best-effort check, as the 2826 * packet for this connection may not actually go through 2827 * dst_ire->ire_stq, and the exact IRE can only be known after 2828 * calling ip_newroute_v6(). This is why we further check on the 2829 * IRE during Multidata packet transmission in tcp_multisend(). 2830 */ 2831 if (ip_multidata_outbound && !ipsec_policy_set && dst_ire != NULL && 2832 !(dst_ire->ire_type & (IRE_LOCAL | IRE_LOOPBACK | IRE_BROADCAST)) && 2833 (md_ill = ire_to_ill(dst_ire), md_ill != NULL) && 2834 ILL_MDT_CAPABLE(md_ill)) { 2835 md_dst_ire = dst_ire; 2836 IRE_REFHOLD(md_dst_ire); 2837 } 2838 2839 if (dst_ire != NULL && 2840 dst_ire->ire_type == IRE_LOCAL && 2841 dst_ire->ire_zoneid != zoneid && 2842 dst_ire->ire_zoneid != ALL_ZONES) { 2843 src_ire = ire_ftable_lookup_v6(v6dst, 0, 0, 0, NULL, NULL, 2844 zoneid, 0, NULL, 2845 MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT | 2846 MATCH_IRE_RJ_BHOLE); 2847 if (src_ire == NULL) { 2848 error = EHOSTUNREACH; 2849 goto bad_addr; 2850 } else if (src_ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) { 2851 if (!(src_ire->ire_type & IRE_HOST)) 2852 error = ENETUNREACH; 2853 else 2854 error = EHOSTUNREACH; 2855 goto bad_addr; 2856 } 2857 if (IN6_IS_ADDR_UNSPECIFIED(v6src)) { 2858 src_ipif = src_ire->ire_ipif; 2859 ipif_refhold(src_ipif); 2860 *v6src = src_ipif->ipif_v6lcl_addr; 2861 } 2862 ire_refrele(src_ire); 2863 src_ire = NULL; 2864 } else if (IN6_IS_ADDR_UNSPECIFIED(v6src) && dst_ire != NULL) { 2865 if ((sire != NULL) && (sire->ire_flags & RTF_SETSRC)) { 2866 *v6src = sire->ire_src_addr_v6; 2867 ire_refrele(dst_ire); 2868 dst_ire = sire; 2869 sire = NULL; 2870 } else if (dst_ire->ire_type == IRE_CACHE && 2871 (dst_ire->ire_flags & RTF_SETSRC)) { 2872 ASSERT(dst_ire->ire_zoneid == zoneid || 2873 dst_ire->ire_zoneid == ALL_ZONES); 2874 *v6src = dst_ire->ire_src_addr_v6; 2875 } else { 2876 /* 2877 * Pick a source address so that a proper inbound load 2878 * spreading would happen. Use dst_ill specified by the 2879 * app. when socket option or scopeid is set. 2880 */ 2881 int err; 2882 2883 if (ipp != NULL && ipp->ipp_ifindex != 0) { 2884 uint_t if_index; 2885 2886 /* 2887 * Scope id or IPV6_PKTINFO 2888 */ 2889 2890 if_index = ipp->ipp_ifindex; 2891 dst_ill = ill_lookup_on_ifindex( 2892 if_index, B_TRUE, NULL, NULL, NULL, NULL); 2893 if (dst_ill == NULL) { 2894 ip1dbg(("ip_bind_connected_v6:" 2895 " bad ifindex %d\n", if_index)); 2896 error = EADDRNOTAVAIL; 2897 goto bad_addr; 2898 } 2899 ill_held = B_TRUE; 2900 } else if (connp->conn_outgoing_ill != NULL) { 2901 /* 2902 * For IPV6_BOUND_IF socket option, 2903 * conn_outgoing_ill should be set 2904 * already in TCP or UDP/ICMP. 2905 */ 2906 dst_ill = conn_get_held_ill(connp, 2907 &connp->conn_outgoing_ill, &err); 2908 if (err == ILL_LOOKUP_FAILED) { 2909 ip1dbg(("ip_bind_connected_v6:" 2910 "no ill for bound_if\n")); 2911 error = EADDRNOTAVAIL; 2912 goto bad_addr; 2913 } 2914 ill_held = B_TRUE; 2915 } else if (dst_ire->ire_stq != NULL) { 2916 /* No need to hold ill here */ 2917 dst_ill = (ill_t *)dst_ire->ire_stq->q_ptr; 2918 } else { 2919 /* No need to hold ill here */ 2920 dst_ill = dst_ire->ire_ipif->ipif_ill; 2921 } 2922 if (!ip6_asp_can_lookup()) { 2923 *mp->b_wptr++ = (char)protocol; 2924 ip6_asp_pending_op(CONNP_TO_WQ(connp), mp, 2925 ip_bind_connected_resume_v6); 2926 error = EINPROGRESS; 2927 goto refrele_and_quit; 2928 } 2929 src_ipif = ipif_select_source_v6(dst_ill, v6dst, 2930 RESTRICT_TO_NONE, connp->conn_src_preferences, 2931 zoneid); 2932 ip6_asp_table_refrele(); 2933 if (src_ipif == NULL) { 2934 pr_addr_dbg("ip_bind_connected_v6: " 2935 "no usable source address for " 2936 "connection to %s\n", AF_INET6, v6dst); 2937 error = EADDRNOTAVAIL; 2938 goto bad_addr; 2939 } 2940 *v6src = src_ipif->ipif_v6lcl_addr; 2941 } 2942 } 2943 2944 /* 2945 * We do ire_route_lookup_v6() here (and not an interface lookup) 2946 * as we assert that v6src should only come from an 2947 * UP interface for hard binding. 2948 */ 2949 src_ire = ire_route_lookup_v6(v6src, 0, 0, 0, NULL, 2950 NULL, zoneid, NULL, MATCH_IRE_ZONEONLY); 2951 2952 /* src_ire must be a local|loopback */ 2953 if (!IRE_IS_LOCAL(src_ire)) { 2954 if (ip_debug > 2) { 2955 /* ip1dbg */ 2956 pr_addr_dbg("ip_bind_connected_v6: bad " 2957 "connected src %s\n", AF_INET6, v6src); 2958 } 2959 error = EADDRNOTAVAIL; 2960 goto bad_addr; 2961 } 2962 2963 /* 2964 * If the source address is a loopback address, the 2965 * destination had best be local or multicast. 2966 * The transports that can't handle multicast will reject 2967 * those addresses. 2968 */ 2969 if (src_ire->ire_type == IRE_LOOPBACK && 2970 !(IRE_IS_LOCAL(dst_ire) || IN6_IS_ADDR_MULTICAST(v6dst) || 2971 IN6_IS_ADDR_V4MAPPED_CLASSD(v6dst))) { 2972 ip1dbg(("ip_bind_connected_v6: bad connected loopback\n")); 2973 error = -1; 2974 goto bad_addr; 2975 } 2976 /* 2977 * Allow setting new policies. For example, disconnects come 2978 * down as ipa_t bind. As we would have set conn_policy_cached 2979 * to B_TRUE before, we should set it to B_FALSE, so that policy 2980 * can change after the disconnect. 2981 */ 2982 connp->conn_policy_cached = B_FALSE; 2983 2984 /* 2985 * The addresses have been verified. Initialize the conn 2986 * before calling the policy as they expect the conns 2987 * initialized. 2988 */ 2989 connp->conn_srcv6 = *v6src; 2990 connp->conn_remv6 = *v6dst; 2991 connp->conn_lport = lport; 2992 connp->conn_fport = fport; 2993 2994 ASSERT(!(ipsec_policy_set && ire_requested)); 2995 if (ire_requested) { 2996 iulp_t *ulp_info = NULL; 2997 2998 /* 2999 * Note that sire will not be NULL if this is an off-link 3000 * connection and there is not cache for that dest yet. 3001 * 3002 * XXX Because of an existing bug, if there are multiple 3003 * default routes, the IRE returned now may not be the actual 3004 * default route used (default routes are chosen in a 3005 * round robin fashion). So if the metrics for different 3006 * default routes are different, we may return the wrong 3007 * metrics. This will not be a problem if the existing 3008 * bug is fixed. 3009 */ 3010 if (sire != NULL) 3011 ulp_info = &(sire->ire_uinfo); 3012 3013 if (!ip_bind_insert_ire_v6(mp, dst_ire, v6dst, ulp_info)) { 3014 error = -1; 3015 goto bad_addr; 3016 } 3017 } else if (ipsec_policy_set) { 3018 if (!ip_bind_ipsec_policy_set(connp, policy_mp)) { 3019 error = -1; 3020 goto bad_addr; 3021 } 3022 } 3023 3024 /* 3025 * Cache IPsec policy in this conn. If we have per-socket policy, 3026 * we'll cache that. If we don't, we'll inherit global policy. 3027 * 3028 * We can't insert until the conn reflects the policy. Note that 3029 * conn_policy_cached is set by ipsec_conn_cache_policy() even for 3030 * connections where we don't have a policy. This is to prevent 3031 * global policy lookups in the inbound path. 3032 * 3033 * If we insert before we set conn_policy_cached, 3034 * CONN_INBOUND_POLICY_PRESENT_V6() check can still evaluate true 3035 * because global policy cound be non-empty. We normally call 3036 * ipsec_check_policy() for conn_policy_cached connections only if 3037 * conn_in_enforce_policy is set. But in this case, 3038 * conn_policy_cached can get set anytime since we made the 3039 * CONN_INBOUND_POLICY_PRESENT_V6() check and ipsec_check_policy() 3040 * is called, which will make the above assumption false. Thus, we 3041 * need to insert after we set conn_policy_cached. 3042 */ 3043 if ((error = ipsec_conn_cache_policy(connp, B_FALSE)) != 0) 3044 goto bad_addr; 3045 3046 /* If not fanout_insert this was just an address verification */ 3047 if (fanout_insert) { 3048 /* 3049 * The addresses have been verified. Time to insert in 3050 * the correct fanout list. 3051 */ 3052 error = ipcl_conn_insert_v6(connp, protocol, v6src, v6dst, 3053 connp->conn_ports, 3054 IPCL_IS_TCP(connp) ? connp->conn_tcp->tcp_bound_if : 0); 3055 } 3056 if (error == 0) { 3057 connp->conn_fully_bound = B_TRUE; 3058 /* 3059 * Our initial checks for MDT have passed; the IRE is not 3060 * LOCAL/LOOPBACK/BROADCAST, and the link layer seems to 3061 * be supporting MDT. Pass the IRE, IPC and ILL into 3062 * ip_mdinfo_return(), which performs further checks 3063 * against them and upon success, returns the MDT info 3064 * mblk which we will attach to the bind acknowledgment. 3065 */ 3066 if (md_dst_ire != NULL) { 3067 mblk_t *mdinfo_mp; 3068 3069 ASSERT(md_ill != NULL); 3070 ASSERT(md_ill->ill_mdt_capab != NULL); 3071 if ((mdinfo_mp = ip_mdinfo_return(md_dst_ire, connp, 3072 md_ill->ill_name, md_ill->ill_mdt_capab)) != NULL) 3073 linkb(mp, mdinfo_mp); 3074 } 3075 } 3076 bad_addr: 3077 if (ipsec_policy_set) { 3078 ASSERT(policy_mp != NULL); 3079 freeb(policy_mp); 3080 /* 3081 * As of now assume that nothing else accompanies 3082 * IPSEC_POLICY_SET. 3083 */ 3084 mp->b_cont = NULL; 3085 } 3086 refrele_and_quit: 3087 if (src_ire != NULL) 3088 IRE_REFRELE(src_ire); 3089 if (dst_ire != NULL) 3090 IRE_REFRELE(dst_ire); 3091 if (sire != NULL) 3092 IRE_REFRELE(sire); 3093 if (src_ipif != NULL) 3094 ipif_refrele(src_ipif); 3095 if (md_dst_ire != NULL) 3096 IRE_REFRELE(md_dst_ire); 3097 if (ill_held && dst_ill != NULL) 3098 ill_refrele(dst_ill); 3099 return (error); 3100 } 3101 3102 /* 3103 * Insert the ire in b_cont. Returns false if it fails (due to lack of space). 3104 * Makes the IRE be IRE_BROADCAST if dst is a multicast address. 3105 */ 3106 static boolean_t 3107 ip_bind_insert_ire_v6(mblk_t *mp, ire_t *ire, const in6_addr_t *dst, 3108 iulp_t *ulp_info) 3109 { 3110 mblk_t *mp1; 3111 ire_t *ret_ire; 3112 3113 mp1 = mp->b_cont; 3114 ASSERT(mp1 != NULL); 3115 3116 if (ire != NULL) { 3117 /* 3118 * mp1 initialized above to IRE_DB_REQ_TYPE 3119 * appended mblk. Its <upper protocol>'s 3120 * job to make sure there is room. 3121 */ 3122 if ((mp1->b_datap->db_lim - mp1->b_rptr) < sizeof (ire_t)) 3123 return (B_FALSE); 3124 3125 mp1->b_datap->db_type = IRE_DB_TYPE; 3126 mp1->b_wptr = mp1->b_rptr + sizeof (ire_t); 3127 bcopy(ire, mp1->b_rptr, sizeof (ire_t)); 3128 ret_ire = (ire_t *)mp1->b_rptr; 3129 if (IN6_IS_ADDR_MULTICAST(dst) || 3130 IN6_IS_ADDR_V4MAPPED_CLASSD(dst)) { 3131 ret_ire->ire_type = IRE_BROADCAST; 3132 ret_ire->ire_addr_v6 = *dst; 3133 } 3134 if (ulp_info != NULL) { 3135 bcopy(ulp_info, &(ret_ire->ire_uinfo), 3136 sizeof (iulp_t)); 3137 } 3138 ret_ire->ire_mp = mp1; 3139 } else { 3140 /* 3141 * No IRE was found. Remove IRE mblk. 3142 */ 3143 mp->b_cont = mp1->b_cont; 3144 freeb(mp1); 3145 } 3146 return (B_TRUE); 3147 } 3148 3149 /* 3150 * Add an ip6i_t header to the front of the mblk. 3151 * Inline if possible else allocate a separate mblk containing only the ip6i_t. 3152 * Returns NULL if allocation fails (and frees original message). 3153 * Used in outgoing path when going through ip_newroute_*v6(). 3154 * Used in incoming path to pass ifindex to transports. 3155 */ 3156 mblk_t * 3157 ip_add_info_v6(mblk_t *mp, ill_t *ill, const in6_addr_t *dst) 3158 { 3159 mblk_t *mp1; 3160 ip6i_t *ip6i; 3161 ip6_t *ip6h; 3162 3163 ip6h = (ip6_t *)mp->b_rptr; 3164 ip6i = (ip6i_t *)(mp->b_rptr - sizeof (ip6i_t)); 3165 if ((uchar_t *)ip6i < mp->b_datap->db_base || 3166 mp->b_datap->db_ref > 1) { 3167 mp1 = allocb(sizeof (ip6i_t), BPRI_MED); 3168 if (mp1 == NULL) { 3169 freemsg(mp); 3170 return (NULL); 3171 } 3172 mp1->b_wptr = mp1->b_rptr = mp1->b_datap->db_lim; 3173 mp1->b_cont = mp; 3174 mp = mp1; 3175 ip6i = (ip6i_t *)(mp->b_rptr - sizeof (ip6i_t)); 3176 } 3177 mp->b_rptr = (uchar_t *)ip6i; 3178 ip6i->ip6i_vcf = ip6h->ip6_vcf; 3179 ip6i->ip6i_nxt = IPPROTO_RAW; 3180 if (ill != NULL) { 3181 ip6i->ip6i_flags = IP6I_IFINDEX; 3182 ip6i->ip6i_ifindex = ill->ill_phyint->phyint_ifindex; 3183 } else { 3184 ip6i->ip6i_flags = 0; 3185 } 3186 ip6i->ip6i_nexthop = *dst; 3187 return (mp); 3188 } 3189 3190 /* 3191 * Handle protocols with which IP is less intimate. There 3192 * can be more than one stream bound to a particular 3193 * protocol. When this is the case, normally each one gets a copy 3194 * of any incoming packets. 3195 * However, if the packet was tunneled and not multicast we only send to it 3196 * the first match. 3197 * 3198 * Zones notes: 3199 * Packets will be distributed to streams in all zones. This is really only 3200 * useful for ICMPv6 as only applications in the global zone can create raw 3201 * sockets for other protocols. 3202 */ 3203 static void 3204 ip_fanout_proto_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, ill_t *ill, 3205 ill_t *inill, uint8_t nexthdr, uint_t nexthdr_offset, uint_t flags, 3206 boolean_t mctl_present, zoneid_t zoneid) 3207 { 3208 queue_t *rq; 3209 mblk_t *mp1, *first_mp1; 3210 in6_addr_t dst = ip6h->ip6_dst; 3211 in6_addr_t src = ip6h->ip6_src; 3212 boolean_t one_only; 3213 mblk_t *first_mp = mp; 3214 boolean_t secure, shared_addr; 3215 conn_t *connp, *first_connp, *next_connp; 3216 connf_t *connfp; 3217 3218 if (mctl_present) { 3219 mp = first_mp->b_cont; 3220 secure = ipsec_in_is_secure(first_mp); 3221 ASSERT(mp != NULL); 3222 } else { 3223 secure = B_FALSE; 3224 } 3225 3226 /* 3227 * If the packet was tunneled and not multicast we only send to it 3228 * the first match. 3229 */ 3230 one_only = ((nexthdr == IPPROTO_ENCAP || nexthdr == IPPROTO_IPV6) && 3231 !IN6_IS_ADDR_MULTICAST(&dst)); 3232 3233 shared_addr = (zoneid == ALL_ZONES); 3234 if (shared_addr) { 3235 /* 3236 * We don't allow multilevel ports for raw IP, so no need to 3237 * check for that here. 3238 */ 3239 zoneid = tsol_packet_to_zoneid(mp); 3240 } 3241 3242 connfp = &ipcl_proto_fanout_v6[nexthdr]; 3243 mutex_enter(&connfp->connf_lock); 3244 connp = connfp->connf_head; 3245 for (connp = connfp->connf_head; connp != NULL; 3246 connp = connp->conn_next) { 3247 if (IPCL_PROTO_MATCH_V6(connp, nexthdr, ip6h, ill, flags, 3248 zoneid) && 3249 (!is_system_labeled() || 3250 tsol_receive_local(mp, &dst, IPV6_VERSION, shared_addr, 3251 connp))) 3252 break; 3253 } 3254 3255 if (connp == NULL || connp->conn_upq == NULL) { 3256 /* 3257 * No one bound to this port. Is 3258 * there a client that wants all 3259 * unclaimed datagrams? 3260 */ 3261 mutex_exit(&connfp->connf_lock); 3262 if (ip_fanout_send_icmp_v6(q, first_mp, flags, 3263 ICMP6_PARAM_PROB, ICMP6_PARAMPROB_NEXTHEADER, 3264 nexthdr_offset, mctl_present, zoneid)) { 3265 BUMP_MIB(ill->ill_ip6_mib, ipv6InUnknownProtos); 3266 } 3267 3268 return; 3269 } 3270 3271 CONN_INC_REF(connp); 3272 first_connp = connp; 3273 3274 /* 3275 * XXX: Fix the multiple protocol listeners case. We should not 3276 * be walking the conn->next list here. 3277 */ 3278 if (one_only) { 3279 /* 3280 * Only send message to one tunnel driver by immediately 3281 * terminating the loop. 3282 */ 3283 connp = NULL; 3284 } else { 3285 connp = connp->conn_next; 3286 3287 } 3288 for (;;) { 3289 while (connp != NULL) { 3290 if (IPCL_PROTO_MATCH_V6(connp, nexthdr, ip6h, ill, 3291 flags, zoneid) && 3292 (!is_system_labeled() || 3293 tsol_receive_local(mp, &dst, IPV6_VERSION, 3294 shared_addr, connp))) 3295 break; 3296 connp = connp->conn_next; 3297 } 3298 3299 /* 3300 * Just copy the data part alone. The mctl part is 3301 * needed just for verifying policy and it is never 3302 * sent up. 3303 */ 3304 if (connp == NULL || connp->conn_upq == NULL || 3305 (((first_mp1 = dupmsg(first_mp)) == NULL) && 3306 ((first_mp1 = ip_copymsg(first_mp)) == NULL))) { 3307 /* 3308 * No more intested clients or memory 3309 * allocation failed 3310 */ 3311 connp = first_connp; 3312 break; 3313 } 3314 mp1 = mctl_present ? first_mp1->b_cont : first_mp1; 3315 CONN_INC_REF(connp); 3316 mutex_exit(&connfp->connf_lock); 3317 rq = connp->conn_rq; 3318 /* 3319 * For link-local always add ifindex so that transport can set 3320 * sin6_scope_id. Avoid it for ICMP error fanout. 3321 */ 3322 if ((connp->conn_ipv6_recvpktinfo || 3323 IN6_IS_ADDR_LINKLOCAL(&src)) && 3324 (flags & IP_FF_IP6INFO)) { 3325 /* Add header */ 3326 mp1 = ip_add_info_v6(mp1, inill, &dst); 3327 } 3328 if (mp1 == NULL) { 3329 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 3330 } else if (!canputnext(rq)) { 3331 if (flags & IP_FF_RAWIP) { 3332 BUMP_MIB(ill->ill_ip6_mib, rawipInOverflows); 3333 } else { 3334 BUMP_MIB(ill->ill_icmp6_mib, 3335 ipv6IfIcmpInOverflows); 3336 } 3337 3338 freemsg(mp1); 3339 } else { 3340 /* 3341 * Don't enforce here if we're a tunnel - let "tun" do 3342 * it instead. 3343 */ 3344 if (!IPCL_IS_IPTUN(connp) && 3345 (CONN_INBOUND_POLICY_PRESENT_V6(connp) || secure)) { 3346 first_mp1 = ipsec_check_inbound_policy 3347 (first_mp1, connp, NULL, ip6h, 3348 mctl_present); 3349 } 3350 if (first_mp1 != NULL) { 3351 if (mctl_present) 3352 freeb(first_mp1); 3353 BUMP_MIB(ill->ill_ip6_mib, ipv6InDelivers); 3354 putnext(rq, mp1); 3355 } 3356 } 3357 mutex_enter(&connfp->connf_lock); 3358 /* Follow the next pointer before releasing the conn. */ 3359 next_connp = connp->conn_next; 3360 CONN_DEC_REF(connp); 3361 connp = next_connp; 3362 } 3363 3364 /* Last one. Send it upstream. */ 3365 mutex_exit(&connfp->connf_lock); 3366 3367 /* Initiate IPPF processing */ 3368 if (IP6_IN_IPP(flags)) { 3369 uint_t ifindex; 3370 3371 mutex_enter(&ill->ill_lock); 3372 ifindex = ill->ill_phyint->phyint_ifindex; 3373 mutex_exit(&ill->ill_lock); 3374 ip_process(IPP_LOCAL_IN, &mp, ifindex); 3375 if (mp == NULL) { 3376 CONN_DEC_REF(connp); 3377 if (mctl_present) 3378 freeb(first_mp); 3379 return; 3380 } 3381 } 3382 3383 /* 3384 * For link-local always add ifindex so that transport can set 3385 * sin6_scope_id. Avoid it for ICMP error fanout. 3386 */ 3387 if ((connp->conn_ipv6_recvpktinfo || IN6_IS_ADDR_LINKLOCAL(&src)) && 3388 (flags & IP_FF_IP6INFO)) { 3389 /* Add header */ 3390 mp = ip_add_info_v6(mp, inill, &dst); 3391 if (mp == NULL) { 3392 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 3393 CONN_DEC_REF(connp); 3394 if (mctl_present) 3395 freeb(first_mp); 3396 return; 3397 } else if (mctl_present) { 3398 first_mp->b_cont = mp; 3399 } else { 3400 first_mp = mp; 3401 } 3402 } 3403 3404 rq = connp->conn_rq; 3405 if (!canputnext(rq)) { 3406 if (flags & IP_FF_RAWIP) { 3407 BUMP_MIB(ill->ill_ip6_mib, rawipInOverflows); 3408 } else { 3409 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInOverflows); 3410 } 3411 3412 freemsg(first_mp); 3413 } else { 3414 if (IPCL_IS_IPTUN(connp)) { 3415 /* 3416 * Tunneled packet. We enforce policy in the tunnel 3417 * module itself. 3418 * 3419 * Send the WHOLE packet up (incl. IPSEC_IN) without 3420 * a policy check. 3421 */ 3422 putnext(rq, first_mp); 3423 CONN_DEC_REF(connp); 3424 return; 3425 } 3426 /* 3427 * Don't enforce here if we're a tunnel - let "tun" do 3428 * it instead. 3429 */ 3430 if (nexthdr != IPPROTO_ENCAP && nexthdr != IPPROTO_IPV6 && 3431 (CONN_INBOUND_POLICY_PRESENT(connp) || secure)) { 3432 first_mp = ipsec_check_inbound_policy(first_mp, connp, 3433 NULL, ip6h, mctl_present); 3434 if (first_mp == NULL) { 3435 CONN_DEC_REF(connp); 3436 return; 3437 } 3438 } 3439 BUMP_MIB(ill->ill_ip6_mib, ipv6InDelivers); 3440 putnext(rq, mp); 3441 if (mctl_present) 3442 freeb(first_mp); 3443 } 3444 CONN_DEC_REF(connp); 3445 } 3446 3447 /* 3448 * Send an ICMP error after patching up the packet appropriately. Returns 3449 * non-zero if the appropriate MIB should be bumped; zero otherwise. 3450 */ 3451 int 3452 ip_fanout_send_icmp_v6(queue_t *q, mblk_t *mp, uint_t flags, 3453 uint_t icmp_type, uint8_t icmp_code, uint_t nexthdr_offset, 3454 boolean_t mctl_present, zoneid_t zoneid) 3455 { 3456 ip6_t *ip6h; 3457 mblk_t *first_mp; 3458 boolean_t secure; 3459 unsigned char db_type; 3460 3461 first_mp = mp; 3462 if (mctl_present) { 3463 mp = mp->b_cont; 3464 secure = ipsec_in_is_secure(first_mp); 3465 ASSERT(mp != NULL); 3466 } else { 3467 /* 3468 * If this is an ICMP error being reported - which goes 3469 * up as M_CTLs, we need to convert them to M_DATA till 3470 * we finish checking with global policy because 3471 * ipsec_check_global_policy() assumes M_DATA as clear 3472 * and M_CTL as secure. 3473 */ 3474 db_type = mp->b_datap->db_type; 3475 mp->b_datap->db_type = M_DATA; 3476 secure = B_FALSE; 3477 } 3478 /* 3479 * We are generating an icmp error for some inbound packet. 3480 * Called from all ip_fanout_(udp, tcp, proto) functions. 3481 * Before we generate an error, check with global policy 3482 * to see whether this is allowed to enter the system. As 3483 * there is no "conn", we are checking with global policy. 3484 */ 3485 ip6h = (ip6_t *)mp->b_rptr; 3486 if (secure || ipsec_inbound_v6_policy_present) { 3487 first_mp = ipsec_check_global_policy(first_mp, NULL, 3488 NULL, ip6h, mctl_present); 3489 if (first_mp == NULL) 3490 return (0); 3491 } 3492 3493 if (!mctl_present) 3494 mp->b_datap->db_type = db_type; 3495 3496 if (flags & IP_FF_SEND_ICMP) { 3497 if (flags & IP_FF_HDR_COMPLETE) { 3498 if (ip_hdr_complete_v6(ip6h, zoneid)) { 3499 freemsg(first_mp); 3500 return (1); 3501 } 3502 } 3503 switch (icmp_type) { 3504 case ICMP6_DST_UNREACH: 3505 icmp_unreachable_v6(WR(q), first_mp, icmp_code, 3506 B_FALSE, B_FALSE, zoneid); 3507 break; 3508 case ICMP6_PARAM_PROB: 3509 icmp_param_problem_v6(WR(q), first_mp, icmp_code, 3510 nexthdr_offset, B_FALSE, B_FALSE, zoneid); 3511 break; 3512 default: 3513 #ifdef DEBUG 3514 panic("ip_fanout_send_icmp_v6: wrong type"); 3515 /*NOTREACHED*/ 3516 #else 3517 freemsg(first_mp); 3518 break; 3519 #endif 3520 } 3521 } else { 3522 freemsg(first_mp); 3523 return (0); 3524 } 3525 3526 return (1); 3527 } 3528 3529 3530 /* 3531 * Fanout for TCP packets 3532 * The caller puts <fport, lport> in the ports parameter. 3533 */ 3534 static void 3535 ip_fanout_tcp_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, ill_t *ill, ill_t *inill, 3536 uint_t flags, uint_t hdr_len, boolean_t mctl_present, zoneid_t zoneid) 3537 { 3538 mblk_t *first_mp; 3539 boolean_t secure; 3540 conn_t *connp; 3541 tcph_t *tcph; 3542 boolean_t syn_present = B_FALSE; 3543 3544 first_mp = mp; 3545 if (mctl_present) { 3546 mp = first_mp->b_cont; 3547 secure = ipsec_in_is_secure(first_mp); 3548 ASSERT(mp != NULL); 3549 } else { 3550 secure = B_FALSE; 3551 } 3552 3553 connp = ipcl_classify_v6(mp, IPPROTO_TCP, hdr_len, zoneid); 3554 3555 if (connp == NULL || 3556 !conn_wantpacket_v6(connp, ill, ip6h, flags, zoneid)) { 3557 /* 3558 * No hard-bound match. Send Reset. 3559 */ 3560 dblk_t *dp = mp->b_datap; 3561 uint32_t ill_index; 3562 3563 ASSERT((dp->db_struioflag & STRUIO_IP) == 0); 3564 3565 /* Initiate IPPf processing, if needed. */ 3566 if (IPP_ENABLED(IPP_LOCAL_IN) && (flags & IP6_NO_IPPOLICY)) { 3567 ill_index = ill->ill_phyint->phyint_ifindex; 3568 ip_process(IPP_LOCAL_IN, &first_mp, ill_index); 3569 if (first_mp == NULL) { 3570 if (connp != NULL) 3571 CONN_DEC_REF(connp); 3572 return; 3573 } 3574 } 3575 BUMP_MIB(ill->ill_ip6_mib, ipv6InDelivers); 3576 tcp_xmit_listeners_reset(first_mp, hdr_len, zoneid); 3577 if (connp != NULL) 3578 CONN_DEC_REF(connp); 3579 return; 3580 } 3581 3582 tcph = (tcph_t *)&mp->b_rptr[hdr_len]; 3583 if ((tcph->th_flags[0] & (TH_SYN|TH_ACK|TH_RST|TH_URG)) == TH_SYN) { 3584 if (connp->conn_flags & IPCL_TCP) { 3585 squeue_t *sqp; 3586 3587 /* 3588 * For fused tcp loopback, assign the eager's 3589 * squeue to be that of the active connect's. 3590 */ 3591 if ((flags & IP_FF_LOOPBACK) && do_tcp_fusion && 3592 !CONN_INBOUND_POLICY_PRESENT_V6(connp) && !secure && 3593 !IP6_IN_IPP(flags)) { 3594 ASSERT(Q_TO_CONN(q) != NULL); 3595 sqp = Q_TO_CONN(q)->conn_sqp; 3596 } else { 3597 sqp = IP_SQUEUE_GET(lbolt); 3598 } 3599 3600 mp->b_datap->db_struioflag |= STRUIO_EAGER; 3601 DB_CKSUMSTART(mp) = (intptr_t)sqp; 3602 3603 /* 3604 * db_cksumstuff is unused in the incoming 3605 * path; Thus store the ifindex here. It will 3606 * be cleared in tcp_conn_create_v6(). 3607 */ 3608 DB_CKSUMSTUFF(mp) = 3609 (intptr_t)ill->ill_phyint->phyint_ifindex; 3610 syn_present = B_TRUE; 3611 } 3612 } 3613 3614 if (IPCL_IS_TCP(connp) && IPCL_IS_BOUND(connp) && !syn_present) { 3615 uint_t flags = (unsigned int)tcph->th_flags[0] & 0xFF; 3616 if ((flags & TH_RST) || (flags & TH_URG)) { 3617 CONN_DEC_REF(connp); 3618 freemsg(first_mp); 3619 return; 3620 } 3621 if (flags & TH_ACK) { 3622 tcp_xmit_listeners_reset(first_mp, hdr_len, zoneid); 3623 CONN_DEC_REF(connp); 3624 return; 3625 } 3626 3627 CONN_DEC_REF(connp); 3628 freemsg(first_mp); 3629 return; 3630 } 3631 3632 if (CONN_INBOUND_POLICY_PRESENT_V6(connp) || secure) { 3633 first_mp = ipsec_check_inbound_policy(first_mp, connp, 3634 NULL, ip6h, mctl_present); 3635 if (first_mp == NULL) { 3636 CONN_DEC_REF(connp); 3637 return; 3638 } 3639 if (IPCL_IS_TCP(connp) && IPCL_IS_BOUND(connp)) { 3640 ASSERT(syn_present); 3641 if (mctl_present) { 3642 ASSERT(first_mp != mp); 3643 first_mp->b_datap->db_struioflag |= 3644 STRUIO_POLICY; 3645 } else { 3646 ASSERT(first_mp == mp); 3647 mp->b_datap->db_struioflag &= 3648 ~STRUIO_EAGER; 3649 mp->b_datap->db_struioflag |= 3650 STRUIO_POLICY; 3651 } 3652 } else { 3653 /* 3654 * Discard first_mp early since we're dealing with a 3655 * fully-connected conn_t and tcp doesn't do policy in 3656 * this case. Also, if someone is bound to IPPROTO_TCP 3657 * over raw IP, they don't expect to see a M_CTL. 3658 */ 3659 if (mctl_present) { 3660 freeb(first_mp); 3661 mctl_present = B_FALSE; 3662 } 3663 first_mp = mp; 3664 } 3665 } 3666 3667 /* Initiate IPPF processing */ 3668 if (IP6_IN_IPP(flags)) { 3669 uint_t ifindex; 3670 3671 mutex_enter(&ill->ill_lock); 3672 ifindex = ill->ill_phyint->phyint_ifindex; 3673 mutex_exit(&ill->ill_lock); 3674 ip_process(IPP_LOCAL_IN, &mp, ifindex); 3675 if (mp == NULL) { 3676 CONN_DEC_REF(connp); 3677 if (mctl_present) { 3678 freeb(first_mp); 3679 } 3680 return; 3681 } else if (mctl_present) { 3682 /* 3683 * ip_add_info_v6 might return a new mp. 3684 */ 3685 ASSERT(first_mp != mp); 3686 first_mp->b_cont = mp; 3687 } else { 3688 first_mp = mp; 3689 } 3690 } 3691 3692 /* 3693 * For link-local always add ifindex so that TCP can bind to that 3694 * interface. Avoid it for ICMP error fanout. 3695 */ 3696 if (!syn_present && ((connp->conn_ipv6_recvpktinfo || 3697 IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src)) && 3698 (flags & IP_FF_IP6INFO))) { 3699 /* Add header */ 3700 mp = ip_add_info_v6(mp, inill, &ip6h->ip6_dst); 3701 if (mp == NULL) { 3702 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 3703 CONN_DEC_REF(connp); 3704 if (mctl_present) 3705 freeb(first_mp); 3706 return; 3707 } else if (mctl_present) { 3708 ASSERT(first_mp != mp); 3709 first_mp->b_cont = mp; 3710 } else { 3711 first_mp = mp; 3712 } 3713 } 3714 3715 BUMP_MIB(ill->ill_ip6_mib, ipv6InDelivers); 3716 if (IPCL_IS_TCP(connp)) { 3717 (*ip_input_proc)(connp->conn_sqp, first_mp, 3718 connp->conn_recv, connp, SQTAG_IP6_TCP_INPUT); 3719 } else { 3720 putnext(connp->conn_rq, first_mp); 3721 CONN_DEC_REF(connp); 3722 } 3723 } 3724 3725 /* 3726 * Fanout for UDP packets. 3727 * The caller puts <fport, lport> in the ports parameter. 3728 * ire_type must be IRE_BROADCAST for multicast and broadcast packets. 3729 * 3730 * If SO_REUSEADDR is set all multicast and broadcast packets 3731 * will be delivered to all streams bound to the same port. 3732 * 3733 * Zones notes: 3734 * Multicast packets will be distributed to streams in all zones. 3735 */ 3736 static void 3737 ip_fanout_udp_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, uint32_t ports, 3738 ill_t *ill, ill_t *inill, uint_t flags, boolean_t mctl_present, 3739 zoneid_t zoneid) 3740 { 3741 uint32_t dstport, srcport; 3742 in6_addr_t dst; 3743 mblk_t *first_mp; 3744 boolean_t secure; 3745 conn_t *connp; 3746 connf_t *connfp; 3747 conn_t *first_conn; 3748 conn_t *next_conn; 3749 mblk_t *mp1, *first_mp1; 3750 in6_addr_t src; 3751 boolean_t shared_addr; 3752 3753 first_mp = mp; 3754 if (mctl_present) { 3755 mp = first_mp->b_cont; 3756 secure = ipsec_in_is_secure(first_mp); 3757 ASSERT(mp != NULL); 3758 } else { 3759 secure = B_FALSE; 3760 } 3761 3762 /* Extract ports in net byte order */ 3763 dstport = htons(ntohl(ports) & 0xFFFF); 3764 srcport = htons(ntohl(ports) >> 16); 3765 dst = ip6h->ip6_dst; 3766 src = ip6h->ip6_src; 3767 3768 shared_addr = (zoneid == ALL_ZONES); 3769 if (shared_addr) { 3770 zoneid = tsol_mlp_findzone(IPPROTO_UDP, dstport); 3771 /* 3772 * If no shared MLP is found, tsol_mlp_findzone returns 3773 * ALL_ZONES. In that case, we assume it's SLP, and 3774 * search for the zone based on the packet label. 3775 * That will also return ALL_ZONES on failure, but 3776 * we never allow conn_zoneid to be set to ALL_ZONES. 3777 */ 3778 if (zoneid == ALL_ZONES) 3779 zoneid = tsol_packet_to_zoneid(mp); 3780 } 3781 3782 /* Attempt to find a client stream based on destination port. */ 3783 connfp = &ipcl_udp_fanout[IPCL_UDP_HASH(dstport)]; 3784 mutex_enter(&connfp->connf_lock); 3785 connp = connfp->connf_head; 3786 if (!IN6_IS_ADDR_MULTICAST(&dst)) { 3787 /* 3788 * Not multicast. Send to the one (first) client we find. 3789 */ 3790 while (connp != NULL) { 3791 if (IPCL_UDP_MATCH_V6(connp, dstport, dst, srcport, 3792 src) && IPCL_ZONE_MATCH(connp, zoneid) && 3793 conn_wantpacket_v6(connp, ill, ip6h, 3794 flags, zoneid)) { 3795 break; 3796 } 3797 connp = connp->conn_next; 3798 } 3799 if (connp == NULL || connp->conn_upq == NULL) 3800 goto notfound; 3801 3802 if (is_system_labeled() && 3803 !tsol_receive_local(mp, &dst, IPV6_VERSION, shared_addr, 3804 connp)) 3805 goto notfound; 3806 3807 /* Found a client */ 3808 CONN_INC_REF(connp); 3809 mutex_exit(&connfp->connf_lock); 3810 3811 if (CONN_UDP_FLOWCTLD(connp)) { 3812 freemsg(first_mp); 3813 CONN_DEC_REF(connp); 3814 return; 3815 } 3816 if (CONN_INBOUND_POLICY_PRESENT_V6(connp) || secure) { 3817 first_mp = ipsec_check_inbound_policy(first_mp, 3818 connp, NULL, ip6h, mctl_present); 3819 if (first_mp == NULL) { 3820 CONN_DEC_REF(connp); 3821 return; 3822 } 3823 } 3824 /* Initiate IPPF processing */ 3825 if (IP6_IN_IPP(flags)) { 3826 uint_t ifindex; 3827 3828 mutex_enter(&ill->ill_lock); 3829 ifindex = ill->ill_phyint->phyint_ifindex; 3830 mutex_exit(&ill->ill_lock); 3831 ip_process(IPP_LOCAL_IN, &mp, ifindex); 3832 if (mp == NULL) { 3833 CONN_DEC_REF(connp); 3834 if (mctl_present) 3835 freeb(first_mp); 3836 return; 3837 } 3838 } 3839 /* 3840 * For link-local always add ifindex so that 3841 * transport can set sin6_scope_id. Avoid it for 3842 * ICMP error fanout. 3843 */ 3844 if ((connp->conn_ipv6_recvpktinfo || 3845 IN6_IS_ADDR_LINKLOCAL(&src)) && 3846 (flags & IP_FF_IP6INFO)) { 3847 /* Add header */ 3848 mp = ip_add_info_v6(mp, inill, &dst); 3849 if (mp == NULL) { 3850 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 3851 CONN_DEC_REF(connp); 3852 if (mctl_present) 3853 freeb(first_mp); 3854 return; 3855 } else if (mctl_present) { 3856 first_mp->b_cont = mp; 3857 } else { 3858 first_mp = mp; 3859 } 3860 } 3861 BUMP_MIB(ill->ill_ip6_mib, ipv6InDelivers); 3862 3863 /* Send it upstream */ 3864 CONN_UDP_RECV(connp, mp); 3865 3866 IP6_STAT(ip6_udp_fannorm); 3867 CONN_DEC_REF(connp); 3868 if (mctl_present) 3869 freeb(first_mp); 3870 return; 3871 } 3872 3873 while (connp != NULL) { 3874 if ((IPCL_UDP_MATCH_V6(connp, dstport, dst, srcport, src)) && 3875 conn_wantpacket_v6(connp, ill, ip6h, flags, zoneid) && 3876 (!is_system_labeled() || 3877 tsol_receive_local(mp, &dst, IPV6_VERSION, shared_addr, 3878 connp))) 3879 break; 3880 connp = connp->conn_next; 3881 } 3882 3883 if (connp == NULL || connp->conn_upq == NULL) 3884 goto notfound; 3885 3886 first_conn = connp; 3887 3888 CONN_INC_REF(connp); 3889 connp = connp->conn_next; 3890 for (;;) { 3891 while (connp != NULL) { 3892 if (IPCL_UDP_MATCH_V6(connp, dstport, dst, srcport, 3893 src) && conn_wantpacket_v6(connp, ill, ip6h, 3894 flags, zoneid) && 3895 (!is_system_labeled() || 3896 tsol_receive_local(mp, &dst, IPV6_VERSION, 3897 shared_addr, connp))) 3898 break; 3899 connp = connp->conn_next; 3900 } 3901 /* 3902 * Just copy the data part alone. The mctl part is 3903 * needed just for verifying policy and it is never 3904 * sent up. 3905 */ 3906 if (connp == NULL || 3907 (((first_mp1 = dupmsg(first_mp)) == NULL) && 3908 ((first_mp1 = ip_copymsg(first_mp)) 3909 == NULL))) { 3910 /* 3911 * No more interested clients or memory 3912 * allocation failed 3913 */ 3914 connp = first_conn; 3915 break; 3916 } 3917 mp1 = mctl_present ? first_mp1->b_cont : first_mp1; 3918 CONN_INC_REF(connp); 3919 mutex_exit(&connfp->connf_lock); 3920 /* 3921 * For link-local always add ifindex so that transport 3922 * can set sin6_scope_id. Avoid it for ICMP error 3923 * fanout. 3924 */ 3925 if ((connp->conn_ipv6_recvpktinfo || 3926 IN6_IS_ADDR_LINKLOCAL(&src)) && 3927 (flags & IP_FF_IP6INFO)) { 3928 /* Add header */ 3929 mp1 = ip_add_info_v6(mp1, inill, &dst); 3930 } 3931 /* mp1 could have changed */ 3932 if (mctl_present) 3933 first_mp1->b_cont = mp1; 3934 else 3935 first_mp1 = mp1; 3936 if (mp1 == NULL) { 3937 if (mctl_present) 3938 freeb(first_mp1); 3939 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 3940 goto next_one; 3941 } 3942 if (CONN_UDP_FLOWCTLD(connp)) { 3943 BUMP_MIB(ill->ill_ip6_mib, udpInOverflows); 3944 freemsg(first_mp1); 3945 goto next_one; 3946 } 3947 3948 if (CONN_INBOUND_POLICY_PRESENT_V6(connp) || 3949 secure) { 3950 first_mp1 = ipsec_check_inbound_policy 3951 (first_mp1, connp, NULL, ip6h, 3952 mctl_present); 3953 } 3954 if (first_mp1 != NULL) { 3955 if (mctl_present) 3956 freeb(first_mp1); 3957 BUMP_MIB(ill->ill_ip6_mib, ipv6InDelivers); 3958 3959 /* Send it upstream */ 3960 CONN_UDP_RECV(connp, mp1); 3961 } 3962 next_one: 3963 mutex_enter(&connfp->connf_lock); 3964 /* Follow the next pointer before releasing the conn. */ 3965 next_conn = connp->conn_next; 3966 IP6_STAT(ip6_udp_fanmb); 3967 CONN_DEC_REF(connp); 3968 connp = next_conn; 3969 } 3970 3971 /* Last one. Send it upstream. */ 3972 mutex_exit(&connfp->connf_lock); 3973 3974 /* Initiate IPPF processing */ 3975 if (IP6_IN_IPP(flags)) { 3976 uint_t ifindex; 3977 3978 mutex_enter(&ill->ill_lock); 3979 ifindex = ill->ill_phyint->phyint_ifindex; 3980 mutex_exit(&ill->ill_lock); 3981 ip_process(IPP_LOCAL_IN, &mp, ifindex); 3982 if (mp == NULL) { 3983 CONN_DEC_REF(connp); 3984 if (mctl_present) { 3985 freeb(first_mp); 3986 } 3987 return; 3988 } 3989 } 3990 3991 /* 3992 * For link-local always add ifindex so that transport can set 3993 * sin6_scope_id. Avoid it for ICMP error fanout. 3994 */ 3995 if ((connp->conn_ipv6_recvpktinfo || 3996 IN6_IS_ADDR_LINKLOCAL(&src)) && (flags & IP_FF_IP6INFO)) { 3997 /* Add header */ 3998 mp = ip_add_info_v6(mp, inill, &dst); 3999 if (mp == NULL) { 4000 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 4001 CONN_DEC_REF(connp); 4002 if (mctl_present) 4003 freeb(first_mp); 4004 return; 4005 } else if (mctl_present) { 4006 first_mp->b_cont = mp; 4007 } else { 4008 first_mp = mp; 4009 } 4010 } 4011 if (CONN_UDP_FLOWCTLD(connp)) { 4012 BUMP_MIB(ill->ill_ip6_mib, udpInOverflows); 4013 freemsg(mp); 4014 } else { 4015 if (CONN_INBOUND_POLICY_PRESENT_V6(connp) || secure) { 4016 first_mp = ipsec_check_inbound_policy(first_mp, 4017 connp, NULL, ip6h, mctl_present); 4018 if (first_mp == NULL) { 4019 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 4020 CONN_DEC_REF(connp); 4021 return; 4022 } 4023 } 4024 BUMP_MIB(ill->ill_ip6_mib, ipv6InDelivers); 4025 4026 /* Send it upstream */ 4027 CONN_UDP_RECV(connp, mp); 4028 } 4029 IP6_STAT(ip6_udp_fanmb); 4030 CONN_DEC_REF(connp); 4031 if (mctl_present) 4032 freeb(first_mp); 4033 return; 4034 4035 notfound: 4036 mutex_exit(&connfp->connf_lock); 4037 /* 4038 * No one bound to this port. Is 4039 * there a client that wants all 4040 * unclaimed datagrams? 4041 */ 4042 if (ipcl_proto_fanout_v6[IPPROTO_UDP].connf_head != NULL) { 4043 ip_fanout_proto_v6(q, first_mp, ip6h, ill, inill, IPPROTO_UDP, 4044 0, flags | IP_FF_RAWIP | IP_FF_IP6INFO, mctl_present, 4045 zoneid); 4046 } else { 4047 if (ip_fanout_send_icmp_v6(q, first_mp, flags, 4048 ICMP6_DST_UNREACH, ICMP6_DST_UNREACH_NOPORT, 0, 4049 mctl_present, zoneid)) { 4050 BUMP_MIB(&ip_mib, udpNoPorts); 4051 } 4052 } 4053 } 4054 4055 /* 4056 * int ip_find_hdr_v6() 4057 * 4058 * This routine is used by the upper layer protocols and the IP tunnel 4059 * module to: 4060 * - Set extension header pointers to appropriate locations 4061 * - Determine IPv6 header length and return it 4062 * - Return a pointer to the last nexthdr value 4063 * 4064 * The caller must initialize ipp_fields. 4065 * 4066 * NOTE: If multiple extension headers of the same type are present, 4067 * ip_find_hdr_v6() will set the respective extension header pointers 4068 * to the first one that it encounters in the IPv6 header. It also 4069 * skips fragment headers. This routine deals with malformed packets 4070 * of various sorts in which case the returned length is up to the 4071 * malformed part. 4072 */ 4073 int 4074 ip_find_hdr_v6(mblk_t *mp, ip6_t *ip6h, ip6_pkt_t *ipp, uint8_t *nexthdrp) 4075 { 4076 uint_t length, ehdrlen; 4077 uint8_t nexthdr; 4078 uint8_t *whereptr, *endptr; 4079 ip6_dest_t *tmpdstopts; 4080 ip6_rthdr_t *tmprthdr; 4081 ip6_hbh_t *tmphopopts; 4082 ip6_frag_t *tmpfraghdr; 4083 4084 length = IPV6_HDR_LEN; 4085 whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */ 4086 endptr = mp->b_wptr; 4087 4088 nexthdr = ip6h->ip6_nxt; 4089 while (whereptr < endptr) { 4090 /* Is there enough left for len + nexthdr? */ 4091 if (whereptr + MIN_EHDR_LEN > endptr) 4092 goto done; 4093 4094 switch (nexthdr) { 4095 case IPPROTO_HOPOPTS: 4096 tmphopopts = (ip6_hbh_t *)whereptr; 4097 ehdrlen = 8 * (tmphopopts->ip6h_len + 1); 4098 if ((uchar_t *)tmphopopts + ehdrlen > endptr) 4099 goto done; 4100 nexthdr = tmphopopts->ip6h_nxt; 4101 /* return only 1st hbh */ 4102 if (!(ipp->ipp_fields & IPPF_HOPOPTS)) { 4103 ipp->ipp_fields |= IPPF_HOPOPTS; 4104 ipp->ipp_hopopts = tmphopopts; 4105 ipp->ipp_hopoptslen = ehdrlen; 4106 } 4107 break; 4108 case IPPROTO_DSTOPTS: 4109 tmpdstopts = (ip6_dest_t *)whereptr; 4110 ehdrlen = 8 * (tmpdstopts->ip6d_len + 1); 4111 if ((uchar_t *)tmpdstopts + ehdrlen > endptr) 4112 goto done; 4113 nexthdr = tmpdstopts->ip6d_nxt; 4114 /* 4115 * ipp_dstopts is set to the destination header after a 4116 * routing header. 4117 * Assume it is a post-rthdr destination header 4118 * and adjust when we find an rthdr. 4119 */ 4120 if (!(ipp->ipp_fields & IPPF_DSTOPTS)) { 4121 ipp->ipp_fields |= IPPF_DSTOPTS; 4122 ipp->ipp_dstopts = tmpdstopts; 4123 ipp->ipp_dstoptslen = ehdrlen; 4124 } 4125 break; 4126 case IPPROTO_ROUTING: 4127 tmprthdr = (ip6_rthdr_t *)whereptr; 4128 ehdrlen = 8 * (tmprthdr->ip6r_len + 1); 4129 if ((uchar_t *)tmprthdr + ehdrlen > endptr) 4130 goto done; 4131 nexthdr = tmprthdr->ip6r_nxt; 4132 /* return only 1st rthdr */ 4133 if (!(ipp->ipp_fields & IPPF_RTHDR)) { 4134 ipp->ipp_fields |= IPPF_RTHDR; 4135 ipp->ipp_rthdr = tmprthdr; 4136 ipp->ipp_rthdrlen = ehdrlen; 4137 } 4138 /* 4139 * Make any destination header we've seen be a 4140 * pre-rthdr destination header. 4141 */ 4142 if (ipp->ipp_fields & IPPF_DSTOPTS) { 4143 ipp->ipp_fields &= ~IPPF_DSTOPTS; 4144 ipp->ipp_fields |= IPPF_RTDSTOPTS; 4145 ipp->ipp_rtdstopts = ipp->ipp_dstopts; 4146 ipp->ipp_dstopts = NULL; 4147 ipp->ipp_rtdstoptslen = ipp->ipp_dstoptslen; 4148 ipp->ipp_dstoptslen = 0; 4149 } 4150 break; 4151 case IPPROTO_FRAGMENT: 4152 tmpfraghdr = (ip6_frag_t *)whereptr; 4153 ehdrlen = sizeof (ip6_frag_t); 4154 if ((uchar_t *)tmpfraghdr + ehdrlen > endptr) 4155 goto done; 4156 nexthdr = tmpfraghdr->ip6f_nxt; 4157 if (!(ipp->ipp_fields & IPPF_FRAGHDR)) { 4158 ipp->ipp_fields |= IPPF_FRAGHDR; 4159 ipp->ipp_fraghdr = tmpfraghdr; 4160 ipp->ipp_fraghdrlen = ehdrlen; 4161 } 4162 break; 4163 case IPPROTO_NONE: 4164 default: 4165 goto done; 4166 } 4167 length += ehdrlen; 4168 whereptr += ehdrlen; 4169 } 4170 done: 4171 if (nexthdrp != NULL) 4172 *nexthdrp = nexthdr; 4173 return (length); 4174 } 4175 4176 int 4177 ip_hdr_complete_v6(ip6_t *ip6h, zoneid_t zoneid) 4178 { 4179 ire_t *ire; 4180 4181 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 4182 ire = ire_lookup_local_v6(zoneid); 4183 if (ire == NULL) { 4184 ip1dbg(("ip_hdr_complete_v6: no source IRE\n")); 4185 return (1); 4186 } 4187 ip6h->ip6_src = ire->ire_addr_v6; 4188 ire_refrele(ire); 4189 } 4190 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 4191 ip6h->ip6_hops = ipv6_def_hops; 4192 return (0); 4193 } 4194 4195 /* 4196 * Try to determine where and what are the IPv6 header length and 4197 * pointer to nexthdr value for the upper layer protocol (or an 4198 * unknown next hdr). 4199 * 4200 * Parameters returns a pointer to the nexthdr value; 4201 * Must handle malformed packets of various sorts. 4202 * Function returns failure for malformed cases. 4203 */ 4204 boolean_t 4205 ip_hdr_length_nexthdr_v6(mblk_t *mp, ip6_t *ip6h, uint16_t *hdr_length_ptr, 4206 uint8_t **nexthdrpp) 4207 { 4208 uint16_t length; 4209 uint_t ehdrlen; 4210 uint8_t *nexthdrp; 4211 uint8_t *whereptr; 4212 uint8_t *endptr; 4213 ip6_dest_t *desthdr; 4214 ip6_rthdr_t *rthdr; 4215 ip6_frag_t *fraghdr; 4216 4217 ASSERT(IPH_HDR_VERSION(ip6h) == IPV6_VERSION); 4218 length = IPV6_HDR_LEN; 4219 whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */ 4220 endptr = mp->b_wptr; 4221 4222 nexthdrp = &ip6h->ip6_nxt; 4223 while (whereptr < endptr) { 4224 /* Is there enough left for len + nexthdr? */ 4225 if (whereptr + MIN_EHDR_LEN > endptr) 4226 break; 4227 4228 switch (*nexthdrp) { 4229 case IPPROTO_HOPOPTS: 4230 case IPPROTO_DSTOPTS: 4231 /* Assumes the headers are identical for hbh and dst */ 4232 desthdr = (ip6_dest_t *)whereptr; 4233 ehdrlen = 8 * (desthdr->ip6d_len + 1); 4234 if ((uchar_t *)desthdr + ehdrlen > endptr) 4235 return (B_FALSE); 4236 nexthdrp = &desthdr->ip6d_nxt; 4237 break; 4238 case IPPROTO_ROUTING: 4239 rthdr = (ip6_rthdr_t *)whereptr; 4240 ehdrlen = 8 * (rthdr->ip6r_len + 1); 4241 if ((uchar_t *)rthdr + ehdrlen > endptr) 4242 return (B_FALSE); 4243 nexthdrp = &rthdr->ip6r_nxt; 4244 break; 4245 case IPPROTO_FRAGMENT: 4246 fraghdr = (ip6_frag_t *)whereptr; 4247 ehdrlen = sizeof (ip6_frag_t); 4248 if ((uchar_t *)&fraghdr[1] > endptr) 4249 return (B_FALSE); 4250 nexthdrp = &fraghdr->ip6f_nxt; 4251 break; 4252 case IPPROTO_NONE: 4253 /* No next header means we're finished */ 4254 default: 4255 *hdr_length_ptr = length; 4256 *nexthdrpp = nexthdrp; 4257 return (B_TRUE); 4258 } 4259 length += ehdrlen; 4260 whereptr += ehdrlen; 4261 *hdr_length_ptr = length; 4262 *nexthdrpp = nexthdrp; 4263 } 4264 switch (*nexthdrp) { 4265 case IPPROTO_HOPOPTS: 4266 case IPPROTO_DSTOPTS: 4267 case IPPROTO_ROUTING: 4268 case IPPROTO_FRAGMENT: 4269 /* 4270 * If any know extension headers are still to be processed, 4271 * the packet's malformed (or at least all the IP header(s) are 4272 * not in the same mblk - and that should never happen. 4273 */ 4274 return (B_FALSE); 4275 4276 default: 4277 /* 4278 * If we get here, we know that all of the IP headers were in 4279 * the same mblk, even if the ULP header is in the next mblk. 4280 */ 4281 *hdr_length_ptr = length; 4282 *nexthdrpp = nexthdrp; 4283 return (B_TRUE); 4284 } 4285 } 4286 4287 /* 4288 * Return the length of the IPv6 related headers (including extension headers) 4289 * Returns a length even if the packet is malformed. 4290 */ 4291 int 4292 ip_hdr_length_v6(mblk_t *mp, ip6_t *ip6h) 4293 { 4294 uint16_t hdr_len; 4295 uint8_t *nexthdrp; 4296 4297 (void) ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_len, &nexthdrp); 4298 return (hdr_len); 4299 } 4300 4301 /* 4302 * Select an ill for the packet by considering load spreading across 4303 * a different ill in the group if dst_ill is part of some group. 4304 */ 4305 static ill_t * 4306 ip_newroute_get_dst_ill_v6(ill_t *dst_ill) 4307 { 4308 ill_t *ill; 4309 4310 /* 4311 * We schedule irrespective of whether the source address is 4312 * INADDR_UNSPECIED or not. 4313 */ 4314 ill = illgrp_scheduler(dst_ill); 4315 if (ill == NULL) 4316 return (NULL); 4317 4318 /* 4319 * For groups with names ip_sioctl_groupname ensures that all 4320 * ills are of same type. For groups without names, ifgrp_insert 4321 * ensures this. 4322 */ 4323 ASSERT(dst_ill->ill_type == ill->ill_type); 4324 4325 return (ill); 4326 } 4327 4328 /* 4329 * IPv6 - 4330 * ip_newroute_v6 is called by ip_rput_data_v6 or ip_wput_v6 whenever we need 4331 * to send out a packet to a destination address for which we do not have 4332 * specific routing information. 4333 * 4334 * Handle non-multicast packets. If ill is non-NULL the match is done 4335 * for that ill. 4336 * 4337 * When a specific ill is specified (using IPV6_PKTINFO, 4338 * IPV6_MULTICAST_IF, or IPV6_BOUND_IF) we will only match 4339 * on routing entries (ftable and ctable) that have a matching 4340 * ire->ire_ipif->ipif_ill. Thus this can only be used 4341 * for destinations that are on-link for the specific ill 4342 * and that can appear on multiple links. Thus it is useful 4343 * for multicast destinations, link-local destinations, and 4344 * at some point perhaps for site-local destinations (if the 4345 * node sits at a site boundary). 4346 * We create the cache entries in the regular ctable since 4347 * it can not "confuse" things for other destinations. 4348 * table. 4349 * 4350 * When ill is part of a ill group, we subject the packets 4351 * to load spreading even if the ill is specified by the 4352 * means described above. We disable only for IPV6_BOUND_PIF 4353 * and for the cases where IP6I_ATTACH_IF is set i.e NS/NA/ 4354 * Echo replies to link-local destinations have IP6I_ATTACH_IF 4355 * set. 4356 * 4357 * NOTE : These are the scopes of some of the variables that point at IRE, 4358 * which needs to be followed while making any future modifications 4359 * to avoid memory leaks. 4360 * 4361 * - ire and sire are the entries looked up initially by 4362 * ire_ftable_lookup_v6. 4363 * - ipif_ire is used to hold the interface ire associated with 4364 * the new cache ire. But it's scope is limited, so we always REFRELE 4365 * it before branching out to error paths. 4366 * - save_ire is initialized before ire_create, so that ire returned 4367 * by ire_create will not over-write the ire. We REFRELE save_ire 4368 * before breaking out of the switch. 4369 * 4370 * Thus on failures, we have to REFRELE only ire and sire, if they 4371 * are not NULL. 4372 * 4373 * v6srcp may be used in the future. Currently unused. 4374 */ 4375 /* ARGSUSED */ 4376 void 4377 ip_newroute_v6(queue_t *q, mblk_t *mp, const in6_addr_t *v6dstp, 4378 const in6_addr_t *v6srcp, ill_t *ill, zoneid_t zoneid) 4379 { 4380 in6_addr_t v6gw; 4381 in6_addr_t dst; 4382 ire_t *ire = NULL; 4383 ipif_t *src_ipif = NULL; 4384 ill_t *dst_ill = NULL; 4385 ire_t *sire = NULL; 4386 ire_t *save_ire; 4387 mblk_t *dlureq_mp; 4388 ip6_t *ip6h; 4389 int err = 0; 4390 mblk_t *first_mp; 4391 ipsec_out_t *io; 4392 ill_t *attach_ill = NULL; 4393 ushort_t ire_marks = 0; 4394 int match_flags; 4395 boolean_t ip6i_present; 4396 ire_t *first_sire = NULL; 4397 mblk_t *copy_mp = NULL; 4398 mblk_t *xmit_mp = NULL; 4399 in6_addr_t save_dst; 4400 uint32_t multirt_flags = 4401 MULTIRT_CACHEGW | MULTIRT_USESTAMP | MULTIRT_SETSTAMP; 4402 boolean_t multirt_is_resolvable; 4403 boolean_t multirt_resolve_next; 4404 boolean_t need_rele = B_FALSE; 4405 boolean_t do_attach_ill = B_FALSE; 4406 boolean_t ip6_asp_table_held = B_FALSE; 4407 tsol_ire_gw_secattr_t *attrp = NULL; 4408 tsol_gcgrp_t *gcgrp = NULL; 4409 tsol_gcgrp_addr_t ga; 4410 4411 ASSERT(!IN6_IS_ADDR_MULTICAST(v6dstp)); 4412 4413 first_mp = mp; 4414 if (mp->b_datap->db_type == M_CTL) { 4415 mp = mp->b_cont; 4416 io = (ipsec_out_t *)first_mp->b_rptr; 4417 ASSERT(io->ipsec_out_type == IPSEC_OUT); 4418 } else { 4419 io = NULL; 4420 } 4421 4422 /* 4423 * If this end point is bound to IPIF_NOFAILOVER, set bnf_ill and 4424 * bind_to_nofailover B_TRUE. We can't use conn to determine as it 4425 * could be NULL. 4426 * 4427 * This information can appear either in an ip6i_t or an IPSEC_OUT 4428 * message. 4429 */ 4430 ip6h = (ip6_t *)mp->b_rptr; 4431 ip6i_present = (ip6h->ip6_nxt == IPPROTO_RAW); 4432 if (ip6i_present || (io != NULL && io->ipsec_out_attach_if)) { 4433 if (!ip6i_present || 4434 ((ip6i_t *)ip6h)->ip6i_flags & IP6I_ATTACH_IF) { 4435 attach_ill = ip_grab_attach_ill(ill, first_mp, 4436 (ip6i_present ? ((ip6i_t *)ip6h)->ip6i_ifindex : 4437 io->ipsec_out_ill_index), B_TRUE); 4438 /* Failure case frees things for us. */ 4439 if (attach_ill == NULL) 4440 return; 4441 4442 /* 4443 * Check if we need an ire that will not be 4444 * looked up by anybody else i.e. HIDDEN. 4445 */ 4446 if (ill_is_probeonly(attach_ill)) 4447 ire_marks = IRE_MARK_HIDDEN; 4448 } 4449 } 4450 4451 if (IN6_IS_ADDR_LOOPBACK(v6dstp)) { 4452 ip1dbg(("ip_newroute_v6: dst with loopback addr\n")); 4453 goto icmp_err_ret; 4454 } else if ((v6srcp != NULL) && IN6_IS_ADDR_LOOPBACK(v6srcp)) { 4455 ip1dbg(("ip_newroute_v6: src with loopback addr\n")); 4456 goto icmp_err_ret; 4457 } 4458 4459 /* 4460 * If this IRE is created for forwarding or it is not for 4461 * TCP traffic, mark it as temporary. 4462 * 4463 * Is it sufficient just to check the next header?? 4464 */ 4465 if (mp->b_prev != NULL || !IP_FLOW_CONTROLLED_ULP(ip6h->ip6_nxt)) 4466 ire_marks |= IRE_MARK_TEMPORARY; 4467 4468 /* 4469 * Get what we can from ire_ftable_lookup_v6 which will follow an IRE 4470 * chain until it gets the most specific information available. 4471 * For example, we know that there is no IRE_CACHE for this dest, 4472 * but there may be an IRE_OFFSUBNET which specifies a gateway. 4473 * ire_ftable_lookup_v6 will look up the gateway, etc. 4474 */ 4475 4476 if (ill == NULL) { 4477 match_flags = MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT | 4478 MATCH_IRE_PARENT | MATCH_IRE_RJ_BHOLE | MATCH_IRE_SECATTR; 4479 ire = ire_ftable_lookup_v6(v6dstp, 0, 0, 0, 4480 NULL, &sire, zoneid, 0, MBLK_GETLABEL(mp), 4481 match_flags); 4482 /* 4483 * ire_add_then_send -> ip_newroute_v6 in the CGTP case passes 4484 * in a NULL ill, but the packet could be a neighbor 4485 * solicitation/advertisment and could have a valid attach_ill. 4486 */ 4487 if (attach_ill != NULL) 4488 ill_refrele(attach_ill); 4489 } else { 4490 if (attach_ill != NULL) { 4491 /* 4492 * attach_ill is set only for communicating with 4493 * on-link hosts. So, don't look for DEFAULT. 4494 * ip_wput_v6 passes the right ill in this case and 4495 * hence we can assert. 4496 */ 4497 ASSERT(ill == attach_ill); 4498 ill_refrele(attach_ill); 4499 do_attach_ill = B_TRUE; 4500 match_flags = MATCH_IRE_RJ_BHOLE | MATCH_IRE_ILL; 4501 } else { 4502 match_flags = MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT | 4503 MATCH_IRE_RJ_BHOLE | MATCH_IRE_ILL_GROUP; 4504 } 4505 match_flags |= MATCH_IRE_PARENT | MATCH_IRE_SECATTR; 4506 ire = ire_ftable_lookup_v6(v6dstp, NULL, NULL, 0, ill->ill_ipif, 4507 &sire, zoneid, 0, MBLK_GETLABEL(mp), match_flags); 4508 } 4509 4510 ip3dbg(("ip_newroute_v6: ire_ftable_lookup_v6() " 4511 "returned ire %p, sire %p\n", (void *)ire, (void *)sire)); 4512 4513 if (zoneid == ALL_ZONES && ire != NULL) { 4514 /* 4515 * In the forwarding case, we can use a route from any zone 4516 * since we won't change the source address. We can easily 4517 * assert that the source address is already set when there's no 4518 * ip6_info header - otherwise we'd have to call pullupmsg(). 4519 */ 4520 ASSERT(ip6i_present || 4521 !IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)); 4522 zoneid = ire->ire_zoneid; 4523 } 4524 4525 /* 4526 * We enter a loop that will be run only once in most cases. 4527 * The loop is re-entered in the case where the destination 4528 * can be reached through multiple RTF_MULTIRT-flagged routes. 4529 * The intention is to compute multiple routes to a single 4530 * destination in a single ip_newroute_v6 call. 4531 * The information is contained in sire->ire_flags. 4532 */ 4533 do { 4534 multirt_resolve_next = B_FALSE; 4535 4536 if (dst_ill != NULL) { 4537 ill_refrele(dst_ill); 4538 dst_ill = NULL; 4539 } 4540 if (src_ipif != NULL) { 4541 ipif_refrele(src_ipif); 4542 src_ipif = NULL; 4543 } 4544 if ((sire != NULL) && sire->ire_flags & RTF_MULTIRT) { 4545 ip3dbg(("ip_newroute_v6: starting new resolution " 4546 "with first_mp %p, tag %d\n", 4547 (void *)first_mp, MULTIRT_DEBUG_TAGGED(first_mp))); 4548 4549 /* 4550 * We check if there are trailing unresolved routes for 4551 * the destination contained in sire. 4552 */ 4553 multirt_is_resolvable = ire_multirt_lookup_v6(&ire, 4554 &sire, multirt_flags, MBLK_GETLABEL(mp)); 4555 4556 ip3dbg(("ip_newroute_v6: multirt_is_resolvable %d, " 4557 "ire %p, sire %p\n", 4558 multirt_is_resolvable, (void *)ire, (void *)sire)); 4559 4560 if (!multirt_is_resolvable) { 4561 /* 4562 * No more multirt routes to resolve; give up 4563 * (all routes resolved or no more resolvable 4564 * routes). 4565 */ 4566 if (ire != NULL) { 4567 ire_refrele(ire); 4568 ire = NULL; 4569 } 4570 } else { 4571 ASSERT(sire != NULL); 4572 ASSERT(ire != NULL); 4573 /* 4574 * We simply use first_sire as a flag that 4575 * indicates if a resolvable multirt route has 4576 * already been found during the preceding 4577 * loops. If it is not the case, we may have 4578 * to send an ICMP error to report that the 4579 * destination is unreachable. We do not 4580 * IRE_REFHOLD first_sire. 4581 */ 4582 if (first_sire == NULL) { 4583 first_sire = sire; 4584 } 4585 } 4586 } 4587 if ((ire == NULL) || (ire == sire)) { 4588 /* 4589 * either ire == NULL (the destination cannot be 4590 * resolved) or ire == sire (the gateway cannot be 4591 * resolved). At this point, there are no more routes 4592 * to resolve for the destination, thus we exit. 4593 */ 4594 if (ip_debug > 3) { 4595 /* ip2dbg */ 4596 pr_addr_dbg("ip_newroute_v6: " 4597 "can't resolve %s\n", AF_INET6, v6dstp); 4598 } 4599 ip3dbg(("ip_newroute_v6: " 4600 "ire %p, sire %p, first_sire %p\n", 4601 (void *)ire, (void *)sire, (void *)first_sire)); 4602 4603 if (sire != NULL) { 4604 ire_refrele(sire); 4605 sire = NULL; 4606 } 4607 4608 if (first_sire != NULL) { 4609 /* 4610 * At least one multirt route has been found 4611 * in the same ip_newroute() call; there is no 4612 * need to report an ICMP error. 4613 * first_sire was not IRE_REFHOLDed. 4614 */ 4615 MULTIRT_DEBUG_UNTAG(first_mp); 4616 freemsg(first_mp); 4617 return; 4618 } 4619 ip_rts_change_v6(RTM_MISS, v6dstp, 0, 0, 0, 0, 0, 0, 4620 RTA_DST); 4621 goto icmp_err_ret; 4622 } 4623 4624 ASSERT(ire->ire_ipversion == IPV6_VERSION); 4625 4626 /* 4627 * Verify that the returned IRE does not have either the 4628 * RTF_REJECT or RTF_BLACKHOLE flags set and that the IRE is 4629 * either an IRE_CACHE, IRE_IF_NORESOLVER or IRE_IF_RESOLVER. 4630 */ 4631 if ((ire->ire_flags & (RTF_REJECT | RTF_BLACKHOLE)) || 4632 (ire->ire_type & (IRE_CACHE | IRE_INTERFACE)) == 0) 4633 goto icmp_err_ret; 4634 4635 /* 4636 * Increment the ire_ob_pkt_count field for ire if it is an 4637 * INTERFACE (IF_RESOLVER or IF_NORESOLVER) IRE type, and 4638 * increment the same for the parent IRE, sire, if it is some 4639 * sort of prefix IRE (which includes DEFAULT, PREFIX, HOST 4640 * and HOST_REDIRECT). 4641 */ 4642 if ((ire->ire_type & IRE_INTERFACE) != 0) { 4643 UPDATE_OB_PKT_COUNT(ire); 4644 ire->ire_last_used_time = lbolt; 4645 } 4646 4647 if (sire != NULL) { 4648 mutex_enter(&sire->ire_lock); 4649 v6gw = sire->ire_gateway_addr_v6; 4650 mutex_exit(&sire->ire_lock); 4651 ASSERT((sire->ire_type & (IRE_CACHETABLE | 4652 IRE_INTERFACE)) == 0); 4653 UPDATE_OB_PKT_COUNT(sire); 4654 sire->ire_last_used_time = lbolt; 4655 } else { 4656 v6gw = ipv6_all_zeros; 4657 } 4658 4659 /* 4660 * We have a route to reach the destination. 4661 * 4662 * 1) If the interface is part of ill group, try to get a new 4663 * ill taking load spreading into account. 4664 * 4665 * 2) After selecting the ill, get a source address that might 4666 * create good inbound load spreading and that matches the 4667 * right scope. ipif_select_source_v6 does this for us. 4668 * 4669 * If the application specified the ill (ifindex), we still 4670 * load spread. Only if the packets needs to go out specifically 4671 * on a given ill e.g. bind to IPIF_NOFAILOVER address, 4672 * IPV6_BOUND_PIF we don't try to use a different ill for load 4673 * spreading. 4674 */ 4675 if (!do_attach_ill) { 4676 /* 4677 * If the interface belongs to an interface group, 4678 * make sure the next possible interface in the group 4679 * is used. This encourages load spreading among 4680 * peers in an interface group. However, in the case 4681 * of multirouting, load spreading is not used, as we 4682 * actually want to replicate outgoing packets through 4683 * particular interfaces. 4684 * 4685 * Note: While we pick a dst_ill we are really only 4686 * interested in the ill for load spreading. 4687 * The source ipif is determined by source address 4688 * selection below. 4689 */ 4690 if ((sire != NULL) && (sire->ire_flags & RTF_MULTIRT)) { 4691 dst_ill = ire->ire_ipif->ipif_ill; 4692 /* For uniformity do a refhold */ 4693 ill_refhold(dst_ill); 4694 } else { 4695 /* 4696 * If we are here trying to create an IRE_CACHE 4697 * for an offlink destination and have the 4698 * IRE_CACHE for the next hop and the latter is 4699 * using virtual IP source address selection i.e 4700 * it's ire->ire_ipif is pointing to a virtual 4701 * network interface (vni) then 4702 * ip_newroute_get_dst_ll() will return the vni 4703 * interface as the dst_ill. Since the vni is 4704 * virtual i.e not associated with any physical 4705 * interface, it cannot be the dst_ill, hence 4706 * in such a case call ip_newroute_get_dst_ll() 4707 * with the stq_ill instead of the ire_ipif ILL. 4708 * The function returns a refheld ill. 4709 */ 4710 if ((ire->ire_type == IRE_CACHE) && 4711 IS_VNI(ire->ire_ipif->ipif_ill)) 4712 dst_ill = ip_newroute_get_dst_ill_v6( 4713 ire->ire_stq->q_ptr); 4714 else 4715 dst_ill = ip_newroute_get_dst_ill_v6( 4716 ire->ire_ipif->ipif_ill); 4717 } 4718 if (dst_ill == NULL) { 4719 if (ip_debug > 2) { 4720 pr_addr_dbg("ip_newroute_v6 : no dst " 4721 "ill for dst %s\n", 4722 AF_INET6, v6dstp); 4723 } 4724 goto icmp_err_ret; 4725 } else if (dst_ill->ill_group == NULL && ill != NULL && 4726 dst_ill != ill) { 4727 /* 4728 * If "ill" is not part of any group, we should 4729 * have found a route matching "ill" as we 4730 * called ire_ftable_lookup_v6 with 4731 * MATCH_IRE_ILL_GROUP. 4732 * Rather than asserting when there is a 4733 * mismatch, we just drop the packet. 4734 */ 4735 ip0dbg(("ip_newroute_v6: BOUND_IF failed : " 4736 "dst_ill %s ill %s\n", 4737 dst_ill->ill_name, 4738 ill->ill_name)); 4739 goto icmp_err_ret; 4740 } 4741 } else { 4742 dst_ill = ire->ire_ipif->ipif_ill; 4743 /* For uniformity do refhold */ 4744 ill_refhold(dst_ill); 4745 /* 4746 * We should have found a route matching ill as we 4747 * called ire_ftable_lookup_v6 with MATCH_IRE_ILL. 4748 * Rather than asserting, while there is a mismatch, 4749 * we just drop the packet. 4750 */ 4751 if (dst_ill != ill) { 4752 ip0dbg(("ip_newroute_v6: Packet dropped as " 4753 "IP6I_ATTACH_IF ill is %s, " 4754 "ire->ire_ipif->ipif_ill is %s\n", 4755 ill->ill_name, 4756 dst_ill->ill_name)); 4757 goto icmp_err_ret; 4758 } 4759 } 4760 /* 4761 * Pick a source address which matches the scope of the 4762 * destination address. 4763 * For RTF_SETSRC routes, the source address is imposed by the 4764 * parent ire (sire). 4765 */ 4766 ASSERT(src_ipif == NULL); 4767 if (ire->ire_type == IRE_IF_RESOLVER && 4768 !IN6_IS_ADDR_UNSPECIFIED(&v6gw) && 4769 ip6_asp_can_lookup()) { 4770 /* 4771 * The ire cache entry we're adding is for the 4772 * gateway itself. The source address in this case 4773 * is relative to the gateway's address. 4774 */ 4775 ip6_asp_table_held = B_TRUE; 4776 src_ipif = ipif_select_source_v6(dst_ill, &v6gw, 4777 RESTRICT_TO_GROUP, IPV6_PREFER_SRC_DEFAULT, zoneid); 4778 if (src_ipif != NULL) 4779 ire_marks |= IRE_MARK_USESRC_CHECK; 4780 } else { 4781 if ((sire != NULL) && (sire->ire_flags & RTF_SETSRC)) { 4782 /* 4783 * Check that the ipif matching the requested 4784 * source address still exists. 4785 */ 4786 src_ipif = ipif_lookup_addr_v6( 4787 &sire->ire_src_addr_v6, NULL, zoneid, 4788 NULL, NULL, NULL, NULL); 4789 } 4790 if (src_ipif == NULL && ip6_asp_can_lookup()) { 4791 uint_t restrict_ill = RESTRICT_TO_NONE; 4792 4793 if (ip6i_present && ((ip6i_t *)ip6h)->ip6i_flags 4794 & IP6I_ATTACH_IF) 4795 restrict_ill = RESTRICT_TO_ILL; 4796 ip6_asp_table_held = B_TRUE; 4797 src_ipif = ipif_select_source_v6(dst_ill, 4798 v6dstp, restrict_ill, 4799 IPV6_PREFER_SRC_DEFAULT, zoneid); 4800 if (src_ipif != NULL) 4801 ire_marks |= IRE_MARK_USESRC_CHECK; 4802 } 4803 } 4804 4805 if (src_ipif == NULL) { 4806 if (ip_debug > 2) { 4807 /* ip1dbg */ 4808 pr_addr_dbg("ip_newroute_v6: no src for " 4809 "dst %s\n, ", AF_INET6, v6dstp); 4810 printf("ip_newroute_v6: interface name %s\n", 4811 dst_ill->ill_name); 4812 } 4813 goto icmp_err_ret; 4814 } 4815 4816 if (ip_debug > 3) { 4817 /* ip2dbg */ 4818 pr_addr_dbg("ip_newroute_v6: first hop %s\n", 4819 AF_INET6, &v6gw); 4820 } 4821 ip2dbg(("\tire type %s (%d)\n", 4822 ip_nv_lookup(ire_nv_tbl, ire->ire_type), ire->ire_type)); 4823 4824 /* 4825 * At this point in ip_newroute_v6(), ire is either the 4826 * IRE_CACHE of the next-hop gateway for an off-subnet 4827 * destination or an IRE_INTERFACE type that should be used 4828 * to resolve an on-subnet destination or an on-subnet 4829 * next-hop gateway. 4830 * 4831 * In the IRE_CACHE case, we have the following : 4832 * 4833 * 1) src_ipif - used for getting a source address. 4834 * 4835 * 2) dst_ill - from which we derive ire_stq/ire_rfq. This 4836 * means packets using this IRE_CACHE will go out on dst_ill. 4837 * 4838 * 3) The IRE sire will point to the prefix that is the longest 4839 * matching route for the destination. These prefix types 4840 * include IRE_DEFAULT, IRE_PREFIX, IRE_HOST. 4841 * 4842 * The newly created IRE_CACHE entry for the off-subnet 4843 * destination is tied to both the prefix route and the 4844 * interface route used to resolve the next-hop gateway 4845 * via the ire_phandle and ire_ihandle fields, respectively. 4846 * 4847 * In the IRE_INTERFACE case, we have the following : 4848 * 4849 * 1) src_ipif - used for getting a source address. 4850 * 4851 * 2) dst_ill - from which we derive ire_stq/ire_rfq. This 4852 * means packets using the IRE_CACHE that we will build 4853 * here will go out on dst_ill. 4854 * 4855 * 3) sire may or may not be NULL. But, the IRE_CACHE that is 4856 * to be created will only be tied to the IRE_INTERFACE that 4857 * was derived from the ire_ihandle field. 4858 * 4859 * If sire is non-NULL, it means the destination is off-link 4860 * and we will first create the IRE_CACHE for the gateway. 4861 * Next time through ip_newroute_v6, we will create the 4862 * IRE_CACHE for the final destination as described above. 4863 */ 4864 save_ire = ire; 4865 switch (ire->ire_type) { 4866 case IRE_CACHE: { 4867 ire_t *ipif_ire; 4868 4869 ASSERT(sire != NULL); 4870 if (IN6_IS_ADDR_UNSPECIFIED(&v6gw)) { 4871 mutex_enter(&ire->ire_lock); 4872 v6gw = ire->ire_gateway_addr_v6; 4873 mutex_exit(&ire->ire_lock); 4874 } 4875 /* 4876 * We need 3 ire's to create a new cache ire for an 4877 * off-link destination from the cache ire of the 4878 * gateway. 4879 * 4880 * 1. The prefix ire 'sire' 4881 * 2. The cache ire of the gateway 'ire' 4882 * 3. The interface ire 'ipif_ire' 4883 * 4884 * We have (1) and (2). We lookup (3) below. 4885 * 4886 * If there is no interface route to the gateway, 4887 * it is a race condition, where we found the cache 4888 * but the inteface route has been deleted. 4889 */ 4890 ipif_ire = ire_ihandle_lookup_offlink_v6(ire, sire); 4891 if (ipif_ire == NULL) { 4892 ip1dbg(("ip_newroute_v6:" 4893 "ire_ihandle_lookup_offlink_v6 failed\n")); 4894 goto icmp_err_ret; 4895 } 4896 /* 4897 * Assume DL_UNITDATA_REQ is same for all physical 4898 * interfaces in the ifgrp. If it isn't, this code will 4899 * have to be seriously rewhacked to allow the 4900 * fastpath probing (such that I cache the link 4901 * header in the IRE_CACHE) to work over ifgrps. 4902 * We have what we need to build an IRE_CACHE. 4903 */ 4904 /* 4905 * Note: the new ire inherits RTF_SETSRC 4906 * and RTF_MULTIRT to propagate these flags from prefix 4907 * to cache. 4908 */ 4909 4910 /* 4911 * Check cached gateway IRE for any security 4912 * attributes; if found, associate the gateway 4913 * credentials group to the destination IRE. 4914 */ 4915 if ((attrp = save_ire->ire_gw_secattr) != NULL) { 4916 mutex_enter(&attrp->igsa_lock); 4917 if ((gcgrp = attrp->igsa_gcgrp) != NULL) 4918 GCGRP_REFHOLD(gcgrp); 4919 mutex_exit(&attrp->igsa_lock); 4920 } 4921 4922 ire = ire_create_v6( 4923 v6dstp, /* dest address */ 4924 &ipv6_all_ones, /* mask */ 4925 &src_ipif->ipif_v6src_addr, /* source address */ 4926 &v6gw, /* gateway address */ 4927 &save_ire->ire_max_frag, 4928 NULL, /* Fast Path header */ 4929 dst_ill->ill_rq, /* recv-from queue */ 4930 dst_ill->ill_wq, /* send-to queue */ 4931 IRE_CACHE, 4932 NULL, 4933 src_ipif, 4934 &sire->ire_mask_v6, /* Parent mask */ 4935 sire->ire_phandle, /* Parent handle */ 4936 ipif_ire->ire_ihandle, /* Interface handle */ 4937 sire->ire_flags & /* flags if any */ 4938 (RTF_SETSRC | RTF_MULTIRT), 4939 &(sire->ire_uinfo), 4940 NULL, 4941 gcgrp); 4942 4943 if (ire == NULL) { 4944 if (gcgrp != NULL) { 4945 GCGRP_REFRELE(gcgrp); 4946 gcgrp = NULL; 4947 } 4948 ire_refrele(save_ire); 4949 ire_refrele(ipif_ire); 4950 break; 4951 } 4952 4953 /* reference now held by IRE */ 4954 gcgrp = NULL; 4955 4956 ire->ire_marks |= ire_marks; 4957 4958 /* 4959 * Prevent sire and ipif_ire from getting deleted. The 4960 * newly created ire is tied to both of them via the 4961 * phandle and ihandle respectively. 4962 */ 4963 IRB_REFHOLD(sire->ire_bucket); 4964 /* Has it been removed already ? */ 4965 if (sire->ire_marks & IRE_MARK_CONDEMNED) { 4966 IRB_REFRELE(sire->ire_bucket); 4967 ire_refrele(ipif_ire); 4968 ire_refrele(save_ire); 4969 break; 4970 } 4971 4972 IRB_REFHOLD(ipif_ire->ire_bucket); 4973 /* Has it been removed already ? */ 4974 if (ipif_ire->ire_marks & IRE_MARK_CONDEMNED) { 4975 IRB_REFRELE(ipif_ire->ire_bucket); 4976 IRB_REFRELE(sire->ire_bucket); 4977 ire_refrele(ipif_ire); 4978 ire_refrele(save_ire); 4979 break; 4980 } 4981 4982 xmit_mp = first_mp; 4983 if (ire->ire_flags & RTF_MULTIRT) { 4984 copy_mp = copymsg(first_mp); 4985 if (copy_mp != NULL) { 4986 xmit_mp = copy_mp; 4987 MULTIRT_DEBUG_TAG(first_mp); 4988 } 4989 } 4990 ire_add_then_send(q, ire, xmit_mp); 4991 if (ip6_asp_table_held) { 4992 ip6_asp_table_refrele(); 4993 ip6_asp_table_held = B_FALSE; 4994 } 4995 ire_refrele(save_ire); 4996 4997 /* Assert that sire is not deleted yet. */ 4998 ASSERT(sire->ire_ptpn != NULL); 4999 IRB_REFRELE(sire->ire_bucket); 5000 5001 /* Assert that ipif_ire is not deleted yet. */ 5002 ASSERT(ipif_ire->ire_ptpn != NULL); 5003 IRB_REFRELE(ipif_ire->ire_bucket); 5004 ire_refrele(ipif_ire); 5005 5006 if (copy_mp != NULL) { 5007 /* 5008 * Search for the next unresolved 5009 * multirt route. 5010 */ 5011 copy_mp = NULL; 5012 ipif_ire = NULL; 5013 ire = NULL; 5014 /* re-enter the loop */ 5015 multirt_resolve_next = B_TRUE; 5016 continue; 5017 } 5018 ire_refrele(sire); 5019 ill_refrele(dst_ill); 5020 ipif_refrele(src_ipif); 5021 return; 5022 } 5023 case IRE_IF_NORESOLVER: 5024 /* 5025 * We have what we need to build an IRE_CACHE. 5026 * 5027 * Create a new dlureq_mp with the IPv6 gateway 5028 * address in destination address in the DLPI hdr 5029 * if the physical length is exactly 16 bytes. 5030 */ 5031 if (dst_ill->ill_phys_addr_length == IPV6_ADDR_LEN) { 5032 const in6_addr_t *addr; 5033 5034 if (!IN6_IS_ADDR_UNSPECIFIED(&v6gw)) 5035 addr = &v6gw; 5036 else 5037 addr = v6dstp; 5038 5039 dlureq_mp = ill_dlur_gen((uchar_t *)addr, 5040 dst_ill->ill_phys_addr_length, 5041 dst_ill->ill_sap, 5042 dst_ill->ill_sap_length); 5043 } else { 5044 dlureq_mp = ill_dlur_gen(NULL, 5045 dst_ill->ill_phys_addr_length, 5046 dst_ill->ill_sap, 5047 dst_ill->ill_sap_length); 5048 } 5049 if (dlureq_mp == NULL) 5050 break; 5051 /* 5052 * TSol note: We are creating the ire cache for the 5053 * destination 'dst'. If 'dst' is offlink, going 5054 * through the first hop 'gw', the security attributes 5055 * of 'dst' must be set to point to the gateway 5056 * credentials of gateway 'gw'. If 'dst' is onlink, it 5057 * is possible that 'dst' is a potential gateway that is 5058 * referenced by some route that has some security 5059 * attributes. Thus in the former case, we need to do a 5060 * gcgrp_lookup of 'gw' while in the latter case we 5061 * need to do gcgrp_lookup of 'dst' itself. 5062 */ 5063 ga.ga_af = AF_INET6; 5064 if (!IN6_IS_ADDR_UNSPECIFIED(&v6gw)) 5065 ga.ga_addr = v6gw; 5066 else 5067 ga.ga_addr = *v6dstp; 5068 gcgrp = gcgrp_lookup(&ga, B_FALSE); 5069 5070 /* 5071 * Note: the new ire inherits sire flags RTF_SETSRC 5072 * and RTF_MULTIRT to propagate those rules from prefix 5073 * to cache. 5074 */ 5075 ire = ire_create_v6( 5076 v6dstp, /* dest address */ 5077 &ipv6_all_ones, /* mask */ 5078 &src_ipif->ipif_v6src_addr, /* source address */ 5079 &v6gw, /* gateway address */ 5080 &save_ire->ire_max_frag, 5081 NULL, /* Fast Path header */ 5082 dst_ill->ill_rq, /* recv-from queue */ 5083 dst_ill->ill_wq, /* send-to queue */ 5084 IRE_CACHE, 5085 dlureq_mp, 5086 src_ipif, 5087 &save_ire->ire_mask_v6, /* Parent mask */ 5088 (sire != NULL) ? /* Parent handle */ 5089 sire->ire_phandle : 0, 5090 save_ire->ire_ihandle, /* Interface handle */ 5091 (sire != NULL) ? /* flags if any */ 5092 sire->ire_flags & 5093 (RTF_SETSRC | RTF_MULTIRT) : 0, 5094 &(save_ire->ire_uinfo), 5095 NULL, 5096 gcgrp); 5097 5098 freeb(dlureq_mp); 5099 5100 if (ire == NULL) { 5101 if (gcgrp != NULL) { 5102 GCGRP_REFRELE(gcgrp); 5103 gcgrp = NULL; 5104 } 5105 ire_refrele(save_ire); 5106 break; 5107 } 5108 5109 /* reference now held by IRE */ 5110 gcgrp = NULL; 5111 5112 ire->ire_marks |= ire_marks; 5113 5114 if (!IN6_IS_ADDR_UNSPECIFIED(&v6gw)) 5115 dst = v6gw; 5116 else 5117 dst = *v6dstp; 5118 err = ndp_noresolver(dst_ill, &dst); 5119 if (err != 0) { 5120 ire_refrele(save_ire); 5121 break; 5122 } 5123 5124 /* Prevent save_ire from getting deleted */ 5125 IRB_REFHOLD(save_ire->ire_bucket); 5126 /* Has it been removed already ? */ 5127 if (save_ire->ire_marks & IRE_MARK_CONDEMNED) { 5128 IRB_REFRELE(save_ire->ire_bucket); 5129 ire_refrele(save_ire); 5130 break; 5131 } 5132 5133 xmit_mp = first_mp; 5134 /* 5135 * In case of MULTIRT, a copy of the current packet 5136 * to send is made to further re-enter the 5137 * loop and attempt another route resolution 5138 */ 5139 if ((sire != NULL) && sire->ire_flags & RTF_MULTIRT) { 5140 copy_mp = copymsg(first_mp); 5141 if (copy_mp != NULL) { 5142 xmit_mp = copy_mp; 5143 MULTIRT_DEBUG_TAG(first_mp); 5144 } 5145 } 5146 ire_add_then_send(q, ire, xmit_mp); 5147 if (ip6_asp_table_held) { 5148 ip6_asp_table_refrele(); 5149 ip6_asp_table_held = B_FALSE; 5150 } 5151 5152 /* Assert that it is not deleted yet. */ 5153 ASSERT(save_ire->ire_ptpn != NULL); 5154 IRB_REFRELE(save_ire->ire_bucket); 5155 ire_refrele(save_ire); 5156 5157 if (copy_mp != NULL) { 5158 /* 5159 * If we found a (no)resolver, we ignore any 5160 * trailing top priority IRE_CACHE in 5161 * further loops. This ensures that we do not 5162 * omit any (no)resolver despite the priority 5163 * in this call. 5164 * IRE_CACHE, if any, will be processed 5165 * by another thread entering ip_newroute(), 5166 * (on resolver response, for example). 5167 * We use this to force multiple parallel 5168 * resolution as soon as a packet needs to be 5169 * sent. The result is, after one packet 5170 * emission all reachable routes are generally 5171 * resolved. 5172 * Otherwise, complete resolution of MULTIRT 5173 * routes would require several emissions as 5174 * side effect. 5175 */ 5176 multirt_flags &= ~MULTIRT_CACHEGW; 5177 5178 /* 5179 * Search for the next unresolved multirt 5180 * route. 5181 */ 5182 copy_mp = NULL; 5183 save_ire = NULL; 5184 ire = NULL; 5185 /* re-enter the loop */ 5186 multirt_resolve_next = B_TRUE; 5187 continue; 5188 } 5189 5190 /* Don't need sire anymore */ 5191 if (sire != NULL) 5192 ire_refrele(sire); 5193 ill_refrele(dst_ill); 5194 ipif_refrele(src_ipif); 5195 return; 5196 5197 case IRE_IF_RESOLVER: 5198 /* 5199 * We can't build an IRE_CACHE yet, but at least we 5200 * found a resolver that can help. 5201 */ 5202 dst = *v6dstp; 5203 5204 /* 5205 * To be at this point in the code with a non-zero gw 5206 * means that dst is reachable through a gateway that 5207 * we have never resolved. By changing dst to the gw 5208 * addr we resolve the gateway first. When 5209 * ire_add_then_send() tries to put the IP dg to dst, 5210 * it will reenter ip_newroute() at which time we will 5211 * find the IRE_CACHE for the gw and create another 5212 * IRE_CACHE above (for dst itself). 5213 */ 5214 if (!IN6_IS_ADDR_UNSPECIFIED(&v6gw)) { 5215 save_dst = dst; 5216 dst = v6gw; 5217 v6gw = ipv6_all_zeros; 5218 } 5219 if (dst_ill->ill_flags & ILLF_XRESOLV) { 5220 /* 5221 * Ask the external resolver to do its thing. 5222 * Make an mblk chain in the following form: 5223 * ARQ_REQ_MBLK-->IRE_MBLK-->packet 5224 */ 5225 mblk_t *ire_mp; 5226 mblk_t *areq_mp; 5227 areq_t *areq; 5228 in6_addr_t *addrp; 5229 5230 ip1dbg(("ip_newroute_v6:ILLF_XRESOLV\n")); 5231 if (ip6_asp_table_held) { 5232 ip6_asp_table_refrele(); 5233 ip6_asp_table_held = B_FALSE; 5234 } 5235 ire = ire_create_mp_v6( 5236 &dst, /* dest address */ 5237 &ipv6_all_ones, /* mask */ 5238 &src_ipif->ipif_v6src_addr, 5239 /* source address */ 5240 &v6gw, /* gateway address */ 5241 NULL, /* Fast Path header */ 5242 dst_ill->ill_rq, /* recv-from queue */ 5243 dst_ill->ill_wq, /* send-to queue */ 5244 IRE_CACHE, 5245 NULL, 5246 src_ipif, 5247 &save_ire->ire_mask_v6, 5248 /* Parent mask */ 5249 0, 5250 save_ire->ire_ihandle, 5251 /* Interface handle */ 5252 0, /* flags if any */ 5253 &(save_ire->ire_uinfo), 5254 NULL, 5255 NULL); 5256 5257 ire_refrele(save_ire); 5258 if (ire == NULL) { 5259 ip1dbg(("ip_newroute_v6:" 5260 "ire is NULL\n")); 5261 break; 5262 } 5263 5264 if ((sire != NULL) && 5265 (sire->ire_flags & RTF_MULTIRT)) { 5266 /* 5267 * processing a copy of the packet to 5268 * send for further resolution loops 5269 */ 5270 copy_mp = copymsg(first_mp); 5271 if (copy_mp != NULL) 5272 MULTIRT_DEBUG_TAG(copy_mp); 5273 } 5274 ire->ire_marks |= ire_marks; 5275 ire_mp = ire->ire_mp; 5276 /* 5277 * Now create or find an nce for this interface. 5278 * The hw addr will need to to be set from 5279 * the reply to the AR_ENTRY_QUERY that 5280 * we're about to send. This will be done in 5281 * ire_add_v6(). 5282 */ 5283 err = ndp_resolver(dst_ill, &dst, mp, zoneid); 5284 switch (err) { 5285 case 0: 5286 /* 5287 * New cache entry created. 5288 * Break, then ask the external 5289 * resolver. 5290 */ 5291 break; 5292 case EINPROGRESS: 5293 /* 5294 * Resolution in progress; 5295 * packet has been queued by 5296 * ndp_resolver(). 5297 */ 5298 ire_delete(ire); 5299 ire = NULL; 5300 /* 5301 * Check if another multirt 5302 * route must be resolved. 5303 */ 5304 if (copy_mp != NULL) { 5305 /* 5306 * If we found a resolver, we 5307 * ignore any trailing top 5308 * priority IRE_CACHE in 5309 * further loops. The reason is 5310 * the same as for noresolver. 5311 */ 5312 multirt_flags &= 5313 ~MULTIRT_CACHEGW; 5314 /* 5315 * Search for the next 5316 * unresolved multirt route. 5317 */ 5318 first_mp = copy_mp; 5319 copy_mp = NULL; 5320 mp = first_mp; 5321 if (mp->b_datap->db_type == 5322 M_CTL) { 5323 mp = mp->b_cont; 5324 } 5325 ASSERT(sire != NULL); 5326 dst = save_dst; 5327 /* 5328 * re-enter the loop 5329 */ 5330 multirt_resolve_next = 5331 B_TRUE; 5332 continue; 5333 } 5334 5335 if (sire != NULL) 5336 ire_refrele(sire); 5337 ill_refrele(dst_ill); 5338 ipif_refrele(src_ipif); 5339 return; 5340 default: 5341 /* 5342 * Transient error; packet will be 5343 * freed. 5344 */ 5345 ire_delete(ire); 5346 ire = NULL; 5347 break; 5348 } 5349 if (err != 0) 5350 break; 5351 /* 5352 * Now set up the AR_ENTRY_QUERY and send it. 5353 */ 5354 areq_mp = ill_arp_alloc(dst_ill, 5355 (uchar_t *)&ipv6_areq_template, 5356 (caddr_t)&dst); 5357 if (areq_mp == NULL) { 5358 ip1dbg(("ip_newroute_v6:" 5359 "areq_mp is NULL\n")); 5360 freemsg(ire_mp); 5361 break; 5362 } 5363 areq = (areq_t *)areq_mp->b_rptr; 5364 addrp = (in6_addr_t *)((char *)areq + 5365 areq->areq_target_addr_offset); 5366 *addrp = dst; 5367 addrp = (in6_addr_t *)((char *)areq + 5368 areq->areq_sender_addr_offset); 5369 *addrp = src_ipif->ipif_v6src_addr; 5370 /* 5371 * link the chain, then send up to the resolver. 5372 */ 5373 linkb(areq_mp, ire_mp); 5374 linkb(areq_mp, mp); 5375 ip1dbg(("ip_newroute_v6:" 5376 "putnext to resolver\n")); 5377 putnext(dst_ill->ill_rq, areq_mp); 5378 /* 5379 * Check if another multirt route 5380 * must be resolved. 5381 */ 5382 ire = NULL; 5383 if (copy_mp != NULL) { 5384 /* 5385 * If we find a resolver, we ignore any 5386 * trailing top priority IRE_CACHE in 5387 * further loops. The reason is the 5388 * same as for noresolver. 5389 */ 5390 multirt_flags &= ~MULTIRT_CACHEGW; 5391 /* 5392 * Search for the next unresolved 5393 * multirt route. 5394 */ 5395 first_mp = copy_mp; 5396 copy_mp = NULL; 5397 mp = first_mp; 5398 if (mp->b_datap->db_type == M_CTL) { 5399 mp = mp->b_cont; 5400 } 5401 ASSERT(sire != NULL); 5402 dst = save_dst; 5403 /* 5404 * re-enter the loop 5405 */ 5406 multirt_resolve_next = B_TRUE; 5407 continue; 5408 } 5409 5410 if (sire != NULL) 5411 ire_refrele(sire); 5412 ill_refrele(dst_ill); 5413 ipif_refrele(src_ipif); 5414 return; 5415 } 5416 /* 5417 * Non-external resolver case. 5418 * 5419 * TSol note: Please see the note above the 5420 * IRE_IF_NORESOLVER case. 5421 */ 5422 ga.ga_af = AF_INET6; 5423 ga.ga_addr = dst; 5424 gcgrp = gcgrp_lookup(&ga, B_FALSE); 5425 5426 ire = ire_create_v6( 5427 &dst, /* dest address */ 5428 &ipv6_all_ones, /* mask */ 5429 &src_ipif->ipif_v6src_addr, /* source address */ 5430 &v6gw, /* gateway address */ 5431 &save_ire->ire_max_frag, 5432 NULL, /* Fast Path header */ 5433 dst_ill->ill_rq, /* recv-from queue */ 5434 dst_ill->ill_wq, /* send-to queue */ 5435 IRE_CACHE, 5436 NULL, 5437 src_ipif, 5438 &save_ire->ire_mask_v6, /* Parent mask */ 5439 0, 5440 save_ire->ire_ihandle, /* Interface handle */ 5441 0, /* flags if any */ 5442 &(save_ire->ire_uinfo), 5443 NULL, 5444 gcgrp); 5445 5446 if (ire == NULL) { 5447 if (gcgrp != NULL) { 5448 GCGRP_REFRELE(gcgrp); 5449 gcgrp = NULL; 5450 } 5451 ire_refrele(save_ire); 5452 break; 5453 } 5454 5455 /* reference now held by IRE */ 5456 gcgrp = NULL; 5457 5458 if ((sire != NULL) && 5459 (sire->ire_flags & RTF_MULTIRT)) { 5460 copy_mp = copymsg(first_mp); 5461 if (copy_mp != NULL) 5462 MULTIRT_DEBUG_TAG(copy_mp); 5463 } 5464 5465 ire->ire_marks |= ire_marks; 5466 err = ndp_resolver(dst_ill, &dst, first_mp, zoneid); 5467 switch (err) { 5468 case 0: 5469 /* Prevent save_ire from getting deleted */ 5470 IRB_REFHOLD(save_ire->ire_bucket); 5471 /* Has it been removed already ? */ 5472 if (save_ire->ire_marks & IRE_MARK_CONDEMNED) { 5473 IRB_REFRELE(save_ire->ire_bucket); 5474 ire_refrele(save_ire); 5475 break; 5476 } 5477 5478 /* 5479 * We have a resolved cache entry, 5480 * add in the IRE. 5481 */ 5482 ire_add_then_send(q, ire, first_mp); 5483 if (ip6_asp_table_held) { 5484 ip6_asp_table_refrele(); 5485 ip6_asp_table_held = B_FALSE; 5486 } 5487 5488 /* Assert that it is not deleted yet. */ 5489 ASSERT(save_ire->ire_ptpn != NULL); 5490 IRB_REFRELE(save_ire->ire_bucket); 5491 ire_refrele(save_ire); 5492 /* 5493 * Check if another multirt route 5494 * must be resolved. 5495 */ 5496 ire = NULL; 5497 if (copy_mp != NULL) { 5498 /* 5499 * If we find a resolver, we ignore any 5500 * trailing top priority IRE_CACHE in 5501 * further loops. The reason is the 5502 * same as for noresolver. 5503 */ 5504 multirt_flags &= ~MULTIRT_CACHEGW; 5505 /* 5506 * Search for the next unresolved 5507 * multirt route. 5508 */ 5509 first_mp = copy_mp; 5510 copy_mp = NULL; 5511 mp = first_mp; 5512 if (mp->b_datap->db_type == M_CTL) { 5513 mp = mp->b_cont; 5514 } 5515 ASSERT(sire != NULL); 5516 dst = save_dst; 5517 /* 5518 * re-enter the loop 5519 */ 5520 multirt_resolve_next = B_TRUE; 5521 continue; 5522 } 5523 5524 if (sire != NULL) 5525 ire_refrele(sire); 5526 ill_refrele(dst_ill); 5527 ipif_refrele(src_ipif); 5528 return; 5529 5530 case EINPROGRESS: 5531 /* 5532 * mp was consumed - presumably queued. 5533 * No need for ire, presumably resolution is 5534 * in progress, and ire will be added when the 5535 * address is resolved. 5536 */ 5537 if (ip6_asp_table_held) { 5538 ip6_asp_table_refrele(); 5539 ip6_asp_table_held = B_FALSE; 5540 } 5541 ASSERT(ire->ire_nce == NULL); 5542 ire_delete(ire); 5543 ire_refrele(save_ire); 5544 /* 5545 * Check if another multirt route 5546 * must be resolved. 5547 */ 5548 ire = NULL; 5549 if (copy_mp != NULL) { 5550 /* 5551 * If we find a resolver, we ignore any 5552 * trailing top priority IRE_CACHE in 5553 * further loops. The reason is the 5554 * same as for noresolver. 5555 */ 5556 multirt_flags &= ~MULTIRT_CACHEGW; 5557 /* 5558 * Search for the next unresolved 5559 * multirt route. 5560 */ 5561 first_mp = copy_mp; 5562 copy_mp = NULL; 5563 mp = first_mp; 5564 if (mp->b_datap->db_type == M_CTL) { 5565 mp = mp->b_cont; 5566 } 5567 ASSERT(sire != NULL); 5568 dst = save_dst; 5569 /* 5570 * re-enter the loop 5571 */ 5572 multirt_resolve_next = B_TRUE; 5573 continue; 5574 } 5575 if (sire != NULL) 5576 ire_refrele(sire); 5577 ill_refrele(dst_ill); 5578 ipif_refrele(src_ipif); 5579 return; 5580 default: 5581 /* Some transient error */ 5582 ASSERT(ire->ire_nce == NULL); 5583 ire_refrele(save_ire); 5584 break; 5585 } 5586 break; 5587 default: 5588 break; 5589 } 5590 if (ip6_asp_table_held) { 5591 ip6_asp_table_refrele(); 5592 ip6_asp_table_held = B_FALSE; 5593 } 5594 } while (multirt_resolve_next); 5595 5596 err_ret: 5597 ip1dbg(("ip_newroute_v6: dropped\n")); 5598 if (src_ipif != NULL) 5599 ipif_refrele(src_ipif); 5600 if (dst_ill != NULL) { 5601 need_rele = B_TRUE; 5602 ill = dst_ill; 5603 } 5604 if (ill != NULL) { 5605 if (mp->b_prev != NULL) { 5606 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 5607 } else { 5608 BUMP_MIB(ill->ill_ip6_mib, ipv6OutDiscards); 5609 } 5610 5611 if (need_rele) 5612 ill_refrele(ill); 5613 } else { 5614 if (mp->b_prev != NULL) { 5615 BUMP_MIB(&ip6_mib, ipv6InDiscards); 5616 } else { 5617 BUMP_MIB(&ip6_mib, ipv6OutDiscards); 5618 } 5619 } 5620 /* Did this packet originate externally? */ 5621 if (mp->b_prev) { 5622 mp->b_next = NULL; 5623 mp->b_prev = NULL; 5624 } 5625 if (copy_mp != NULL) { 5626 MULTIRT_DEBUG_UNTAG(copy_mp); 5627 freemsg(copy_mp); 5628 } 5629 MULTIRT_DEBUG_UNTAG(first_mp); 5630 freemsg(first_mp); 5631 if (ire != NULL) 5632 ire_refrele(ire); 5633 if (sire != NULL) 5634 ire_refrele(sire); 5635 return; 5636 5637 icmp_err_ret: 5638 if (ip6_asp_table_held) 5639 ip6_asp_table_refrele(); 5640 if (src_ipif != NULL) 5641 ipif_refrele(src_ipif); 5642 if (dst_ill != NULL) { 5643 need_rele = B_TRUE; 5644 ill = dst_ill; 5645 } 5646 ip1dbg(("ip_newroute_v6: no route\n")); 5647 if (sire != NULL) 5648 ire_refrele(sire); 5649 /* 5650 * We need to set sire to NULL to avoid double freeing if we 5651 * ever goto err_ret from below. 5652 */ 5653 sire = NULL; 5654 ip6h = (ip6_t *)mp->b_rptr; 5655 /* Skip ip6i_t header if present */ 5656 if (ip6h->ip6_nxt == IPPROTO_RAW) { 5657 /* Make sure the IPv6 header is present */ 5658 if ((mp->b_wptr - (uchar_t *)ip6h) < 5659 sizeof (ip6i_t) + IPV6_HDR_LEN) { 5660 if (!pullupmsg(mp, sizeof (ip6i_t) + IPV6_HDR_LEN)) { 5661 ip1dbg(("ip_newroute_v6: pullupmsg failed\n")); 5662 goto err_ret; 5663 } 5664 } 5665 mp->b_rptr += sizeof (ip6i_t); 5666 ip6h = (ip6_t *)mp->b_rptr; 5667 } 5668 /* Did this packet originate externally? */ 5669 if (mp->b_prev) { 5670 if (ill != NULL) { 5671 BUMP_MIB(ill->ill_ip6_mib, ipv6InNoRoutes); 5672 } else { 5673 BUMP_MIB(&ip6_mib, ipv6InNoRoutes); 5674 } 5675 mp->b_next = NULL; 5676 mp->b_prev = NULL; 5677 q = WR(q); 5678 } else { 5679 if (ill != NULL) { 5680 BUMP_MIB(ill->ill_ip6_mib, ipv6OutNoRoutes); 5681 } else { 5682 BUMP_MIB(&ip6_mib, ipv6OutNoRoutes); 5683 } 5684 if (ip_hdr_complete_v6(ip6h, zoneid)) { 5685 /* Failed */ 5686 if (copy_mp != NULL) { 5687 MULTIRT_DEBUG_UNTAG(copy_mp); 5688 freemsg(copy_mp); 5689 } 5690 MULTIRT_DEBUG_UNTAG(first_mp); 5691 freemsg(first_mp); 5692 if (ire != NULL) 5693 ire_refrele(ire); 5694 if (need_rele) 5695 ill_refrele(ill); 5696 return; 5697 } 5698 } 5699 5700 if (need_rele) 5701 ill_refrele(ill); 5702 5703 /* 5704 * At this point we will have ire only if RTF_BLACKHOLE 5705 * or RTF_REJECT flags are set on the IRE. It will not 5706 * generate ICMP6_DST_UNREACH_NOROUTE if RTF_BLACKHOLE is set. 5707 */ 5708 if (ire != NULL) { 5709 if (ire->ire_flags & RTF_BLACKHOLE) { 5710 ire_refrele(ire); 5711 if (copy_mp != NULL) { 5712 MULTIRT_DEBUG_UNTAG(copy_mp); 5713 freemsg(copy_mp); 5714 } 5715 MULTIRT_DEBUG_UNTAG(first_mp); 5716 freemsg(first_mp); 5717 return; 5718 } 5719 ire_refrele(ire); 5720 } 5721 if (ip_debug > 3) { 5722 /* ip2dbg */ 5723 pr_addr_dbg("ip_newroute_v6: no route to %s\n", 5724 AF_INET6, v6dstp); 5725 } 5726 icmp_unreachable_v6(WR(q), first_mp, ICMP6_DST_UNREACH_NOROUTE, 5727 B_FALSE, B_FALSE, zoneid); 5728 } 5729 5730 /* 5731 * ip_newroute_ipif_v6 is called by ip_wput_v6 and ip_wput_ipsec_out_v6 whenever 5732 * we need to send out a packet to a destination address for which we do not 5733 * have specific routing information. It is only used for multicast packets. 5734 * 5735 * If unspec_src we allow creating an IRE with source address zero. 5736 * ire_send_v6() will delete it after the packet is sent. 5737 */ 5738 void 5739 ip_newroute_ipif_v6(queue_t *q, mblk_t *mp, ipif_t *ipif, 5740 in6_addr_t v6dst, int unspec_src, zoneid_t zoneid) 5741 { 5742 ire_t *ire = NULL; 5743 ipif_t *src_ipif = NULL; 5744 int err = 0; 5745 ill_t *dst_ill = NULL; 5746 ire_t *save_ire; 5747 ushort_t ire_marks = 0; 5748 ipsec_out_t *io; 5749 ill_t *attach_ill = NULL; 5750 ill_t *ill; 5751 ip6_t *ip6h; 5752 mblk_t *first_mp; 5753 boolean_t ip6i_present; 5754 ire_t *fire = NULL; 5755 mblk_t *copy_mp = NULL; 5756 boolean_t multirt_resolve_next; 5757 in6_addr_t *v6dstp = &v6dst; 5758 boolean_t ipif_held = B_FALSE; 5759 boolean_t ill_held = B_FALSE; 5760 boolean_t ip6_asp_table_held = B_FALSE; 5761 5762 /* 5763 * This loop is run only once in most cases. 5764 * We loop to resolve further routes only when the destination 5765 * can be reached through multiple RTF_MULTIRT-flagged ires. 5766 */ 5767 do { 5768 multirt_resolve_next = B_FALSE; 5769 if (dst_ill != NULL) { 5770 ill_refrele(dst_ill); 5771 dst_ill = NULL; 5772 } 5773 5774 if (src_ipif != NULL) { 5775 ipif_refrele(src_ipif); 5776 src_ipif = NULL; 5777 } 5778 ASSERT(ipif != NULL); 5779 ill = ipif->ipif_ill; 5780 5781 ASSERT(!IN6_IS_ADDR_V4MAPPED(v6dstp)); 5782 if (ip_debug > 2) { 5783 /* ip1dbg */ 5784 pr_addr_dbg("ip_newroute_ipif_v6: v6dst %s\n", 5785 AF_INET6, v6dstp); 5786 printf("ip_newroute_ipif_v6: if %s, v6 %d\n", 5787 ill->ill_name, ipif->ipif_isv6); 5788 } 5789 5790 first_mp = mp; 5791 if (mp->b_datap->db_type == M_CTL) { 5792 mp = mp->b_cont; 5793 io = (ipsec_out_t *)first_mp->b_rptr; 5794 ASSERT(io->ipsec_out_type == IPSEC_OUT); 5795 } else { 5796 io = NULL; 5797 } 5798 5799 /* 5800 * If the interface is a pt-pt interface we look for an 5801 * IRE_IF_RESOLVER or IRE_IF_NORESOLVER that matches both the 5802 * local_address and the pt-pt destination address. 5803 * Otherwise we just match the local address. 5804 */ 5805 if (!(ill->ill_flags & ILLF_MULTICAST)) { 5806 goto err_ret; 5807 } 5808 /* 5809 * If this end point is bound to IPIF_NOFAILOVER, set bnf_ill 5810 * and bind_to_nofailover B_TRUE. We can't use conn to determine 5811 * as it could be NULL. 5812 * 5813 * This information can appear either in an ip6i_t or an 5814 * IPSEC_OUT message. 5815 */ 5816 ip6h = (ip6_t *)mp->b_rptr; 5817 ip6i_present = (ip6h->ip6_nxt == IPPROTO_RAW); 5818 if (ip6i_present || (io != NULL && io->ipsec_out_attach_if)) { 5819 if (!ip6i_present || 5820 ((ip6i_t *)ip6h)->ip6i_flags & IP6I_ATTACH_IF) { 5821 attach_ill = ip_grab_attach_ill(ill, first_mp, 5822 (ip6i_present ? 5823 ((ip6i_t *)ip6h)->ip6i_ifindex : 5824 io->ipsec_out_ill_index), B_TRUE); 5825 /* Failure case frees things for us. */ 5826 if (attach_ill == NULL) 5827 return; 5828 5829 /* 5830 * Check if we need an ire that will not be 5831 * looked up by anybody else i.e. HIDDEN. 5832 */ 5833 if (ill_is_probeonly(attach_ill)) 5834 ire_marks = IRE_MARK_HIDDEN; 5835 } 5836 } 5837 5838 /* 5839 * We check if an IRE_OFFSUBNET for the addr that goes through 5840 * ipif exists. We need it to determine if the RTF_SETSRC and/or 5841 * RTF_MULTIRT flags must be honored. 5842 */ 5843 fire = ipif_lookup_multi_ire_v6(ipif, v6dstp); 5844 ip2dbg(("ip_newroute_ipif_v6: " 5845 "ipif_lookup_multi_ire_v6(" 5846 "ipif %p, dst %08x) = fire %p\n", 5847 (void *)ipif, ntohl(V4_PART_OF_V6((*v6dstp))), 5848 (void *)fire)); 5849 5850 /* 5851 * If the application specified the ill (ifindex), we still 5852 * load spread. Only if the packets needs to go out specifically 5853 * on a given ill e.g. binding to IPIF_NOFAILOVER address or 5854 * IPV6_BOUND_PIF, or there is a parent ire entry that specified 5855 * multirouting, then we don't try to use a different ill for 5856 * load spreading. 5857 */ 5858 if (attach_ill == NULL) { 5859 /* 5860 * If the interface belongs to an interface group, 5861 * make sure the next possible interface in the group 5862 * is used. This encourages load spreading among peers 5863 * in an interface group. 5864 * 5865 * Note: While we pick a dst_ill we are really only 5866 * interested in the ill for load spreading. The source 5867 * ipif is determined by source address selection below. 5868 */ 5869 if ((fire != NULL) && (fire->ire_flags & RTF_MULTIRT)) { 5870 dst_ill = ipif->ipif_ill; 5871 /* For uniformity do a refhold */ 5872 ill_refhold(dst_ill); 5873 } else { 5874 /* refheld by ip_newroute_get_dst_ill_v6 */ 5875 dst_ill = 5876 ip_newroute_get_dst_ill_v6(ipif->ipif_ill); 5877 } 5878 if (dst_ill == NULL) { 5879 if (ip_debug > 2) { 5880 pr_addr_dbg("ip_newroute_ipif_v6: " 5881 "no dst ill for dst %s\n", 5882 AF_INET6, v6dstp); 5883 } 5884 goto err_ret; 5885 } 5886 } else { 5887 dst_ill = ipif->ipif_ill; 5888 /* 5889 * ip_wput_v6 passes the right ipif for IPIF_NOFAILOVER 5890 * and IPV6_BOUND_PIF case. 5891 */ 5892 ASSERT(dst_ill == attach_ill); 5893 /* attach_ill is already refheld */ 5894 } 5895 /* 5896 * Pick a source address which matches the scope of the 5897 * destination address. 5898 * For RTF_SETSRC routes, the source address is imposed by the 5899 * parent ire (fire). 5900 */ 5901 ASSERT(src_ipif == NULL); 5902 if ((fire != NULL) && (fire->ire_flags & RTF_SETSRC)) { 5903 /* 5904 * Check that the ipif matching the requested source 5905 * address still exists. 5906 */ 5907 src_ipif = 5908 ipif_lookup_addr_v6(&fire->ire_src_addr_v6, 5909 NULL, zoneid, NULL, NULL, NULL, NULL); 5910 } 5911 if (src_ipif == NULL && ip6_asp_can_lookup()) { 5912 ip6_asp_table_held = B_TRUE; 5913 src_ipif = ipif_select_source_v6(dst_ill, v6dstp, 5914 RESTRICT_TO_NONE, IPV6_PREFER_SRC_DEFAULT, zoneid); 5915 } 5916 5917 if (src_ipif == NULL) { 5918 if (!unspec_src) { 5919 if (ip_debug > 2) { 5920 /* ip1dbg */ 5921 pr_addr_dbg("ip_newroute_ipif_v6: " 5922 "no src for dst %s\n,", 5923 AF_INET6, v6dstp); 5924 printf(" through interface %s\n", 5925 dst_ill->ill_name); 5926 } 5927 goto err_ret; 5928 } 5929 src_ipif = ipif; 5930 ipif_refhold(src_ipif); 5931 } 5932 ire = ipif_to_ire_v6(ipif); 5933 if (ire == NULL) { 5934 if (ip_debug > 2) { 5935 /* ip1dbg */ 5936 pr_addr_dbg("ip_newroute_ipif_v6: v6src %s\n", 5937 AF_INET6, &ipif->ipif_v6lcl_addr); 5938 printf("ip_newroute_ipif_v6: " 5939 "if %s\n", dst_ill->ill_name); 5940 } 5941 goto err_ret; 5942 } 5943 if (ire->ire_flags & (RTF_REJECT | RTF_BLACKHOLE)) 5944 goto err_ret; 5945 5946 ASSERT(ire->ire_ipversion == IPV6_VERSION); 5947 5948 ip1dbg(("ip_newroute_ipif_v6: interface type %s (%d),", 5949 ip_nv_lookup(ire_nv_tbl, ire->ire_type), ire->ire_type)); 5950 if (ip_debug > 2) { 5951 /* ip1dbg */ 5952 pr_addr_dbg(" address %s\n", 5953 AF_INET6, &ire->ire_src_addr_v6); 5954 } 5955 save_ire = ire; 5956 ip2dbg(("ip_newroute_ipif: ire %p, ipif %p\n", 5957 (void *)ire, (void *)ipif)); 5958 5959 if ((fire != NULL) && (fire->ire_flags & RTF_MULTIRT)) { 5960 /* 5961 * an IRE_OFFSUBET was looked up 5962 * on that interface. 5963 * this ire has RTF_MULTIRT flag, 5964 * so the resolution loop 5965 * will be re-entered to resolve 5966 * additional routes on other 5967 * interfaces. For that purpose, 5968 * a copy of the packet is 5969 * made at this point. 5970 */ 5971 fire->ire_last_used_time = lbolt; 5972 copy_mp = copymsg(first_mp); 5973 if (copy_mp) { 5974 MULTIRT_DEBUG_TAG(copy_mp); 5975 } 5976 } 5977 5978 ASSERT((attach_ill == NULL) || (dst_ill == attach_ill)); 5979 switch (ire->ire_type) { 5980 case IRE_IF_NORESOLVER: { 5981 /* We have what we need to build an IRE_CACHE. */ 5982 mblk_t *dlureq_mp; 5983 5984 /* 5985 * Create a new dlureq_mp with the 5986 * IPv6 gateway address in destination address in the 5987 * DLPI hdr if the physical length is exactly 16 bytes. 5988 */ 5989 ASSERT(dst_ill->ill_isv6); 5990 if (dst_ill->ill_phys_addr_length == IPV6_ADDR_LEN) { 5991 dlureq_mp = ill_dlur_gen((uchar_t *)v6dstp, 5992 dst_ill->ill_phys_addr_length, 5993 dst_ill->ill_sap, 5994 dst_ill->ill_sap_length); 5995 } else { 5996 dlureq_mp = ill_dlur_gen(NULL, 5997 dst_ill->ill_phys_addr_length, 5998 dst_ill->ill_sap, 5999 dst_ill->ill_sap_length); 6000 } 6001 6002 if (dlureq_mp == NULL) 6003 break; 6004 /* 6005 * The newly created ire will inherit the flags of the 6006 * parent ire, if any. 6007 */ 6008 ire = ire_create_v6( 6009 v6dstp, /* dest address */ 6010 &ipv6_all_ones, /* mask */ 6011 &src_ipif->ipif_v6src_addr, /* source address */ 6012 NULL, /* gateway address */ 6013 &save_ire->ire_max_frag, 6014 NULL, /* Fast Path header */ 6015 dst_ill->ill_rq, /* recv-from queue */ 6016 dst_ill->ill_wq, /* send-to queue */ 6017 IRE_CACHE, 6018 dlureq_mp, 6019 src_ipif, 6020 NULL, 6021 (fire != NULL) ? /* Parent handle */ 6022 fire->ire_phandle : 0, 6023 save_ire->ire_ihandle, /* Interface handle */ 6024 (fire != NULL) ? 6025 (fire->ire_flags & (RTF_SETSRC | RTF_MULTIRT)) : 6026 0, 6027 &ire_uinfo_null, 6028 NULL, 6029 NULL); 6030 6031 freeb(dlureq_mp); 6032 6033 if (ire == NULL) { 6034 ire_refrele(save_ire); 6035 break; 6036 } 6037 6038 ire->ire_marks |= ire_marks; 6039 6040 err = ndp_noresolver(dst_ill, v6dstp); 6041 if (err != 0) { 6042 ire_refrele(save_ire); 6043 break; 6044 } 6045 6046 /* Prevent save_ire from getting deleted */ 6047 IRB_REFHOLD(save_ire->ire_bucket); 6048 /* Has it been removed already ? */ 6049 if (save_ire->ire_marks & IRE_MARK_CONDEMNED) { 6050 IRB_REFRELE(save_ire->ire_bucket); 6051 ire_refrele(save_ire); 6052 break; 6053 } 6054 6055 ire_add_then_send(q, ire, first_mp); 6056 if (ip6_asp_table_held) { 6057 ip6_asp_table_refrele(); 6058 ip6_asp_table_held = B_FALSE; 6059 } 6060 6061 /* Assert that it is not deleted yet. */ 6062 ASSERT(save_ire->ire_ptpn != NULL); 6063 IRB_REFRELE(save_ire->ire_bucket); 6064 ire_refrele(save_ire); 6065 if (fire != NULL) { 6066 ire_refrele(fire); 6067 fire = NULL; 6068 } 6069 6070 /* 6071 * The resolution loop is re-entered if we 6072 * actually are in a multirouting case. 6073 */ 6074 if (copy_mp != NULL) { 6075 boolean_t need_resolve = 6076 ire_multirt_need_resolve_v6(v6dstp, 6077 MBLK_GETLABEL(copy_mp)); 6078 if (!need_resolve) { 6079 MULTIRT_DEBUG_UNTAG(copy_mp); 6080 freemsg(copy_mp); 6081 copy_mp = NULL; 6082 } else { 6083 /* 6084 * ipif_lookup_group_v6() calls 6085 * ire_lookup_multi_v6() that uses 6086 * ire_ftable_lookup_v6() to find 6087 * an IRE_INTERFACE for the group. 6088 * In the multirt case, 6089 * ire_lookup_multi_v6() then invokes 6090 * ire_multirt_lookup_v6() to find 6091 * the next resolvable ire. 6092 * As a result, we obtain a new 6093 * interface, derived from the 6094 * next ire. 6095 */ 6096 if (ipif_held) { 6097 ipif_refrele(ipif); 6098 ipif_held = B_FALSE; 6099 } 6100 ipif = ipif_lookup_group_v6(v6dstp, 6101 zoneid); 6102 ip2dbg(("ip_newroute_ipif: " 6103 "multirt dst %08x, ipif %p\n", 6104 ntohl(V4_PART_OF_V6((*v6dstp))), 6105 (void *)ipif)); 6106 if (ipif != NULL) { 6107 ipif_held = B_TRUE; 6108 mp = copy_mp; 6109 copy_mp = NULL; 6110 multirt_resolve_next = 6111 B_TRUE; 6112 continue; 6113 } else { 6114 freemsg(copy_mp); 6115 } 6116 } 6117 } 6118 ill_refrele(dst_ill); 6119 if (ipif_held) { 6120 ipif_refrele(ipif); 6121 ipif_held = B_FALSE; 6122 } 6123 if (src_ipif != NULL) 6124 ipif_refrele(src_ipif); 6125 return; 6126 } 6127 case IRE_IF_RESOLVER: { 6128 6129 ASSERT(dst_ill->ill_isv6); 6130 6131 /* 6132 * We obtain a partial IRE_CACHE which we will pass 6133 * along with the resolver query. When the response 6134 * comes back it will be there ready for us to add. 6135 */ 6136 /* 6137 * the newly created ire will inherit the flags of the 6138 * parent ire, if any. 6139 */ 6140 ire = ire_create_v6( 6141 v6dstp, /* dest address */ 6142 &ipv6_all_ones, /* mask */ 6143 &src_ipif->ipif_v6src_addr, /* source address */ 6144 NULL, /* gateway address */ 6145 &save_ire->ire_max_frag, 6146 NULL, /* Fast Path header */ 6147 dst_ill->ill_rq, /* recv-from queue */ 6148 dst_ill->ill_wq, /* send-to queue */ 6149 IRE_CACHE, 6150 NULL, 6151 src_ipif, 6152 NULL, 6153 (fire != NULL) ? /* Parent handle */ 6154 fire->ire_phandle : 0, 6155 save_ire->ire_ihandle, /* Interface handle */ 6156 (fire != NULL) ? 6157 (fire->ire_flags & (RTF_SETSRC | RTF_MULTIRT)) : 6158 0, 6159 &ire_uinfo_null, 6160 NULL, 6161 NULL); 6162 6163 if (ire == NULL) { 6164 ire_refrele(save_ire); 6165 break; 6166 } 6167 6168 ire->ire_marks |= ire_marks; 6169 6170 /* Resolve and add ire to the ctable */ 6171 err = ndp_resolver(dst_ill, v6dstp, first_mp, zoneid); 6172 switch (err) { 6173 case 0: 6174 /* Prevent save_ire from getting deleted */ 6175 IRB_REFHOLD(save_ire->ire_bucket); 6176 /* Has it been removed already ? */ 6177 if (save_ire->ire_marks & IRE_MARK_CONDEMNED) { 6178 IRB_REFRELE(save_ire->ire_bucket); 6179 ire_refrele(save_ire); 6180 break; 6181 } 6182 /* 6183 * We have a resolved cache entry, 6184 * add in the IRE. 6185 */ 6186 ire_add_then_send(q, ire, first_mp); 6187 if (ip6_asp_table_held) { 6188 ip6_asp_table_refrele(); 6189 ip6_asp_table_held = B_FALSE; 6190 } 6191 6192 /* Assert that it is not deleted yet. */ 6193 ASSERT(save_ire->ire_ptpn != NULL); 6194 IRB_REFRELE(save_ire->ire_bucket); 6195 ire_refrele(save_ire); 6196 if (fire != NULL) { 6197 ire_refrele(fire); 6198 fire = NULL; 6199 } 6200 6201 /* 6202 * The resolution loop is re-entered if we 6203 * actually are in a multirouting case. 6204 */ 6205 if (copy_mp != NULL) { 6206 boolean_t need_resolve = 6207 ire_multirt_need_resolve_v6(v6dstp, 6208 MBLK_GETLABEL(copy_mp)); 6209 if (!need_resolve) { 6210 MULTIRT_DEBUG_UNTAG(copy_mp); 6211 freemsg(copy_mp); 6212 copy_mp = NULL; 6213 } else { 6214 /* 6215 * ipif_lookup_group_v6() calls 6216 * ire_lookup_multi_v6() that 6217 * uses ire_ftable_lookup_v6() 6218 * to find an IRE_INTERFACE for 6219 * the group. In the multirt 6220 * case, ire_lookup_multi_v6() 6221 * then invokes 6222 * ire_multirt_lookup_v6() to 6223 * find the next resolvable ire. 6224 * As a result, we obtain a new 6225 * interface, derived from the 6226 * next ire. 6227 */ 6228 if (ipif_held) { 6229 ipif_refrele(ipif); 6230 ipif_held = B_FALSE; 6231 } 6232 ipif = ipif_lookup_group_v6( 6233 v6dstp, zoneid); 6234 ip2dbg(("ip_newroute_ipif: " 6235 "multirt dst %08x, " 6236 "ipif %p\n", 6237 ntohl(V4_PART_OF_V6( 6238 (*v6dstp))), 6239 (void *)ipif)); 6240 if (ipif != NULL) { 6241 ipif_held = B_TRUE; 6242 mp = copy_mp; 6243 copy_mp = NULL; 6244 multirt_resolve_next = 6245 B_TRUE; 6246 continue; 6247 } else { 6248 freemsg(copy_mp); 6249 } 6250 } 6251 } 6252 ill_refrele(dst_ill); 6253 if (ipif_held) { 6254 ipif_refrele(ipif); 6255 ipif_held = B_FALSE; 6256 } 6257 if (src_ipif != NULL) 6258 ipif_refrele(src_ipif); 6259 return; 6260 6261 case EINPROGRESS: 6262 /* 6263 * mp was consumed - presumably queued. 6264 * No need for ire, presumably resolution is 6265 * in progress, and ire will be added when the 6266 * address is resolved. 6267 */ 6268 if (ip6_asp_table_held) { 6269 ip6_asp_table_refrele(); 6270 ip6_asp_table_held = B_FALSE; 6271 } 6272 ire_delete(ire); 6273 ire_refrele(save_ire); 6274 if (fire != NULL) { 6275 ire_refrele(fire); 6276 fire = NULL; 6277 } 6278 6279 /* 6280 * The resolution loop is re-entered if we 6281 * actually are in a multirouting case. 6282 */ 6283 if (copy_mp != NULL) { 6284 boolean_t need_resolve = 6285 ire_multirt_need_resolve_v6(v6dstp, 6286 MBLK_GETLABEL(copy_mp)); 6287 if (!need_resolve) { 6288 MULTIRT_DEBUG_UNTAG(copy_mp); 6289 freemsg(copy_mp); 6290 copy_mp = NULL; 6291 } else { 6292 /* 6293 * ipif_lookup_group_v6() calls 6294 * ire_lookup_multi_v6() that 6295 * uses ire_ftable_lookup_v6() 6296 * to find an IRE_INTERFACE for 6297 * the group. In the multirt 6298 * case, ire_lookup_multi_v6() 6299 * then invokes 6300 * ire_multirt_lookup_v6() to 6301 * find the next resolvable ire. 6302 * As a result, we obtain a new 6303 * interface, derived from the 6304 * next ire. 6305 */ 6306 if (ipif_held) { 6307 ipif_refrele(ipif); 6308 ipif_held = B_FALSE; 6309 } 6310 ipif = ipif_lookup_group_v6( 6311 v6dstp, zoneid); 6312 ip2dbg(("ip_newroute_ipif: " 6313 "multirt dst %08x, " 6314 "ipif %p\n", 6315 ntohl(V4_PART_OF_V6( 6316 (*v6dstp))), 6317 (void *)ipif)); 6318 if (ipif != NULL) { 6319 ipif_held = B_TRUE; 6320 mp = copy_mp; 6321 copy_mp = NULL; 6322 multirt_resolve_next = 6323 B_TRUE; 6324 continue; 6325 } else { 6326 freemsg(copy_mp); 6327 } 6328 } 6329 } 6330 ill_refrele(dst_ill); 6331 if (ipif_held) { 6332 ipif_refrele(ipif); 6333 ipif_held = B_FALSE; 6334 } 6335 if (src_ipif != NULL) 6336 ipif_refrele(src_ipif); 6337 return; 6338 default: 6339 /* Some transient error */ 6340 ire_refrele(save_ire); 6341 break; 6342 } 6343 break; 6344 } 6345 default: 6346 break; 6347 } 6348 if (ip6_asp_table_held) { 6349 ip6_asp_table_refrele(); 6350 ip6_asp_table_held = B_FALSE; 6351 } 6352 } while (multirt_resolve_next); 6353 6354 err_ret: 6355 if (ip6_asp_table_held) 6356 ip6_asp_table_refrele(); 6357 if (ire != NULL) 6358 ire_refrele(ire); 6359 if (fire != NULL) 6360 ire_refrele(fire); 6361 if (ipif != NULL && ipif_held) 6362 ipif_refrele(ipif); 6363 if (src_ipif != NULL) 6364 ipif_refrele(src_ipif); 6365 /* Multicast - no point in trying to generate ICMP error */ 6366 ASSERT((attach_ill == NULL) || (dst_ill == attach_ill)); 6367 if (dst_ill != NULL) { 6368 ill = dst_ill; 6369 ill_held = B_TRUE; 6370 } 6371 if (mp->b_prev || mp->b_next) { 6372 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 6373 } else { 6374 BUMP_MIB(ill->ill_ip6_mib, ipv6OutDiscards); 6375 } 6376 ip1dbg(("ip_newroute_ipif_v6: dropped\n")); 6377 mp->b_next = NULL; 6378 mp->b_prev = NULL; 6379 freemsg(first_mp); 6380 if (ill_held) 6381 ill_refrele(ill); 6382 } 6383 6384 /* 6385 * Parse and process any hop-by-hop or destination options. 6386 * 6387 * Assumes that q is an ill read queue so that ICMP errors for link-local 6388 * destinations are sent out the correct interface. 6389 * 6390 * Returns -1 if there was an error and mp has been consumed. 6391 * Returns 0 if no special action is needed. 6392 * Returns 1 if the packet contained a router alert option for this node 6393 * which is verified to be "interesting/known" for our implementation. 6394 * 6395 * XXX Note: In future as more hbh or dest options are defined, 6396 * it may be better to have different routines for hbh and dest 6397 * options as opt_type fields other than IP6OPT_PAD1 and IP6OPT_PADN 6398 * may have same value in different namespaces. Or is it same namespace ?? 6399 * Current code checks for each opt_type (other than pads) if it is in 6400 * the expected nexthdr (hbh or dest) 6401 */ 6402 static int 6403 ip_process_options_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, 6404 uint8_t *optptr, uint_t optlen, uint8_t hdr_type) 6405 { 6406 uint8_t opt_type; 6407 uint_t optused; 6408 int ret = 0; 6409 mblk_t *first_mp; 6410 const char *errtype; 6411 zoneid_t zoneid; 6412 ill_t *ill = q->q_ptr; 6413 6414 first_mp = mp; 6415 if (mp->b_datap->db_type == M_CTL) { 6416 mp = mp->b_cont; 6417 } 6418 6419 while (optlen != 0) { 6420 opt_type = *optptr; 6421 if (opt_type == IP6OPT_PAD1) { 6422 optused = 1; 6423 } else { 6424 if (optlen < 2) 6425 goto bad_opt; 6426 errtype = "malformed"; 6427 if (opt_type == ip6opt_ls) { 6428 optused = 2 + optptr[1]; 6429 if (optused > optlen) 6430 goto bad_opt; 6431 } else switch (opt_type) { 6432 case IP6OPT_PADN: 6433 /* 6434 * Note:We don't verify that (N-2) pad octets 6435 * are zero as required by spec. Adhere to 6436 * "be liberal in what you accept..." part of 6437 * implementation philosophy (RFC791,RFC1122) 6438 */ 6439 optused = 2 + optptr[1]; 6440 if (optused > optlen) 6441 goto bad_opt; 6442 break; 6443 6444 case IP6OPT_JUMBO: 6445 if (hdr_type != IPPROTO_HOPOPTS) 6446 goto opt_error; 6447 goto opt_error; /* XXX Not implemented! */ 6448 6449 case IP6OPT_ROUTER_ALERT: { 6450 struct ip6_opt_router *or; 6451 6452 if (hdr_type != IPPROTO_HOPOPTS) 6453 goto opt_error; 6454 optused = 2 + optptr[1]; 6455 if (optused > optlen) 6456 goto bad_opt; 6457 or = (struct ip6_opt_router *)optptr; 6458 /* Check total length and alignment */ 6459 if (optused != sizeof (*or) || 6460 ((uintptr_t)or->ip6or_value & 0x1) != 0) 6461 goto opt_error; 6462 /* Check value */ 6463 switch (*((uint16_t *)or->ip6or_value)) { 6464 case IP6_ALERT_MLD: 6465 case IP6_ALERT_RSVP: 6466 ret = 1; 6467 } 6468 break; 6469 } 6470 case IP6OPT_HOME_ADDRESS: { 6471 /* 6472 * Minimal support for the home address option 6473 * (which is required by all IPv6 nodes). 6474 * Implement by just swapping the home address 6475 * and source address. 6476 * XXX Note: this has IPsec implications since 6477 * AH needs to take this into account. 6478 * Also, when IPsec is used we need to ensure 6479 * that this is only processed once 6480 * in the received packet (to avoid swapping 6481 * back and forth). 6482 * NOTE:This option processing is considered 6483 * to be unsafe and prone to a denial of 6484 * service attack. 6485 * The current processing is not safe even with 6486 * IPsec secured IP packets. Since the home 6487 * address option processing requirement still 6488 * is in the IETF draft and in the process of 6489 * being redefined for its usage, it has been 6490 * decided to turn off the option by default. 6491 * If this section of code needs to be executed, 6492 * ndd variable ip6_ignore_home_address_opt 6493 * should be set to 0 at the user's own risk. 6494 */ 6495 struct ip6_opt_home_address *oh; 6496 in6_addr_t tmp; 6497 6498 if (ipv6_ignore_home_address_opt) 6499 goto opt_error; 6500 6501 if (hdr_type != IPPROTO_DSTOPTS) 6502 goto opt_error; 6503 optused = 2 + optptr[1]; 6504 if (optused > optlen) 6505 goto bad_opt; 6506 6507 /* 6508 * We did this dest. opt the first time 6509 * around (i.e. before AH processing). 6510 * If we've done AH... stop now. 6511 */ 6512 if (first_mp != mp) { 6513 ipsec_in_t *ii; 6514 6515 ii = (ipsec_in_t *)first_mp->b_rptr; 6516 if (ii->ipsec_in_ah_sa != NULL) 6517 break; 6518 } 6519 6520 oh = (struct ip6_opt_home_address *)optptr; 6521 /* Check total length and alignment */ 6522 if (optused < sizeof (*oh) || 6523 ((uintptr_t)oh->ip6oh_addr & 0x7) != 0) 6524 goto opt_error; 6525 /* Swap ip6_src and the home address */ 6526 tmp = ip6h->ip6_src; 6527 /* XXX Note: only 8 byte alignment option */ 6528 ip6h->ip6_src = *(in6_addr_t *)oh->ip6oh_addr; 6529 *(in6_addr_t *)oh->ip6oh_addr = tmp; 6530 break; 6531 } 6532 6533 case IP6OPT_TUNNEL_LIMIT: 6534 if (hdr_type != IPPROTO_DSTOPTS) { 6535 goto opt_error; 6536 } 6537 optused = 2 + optptr[1]; 6538 if (optused > optlen) { 6539 goto bad_opt; 6540 } 6541 if (optused != 3) { 6542 goto opt_error; 6543 } 6544 break; 6545 6546 default: 6547 errtype = "unknown"; 6548 /* FALLTHROUGH */ 6549 opt_error: 6550 /* Determine which zone should send error */ 6551 zoneid = ipif_lookup_addr_zoneid_v6( 6552 &ip6h->ip6_dst, ill); 6553 switch (IP6OPT_TYPE(opt_type)) { 6554 case IP6OPT_TYPE_SKIP: 6555 optused = 2 + optptr[1]; 6556 if (optused > optlen) 6557 goto bad_opt; 6558 ip1dbg(("ip_process_options_v6: %s " 6559 "opt 0x%x skipped\n", 6560 errtype, opt_type)); 6561 break; 6562 case IP6OPT_TYPE_DISCARD: 6563 ip1dbg(("ip_process_options_v6: %s " 6564 "opt 0x%x; packet dropped\n", 6565 errtype, opt_type)); 6566 freemsg(first_mp); 6567 return (-1); 6568 case IP6OPT_TYPE_ICMP: 6569 if (zoneid == ALL_ZONES) { 6570 freemsg(first_mp); 6571 return (-1); 6572 } 6573 icmp_param_problem_v6(WR(q), first_mp, 6574 ICMP6_PARAMPROB_OPTION, 6575 (uint32_t)(optptr - 6576 (uint8_t *)ip6h), 6577 B_FALSE, B_FALSE, zoneid); 6578 return (-1); 6579 case IP6OPT_TYPE_FORCEICMP: 6580 if (zoneid == ALL_ZONES) { 6581 freemsg(first_mp); 6582 return (-1); 6583 } 6584 icmp_param_problem_v6(WR(q), first_mp, 6585 ICMP6_PARAMPROB_OPTION, 6586 (uint32_t)(optptr - 6587 (uint8_t *)ip6h), 6588 B_FALSE, B_TRUE, zoneid); 6589 return (-1); 6590 default: 6591 ASSERT(0); 6592 } 6593 } 6594 } 6595 optlen -= optused; 6596 optptr += optused; 6597 } 6598 return (ret); 6599 6600 bad_opt: 6601 /* Determine which zone should send error */ 6602 zoneid = ipif_lookup_addr_zoneid_v6(&ip6h->ip6_dst, ill); 6603 if (zoneid == ALL_ZONES) { 6604 freemsg(first_mp); 6605 } else { 6606 icmp_param_problem_v6(WR(q), first_mp, ICMP6_PARAMPROB_OPTION, 6607 (uint32_t)(optptr - (uint8_t *)ip6h), 6608 B_FALSE, B_FALSE, zoneid); 6609 } 6610 return (-1); 6611 } 6612 6613 /* 6614 * Process a routing header that is not yet empty. 6615 * Only handles type 0 routing headers. 6616 */ 6617 static void 6618 ip_process_rthdr(queue_t *q, mblk_t *mp, ip6_t *ip6h, ip6_rthdr_t *rth, 6619 ill_t *ill, uint_t flags, mblk_t *hada_mp, mblk_t *dl_mp) 6620 { 6621 ip6_rthdr0_t *rthdr; 6622 uint_t ehdrlen; 6623 uint_t numaddr; 6624 in6_addr_t *addrptr; 6625 in6_addr_t tmp; 6626 6627 ASSERT(rth->ip6r_segleft != 0); 6628 6629 if (!ipv6_forward_src_routed) { 6630 /* XXX Check for source routed out same interface? */ 6631 BUMP_MIB(ill->ill_ip6_mib, ipv6ForwProhibits); 6632 BUMP_MIB(ill->ill_ip6_mib, ipv6InAddrErrors); 6633 freemsg(hada_mp); 6634 freemsg(mp); 6635 return; 6636 } 6637 6638 if (rth->ip6r_type != 0) { 6639 if (hada_mp != NULL) 6640 goto hada_drop; 6641 /* Sent by forwarding path, and router is global zone */ 6642 icmp_param_problem_v6(WR(q), mp, 6643 ICMP6_PARAMPROB_HEADER, 6644 (uint32_t)((uchar_t *)&rth->ip6r_type - (uchar_t *)ip6h), 6645 B_FALSE, B_FALSE, GLOBAL_ZONEID); 6646 return; 6647 } 6648 rthdr = (ip6_rthdr0_t *)rth; 6649 ehdrlen = 8 * (rthdr->ip6r0_len + 1); 6650 ASSERT(mp->b_rptr + ehdrlen <= mp->b_wptr); 6651 addrptr = (in6_addr_t *)((char *)rthdr + sizeof (*rthdr)); 6652 /* rthdr->ip6r0_len is twice the number of addresses in the header */ 6653 if (rthdr->ip6r0_len & 0x1) { 6654 /* An odd length is impossible */ 6655 if (hada_mp != NULL) 6656 goto hada_drop; 6657 /* Sent by forwarding path, and router is global zone */ 6658 icmp_param_problem_v6(WR(q), mp, 6659 ICMP6_PARAMPROB_HEADER, 6660 (uint32_t)((uchar_t *)&rthdr->ip6r0_len - (uchar_t *)ip6h), 6661 B_FALSE, B_FALSE, GLOBAL_ZONEID); 6662 return; 6663 } 6664 numaddr = rthdr->ip6r0_len / 2; 6665 if (rthdr->ip6r0_segleft > numaddr) { 6666 /* segleft exceeds number of addresses in routing header */ 6667 if (hada_mp != NULL) 6668 goto hada_drop; 6669 /* Sent by forwarding path, and router is global zone */ 6670 icmp_param_problem_v6(WR(q), mp, 6671 ICMP6_PARAMPROB_HEADER, 6672 (uint32_t)((uchar_t *)&rthdr->ip6r0_segleft - 6673 (uchar_t *)ip6h), 6674 B_FALSE, B_FALSE, GLOBAL_ZONEID); 6675 return; 6676 } 6677 addrptr += (numaddr - rthdr->ip6r0_segleft); 6678 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst) || 6679 IN6_IS_ADDR_MULTICAST(addrptr)) { 6680 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 6681 freemsg(hada_mp); 6682 freemsg(mp); 6683 return; 6684 } 6685 /* Swap */ 6686 tmp = *addrptr; 6687 *addrptr = ip6h->ip6_dst; 6688 ip6h->ip6_dst = tmp; 6689 rthdr->ip6r0_segleft--; 6690 /* Don't allow any mapped addresses - ip_wput_v6 can't handle them */ 6691 if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_dst)) { 6692 if (hada_mp != NULL) 6693 goto hada_drop; 6694 /* Sent by forwarding path, and router is global zone */ 6695 icmp_unreachable_v6(WR(q), mp, ICMP6_DST_UNREACH_NOROUTE, 6696 B_FALSE, B_FALSE, GLOBAL_ZONEID); 6697 return; 6698 } 6699 if (ip_check_v6_mblk(mp, ill) == 0) { 6700 ip6h = (ip6_t *)mp->b_rptr; 6701 ip_rput_data_v6(q, ill, mp, ip6h, flags, hada_mp, dl_mp); 6702 } 6703 return; 6704 hada_drop: 6705 /* IPsec kstats: bean counter? */ 6706 freemsg(hada_mp); 6707 freemsg(mp); 6708 } 6709 6710 /* 6711 * Read side put procedure for IPv6 module. 6712 */ 6713 void 6714 ip_rput_v6(queue_t *q, mblk_t *mp) 6715 { 6716 mblk_t *first_mp; 6717 mblk_t *hada_mp = NULL; 6718 ip6_t *ip6h; 6719 boolean_t ll_multicast = B_FALSE; 6720 boolean_t mctl_present = B_FALSE; 6721 ill_t *ill; 6722 struct iocblk *iocp; 6723 uint_t flags = 0; 6724 mblk_t *dl_mp; 6725 6726 ill = (ill_t *)q->q_ptr; 6727 if (ill->ill_state_flags & ILL_CONDEMNED) { 6728 union DL_primitives *dl; 6729 6730 dl = (union DL_primitives *)mp->b_rptr; 6731 /* 6732 * Things are opening or closing - only accept DLPI 6733 * ack messages. If the stream is closing and ip_wsrv 6734 * has completed, ip_close is out of the qwait, but has 6735 * not yet completed qprocsoff. Don't proceed any further 6736 * because the ill has been cleaned up and things hanging 6737 * off the ill have been freed. 6738 */ 6739 if ((mp->b_datap->db_type != M_PCPROTO) || 6740 (dl->dl_primitive == DL_UNITDATA_IND)) { 6741 inet_freemsg(mp); 6742 return; 6743 } 6744 } 6745 6746 dl_mp = NULL; 6747 switch (mp->b_datap->db_type) { 6748 case M_DATA: { 6749 int hlen; 6750 uchar_t *ucp; 6751 struct ether_header *eh; 6752 dl_unitdata_ind_t *dui; 6753 6754 /* 6755 * This is a work-around for CR 6451644, a bug in Nemo. It 6756 * should be removed when that problem is fixed. 6757 */ 6758 if (ill->ill_mactype == DL_ETHER && 6759 (hlen = MBLKHEAD(mp)) >= sizeof (struct ether_header) && 6760 (ucp = mp->b_rptr)[-1] == (IP6_DL_SAP & 0xFF) && 6761 ucp[-2] == (IP6_DL_SAP >> 8)) { 6762 if (hlen >= sizeof (struct ether_vlan_header) && 6763 ucp[-5] == 0 && ucp[-6] == 0x81) 6764 ucp -= sizeof (struct ether_vlan_header); 6765 else 6766 ucp -= sizeof (struct ether_header); 6767 /* 6768 * If it's a group address, then fabricate a 6769 * DL_UNITDATA_IND message. 6770 */ 6771 if ((ll_multicast = (ucp[0] & 1)) != 0 && 6772 (dl_mp = allocb(DL_UNITDATA_IND_SIZE + 16, 6773 BPRI_HI)) != NULL) { 6774 eh = (struct ether_header *)ucp; 6775 dui = (dl_unitdata_ind_t *)dl_mp->b_rptr; 6776 DB_TYPE(dl_mp) = M_PROTO; 6777 dl_mp->b_wptr = (uchar_t *)(dui + 1) + 16; 6778 dui->dl_primitive = DL_UNITDATA_IND; 6779 dui->dl_dest_addr_length = 8; 6780 dui->dl_dest_addr_offset = DL_UNITDATA_IND_SIZE; 6781 dui->dl_src_addr_length = 8; 6782 dui->dl_src_addr_offset = DL_UNITDATA_IND_SIZE + 6783 8; 6784 dui->dl_group_address = 1; 6785 ucp = (uchar_t *)(dui + 1); 6786 if (ill->ill_sap_length > 0) 6787 ucp += ill->ill_sap_length; 6788 bcopy(&eh->ether_dhost, ucp, 6); 6789 bcopy(&eh->ether_shost, ucp + 8, 6); 6790 ucp = (uchar_t *)(dui + 1); 6791 if (ill->ill_sap_length < 0) 6792 ucp += 8 + ill->ill_sap_length; 6793 bcopy(&eh->ether_type, ucp, 2); 6794 bcopy(&eh->ether_type, ucp + 8, 2); 6795 } 6796 } 6797 break; 6798 } 6799 6800 case M_PROTO: 6801 case M_PCPROTO: 6802 if (((dl_unitdata_ind_t *)mp->b_rptr)->dl_primitive != 6803 DL_UNITDATA_IND) { 6804 /* Go handle anything other than data elsewhere. */ 6805 ip_rput_dlpi(q, mp); 6806 return; 6807 } 6808 #define dlur ((dl_unitdata_ind_t *)mp->b_rptr) 6809 ll_multicast = dlur->dl_group_address; 6810 #undef dlur 6811 /* Save the DLPI header. */ 6812 dl_mp = mp; 6813 mp = mp->b_cont; 6814 dl_mp->b_cont = NULL; 6815 break; 6816 case M_BREAK: 6817 panic("ip_rput_v6: got an M_BREAK"); 6818 /*NOTREACHED*/ 6819 case M_IOCACK: 6820 iocp = (struct iocblk *)mp->b_rptr; 6821 switch (iocp->ioc_cmd) { 6822 case DL_IOC_HDR_INFO: 6823 ill = (ill_t *)q->q_ptr; 6824 ill_fastpath_ack(ill, mp); 6825 return; 6826 case SIOCSTUNPARAM: 6827 case SIOCGTUNPARAM: 6828 case OSIOCSTUNPARAM: 6829 case OSIOCGTUNPARAM: 6830 /* Go through qwriter */ 6831 break; 6832 default: 6833 putnext(q, mp); 6834 return; 6835 } 6836 /* FALLTHRU */ 6837 case M_ERROR: 6838 case M_HANGUP: 6839 mutex_enter(&ill->ill_lock); 6840 if (ill->ill_state_flags & ILL_CONDEMNED) { 6841 mutex_exit(&ill->ill_lock); 6842 freemsg(mp); 6843 return; 6844 } 6845 ill_refhold_locked(ill); 6846 mutex_exit(&ill->ill_lock); 6847 qwriter_ip(NULL, ill, q, mp, ip_rput_other, CUR_OP, B_FALSE); 6848 return; 6849 case M_CTL: 6850 if ((MBLKL(mp) > sizeof (int)) && 6851 ((da_ipsec_t *)mp->b_rptr)->da_type == IPHADA_M_CTL) { 6852 ASSERT(MBLKL(mp) >= sizeof (da_ipsec_t)); 6853 mctl_present = B_TRUE; 6854 break; 6855 } 6856 putnext(q, mp); 6857 return; 6858 case M_IOCNAK: 6859 iocp = (struct iocblk *)mp->b_rptr; 6860 switch (iocp->ioc_cmd) { 6861 case DL_IOC_HDR_INFO: 6862 case SIOCSTUNPARAM: 6863 case SIOCGTUNPARAM: 6864 case OSIOCSTUNPARAM: 6865 case OSIOCGTUNPARAM: 6866 mutex_enter(&ill->ill_lock); 6867 if (ill->ill_state_flags & ILL_CONDEMNED) { 6868 mutex_exit(&ill->ill_lock); 6869 freemsg(mp); 6870 return; 6871 } 6872 ill_refhold_locked(ill); 6873 mutex_exit(&ill->ill_lock); 6874 qwriter_ip(NULL, ill, q, mp, ip_rput_other, CUR_OP, 6875 B_FALSE); 6876 return; 6877 default: 6878 break; 6879 } 6880 /* FALLTHRU */ 6881 default: 6882 putnext(q, mp); 6883 return; 6884 } 6885 6886 BUMP_MIB(ill->ill_ip6_mib, ipv6InReceives); 6887 /* 6888 * if db_ref > 1 then copymsg and free original. Packet may be 6889 * changed and do not want other entity who has a reference to this 6890 * message to trip over the changes. This is a blind change because 6891 * trying to catch all places that might change packet is too 6892 * difficult (since it may be a module above this one). 6893 */ 6894 if (mp->b_datap->db_ref > 1) { 6895 mblk_t *mp1; 6896 6897 mp1 = copymsg(mp); 6898 freemsg(mp); 6899 if (mp1 == NULL) { 6900 first_mp = NULL; 6901 goto discard; 6902 } 6903 mp = mp1; 6904 } 6905 first_mp = mp; 6906 if (mctl_present) { 6907 hada_mp = first_mp; 6908 mp = first_mp->b_cont; 6909 } 6910 6911 if (ip_check_v6_mblk(mp, ill) == -1) 6912 return; 6913 6914 ip6h = (ip6_t *)mp->b_rptr; 6915 6916 DTRACE_PROBE4(ip6__physical__in__start, 6917 ill_t *, ill, ill_t *, NULL, 6918 ip6_t *, ip6h, mblk_t *, first_mp); 6919 6920 FW_HOOKS6(ip6_physical_in_event, ipv6firewall_physical_in, 6921 ill, NULL, ip6h, first_mp, mp); 6922 6923 DTRACE_PROBE1(ip6__physical__in__end, mblk_t *, first_mp); 6924 6925 if (first_mp == NULL) 6926 return; 6927 6928 if ((ip6h->ip6_vcf & IPV6_VERS_AND_FLOW_MASK) == 6929 IPV6_DEFAULT_VERS_AND_FLOW) { 6930 /* 6931 * It may be a bit too expensive to do this mapped address 6932 * check here, but in the interest of robustness, it seems 6933 * like the correct place. 6934 * TODO: Avoid this check for e.g. connected TCP sockets 6935 */ 6936 if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_src)) { 6937 ip1dbg(("ip_rput_v6: pkt with mapped src addr\n")); 6938 goto discard; 6939 } 6940 6941 if (IN6_IS_ADDR_LOOPBACK(&ip6h->ip6_src)) { 6942 ip1dbg(("ip_rput_v6: pkt with loopback src")); 6943 goto discard; 6944 } else if (IN6_IS_ADDR_LOOPBACK(&ip6h->ip6_dst)) { 6945 ip1dbg(("ip_rput_v6: pkt with loopback dst")); 6946 goto discard; 6947 } 6948 6949 flags |= (ll_multicast ? IP6_IN_LLMCAST : 0); 6950 ip_rput_data_v6(q, ill, mp, ip6h, flags, hada_mp, dl_mp); 6951 } else { 6952 BUMP_MIB(ill->ill_ip6_mib, ipv6InIPv4); 6953 goto discard; 6954 } 6955 freemsg(dl_mp); 6956 return; 6957 6958 discard: 6959 if (dl_mp != NULL) 6960 freeb(dl_mp); 6961 freemsg(first_mp); 6962 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 6963 } 6964 6965 /* 6966 * Walk through the IPv6 packet in mp and see if there's an AH header 6967 * in it. See if the AH header needs to get done before other headers in 6968 * the packet. (Worker function for ipsec_early_ah_v6().) 6969 */ 6970 #define IPSEC_HDR_DONT_PROCESS 0 6971 #define IPSEC_HDR_PROCESS 1 6972 #define IPSEC_MEMORY_ERROR 2 6973 static int 6974 ipsec_needs_processing_v6(mblk_t *mp, uint8_t *nexthdr) 6975 { 6976 uint_t length; 6977 uint_t ehdrlen; 6978 uint8_t *whereptr; 6979 uint8_t *endptr; 6980 uint8_t *nexthdrp; 6981 ip6_dest_t *desthdr; 6982 ip6_rthdr_t *rthdr; 6983 ip6_t *ip6h; 6984 6985 /* 6986 * For now just pullup everything. In general, the less pullups, 6987 * the better, but there's so much squirrelling through anyway, 6988 * it's just easier this way. 6989 */ 6990 if (!pullupmsg(mp, -1)) { 6991 return (IPSEC_MEMORY_ERROR); 6992 } 6993 6994 ip6h = (ip6_t *)mp->b_rptr; 6995 length = IPV6_HDR_LEN; 6996 whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */ 6997 endptr = mp->b_wptr; 6998 6999 /* 7000 * We can't just use the argument nexthdr in the place 7001 * of nexthdrp becaue we don't dereference nexthdrp 7002 * till we confirm whether it is a valid address. 7003 */ 7004 nexthdrp = &ip6h->ip6_nxt; 7005 while (whereptr < endptr) { 7006 /* Is there enough left for len + nexthdr? */ 7007 if (whereptr + MIN_EHDR_LEN > endptr) 7008 return (IPSEC_MEMORY_ERROR); 7009 7010 switch (*nexthdrp) { 7011 case IPPROTO_HOPOPTS: 7012 case IPPROTO_DSTOPTS: 7013 /* Assumes the headers are identical for hbh and dst */ 7014 desthdr = (ip6_dest_t *)whereptr; 7015 ehdrlen = 8 * (desthdr->ip6d_len + 1); 7016 if ((uchar_t *)desthdr + ehdrlen > endptr) 7017 return (IPSEC_MEMORY_ERROR); 7018 /* 7019 * Return DONT_PROCESS because of potential Mobile IPv6 7020 * cruft for destination options. 7021 */ 7022 if (*nexthdrp == IPPROTO_DSTOPTS) 7023 return (IPSEC_HDR_DONT_PROCESS); 7024 nexthdrp = &desthdr->ip6d_nxt; 7025 break; 7026 case IPPROTO_ROUTING: 7027 rthdr = (ip6_rthdr_t *)whereptr; 7028 7029 /* 7030 * If there's more hops left on the routing header, 7031 * return now with DON'T PROCESS. 7032 */ 7033 if (rthdr->ip6r_segleft > 0) 7034 return (IPSEC_HDR_DONT_PROCESS); 7035 7036 ehdrlen = 8 * (rthdr->ip6r_len + 1); 7037 if ((uchar_t *)rthdr + ehdrlen > endptr) 7038 return (IPSEC_MEMORY_ERROR); 7039 nexthdrp = &rthdr->ip6r_nxt; 7040 break; 7041 case IPPROTO_FRAGMENT: 7042 /* Wait for reassembly */ 7043 return (IPSEC_HDR_DONT_PROCESS); 7044 case IPPROTO_AH: 7045 *nexthdr = IPPROTO_AH; 7046 return (IPSEC_HDR_PROCESS); 7047 case IPPROTO_NONE: 7048 /* No next header means we're finished */ 7049 default: 7050 return (IPSEC_HDR_DONT_PROCESS); 7051 } 7052 length += ehdrlen; 7053 whereptr += ehdrlen; 7054 } 7055 panic("ipsec_needs_processing_v6"); 7056 /*NOTREACHED*/ 7057 } 7058 7059 /* 7060 * Path for AH if options are present. If this is the first time we are 7061 * sending a datagram to AH, allocate a IPSEC_IN message and prepend it. 7062 * Otherwise, just fanout. Return value answers the boolean question: 7063 * "Did I consume the mblk you sent me?" 7064 * 7065 * Sometimes AH needs to be done before other IPv6 headers for security 7066 * reasons. This function (and its ipsec_needs_processing_v6() above) 7067 * indicates if that is so, and fans out to the appropriate IPsec protocol 7068 * for the datagram passed in. 7069 */ 7070 static boolean_t 7071 ipsec_early_ah_v6(queue_t *q, mblk_t *first_mp, boolean_t mctl_present, 7072 ill_t *ill, ire_t *ire, mblk_t *hada_mp, zoneid_t zoneid) 7073 { 7074 mblk_t *mp; 7075 uint8_t nexthdr; 7076 ipsec_in_t *ii = NULL; 7077 ah_t *ah; 7078 ipsec_status_t ipsec_rc; 7079 7080 ASSERT((hada_mp == NULL) || (!mctl_present)); 7081 7082 switch (ipsec_needs_processing_v6( 7083 (mctl_present ? first_mp->b_cont : first_mp), &nexthdr)) { 7084 case IPSEC_MEMORY_ERROR: 7085 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 7086 freemsg(hada_mp); 7087 freemsg(first_mp); 7088 return (B_TRUE); 7089 case IPSEC_HDR_DONT_PROCESS: 7090 return (B_FALSE); 7091 } 7092 7093 /* Default means send it to AH! */ 7094 ASSERT(nexthdr == IPPROTO_AH); 7095 if (!mctl_present) { 7096 mp = first_mp; 7097 if ((first_mp = ipsec_in_alloc(B_FALSE)) == NULL) { 7098 ip1dbg(("ipsec_early_ah_v6: IPSEC_IN " 7099 "allocation failure.\n")); 7100 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 7101 freemsg(hada_mp); 7102 freemsg(mp); 7103 return (B_TRUE); 7104 } 7105 /* 7106 * Store the ill_index so that when we come back 7107 * from IPSEC we ride on the same queue. 7108 */ 7109 ii = (ipsec_in_t *)first_mp->b_rptr; 7110 ii->ipsec_in_ill_index = ill->ill_phyint->phyint_ifindex; 7111 ii->ipsec_in_rill_index = ii->ipsec_in_ill_index; 7112 first_mp->b_cont = mp; 7113 } 7114 /* 7115 * Cache hardware acceleration info. 7116 */ 7117 if (hada_mp != NULL) { 7118 ASSERT(ii != NULL); 7119 IPSECHW_DEBUG(IPSECHW_PKT, ("ipsec_early_ah_v6: " 7120 "caching data attr.\n")); 7121 ii->ipsec_in_accelerated = B_TRUE; 7122 ii->ipsec_in_da = hada_mp; 7123 } 7124 7125 if (!ipsec_loaded()) { 7126 ip_proto_not_sup(q, first_mp, IP_FF_SEND_ICMP, zoneid); 7127 return (B_TRUE); 7128 } 7129 7130 ah = ipsec_inbound_ah_sa(first_mp); 7131 if (ah == NULL) 7132 return (B_TRUE); 7133 ASSERT(ii->ipsec_in_ah_sa != NULL); 7134 ASSERT(ii->ipsec_in_ah_sa->ipsa_input_func != NULL); 7135 ipsec_rc = ii->ipsec_in_ah_sa->ipsa_input_func(first_mp, ah); 7136 7137 switch (ipsec_rc) { 7138 case IPSEC_STATUS_SUCCESS: 7139 /* we're done with IPsec processing, send it up */ 7140 ip_fanout_proto_again(first_mp, ill, ill, ire); 7141 break; 7142 case IPSEC_STATUS_FAILED: 7143 BUMP_MIB(&ip6_mib, ipv6InDiscards); 7144 break; 7145 case IPSEC_STATUS_PENDING: 7146 /* no action needed */ 7147 break; 7148 } 7149 return (B_TRUE); 7150 } 7151 7152 /* 7153 * Validate the IPv6 mblk for alignment. 7154 */ 7155 int 7156 ip_check_v6_mblk(mblk_t *mp, ill_t *ill) 7157 { 7158 int pkt_len, ip6_len; 7159 ip6_t *ip6h = (ip6_t *)mp->b_rptr; 7160 7161 /* check for alignment and full IPv6 header */ 7162 if (!OK_32PTR((uchar_t *)ip6h) || 7163 (mp->b_wptr - (uchar_t *)ip6h) < IPV6_HDR_LEN) { 7164 if (!pullupmsg(mp, IPV6_HDR_LEN)) { 7165 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 7166 ip1dbg(("ip_rput_v6: pullupmsg failed\n")); 7167 freemsg(mp); 7168 return (-1); 7169 } 7170 ip6h = (ip6_t *)mp->b_rptr; 7171 } 7172 7173 ASSERT(OK_32PTR((uchar_t *)ip6h) && 7174 (mp->b_wptr - (uchar_t *)ip6h) >= IPV6_HDR_LEN); 7175 7176 if (mp->b_cont == NULL) 7177 pkt_len = mp->b_wptr - mp->b_rptr; 7178 else 7179 pkt_len = msgdsize(mp); 7180 ip6_len = ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN; 7181 7182 /* 7183 * Check for bogus (too short packet) and packet which 7184 * was padded by the link layer. 7185 */ 7186 if (ip6_len != pkt_len) { 7187 ssize_t diff; 7188 7189 if (ip6_len > pkt_len) { 7190 ip1dbg(("ip_rput_data_v6: packet too short %d %d\n", 7191 ip6_len, pkt_len)); 7192 BUMP_MIB(ill->ill_ip6_mib, ipv6InTruncatedPkts); 7193 freemsg(mp); 7194 return (-1); 7195 } 7196 diff = (ssize_t)(pkt_len - ip6_len); 7197 7198 if (!adjmsg(mp, -diff)) { 7199 ip1dbg(("ip_rput_data_v6: adjmsg failed\n")); 7200 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 7201 freemsg(mp); 7202 return (-1); 7203 } 7204 } 7205 return (0); 7206 } 7207 7208 /* 7209 * ip_rput_data_v6 -- received IPv6 packets in M_DATA messages show up here. 7210 * ip_rput_v6 has already verified alignment, the min length, the version, 7211 * and db_ref = 1. 7212 * 7213 * The ill passed in (the arg named inill) is the ill that the packet 7214 * actually arrived on. We need to remember this when saving the 7215 * input interface index into potential IPV6_PKTINFO data in 7216 * ip_add_info_v6(). 7217 * 7218 * This routine doesn't free dl_mp; that's the caller's responsibility on 7219 * return. (Note that the callers are complex enough that there's no tail 7220 * recursion here anyway.) 7221 */ 7222 void 7223 ip_rput_data_v6(queue_t *q, ill_t *inill, mblk_t *mp, ip6_t *ip6h, 7224 uint_t flags, mblk_t *hada_mp, mblk_t *dl_mp) 7225 { 7226 ire_t *ire = NULL; 7227 queue_t *rq; 7228 ill_t *ill = inill; 7229 ill_t *outill; 7230 ipif_t *ipif; 7231 uint8_t *whereptr; 7232 uint8_t nexthdr; 7233 uint16_t remlen; 7234 uint_t prev_nexthdr_offset; 7235 uint_t used; 7236 size_t pkt_len; 7237 uint16_t ip6_len; 7238 uint_t hdr_len; 7239 boolean_t mctl_present; 7240 mblk_t *first_mp; 7241 mblk_t *first_mp1; 7242 boolean_t no_forward; 7243 ip6_hbh_t *hbhhdr; 7244 boolean_t ll_multicast = (flags & IP6_IN_LLMCAST); 7245 conn_t *connp; 7246 ilm_t *ilm; 7247 uint32_t ports; 7248 uint_t ipif_id = 0; 7249 zoneid_t zoneid = GLOBAL_ZONEID; 7250 uint16_t hck_flags, reass_hck_flags; 7251 uint32_t reass_sum; 7252 boolean_t cksum_err; 7253 mblk_t *mp1; 7254 7255 EXTRACT_PKT_MP(mp, first_mp, mctl_present); 7256 7257 if (hada_mp != NULL) { 7258 /* 7259 * It's an IPsec accelerated packet. 7260 * Keep a pointer to the data attributes around until 7261 * we allocate the ipsecinfo structure. 7262 */ 7263 IPSECHW_DEBUG(IPSECHW_PKT, 7264 ("ip_rput_data_v6: inbound HW accelerated IPsec pkt\n")); 7265 hada_mp->b_cont = NULL; 7266 /* 7267 * Since it is accelerated, it came directly from 7268 * the ill. 7269 */ 7270 ASSERT(mctl_present == B_FALSE); 7271 ASSERT(mp->b_datap->db_type != M_CTL); 7272 } 7273 7274 ip6h = (ip6_t *)mp->b_rptr; 7275 ip6_len = ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN; 7276 pkt_len = ip6_len; 7277 7278 if (ILL_HCKSUM_CAPABLE(ill) && !mctl_present && dohwcksum) 7279 hck_flags = DB_CKSUMFLAGS(mp); 7280 else 7281 hck_flags = 0; 7282 7283 /* Clear checksum flags in case we need to forward */ 7284 DB_CKSUMFLAGS(mp) = 0; 7285 reass_sum = reass_hck_flags = 0; 7286 7287 nexthdr = ip6h->ip6_nxt; 7288 7289 prev_nexthdr_offset = (uint_t)((uchar_t *)&ip6h->ip6_nxt - 7290 (uchar_t *)ip6h); 7291 whereptr = (uint8_t *)&ip6h[1]; 7292 remlen = pkt_len - IPV6_HDR_LEN; /* Track how much is left */ 7293 7294 /* Process hop by hop header options */ 7295 if (nexthdr == IPPROTO_HOPOPTS) { 7296 uint_t ehdrlen; 7297 uint8_t *optptr; 7298 7299 if (remlen < MIN_EHDR_LEN) 7300 goto pkt_too_short; 7301 if (mp->b_cont != NULL && 7302 whereptr + MIN_EHDR_LEN > mp->b_wptr) { 7303 if (!pullupmsg(mp, IPV6_HDR_LEN + MIN_EHDR_LEN)) { 7304 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 7305 freemsg(hada_mp); 7306 freemsg(first_mp); 7307 return; 7308 } 7309 ip6h = (ip6_t *)mp->b_rptr; 7310 whereptr = (uint8_t *)ip6h + pkt_len - remlen; 7311 } 7312 hbhhdr = (ip6_hbh_t *)whereptr; 7313 nexthdr = hbhhdr->ip6h_nxt; 7314 prev_nexthdr_offset = (uint_t)(whereptr - (uint8_t *)ip6h); 7315 ehdrlen = 8 * (hbhhdr->ip6h_len + 1); 7316 7317 if (remlen < ehdrlen) 7318 goto pkt_too_short; 7319 if (mp->b_cont != NULL && 7320 whereptr + ehdrlen > mp->b_wptr) { 7321 if (!pullupmsg(mp, IPV6_HDR_LEN + ehdrlen)) { 7322 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 7323 freemsg(hada_mp); 7324 freemsg(first_mp); 7325 return; 7326 } 7327 ip6h = (ip6_t *)mp->b_rptr; 7328 whereptr = (uint8_t *)ip6h + pkt_len - remlen; 7329 hbhhdr = (ip6_hbh_t *)whereptr; 7330 } 7331 7332 optptr = whereptr + 2; 7333 whereptr += ehdrlen; 7334 remlen -= ehdrlen; 7335 switch (ip_process_options_v6(q, first_mp, ip6h, optptr, 7336 ehdrlen - 2, IPPROTO_HOPOPTS)) { 7337 case -1: 7338 /* 7339 * Packet has been consumed and any 7340 * needed ICMP messages sent. 7341 */ 7342 BUMP_MIB(ill->ill_ip6_mib, ipv6InHdrErrors); 7343 freemsg(hada_mp); 7344 return; 7345 case 0: 7346 /* no action needed */ 7347 break; 7348 case 1: 7349 /* Known router alert */ 7350 goto ipv6forus; 7351 } 7352 } 7353 7354 /* 7355 * Attach any necessary label information to this packet. 7356 */ 7357 if (is_system_labeled() && !tsol_get_pkt_label(mp, IPV6_VERSION)) { 7358 if (ip6opt_ls != 0) 7359 ip0dbg(("tsol_get_pkt_label v6 failed\n")); 7360 BUMP_MIB(ill->ill_ip6_mib, ipv6InHdrErrors); 7361 freemsg(hada_mp); 7362 freemsg(first_mp); 7363 return; 7364 } 7365 7366 /* 7367 * On incoming v6 multicast packets we will bypass the ire table, 7368 * and assume that the read queue corresponds to the targetted 7369 * interface. 7370 * 7371 * The effect of this is the same as the IPv4 original code, but is 7372 * much cleaner I think. See ip_rput for how that was done. 7373 */ 7374 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 7375 BUMP_MIB(ill->ill_ip6_mib, ipv6InMcastPkts); 7376 /* 7377 * XXX TODO Give to mrouted to for multicast forwarding. 7378 */ 7379 ILM_WALKER_HOLD(ill); 7380 ilm = ilm_lookup_ill_v6(ill, &ip6h->ip6_dst, ALL_ZONES); 7381 ILM_WALKER_RELE(ill); 7382 if (ilm == NULL) { 7383 if (ip_debug > 3) { 7384 /* ip2dbg */ 7385 pr_addr_dbg("ip_rput_data_v6: got mcast packet" 7386 " which is not for us: %s\n", AF_INET6, 7387 &ip6h->ip6_dst); 7388 } 7389 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 7390 freemsg(hada_mp); 7391 freemsg(first_mp); 7392 return; 7393 } 7394 if (ip_debug > 3) { 7395 /* ip2dbg */ 7396 pr_addr_dbg("ip_rput_data_v6: multicast for us: %s\n", 7397 AF_INET6, &ip6h->ip6_dst); 7398 } 7399 rq = ill->ill_rq; 7400 zoneid = GLOBAL_ZONEID; 7401 goto ipv6forus; 7402 } 7403 7404 ipif = ill->ill_ipif; 7405 7406 /* 7407 * If a packet was received on an interface that is a 6to4 tunnel, 7408 * incoming IPv6 packets, with a 6to4 addressed IPv6 destination, must 7409 * be checked to have a 6to4 prefix (2002:V4ADDR::/48) that is equal to 7410 * the 6to4 prefix of the address configured on the receiving interface. 7411 * Otherwise, the packet was delivered to this interface in error and 7412 * the packet must be dropped. 7413 */ 7414 if ((ill->ill_is_6to4tun) && IN6_IS_ADDR_6TO4(&ip6h->ip6_dst)) { 7415 7416 if (!IN6_ARE_6TO4_PREFIX_EQUAL(&ipif->ipif_v6lcl_addr, 7417 &ip6h->ip6_dst)) { 7418 if (ip_debug > 2) { 7419 /* ip1dbg */ 7420 pr_addr_dbg("ip_rput_data_v6: received 6to4 " 7421 "addressed packet which is not for us: " 7422 "%s\n", AF_INET6, &ip6h->ip6_dst); 7423 } 7424 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 7425 freemsg(first_mp); 7426 return; 7427 } 7428 } 7429 7430 /* 7431 * Find an ire that matches destination. For link-local addresses 7432 * we have to match the ill. 7433 * TBD for site local addresses. 7434 */ 7435 if (IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_dst)) { 7436 ire = ire_ctable_lookup_v6(&ip6h->ip6_dst, NULL, 7437 IRE_CACHE|IRE_LOCAL, ill->ill_ipif, ALL_ZONES, NULL, 7438 MATCH_IRE_TYPE | MATCH_IRE_ILL_GROUP); 7439 } else { 7440 ire = ire_cache_lookup_v6(&ip6h->ip6_dst, ALL_ZONES, 7441 MBLK_GETLABEL(mp)); 7442 } 7443 if (ire == NULL) { 7444 /* 7445 * No matching IRE found. Mark this packet as having 7446 * originated externally. 7447 */ 7448 if (!(ill->ill_flags & ILLF_ROUTER) || ll_multicast) { 7449 BUMP_MIB(ill->ill_ip6_mib, ipv6ForwProhibits); 7450 if (!(ill->ill_flags & ILLF_ROUTER)) 7451 BUMP_MIB(ill->ill_ip6_mib, ipv6InAddrErrors); 7452 freemsg(hada_mp); 7453 freemsg(first_mp); 7454 return; 7455 } 7456 if (ip6h->ip6_hops <= 1) { 7457 if (hada_mp != NULL) 7458 goto hada_drop; 7459 /* Sent by forwarding path, and router is global zone */ 7460 icmp_time_exceeded_v6(WR(q), first_mp, 7461 ICMP6_TIME_EXCEED_TRANSIT, ll_multicast, B_FALSE, 7462 GLOBAL_ZONEID); 7463 return; 7464 } 7465 /* 7466 * Per RFC 3513 section 2.5.2, we must not forward packets with 7467 * an unspecified source address. 7468 */ 7469 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 7470 BUMP_MIB(ill->ill_ip6_mib, ipv6ForwProhibits); 7471 freemsg(hada_mp); 7472 freemsg(first_mp); 7473 return; 7474 } 7475 mp->b_prev = (mblk_t *)(uintptr_t) 7476 ill->ill_phyint->phyint_ifindex; 7477 ip_newroute_v6(q, mp, &ip6h->ip6_dst, &ip6h->ip6_src, 7478 IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_dst) ? ill : NULL, 7479 ALL_ZONES); 7480 return; 7481 } 7482 ipif_id = ire->ire_ipif->ipif_seqid; 7483 /* we have a matching IRE */ 7484 if (ire->ire_stq != NULL) { 7485 ill_group_t *ill_group; 7486 ill_group_t *ire_group; 7487 7488 /* 7489 * To be quicker, we may wish not to chase pointers 7490 * (ire->ire_ipif->ipif_ill...) and instead store the 7491 * forwarding policy in the ire. An unfortunate side- 7492 * effect of this would be requiring an ire flush whenever 7493 * the ILLF_ROUTER flag changes. For now, chase pointers 7494 * once and store in the boolean no_forward. 7495 * 7496 * This appears twice to keep it out of the non-forwarding, 7497 * yes-it's-for-us-on-the-right-interface case. 7498 */ 7499 no_forward = ((ill->ill_flags & 7500 ire->ire_ipif->ipif_ill->ill_flags & ILLF_ROUTER) == 0); 7501 7502 7503 ASSERT(first_mp == mp); 7504 /* 7505 * This ire has a send-to queue - forward the packet. 7506 */ 7507 if (no_forward || ll_multicast || (hada_mp != NULL)) { 7508 freemsg(hada_mp); 7509 BUMP_MIB(ill->ill_ip6_mib, ipv6ForwProhibits); 7510 if (no_forward) 7511 BUMP_MIB(ill->ill_ip6_mib, ipv6InAddrErrors); 7512 freemsg(mp); 7513 ire_refrele(ire); 7514 return; 7515 } 7516 if (ip6h->ip6_hops <= 1) { 7517 ip1dbg(("ip_rput_data_v6: hop limit expired.\n")); 7518 /* Sent by forwarding path, and router is global zone */ 7519 icmp_time_exceeded_v6(WR(q), mp, 7520 ICMP6_TIME_EXCEED_TRANSIT, ll_multicast, B_FALSE, 7521 GLOBAL_ZONEID); 7522 ire_refrele(ire); 7523 return; 7524 } 7525 /* 7526 * Per RFC 3513 section 2.5.2, we must not forward packets with 7527 * an unspecified source address. 7528 */ 7529 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 7530 BUMP_MIB(ill->ill_ip6_mib, ipv6ForwProhibits); 7531 freemsg(mp); 7532 ire_refrele(ire); 7533 return; 7534 } 7535 7536 if (is_system_labeled()) { 7537 mblk_t *mp1; 7538 7539 if ((mp1 = tsol_ip_forward(ire, mp)) == NULL) { 7540 BUMP_MIB(ill->ill_ip6_mib, ipv6ForwProhibits); 7541 freemsg(mp); 7542 ire_refrele(ire); 7543 return; 7544 } 7545 /* Size may have changed */ 7546 mp = mp1; 7547 ip6h = (ip6_t *)mp->b_rptr; 7548 pkt_len = msgdsize(mp); 7549 } 7550 7551 if (pkt_len > ire->ire_max_frag) { 7552 BUMP_MIB(ill->ill_ip6_mib, ipv6InTooBigErrors); 7553 /* Sent by forwarding path, and router is global zone */ 7554 icmp_pkt2big_v6(WR(q), mp, ire->ire_max_frag, 7555 ll_multicast, B_TRUE, GLOBAL_ZONEID); 7556 ire_refrele(ire); 7557 return; 7558 } 7559 7560 /* 7561 * Check to see if we're forwarding the packet to a 7562 * different link from which it came. If so, check the 7563 * source and destination addresses since routers must not 7564 * forward any packets with link-local source or 7565 * destination addresses to other links. Otherwise (if 7566 * we're forwarding onto the same link), conditionally send 7567 * a redirect message. 7568 */ 7569 ill_group = ill->ill_group; 7570 ire_group = ((ill_t *)(ire->ire_rfq)->q_ptr)->ill_group; 7571 if (ire->ire_rfq != q && (ill_group == NULL || 7572 ill_group != ire_group)) { 7573 if (IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_dst) || 7574 IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src)) { 7575 BUMP_MIB(ill->ill_ip6_mib, ipv6InAddrErrors); 7576 freemsg(mp); 7577 ire_refrele(ire); 7578 return; 7579 } 7580 /* TBD add site-local check at site boundary? */ 7581 } else if (ipv6_send_redirects) { 7582 in6_addr_t *v6targ; 7583 in6_addr_t gw_addr_v6; 7584 ire_t *src_ire_v6 = NULL; 7585 7586 /* 7587 * Don't send a redirect when forwarding a source 7588 * routed packet. 7589 */ 7590 if (ip_source_routed_v6(ip6h, mp)) 7591 goto forward; 7592 7593 mutex_enter(&ire->ire_lock); 7594 gw_addr_v6 = ire->ire_gateway_addr_v6; 7595 mutex_exit(&ire->ire_lock); 7596 if (!IN6_IS_ADDR_UNSPECIFIED(&gw_addr_v6)) { 7597 v6targ = &gw_addr_v6; 7598 /* 7599 * We won't send redirects to a router 7600 * that doesn't have a link local 7601 * address, but will forward. 7602 */ 7603 if (!IN6_IS_ADDR_LINKLOCAL(v6targ)) { 7604 BUMP_MIB(ill->ill_ip6_mib, 7605 ipv6InAddrErrors); 7606 goto forward; 7607 } 7608 } else { 7609 v6targ = &ip6h->ip6_dst; 7610 } 7611 7612 src_ire_v6 = ire_ftable_lookup_v6(&ip6h->ip6_src, 7613 NULL, NULL, IRE_INTERFACE, ire->ire_ipif, NULL, 7614 ALL_ZONES, 0, NULL, 7615 MATCH_IRE_IPIF | MATCH_IRE_TYPE); 7616 7617 if (src_ire_v6 != NULL) { 7618 /* 7619 * The source is directly connected. 7620 */ 7621 mp1 = copymsg(mp); 7622 if (mp1 != NULL) { 7623 icmp_send_redirect_v6(WR(q), 7624 mp1, v6targ, &ip6h->ip6_dst, 7625 ill, B_FALSE); 7626 } 7627 ire_refrele(src_ire_v6); 7628 } 7629 } 7630 7631 forward: 7632 /* Hoplimit verified above */ 7633 ip6h->ip6_hops--; 7634 7635 outill = ire->ire_ipif->ipif_ill; 7636 7637 DTRACE_PROBE4(ip6__forwarding__start, 7638 ill_t *, inill, ill_t *, outill, 7639 ip6_t *, ip6h, mblk_t *, mp); 7640 7641 FW_HOOKS6(ip6_forwarding_event, ipv6firewall_forwarding, 7642 inill, outill, ip6h, mp, mp); 7643 7644 DTRACE_PROBE1(ip6__forwarding__end, mblk_t *, mp); 7645 7646 if (mp != NULL) { 7647 UPDATE_IB_PKT_COUNT(ire); 7648 ire->ire_last_used_time = lbolt; 7649 BUMP_MIB(ill->ill_ip6_mib, ipv6OutForwDatagrams); 7650 ip_xmit_v6(mp, ire, 0, NULL, B_FALSE, NULL); 7651 } 7652 IRE_REFRELE(ire); 7653 return; 7654 } 7655 rq = ire->ire_rfq; 7656 7657 /* 7658 * Need to put on correct queue for reassembly to find it. 7659 * No need to use put() since reassembly has its own locks. 7660 * Note: multicast packets and packets destined to addresses 7661 * assigned to loopback (ire_rfq is NULL) will be reassembled on 7662 * the arriving ill. 7663 */ 7664 if (rq != q) { 7665 boolean_t check_multi = B_TRUE; 7666 ill_group_t *ill_group = NULL; 7667 ill_group_t *ire_group = NULL; 7668 ill_t *ire_ill = NULL; 7669 uint_t ill_ifindex = ill->ill_usesrc_ifindex; 7670 7671 /* 7672 * To be quicker, we may wish not to chase pointers 7673 * (ire->ire_ipif->ipif_ill...) and instead store the 7674 * forwarding policy in the ire. An unfortunate side- 7675 * effect of this would be requiring an ire flush whenever 7676 * the ILLF_ROUTER flag changes. For now, chase pointers 7677 * once and store in the boolean no_forward. 7678 */ 7679 no_forward = ((ill->ill_flags & 7680 ire->ire_ipif->ipif_ill->ill_flags & ILLF_ROUTER) == 0); 7681 7682 ill_group = ill->ill_group; 7683 if (rq != NULL) { 7684 ire_ill = (ill_t *)(rq->q_ptr); 7685 ire_group = ire_ill->ill_group; 7686 } 7687 7688 /* 7689 * If it's part of the same IPMP group, or if it's a legal 7690 * address on the 'usesrc' interface, then bypass strict 7691 * checks. 7692 */ 7693 if (ill_group != NULL && ill_group == ire_group) { 7694 check_multi = B_FALSE; 7695 } else if (ill_ifindex != 0 && ire_ill != NULL && 7696 ill_ifindex == ire_ill->ill_phyint->phyint_ifindex) { 7697 check_multi = B_FALSE; 7698 } 7699 7700 ASSERT(!IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)); 7701 if (check_multi && ipv6_strict_dst_multihoming && no_forward) { 7702 /* 7703 * This packet came in on an interface other than the 7704 * one associated with the destination address 7705 * and we are strict about matches. 7706 * 7707 * As long as the ills belong to the same group, 7708 * we don't consider them to arriving on the wrong 7709 * interface. Thus, when the switch is doing inbound 7710 * load spreading, we won't drop packets when we 7711 * are doing strict multihoming checks. 7712 */ 7713 BUMP_MIB(ill->ill_ip6_mib, ipv6ForwProhibits); 7714 freemsg(hada_mp); 7715 freemsg(first_mp); 7716 ire_refrele(ire); 7717 return; 7718 } 7719 7720 if (rq != NULL) 7721 q = rq; 7722 7723 ill = (ill_t *)q->q_ptr; 7724 ASSERT(ill); 7725 } 7726 7727 zoneid = ire->ire_zoneid; 7728 UPDATE_IB_PKT_COUNT(ire); 7729 ire->ire_last_used_time = lbolt; 7730 /* Don't use the ire after this point. */ 7731 ire_refrele(ire); 7732 ipv6forus: 7733 /* 7734 * Looks like this packet is for us one way or another. 7735 * This is where we'll process destination headers etc. 7736 */ 7737 for (; ; ) { 7738 switch (nexthdr) { 7739 case IPPROTO_TCP: { 7740 uint16_t *up; 7741 uint32_t sum; 7742 int offset; 7743 7744 hdr_len = pkt_len - remlen; 7745 7746 if (hada_mp != NULL) { 7747 ip0dbg(("tcp hada drop\n")); 7748 goto hada_drop; 7749 } 7750 7751 7752 /* TCP needs all of the TCP header */ 7753 if (remlen < TCP_MIN_HEADER_LENGTH) 7754 goto pkt_too_short; 7755 if (mp->b_cont != NULL && 7756 whereptr + TCP_MIN_HEADER_LENGTH > mp->b_wptr) { 7757 if (!pullupmsg(mp, 7758 hdr_len + TCP_MIN_HEADER_LENGTH)) { 7759 BUMP_MIB(ill->ill_ip6_mib, 7760 ipv6InDiscards); 7761 freemsg(first_mp); 7762 return; 7763 } 7764 hck_flags = 0; 7765 ip6h = (ip6_t *)mp->b_rptr; 7766 whereptr = (uint8_t *)ip6h + hdr_len; 7767 } 7768 /* 7769 * Extract the offset field from the TCP header. 7770 */ 7771 offset = ((uchar_t *)ip6h)[hdr_len + 12] >> 4; 7772 if (offset != 5) { 7773 if (offset < 5) { 7774 ip1dbg(("ip_rput_data_v6: short " 7775 "TCP data offset")); 7776 BUMP_MIB(ill->ill_ip6_mib, 7777 ipv6InDiscards); 7778 freemsg(first_mp); 7779 return; 7780 } 7781 /* 7782 * There must be TCP options. 7783 * Make sure we can grab them. 7784 */ 7785 offset <<= 2; 7786 if (remlen < offset) 7787 goto pkt_too_short; 7788 if (mp->b_cont != NULL && 7789 whereptr + offset > mp->b_wptr) { 7790 if (!pullupmsg(mp, 7791 hdr_len + offset)) { 7792 BUMP_MIB(ill->ill_ip6_mib, 7793 ipv6InDiscards); 7794 freemsg(first_mp); 7795 return; 7796 } 7797 hck_flags = 0; 7798 ip6h = (ip6_t *)mp->b_rptr; 7799 whereptr = (uint8_t *)ip6h + hdr_len; 7800 } 7801 } 7802 7803 up = (uint16_t *)&ip6h->ip6_src; 7804 /* 7805 * TCP checksum calculation. First sum up the 7806 * pseudo-header fields: 7807 * - Source IPv6 address 7808 * - Destination IPv6 address 7809 * - TCP payload length 7810 * - TCP protocol ID 7811 */ 7812 sum = htons(IPPROTO_TCP + remlen) + 7813 up[0] + up[1] + up[2] + up[3] + 7814 up[4] + up[5] + up[6] + up[7] + 7815 up[8] + up[9] + up[10] + up[11] + 7816 up[12] + up[13] + up[14] + up[15]; 7817 7818 /* Fold initial sum */ 7819 sum = (sum & 0xffff) + (sum >> 16); 7820 7821 mp1 = mp->b_cont; 7822 7823 if ((hck_flags & (HCK_FULLCKSUM|HCK_PARTIALCKSUM)) == 0) 7824 IP6_STAT(ip6_in_sw_cksum); 7825 7826 IP_CKSUM_RECV(hck_flags, sum, (uchar_t *) 7827 ((uchar_t *)mp->b_rptr + DB_CKSUMSTART(mp)), 7828 (int32_t)(whereptr - (uchar_t *)mp->b_rptr), 7829 mp, mp1, cksum_err); 7830 7831 if (cksum_err) { 7832 BUMP_MIB(&ip_mib, tcpInErrs); 7833 7834 if (hck_flags & HCK_FULLCKSUM) 7835 IP6_STAT(ip6_tcp_in_full_hw_cksum_err); 7836 else if (hck_flags & HCK_PARTIALCKSUM) 7837 IP6_STAT(ip6_tcp_in_part_hw_cksum_err); 7838 else 7839 IP6_STAT(ip6_tcp_in_sw_cksum_err); 7840 7841 freemsg(first_mp); 7842 return; 7843 } 7844 tcp_fanout: 7845 ip_fanout_tcp_v6(q, first_mp, ip6h, ill, inill, 7846 (flags|IP_FF_SEND_ICMP|IP_FF_SYN_ADDIRE| 7847 IP_FF_IP6INFO), hdr_len, mctl_present, zoneid); 7848 return; 7849 } 7850 case IPPROTO_SCTP: 7851 { 7852 sctp_hdr_t *sctph; 7853 uint32_t calcsum, pktsum; 7854 uint_t hdr_len = pkt_len - remlen; 7855 7856 /* SCTP needs all of the SCTP header */ 7857 if (remlen < sizeof (*sctph)) { 7858 goto pkt_too_short; 7859 } 7860 if (whereptr + sizeof (*sctph) > mp->b_wptr) { 7861 ASSERT(mp->b_cont != NULL); 7862 if (!pullupmsg(mp, hdr_len + sizeof (*sctph))) { 7863 BUMP_MIB(ill->ill_ip6_mib, 7864 ipv6InDiscards); 7865 freemsg(mp); 7866 return; 7867 } 7868 ip6h = (ip6_t *)mp->b_rptr; 7869 whereptr = (uint8_t *)ip6h + hdr_len; 7870 } 7871 7872 sctph = (sctp_hdr_t *)(mp->b_rptr + hdr_len); 7873 /* checksum */ 7874 pktsum = sctph->sh_chksum; 7875 sctph->sh_chksum = 0; 7876 calcsum = sctp_cksum(mp, hdr_len); 7877 if (calcsum != pktsum) { 7878 BUMP_MIB(&sctp_mib, sctpChecksumError); 7879 freemsg(mp); 7880 return; 7881 } 7882 sctph->sh_chksum = pktsum; 7883 ports = *(uint32_t *)(mp->b_rptr + hdr_len); 7884 if ((connp = sctp_fanout(&ip6h->ip6_src, &ip6h->ip6_dst, 7885 ports, ipif_id, zoneid, mp)) == NULL) { 7886 ip_fanout_sctp_raw(first_mp, ill, 7887 (ipha_t *)ip6h, B_FALSE, ports, 7888 mctl_present, 7889 (flags|IP_FF_SEND_ICMP|IP_FF_IP6INFO), 7890 B_TRUE, ipif_id, zoneid); 7891 return; 7892 } 7893 BUMP_MIB(&ip_mib, ipInDelivers); 7894 sctp_input(connp, (ipha_t *)ip6h, mp, first_mp, ill, 7895 B_FALSE, mctl_present); 7896 return; 7897 } 7898 case IPPROTO_UDP: { 7899 uint16_t *up; 7900 uint32_t sum; 7901 7902 hdr_len = pkt_len - remlen; 7903 7904 if (hada_mp != NULL) { 7905 ip0dbg(("udp hada drop\n")); 7906 goto hada_drop; 7907 } 7908 7909 /* Verify that at least the ports are present */ 7910 if (remlen < UDPH_SIZE) 7911 goto pkt_too_short; 7912 if (mp->b_cont != NULL && 7913 whereptr + UDPH_SIZE > mp->b_wptr) { 7914 if (!pullupmsg(mp, hdr_len + UDPH_SIZE)) { 7915 BUMP_MIB(ill->ill_ip6_mib, 7916 ipv6InDiscards); 7917 freemsg(first_mp); 7918 return; 7919 } 7920 hck_flags = 0; 7921 ip6h = (ip6_t *)mp->b_rptr; 7922 whereptr = (uint8_t *)ip6h + hdr_len; 7923 } 7924 7925 /* 7926 * Before going through the regular checksum 7927 * calculation, make sure the received checksum 7928 * is non-zero. RFC 2460 says, a 0x0000 checksum 7929 * in a UDP packet (within IPv6 packet) is invalid 7930 * and should be replaced by 0xffff. This makes 7931 * sense as regular checksum calculation will 7932 * pass for both the cases i.e. 0x0000 and 0xffff. 7933 * Removing one of the case makes error detection 7934 * stronger. 7935 */ 7936 7937 if (((udpha_t *)whereptr)->uha_checksum == 0) { 7938 /* 0x0000 checksum is invalid */ 7939 ip1dbg(("ip_rput_data_v6: Invalid UDP " 7940 "checksum value 0x0000\n")); 7941 BUMP_MIB(ill->ill_ip6_mib, udpInCksumErrs); 7942 freemsg(first_mp); 7943 return; 7944 } 7945 7946 up = (uint16_t *)&ip6h->ip6_src; 7947 7948 /* 7949 * UDP checksum calculation. First sum up the 7950 * pseudo-header fields: 7951 * - Source IPv6 address 7952 * - Destination IPv6 address 7953 * - UDP payload length 7954 * - UDP protocol ID 7955 */ 7956 7957 sum = htons(IPPROTO_UDP + remlen) + 7958 up[0] + up[1] + up[2] + up[3] + 7959 up[4] + up[5] + up[6] + up[7] + 7960 up[8] + up[9] + up[10] + up[11] + 7961 up[12] + up[13] + up[14] + up[15]; 7962 7963 /* Fold initial sum */ 7964 sum = (sum & 0xffff) + (sum >> 16); 7965 7966 if (reass_hck_flags != 0) { 7967 hck_flags = reass_hck_flags; 7968 7969 IP_CKSUM_RECV_REASS(hck_flags, 7970 (int32_t)(whereptr - (uchar_t *)mp->b_rptr), 7971 sum, reass_sum, cksum_err); 7972 } else { 7973 mp1 = mp->b_cont; 7974 7975 IP_CKSUM_RECV(hck_flags, sum, (uchar_t *) 7976 ((uchar_t *)mp->b_rptr + DB_CKSUMSTART(mp)), 7977 (int32_t)(whereptr - (uchar_t *)mp->b_rptr), 7978 mp, mp1, cksum_err); 7979 } 7980 7981 if ((hck_flags & (HCK_FULLCKSUM|HCK_PARTIALCKSUM)) == 0) 7982 IP6_STAT(ip6_in_sw_cksum); 7983 7984 if (cksum_err) { 7985 BUMP_MIB(ill->ill_ip6_mib, udpInCksumErrs); 7986 7987 if (hck_flags & HCK_FULLCKSUM) 7988 IP6_STAT(ip6_udp_in_full_hw_cksum_err); 7989 else if (hck_flags & HCK_PARTIALCKSUM) 7990 IP6_STAT(ip6_udp_in_part_hw_cksum_err); 7991 else 7992 IP6_STAT(ip6_udp_in_sw_cksum_err); 7993 7994 freemsg(first_mp); 7995 return; 7996 } 7997 goto udp_fanout; 7998 } 7999 case IPPROTO_ICMPV6: { 8000 uint16_t *up; 8001 uint32_t sum; 8002 uint_t hdr_len = pkt_len - remlen; 8003 8004 if (hada_mp != NULL) { 8005 ip0dbg(("icmp hada drop\n")); 8006 goto hada_drop; 8007 } 8008 8009 up = (uint16_t *)&ip6h->ip6_src; 8010 sum = htons(IPPROTO_ICMPV6 + remlen) + 8011 up[0] + up[1] + up[2] + up[3] + 8012 up[4] + up[5] + up[6] + up[7] + 8013 up[8] + up[9] + up[10] + up[11] + 8014 up[12] + up[13] + up[14] + up[15]; 8015 sum = (sum & 0xffff) + (sum >> 16); 8016 sum = IP_CSUM(mp, hdr_len, sum); 8017 if (sum != 0) { 8018 /* IPv6 ICMP checksum failed */ 8019 ip1dbg(("ip_rput_data_v6: ICMPv6 checksum " 8020 "failed %x\n", 8021 sum)); 8022 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInMsgs); 8023 BUMP_MIB(ill->ill_icmp6_mib, 8024 ipv6IfIcmpInErrors); 8025 freemsg(first_mp); 8026 return; 8027 } 8028 8029 icmp_fanout: 8030 /* Check variable for testing applications */ 8031 if (ipv6_drop_inbound_icmpv6) { 8032 freemsg(first_mp); 8033 return; 8034 } 8035 /* 8036 * Assume that there is always at least one conn for 8037 * ICMPv6 (in.ndpd) i.e. don't optimize the case 8038 * where there is no conn. 8039 */ 8040 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 8041 ASSERT(!(ill->ill_phyint->phyint_flags & 8042 PHYI_LOOPBACK)); 8043 /* 8044 * In the multicast case, applications may have 8045 * joined the group from different zones, so we 8046 * need to deliver the packet to each of them. 8047 * Loop through the multicast memberships 8048 * structures (ilm) on the receive ill and send 8049 * a copy of the packet up each matching one. 8050 */ 8051 ILM_WALKER_HOLD(ill); 8052 for (ilm = ill->ill_ilm; ilm != NULL; 8053 ilm = ilm->ilm_next) { 8054 if (ilm->ilm_flags & ILM_DELETED) 8055 continue; 8056 if (!IN6_ARE_ADDR_EQUAL( 8057 &ilm->ilm_v6addr, &ip6h->ip6_dst)) 8058 continue; 8059 if (!ipif_lookup_zoneid(ill, 8060 ilm->ilm_zoneid, IPIF_UP, NULL)) 8061 continue; 8062 8063 first_mp1 = ip_copymsg(first_mp); 8064 if (first_mp1 == NULL) 8065 continue; 8066 icmp_inbound_v6(q, first_mp1, ill, 8067 hdr_len, mctl_present, 0, 8068 ilm->ilm_zoneid, dl_mp); 8069 } 8070 ILM_WALKER_RELE(ill); 8071 } else { 8072 first_mp1 = ip_copymsg(first_mp); 8073 if (first_mp1 != NULL) 8074 icmp_inbound_v6(q, first_mp1, ill, 8075 hdr_len, mctl_present, 0, zoneid, 8076 dl_mp); 8077 } 8078 } 8079 /* FALLTHRU */ 8080 default: { 8081 /* 8082 * Handle protocols with which IPv6 is less intimate. 8083 */ 8084 uint_t proto_flags = IP_FF_RAWIP|IP_FF_IP6INFO; 8085 8086 if (hada_mp != NULL) { 8087 ip0dbg(("default hada drop\n")); 8088 goto hada_drop; 8089 } 8090 8091 /* 8092 * Enable sending ICMP for "Unknown" nexthdr 8093 * case. i.e. where we did not FALLTHRU from 8094 * IPPROTO_ICMPV6 processing case above. 8095 * If we did FALLTHRU, then the packet has already been 8096 * processed for IPPF, don't process it again in 8097 * ip_fanout_proto_v6; set IP6_NO_IPPOLICY in the 8098 * flags 8099 */ 8100 if (nexthdr != IPPROTO_ICMPV6) 8101 proto_flags |= IP_FF_SEND_ICMP; 8102 else 8103 proto_flags |= IP6_NO_IPPOLICY; 8104 8105 ip_fanout_proto_v6(q, first_mp, ip6h, ill, inill, 8106 nexthdr, prev_nexthdr_offset, (flags|proto_flags), 8107 mctl_present, zoneid); 8108 return; 8109 } 8110 8111 case IPPROTO_DSTOPTS: { 8112 uint_t ehdrlen; 8113 uint8_t *optptr; 8114 ip6_dest_t *desthdr; 8115 8116 /* Check if AH is present. */ 8117 if (ipsec_early_ah_v6(q, first_mp, mctl_present, ill, 8118 ire, hada_mp, zoneid)) { 8119 ip0dbg(("dst early hada drop\n")); 8120 return; 8121 } 8122 8123 /* 8124 * Reinitialize pointers, as ipsec_early_ah_v6() does 8125 * complete pullups. We don't have to do more pullups 8126 * as a result. 8127 */ 8128 whereptr = (uint8_t *)((uintptr_t)mp->b_rptr + 8129 (uintptr_t)(whereptr - ((uint8_t *)ip6h))); 8130 ip6h = (ip6_t *)mp->b_rptr; 8131 8132 if (remlen < MIN_EHDR_LEN) 8133 goto pkt_too_short; 8134 8135 desthdr = (ip6_dest_t *)whereptr; 8136 nexthdr = desthdr->ip6d_nxt; 8137 prev_nexthdr_offset = (uint_t)(whereptr - 8138 (uint8_t *)ip6h); 8139 ehdrlen = 8 * (desthdr->ip6d_len + 1); 8140 if (remlen < ehdrlen) 8141 goto pkt_too_short; 8142 optptr = whereptr + 2; 8143 /* 8144 * Note: XXX This code does not seem to make 8145 * distinction between Destination Options Header 8146 * being before/after Routing Header which can 8147 * happen if we are at the end of source route. 8148 * This may become significant in future. 8149 * (No real significant Destination Options are 8150 * defined/implemented yet ). 8151 */ 8152 switch (ip_process_options_v6(q, first_mp, ip6h, optptr, 8153 ehdrlen - 2, IPPROTO_DSTOPTS)) { 8154 case -1: 8155 /* 8156 * Packet has been consumed and any needed 8157 * ICMP errors sent. 8158 */ 8159 BUMP_MIB(ill->ill_ip6_mib, ipv6InHdrErrors); 8160 freemsg(hada_mp); 8161 return; 8162 case 0: 8163 /* No action needed continue */ 8164 break; 8165 case 1: 8166 /* 8167 * Unnexpected return value 8168 * (Router alert is a Hop-by-Hop option) 8169 */ 8170 #ifdef DEBUG 8171 panic("ip_rput_data_v6: router " 8172 "alert hbh opt indication in dest opt"); 8173 /*NOTREACHED*/ 8174 #else 8175 freemsg(hada_mp); 8176 freemsg(first_mp); 8177 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 8178 return; 8179 #endif 8180 } 8181 used = ehdrlen; 8182 break; 8183 } 8184 case IPPROTO_FRAGMENT: { 8185 ip6_frag_t *fraghdr; 8186 size_t no_frag_hdr_len; 8187 8188 if (hada_mp != NULL) { 8189 ip0dbg(("frag hada drop\n")); 8190 goto hada_drop; 8191 } 8192 8193 ASSERT(first_mp == mp); 8194 if (remlen < sizeof (ip6_frag_t)) 8195 goto pkt_too_short; 8196 8197 if (mp->b_cont != NULL && 8198 whereptr + sizeof (ip6_frag_t) > mp->b_wptr) { 8199 if (!pullupmsg(mp, 8200 pkt_len - remlen + sizeof (ip6_frag_t))) { 8201 BUMP_MIB(ill->ill_ip6_mib, 8202 ipv6InDiscards); 8203 freemsg(mp); 8204 return; 8205 } 8206 hck_flags = 0; 8207 ip6h = (ip6_t *)mp->b_rptr; 8208 whereptr = (uint8_t *)ip6h + pkt_len - remlen; 8209 } 8210 8211 fraghdr = (ip6_frag_t *)whereptr; 8212 used = (uint_t)sizeof (ip6_frag_t); 8213 BUMP_MIB(ill->ill_ip6_mib, ipv6ReasmReqds); 8214 8215 /* 8216 * Invoke the CGTP (multirouting) filtering module to 8217 * process the incoming packet. Packets identified as 8218 * duplicates must be discarded. Filtering is active 8219 * only if the the ip_cgtp_filter ndd variable is 8220 * non-zero. 8221 */ 8222 if (ip_cgtp_filter && (ip_cgtp_filter_ops != NULL)) { 8223 int cgtp_flt_pkt = 8224 ip_cgtp_filter_ops->cfo_filter_v6( 8225 inill->ill_rq, ip6h, fraghdr); 8226 if (cgtp_flt_pkt == CGTP_IP_PKT_DUPLICATE) { 8227 freemsg(mp); 8228 return; 8229 } 8230 } 8231 8232 /* Restore the flags */ 8233 DB_CKSUMFLAGS(mp) = hck_flags; 8234 8235 mp = ip_rput_frag_v6(q, mp, ip6h, fraghdr, 8236 remlen - used, &prev_nexthdr_offset, 8237 &reass_sum, &reass_hck_flags); 8238 if (mp == NULL) { 8239 /* Reassembly is still pending */ 8240 return; 8241 } 8242 /* The first mblk are the headers before the frag hdr */ 8243 BUMP_MIB(ill->ill_ip6_mib, ipv6ReasmOKs); 8244 8245 first_mp = mp; /* mp has most likely changed! */ 8246 no_frag_hdr_len = mp->b_wptr - mp->b_rptr; 8247 ip6h = (ip6_t *)mp->b_rptr; 8248 nexthdr = ((char *)ip6h)[prev_nexthdr_offset]; 8249 whereptr = mp->b_rptr + no_frag_hdr_len; 8250 remlen = ntohs(ip6h->ip6_plen) + 8251 (uint16_t)(IPV6_HDR_LEN - no_frag_hdr_len); 8252 pkt_len = msgdsize(mp); 8253 used = 0; 8254 break; 8255 } 8256 case IPPROTO_HOPOPTS: 8257 if (hada_mp != NULL) { 8258 ip0dbg(("hop hada drop\n")); 8259 goto hada_drop; 8260 } 8261 /* 8262 * Illegal header sequence. 8263 * (Hop-by-hop headers are processed above 8264 * and required to immediately follow IPv6 header) 8265 */ 8266 icmp_param_problem_v6(WR(q), first_mp, 8267 ICMP6_PARAMPROB_NEXTHEADER, 8268 prev_nexthdr_offset, 8269 B_FALSE, B_FALSE, zoneid); 8270 return; 8271 8272 case IPPROTO_ROUTING: { 8273 uint_t ehdrlen; 8274 ip6_rthdr_t *rthdr; 8275 8276 /* Check if AH is present. */ 8277 if (ipsec_early_ah_v6(q, first_mp, mctl_present, ill, 8278 ire, hada_mp, zoneid)) { 8279 ip0dbg(("routing hada drop\n")); 8280 return; 8281 } 8282 8283 /* 8284 * Reinitialize pointers, as ipsec_early_ah_v6() does 8285 * complete pullups. We don't have to do more pullups 8286 * as a result. 8287 */ 8288 whereptr = (uint8_t *)((uintptr_t)mp->b_rptr + 8289 (uintptr_t)(whereptr - ((uint8_t *)ip6h))); 8290 ip6h = (ip6_t *)mp->b_rptr; 8291 8292 if (remlen < MIN_EHDR_LEN) 8293 goto pkt_too_short; 8294 rthdr = (ip6_rthdr_t *)whereptr; 8295 nexthdr = rthdr->ip6r_nxt; 8296 prev_nexthdr_offset = (uint_t)(whereptr - 8297 (uint8_t *)ip6h); 8298 ehdrlen = 8 * (rthdr->ip6r_len + 1); 8299 if (remlen < ehdrlen) 8300 goto pkt_too_short; 8301 if (rthdr->ip6r_segleft != 0) { 8302 /* Not end of source route */ 8303 if (ll_multicast) { 8304 BUMP_MIB(ill->ill_ip6_mib, 8305 ipv6ForwProhibits); 8306 freemsg(hada_mp); 8307 freemsg(mp); 8308 return; 8309 } 8310 ip_process_rthdr(q, mp, ip6h, rthdr, ill, 8311 flags, hada_mp, dl_mp); 8312 return; 8313 } 8314 used = ehdrlen; 8315 break; 8316 } 8317 case IPPROTO_AH: 8318 case IPPROTO_ESP: { 8319 /* 8320 * Fast path for AH/ESP. If this is the first time 8321 * we are sending a datagram to AH/ESP, allocate 8322 * a IPSEC_IN message and prepend it. Otherwise, 8323 * just fanout. 8324 */ 8325 8326 ipsec_in_t *ii; 8327 int ipsec_rc; 8328 8329 if (!mctl_present) { 8330 ASSERT(first_mp == mp); 8331 if ((first_mp = ipsec_in_alloc(B_FALSE)) == 8332 NULL) { 8333 ip1dbg(("ip_rput_data_v6: IPSEC_IN " 8334 "allocation failure.\n")); 8335 BUMP_MIB(ill->ill_ip6_mib, 8336 ipv6InDiscards); 8337 freemsg(mp); 8338 return; 8339 } 8340 /* 8341 * Store the ill_index so that when we come back 8342 * from IPSEC we ride on the same queue. 8343 */ 8344 ii = (ipsec_in_t *)first_mp->b_rptr; 8345 ii->ipsec_in_ill_index = 8346 ill->ill_phyint->phyint_ifindex; 8347 ii->ipsec_in_rill_index = 8348 ii->ipsec_in_ill_index; 8349 first_mp->b_cont = mp; 8350 /* 8351 * Cache hardware acceleration info. 8352 */ 8353 if (hada_mp != NULL) { 8354 IPSECHW_DEBUG(IPSECHW_PKT, 8355 ("ip_rput_data_v6: " 8356 "caching data attr.\n")); 8357 ii->ipsec_in_accelerated = B_TRUE; 8358 ii->ipsec_in_da = hada_mp; 8359 hada_mp = NULL; 8360 } 8361 } else { 8362 ii = (ipsec_in_t *)first_mp->b_rptr; 8363 } 8364 8365 if (!ipsec_loaded()) { 8366 ip_proto_not_sup(q, first_mp, IP_FF_SEND_ICMP, 8367 ire->ire_zoneid); 8368 return; 8369 } 8370 8371 /* select inbound SA and have IPsec process the pkt */ 8372 if (nexthdr == IPPROTO_ESP) { 8373 esph_t *esph = ipsec_inbound_esp_sa(first_mp); 8374 if (esph == NULL) 8375 return; 8376 ASSERT(ii->ipsec_in_esp_sa != NULL); 8377 ASSERT(ii->ipsec_in_esp_sa->ipsa_input_func != 8378 NULL); 8379 ipsec_rc = ii->ipsec_in_esp_sa->ipsa_input_func( 8380 first_mp, esph); 8381 } else { 8382 ah_t *ah = ipsec_inbound_ah_sa(first_mp); 8383 if (ah == NULL) 8384 return; 8385 ASSERT(ii->ipsec_in_ah_sa != NULL); 8386 ASSERT(ii->ipsec_in_ah_sa->ipsa_input_func != 8387 NULL); 8388 ipsec_rc = ii->ipsec_in_ah_sa->ipsa_input_func( 8389 first_mp, ah); 8390 } 8391 8392 switch (ipsec_rc) { 8393 case IPSEC_STATUS_SUCCESS: 8394 break; 8395 case IPSEC_STATUS_FAILED: 8396 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 8397 /* FALLTHRU */ 8398 case IPSEC_STATUS_PENDING: 8399 return; 8400 } 8401 /* we're done with IPsec processing, send it up */ 8402 ip_fanout_proto_again(first_mp, ill, inill, ire); 8403 return; 8404 } 8405 case IPPROTO_NONE: 8406 /* All processing is done. Count as "delivered". */ 8407 freemsg(hada_mp); 8408 freemsg(first_mp); 8409 BUMP_MIB(ill->ill_ip6_mib, ipv6InDelivers); 8410 return; 8411 } 8412 whereptr += used; 8413 ASSERT(remlen >= used); 8414 remlen -= used; 8415 } 8416 /* NOTREACHED */ 8417 8418 pkt_too_short: 8419 ip1dbg(("ip_rput_data_v6: packet too short %d %lu %d\n", 8420 ip6_len, pkt_len, remlen)); 8421 BUMP_MIB(ill->ill_ip6_mib, ipv6InTruncatedPkts); 8422 freemsg(hada_mp); 8423 freemsg(first_mp); 8424 return; 8425 udp_fanout: 8426 if (mctl_present || IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 8427 connp = NULL; 8428 } else { 8429 connp = ipcl_classify_v6(mp, IPPROTO_UDP, hdr_len, zoneid); 8430 if ((connp != NULL) && (connp->conn_upq == NULL)) { 8431 CONN_DEC_REF(connp); 8432 connp = NULL; 8433 } 8434 } 8435 8436 if (connp == NULL) { 8437 uint32_t ports; 8438 8439 ports = *(uint32_t *)(mp->b_rptr + hdr_len + 8440 UDP_PORTS_OFFSET); 8441 IP6_STAT(ip6_udp_slow_path); 8442 ip_fanout_udp_v6(q, first_mp, ip6h, ports, ill, inill, 8443 (flags|IP_FF_SEND_ICMP|IP_FF_IP6INFO), mctl_present, 8444 zoneid); 8445 return; 8446 } 8447 8448 if (CONN_UDP_FLOWCTLD(connp)) { 8449 freemsg(first_mp); 8450 BUMP_MIB(ill->ill_ip6_mib, udpInOverflows); 8451 CONN_DEC_REF(connp); 8452 return; 8453 } 8454 8455 /* Initiate IPPF processing */ 8456 if (IP6_IN_IPP(flags)) { 8457 ip_process(IPP_LOCAL_IN, &mp, ill->ill_phyint->phyint_ifindex); 8458 if (mp == NULL) { 8459 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 8460 CONN_DEC_REF(connp); 8461 return; 8462 } 8463 } 8464 8465 if (connp->conn_ipv6_recvpktinfo || 8466 IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src)) { 8467 mp = ip_add_info_v6(mp, inill, &ip6h->ip6_dst); 8468 if (mp == NULL) { 8469 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 8470 CONN_DEC_REF(connp); 8471 return; 8472 } 8473 } 8474 8475 IP6_STAT(ip6_udp_fast_path); 8476 BUMP_MIB(ill->ill_ip6_mib, ipv6InReceives); 8477 BUMP_MIB(ill->ill_ip6_mib, ipv6InDelivers); 8478 8479 /* Send it upstream */ 8480 CONN_UDP_RECV(connp, mp); 8481 8482 CONN_DEC_REF(connp); 8483 freemsg(hada_mp); 8484 return; 8485 8486 hada_drop: 8487 ip1dbg(("ip_rput_data_v6: malformed accelerated packet\n")); 8488 /* IPsec kstats: bump counter here */ 8489 freemsg(hada_mp); 8490 freemsg(first_mp); 8491 } 8492 8493 /* 8494 * Reassemble fragment. 8495 * When it returns a completed message the first mblk will only contain 8496 * the headers prior to the fragment header. 8497 * 8498 * prev_nexthdr_offset is an offset indication of where the nexthdr field is 8499 * of the preceding header. This is needed to patch the previous header's 8500 * nexthdr field when reassembly completes. 8501 */ 8502 static mblk_t * 8503 ip_rput_frag_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, 8504 ip6_frag_t *fraghdr, uint_t remlen, uint_t *prev_nexthdr_offset, 8505 uint32_t *cksum_val, uint16_t *cksum_flags) 8506 { 8507 ill_t *ill = (ill_t *)q->q_ptr; 8508 uint32_t ident = ntohl(fraghdr->ip6f_ident); 8509 uint16_t offset; 8510 boolean_t more_frags; 8511 uint8_t nexthdr = fraghdr->ip6f_nxt; 8512 in6_addr_t *v6dst_ptr; 8513 in6_addr_t *v6src_ptr; 8514 uint_t end; 8515 uint_t hdr_length; 8516 size_t count; 8517 ipf_t *ipf; 8518 ipf_t **ipfp; 8519 ipfb_t *ipfb; 8520 mblk_t *mp1; 8521 uint8_t ecn_info = 0; 8522 size_t msg_len; 8523 mblk_t *tail_mp; 8524 mblk_t *t_mp; 8525 boolean_t pruned = B_FALSE; 8526 uint32_t sum_val; 8527 uint16_t sum_flags; 8528 8529 8530 if (cksum_val != NULL) 8531 *cksum_val = 0; 8532 if (cksum_flags != NULL) 8533 *cksum_flags = 0; 8534 8535 /* 8536 * We utilize hardware computed checksum info only for UDP since 8537 * IP fragmentation is a normal occurence for the protocol. In 8538 * addition, checksum offload support for IP fragments carrying 8539 * UDP payload is commonly implemented across network adapters. 8540 */ 8541 ASSERT(ill != NULL); 8542 if (nexthdr == IPPROTO_UDP && dohwcksum && ILL_HCKSUM_CAPABLE(ill) && 8543 (DB_CKSUMFLAGS(mp) & (HCK_FULLCKSUM | HCK_PARTIALCKSUM))) { 8544 mblk_t *mp1 = mp->b_cont; 8545 int32_t len; 8546 8547 /* Record checksum information from the packet */ 8548 sum_val = (uint32_t)DB_CKSUM16(mp); 8549 sum_flags = DB_CKSUMFLAGS(mp); 8550 8551 /* fragmented payload offset from beginning of mblk */ 8552 offset = (uint16_t)((uchar_t *)&fraghdr[1] - mp->b_rptr); 8553 8554 if ((sum_flags & HCK_PARTIALCKSUM) && 8555 (mp1 == NULL || mp1->b_cont == NULL) && 8556 offset >= (uint16_t)DB_CKSUMSTART(mp) && 8557 ((len = offset - (uint16_t)DB_CKSUMSTART(mp)) & 1) == 0) { 8558 uint32_t adj; 8559 /* 8560 * Partial checksum has been calculated by hardware 8561 * and attached to the packet; in addition, any 8562 * prepended extraneous data is even byte aligned. 8563 * If any such data exists, we adjust the checksum; 8564 * this would also handle any postpended data. 8565 */ 8566 IP_ADJCKSUM_PARTIAL(mp->b_rptr + DB_CKSUMSTART(mp), 8567 mp, mp1, len, adj); 8568 8569 /* One's complement subtract extraneous checksum */ 8570 if (adj >= sum_val) 8571 sum_val = ~(adj - sum_val) & 0xFFFF; 8572 else 8573 sum_val -= adj; 8574 } 8575 } else { 8576 sum_val = 0; 8577 sum_flags = 0; 8578 } 8579 8580 /* Clear hardware checksumming flag */ 8581 DB_CKSUMFLAGS(mp) = 0; 8582 8583 /* 8584 * Note: Fragment offset in header is in 8-octet units. 8585 * Clearing least significant 3 bits not only extracts 8586 * it but also gets it in units of octets. 8587 */ 8588 offset = ntohs(fraghdr->ip6f_offlg) & ~7; 8589 more_frags = (fraghdr->ip6f_offlg & IP6F_MORE_FRAG); 8590 8591 /* 8592 * Is the more frags flag on and the payload length not a multiple 8593 * of eight? 8594 */ 8595 if (more_frags && (ntohs(ip6h->ip6_plen) & 7)) { 8596 zoneid_t zoneid; 8597 8598 BUMP_MIB(ill->ill_ip6_mib, ipv6InHdrErrors); 8599 zoneid = ipif_lookup_addr_zoneid_v6(&ip6h->ip6_dst, ill); 8600 if (zoneid == ALL_ZONES) { 8601 freemsg(mp); 8602 return (NULL); 8603 } 8604 icmp_param_problem_v6(WR(q), mp, ICMP6_PARAMPROB_HEADER, 8605 (uint32_t)((char *)&ip6h->ip6_plen - 8606 (char *)ip6h), B_FALSE, B_FALSE, zoneid); 8607 return (NULL); 8608 } 8609 8610 v6src_ptr = &ip6h->ip6_src; 8611 v6dst_ptr = &ip6h->ip6_dst; 8612 end = remlen; 8613 8614 hdr_length = (uint_t)((char *)&fraghdr[1] - (char *)ip6h); 8615 end += offset; 8616 8617 /* 8618 * Would fragment cause reassembled packet to have a payload length 8619 * greater than IP_MAXPACKET - the max payload size? 8620 */ 8621 if (end > IP_MAXPACKET) { 8622 zoneid_t zoneid; 8623 8624 BUMP_MIB(ill->ill_ip6_mib, ipv6InHdrErrors); 8625 zoneid = ipif_lookup_addr_zoneid_v6(&ip6h->ip6_dst, ill); 8626 if (zoneid == ALL_ZONES) { 8627 freemsg(mp); 8628 return (NULL); 8629 } 8630 icmp_param_problem_v6(WR(q), mp, ICMP6_PARAMPROB_HEADER, 8631 (uint32_t)((char *)&fraghdr->ip6f_offlg - 8632 (char *)ip6h), B_FALSE, B_FALSE, zoneid); 8633 return (NULL); 8634 } 8635 8636 /* 8637 * This packet just has one fragment. Reassembly not 8638 * needed. 8639 */ 8640 if (!more_frags && offset == 0) { 8641 goto reass_done; 8642 } 8643 8644 /* 8645 * Drop the fragmented as early as possible, if 8646 * we don't have resource(s) to re-assemble. 8647 */ 8648 if (ip_reass_queue_bytes == 0) { 8649 freemsg(mp); 8650 return (NULL); 8651 } 8652 8653 /* Record the ECN field info. */ 8654 ecn_info = (uint8_t)(ntohl(ip6h->ip6_vcf & htonl(~0xFFCFFFFF)) >> 20); 8655 /* 8656 * If this is not the first fragment, dump the unfragmentable 8657 * portion of the packet. 8658 */ 8659 if (offset) 8660 mp->b_rptr = (uchar_t *)&fraghdr[1]; 8661 8662 /* 8663 * Fragmentation reassembly. Each ILL has a hash table for 8664 * queueing packets undergoing reassembly for all IPIFs 8665 * associated with the ILL. The hash is based on the packet 8666 * IP ident field. The ILL frag hash table was allocated 8667 * as a timer block at the time the ILL was created. Whenever 8668 * there is anything on the reassembly queue, the timer will 8669 * be running. 8670 */ 8671 msg_len = MBLKSIZE(mp); 8672 tail_mp = mp; 8673 while (tail_mp->b_cont != NULL) { 8674 tail_mp = tail_mp->b_cont; 8675 msg_len += MBLKSIZE(tail_mp); 8676 } 8677 /* 8678 * If the reassembly list for this ILL will get too big 8679 * prune it. 8680 */ 8681 8682 if ((msg_len + sizeof (*ipf) + ill->ill_frag_count) >= 8683 ip_reass_queue_bytes) { 8684 ill_frag_prune(ill, (ip_reass_queue_bytes < msg_len) ? 0 8685 : (ip_reass_queue_bytes - msg_len)); 8686 pruned = B_TRUE; 8687 } 8688 8689 ipfb = &ill->ill_frag_hash_tbl[ILL_FRAG_HASH_V6(*v6src_ptr, ident)]; 8690 mutex_enter(&ipfb->ipfb_lock); 8691 8692 ipfp = &ipfb->ipfb_ipf; 8693 /* Try to find an existing fragment queue for this packet. */ 8694 for (;;) { 8695 ipf = ipfp[0]; 8696 if (ipf) { 8697 /* 8698 * It has to match on ident, source address, and 8699 * dest address. 8700 */ 8701 if (ipf->ipf_ident == ident && 8702 IN6_ARE_ADDR_EQUAL(&ipf->ipf_v6src, v6src_ptr) && 8703 IN6_ARE_ADDR_EQUAL(&ipf->ipf_v6dst, v6dst_ptr)) { 8704 8705 /* 8706 * If we have received too many 8707 * duplicate fragments for this packet 8708 * free it. 8709 */ 8710 if (ipf->ipf_num_dups > ip_max_frag_dups) { 8711 ill_frag_free_pkts(ill, ipfb, ipf, 1); 8712 freemsg(mp); 8713 mutex_exit(&ipfb->ipfb_lock); 8714 return (NULL); 8715 } 8716 8717 break; 8718 } 8719 ipfp = &ipf->ipf_hash_next; 8720 continue; 8721 } 8722 8723 8724 /* 8725 * If we pruned the list, do we want to store this new 8726 * fragment?. We apply an optimization here based on the 8727 * fact that most fragments will be received in order. 8728 * So if the offset of this incoming fragment is zero, 8729 * it is the first fragment of a new packet. We will 8730 * keep it. Otherwise drop the fragment, as we have 8731 * probably pruned the packet already (since the 8732 * packet cannot be found). 8733 */ 8734 8735 if (pruned && offset != 0) { 8736 mutex_exit(&ipfb->ipfb_lock); 8737 freemsg(mp); 8738 return (NULL); 8739 } 8740 8741 /* New guy. Allocate a frag message. */ 8742 mp1 = allocb(sizeof (*ipf), BPRI_MED); 8743 if (!mp1) { 8744 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 8745 freemsg(mp); 8746 partial_reass_done: 8747 mutex_exit(&ipfb->ipfb_lock); 8748 return (NULL); 8749 } 8750 8751 if (ipfb->ipfb_frag_pkts >= MAX_FRAG_PKTS) { 8752 /* 8753 * Too many fragmented packets in this hash bucket. 8754 * Free the oldest. 8755 */ 8756 ill_frag_free_pkts(ill, ipfb, ipfb->ipfb_ipf, 1); 8757 } 8758 8759 mp1->b_cont = mp; 8760 8761 /* Initialize the fragment header. */ 8762 ipf = (ipf_t *)mp1->b_rptr; 8763 ipf->ipf_mp = mp1; 8764 ipf->ipf_ptphn = ipfp; 8765 ipfp[0] = ipf; 8766 ipf->ipf_hash_next = NULL; 8767 ipf->ipf_ident = ident; 8768 ipf->ipf_v6src = *v6src_ptr; 8769 ipf->ipf_v6dst = *v6dst_ptr; 8770 /* Record reassembly start time. */ 8771 ipf->ipf_timestamp = gethrestime_sec(); 8772 /* Record ipf generation and account for frag header */ 8773 ipf->ipf_gen = ill->ill_ipf_gen++; 8774 ipf->ipf_count = MBLKSIZE(mp1); 8775 ipf->ipf_protocol = nexthdr; 8776 ipf->ipf_nf_hdr_len = 0; 8777 ipf->ipf_prev_nexthdr_offset = 0; 8778 ipf->ipf_last_frag_seen = B_FALSE; 8779 ipf->ipf_ecn = ecn_info; 8780 ipf->ipf_num_dups = 0; 8781 ipfb->ipfb_frag_pkts++; 8782 ipf->ipf_checksum = 0; 8783 ipf->ipf_checksum_flags = 0; 8784 8785 /* Store checksum value in fragment header */ 8786 if (sum_flags != 0) { 8787 sum_val = (sum_val & 0xFFFF) + (sum_val >> 16); 8788 sum_val = (sum_val & 0xFFFF) + (sum_val >> 16); 8789 ipf->ipf_checksum = sum_val; 8790 ipf->ipf_checksum_flags = sum_flags; 8791 } 8792 8793 /* 8794 * We handle reassembly two ways. In the easy case, 8795 * where all the fragments show up in order, we do 8796 * minimal bookkeeping, and just clip new pieces on 8797 * the end. If we ever see a hole, then we go off 8798 * to ip_reassemble which has to mark the pieces and 8799 * keep track of the number of holes, etc. Obviously, 8800 * the point of having both mechanisms is so we can 8801 * handle the easy case as efficiently as possible. 8802 */ 8803 if (offset == 0) { 8804 /* Easy case, in-order reassembly so far. */ 8805 /* Update the byte count */ 8806 ipf->ipf_count += msg_len; 8807 ipf->ipf_tail_mp = tail_mp; 8808 /* 8809 * Keep track of next expected offset in 8810 * ipf_end. 8811 */ 8812 ipf->ipf_end = end; 8813 ipf->ipf_nf_hdr_len = hdr_length; 8814 ipf->ipf_prev_nexthdr_offset = *prev_nexthdr_offset; 8815 } else { 8816 /* Hard case, hole at the beginning. */ 8817 ipf->ipf_tail_mp = NULL; 8818 /* 8819 * ipf_end == 0 means that we have given up 8820 * on easy reassembly. 8821 */ 8822 ipf->ipf_end = 0; 8823 8824 /* Forget checksum offload from now on */ 8825 ipf->ipf_checksum_flags = 0; 8826 8827 /* 8828 * ipf_hole_cnt is set by ip_reassemble. 8829 * ipf_count is updated by ip_reassemble. 8830 * No need to check for return value here 8831 * as we don't expect reassembly to complete or 8832 * fail for the first fragment itself. 8833 */ 8834 (void) ip_reassemble(mp, ipf, offset, more_frags, ill, 8835 msg_len); 8836 } 8837 /* Update per ipfb and ill byte counts */ 8838 ipfb->ipfb_count += ipf->ipf_count; 8839 ASSERT(ipfb->ipfb_count > 0); /* Wraparound */ 8840 ill->ill_frag_count += ipf->ipf_count; 8841 ASSERT(ill->ill_frag_count > 0); /* Wraparound */ 8842 /* If the frag timer wasn't already going, start it. */ 8843 mutex_enter(&ill->ill_lock); 8844 ill_frag_timer_start(ill); 8845 mutex_exit(&ill->ill_lock); 8846 goto partial_reass_done; 8847 } 8848 8849 /* 8850 * If the packet's flag has changed (it could be coming up 8851 * from an interface different than the previous, therefore 8852 * possibly different checksum capability), then forget about 8853 * any stored checksum states. Otherwise add the value to 8854 * the existing one stored in the fragment header. 8855 */ 8856 if (sum_flags != 0 && sum_flags == ipf->ipf_checksum_flags) { 8857 sum_val += ipf->ipf_checksum; 8858 sum_val = (sum_val & 0xFFFF) + (sum_val >> 16); 8859 sum_val = (sum_val & 0xFFFF) + (sum_val >> 16); 8860 ipf->ipf_checksum = sum_val; 8861 } else if (ipf->ipf_checksum_flags != 0) { 8862 /* Forget checksum offload from now on */ 8863 ipf->ipf_checksum_flags = 0; 8864 } 8865 8866 /* 8867 * We have a new piece of a datagram which is already being 8868 * reassembled. Update the ECN info if all IP fragments 8869 * are ECN capable. If there is one which is not, clear 8870 * all the info. If there is at least one which has CE 8871 * code point, IP needs to report that up to transport. 8872 */ 8873 if (ecn_info != IPH_ECN_NECT && ipf->ipf_ecn != IPH_ECN_NECT) { 8874 if (ecn_info == IPH_ECN_CE) 8875 ipf->ipf_ecn = IPH_ECN_CE; 8876 } else { 8877 ipf->ipf_ecn = IPH_ECN_NECT; 8878 } 8879 8880 if (offset && ipf->ipf_end == offset) { 8881 /* The new fragment fits at the end */ 8882 ipf->ipf_tail_mp->b_cont = mp; 8883 /* Update the byte count */ 8884 ipf->ipf_count += msg_len; 8885 /* Update per ipfb and ill byte counts */ 8886 ipfb->ipfb_count += msg_len; 8887 ASSERT(ipfb->ipfb_count > 0); /* Wraparound */ 8888 ill->ill_frag_count += msg_len; 8889 ASSERT(ill->ill_frag_count > 0); /* Wraparound */ 8890 if (more_frags) { 8891 /* More to come. */ 8892 ipf->ipf_end = end; 8893 ipf->ipf_tail_mp = tail_mp; 8894 goto partial_reass_done; 8895 } 8896 } else { 8897 /* 8898 * Go do the hard cases. 8899 * Call ip_reassemble(). 8900 */ 8901 int ret; 8902 8903 if (offset == 0) { 8904 if (ipf->ipf_prev_nexthdr_offset == 0) { 8905 ipf->ipf_nf_hdr_len = hdr_length; 8906 ipf->ipf_prev_nexthdr_offset = 8907 *prev_nexthdr_offset; 8908 } 8909 } 8910 /* Save current byte count */ 8911 count = ipf->ipf_count; 8912 ret = ip_reassemble(mp, ipf, offset, more_frags, ill, msg_len); 8913 8914 /* Count of bytes added and subtracted (freeb()ed) */ 8915 count = ipf->ipf_count - count; 8916 if (count) { 8917 /* Update per ipfb and ill byte counts */ 8918 ipfb->ipfb_count += count; 8919 ASSERT(ipfb->ipfb_count > 0); /* Wraparound */ 8920 ill->ill_frag_count += count; 8921 ASSERT(ill->ill_frag_count > 0); /* Wraparound */ 8922 } 8923 if (ret == IP_REASS_PARTIAL) { 8924 goto partial_reass_done; 8925 } else if (ret == IP_REASS_FAILED) { 8926 /* Reassembly failed. Free up all resources */ 8927 ill_frag_free_pkts(ill, ipfb, ipf, 1); 8928 for (t_mp = mp; t_mp != NULL; t_mp = t_mp->b_cont) { 8929 IP_REASS_SET_START(t_mp, 0); 8930 IP_REASS_SET_END(t_mp, 0); 8931 } 8932 freemsg(mp); 8933 goto partial_reass_done; 8934 } 8935 8936 /* We will reach here iff 'ret' is IP_REASS_COMPLETE */ 8937 } 8938 /* 8939 * We have completed reassembly. Unhook the frag header from 8940 * the reassembly list. 8941 * 8942 * Grab the unfragmentable header length next header value out 8943 * of the first fragment 8944 */ 8945 ASSERT(ipf->ipf_nf_hdr_len != 0); 8946 hdr_length = ipf->ipf_nf_hdr_len; 8947 8948 /* 8949 * Before we free the frag header, record the ECN info 8950 * to report back to the transport. 8951 */ 8952 ecn_info = ipf->ipf_ecn; 8953 8954 /* 8955 * Store the nextheader field in the header preceding the fragment 8956 * header 8957 */ 8958 nexthdr = ipf->ipf_protocol; 8959 *prev_nexthdr_offset = ipf->ipf_prev_nexthdr_offset; 8960 ipfp = ipf->ipf_ptphn; 8961 8962 /* We need to supply these to caller */ 8963 if ((sum_flags = ipf->ipf_checksum_flags) != 0) 8964 sum_val = ipf->ipf_checksum; 8965 else 8966 sum_val = 0; 8967 8968 mp1 = ipf->ipf_mp; 8969 count = ipf->ipf_count; 8970 ipf = ipf->ipf_hash_next; 8971 if (ipf) 8972 ipf->ipf_ptphn = ipfp; 8973 ipfp[0] = ipf; 8974 ill->ill_frag_count -= count; 8975 ASSERT(ipfb->ipfb_count >= count); 8976 ipfb->ipfb_count -= count; 8977 ipfb->ipfb_frag_pkts--; 8978 mutex_exit(&ipfb->ipfb_lock); 8979 /* Ditch the frag header. */ 8980 mp = mp1->b_cont; 8981 freeb(mp1); 8982 8983 /* 8984 * Make sure the packet is good by doing some sanity 8985 * check. If bad we can silentely drop the packet. 8986 */ 8987 reass_done: 8988 if (hdr_length < sizeof (ip6_frag_t)) { 8989 BUMP_MIB(ill->ill_ip6_mib, ipv6InHdrErrors); 8990 ip1dbg(("ip_rput_frag_v6: bad packet\n")); 8991 freemsg(mp); 8992 return (NULL); 8993 } 8994 8995 /* 8996 * Remove the fragment header from the initial header by 8997 * splitting the mblk into the non-fragmentable header and 8998 * everthing after the fragment extension header. This has the 8999 * side effect of putting all the headers that need destination 9000 * processing into the b_cont block-- on return this fact is 9001 * used in order to avoid having to look at the extensions 9002 * already processed. 9003 * 9004 * Note that this code assumes that the unfragmentable portion 9005 * of the header is in the first mblk and increments 9006 * the read pointer past it. If this assumption is broken 9007 * this code fails badly. 9008 */ 9009 if (mp->b_rptr + hdr_length != mp->b_wptr) { 9010 mblk_t *nmp; 9011 9012 if (!(nmp = dupb(mp))) { 9013 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 9014 ip1dbg(("ip_rput_frag_v6: dupb failed\n")); 9015 freemsg(mp); 9016 return (NULL); 9017 } 9018 nmp->b_cont = mp->b_cont; 9019 mp->b_cont = nmp; 9020 nmp->b_rptr += hdr_length; 9021 } 9022 mp->b_wptr = mp->b_rptr + hdr_length - sizeof (ip6_frag_t); 9023 9024 ip6h = (ip6_t *)mp->b_rptr; 9025 ((char *)ip6h)[*prev_nexthdr_offset] = nexthdr; 9026 9027 /* Restore original IP length in header. */ 9028 ip6h->ip6_plen = htons((uint16_t)(msgdsize(mp) - IPV6_HDR_LEN)); 9029 /* Record the ECN info. */ 9030 ip6h->ip6_vcf &= htonl(0xFFCFFFFF); 9031 ip6h->ip6_vcf |= htonl(ecn_info << 20); 9032 9033 /* Reassembly is successful; return checksum information if needed */ 9034 if (cksum_val != NULL) 9035 *cksum_val = sum_val; 9036 if (cksum_flags != NULL) 9037 *cksum_flags = sum_flags; 9038 9039 return (mp); 9040 } 9041 9042 /* 9043 * Walk through the options to see if there is a routing header. 9044 * If present get the destination which is the last address of 9045 * the option. 9046 */ 9047 in6_addr_t 9048 ip_get_dst_v6(ip6_t *ip6h, boolean_t *is_fragment) 9049 { 9050 uint8_t nexthdr; 9051 uint8_t *whereptr; 9052 ip6_hbh_t *hbhhdr; 9053 ip6_dest_t *dsthdr; 9054 ip6_rthdr0_t *rthdr; 9055 ip6_frag_t *fraghdr; 9056 int ehdrlen; 9057 int left; 9058 in6_addr_t *ap, rv; 9059 9060 if (is_fragment != NULL) 9061 *is_fragment = B_FALSE; 9062 9063 rv = ip6h->ip6_dst; 9064 9065 nexthdr = ip6h->ip6_nxt; 9066 whereptr = (uint8_t *)&ip6h[1]; 9067 for (;;) { 9068 9069 ASSERT(nexthdr != IPPROTO_RAW); 9070 switch (nexthdr) { 9071 case IPPROTO_HOPOPTS: 9072 hbhhdr = (ip6_hbh_t *)whereptr; 9073 nexthdr = hbhhdr->ip6h_nxt; 9074 ehdrlen = 8 * (hbhhdr->ip6h_len + 1); 9075 break; 9076 case IPPROTO_DSTOPTS: 9077 dsthdr = (ip6_dest_t *)whereptr; 9078 nexthdr = dsthdr->ip6d_nxt; 9079 ehdrlen = 8 * (dsthdr->ip6d_len + 1); 9080 break; 9081 case IPPROTO_ROUTING: 9082 rthdr = (ip6_rthdr0_t *)whereptr; 9083 nexthdr = rthdr->ip6r0_nxt; 9084 ehdrlen = 8 * (rthdr->ip6r0_len + 1); 9085 9086 left = rthdr->ip6r0_segleft; 9087 ap = (in6_addr_t *)((char *)rthdr + sizeof (*rthdr)); 9088 rv = *(ap + left - 1); 9089 /* 9090 * If the caller doesn't care whether the packet 9091 * is a fragment or not, we can stop here since 9092 * we have our destination. 9093 */ 9094 if (is_fragment == NULL) 9095 goto done; 9096 break; 9097 case IPPROTO_FRAGMENT: 9098 fraghdr = (ip6_frag_t *)whereptr; 9099 nexthdr = fraghdr->ip6f_nxt; 9100 ehdrlen = sizeof (ip6_frag_t); 9101 if (is_fragment != NULL) 9102 *is_fragment = B_TRUE; 9103 goto done; 9104 default : 9105 goto done; 9106 } 9107 whereptr += ehdrlen; 9108 } 9109 9110 done: 9111 return (rv); 9112 } 9113 9114 /* 9115 * ip_source_routed_v6: 9116 * This function is called by redirect code in ip_rput_data_v6 to 9117 * know whether this packet is source routed through this node i.e 9118 * whether this node (router) is part of the journey. This 9119 * function is called under two cases : 9120 * 9121 * case 1 : Routing header was processed by this node and 9122 * ip_process_rthdr replaced ip6_dst with the next hop 9123 * and we are forwarding the packet to the next hop. 9124 * 9125 * case 2 : Routing header was not processed by this node and we 9126 * are just forwarding the packet. 9127 * 9128 * For case (1) we don't want to send redirects. For case(2) we 9129 * want to send redirects. 9130 */ 9131 static boolean_t 9132 ip_source_routed_v6(ip6_t *ip6h, mblk_t *mp) 9133 { 9134 uint8_t nexthdr; 9135 in6_addr_t *addrptr; 9136 ip6_rthdr0_t *rthdr; 9137 uint8_t numaddr; 9138 ip6_hbh_t *hbhhdr; 9139 uint_t ehdrlen; 9140 uint8_t *byteptr; 9141 9142 ip2dbg(("ip_source_routed_v6\n")); 9143 nexthdr = ip6h->ip6_nxt; 9144 ehdrlen = IPV6_HDR_LEN; 9145 9146 /* if a routing hdr is preceeded by HOPOPT or DSTOPT */ 9147 while (nexthdr == IPPROTO_HOPOPTS || 9148 nexthdr == IPPROTO_DSTOPTS) { 9149 byteptr = (uint8_t *)ip6h + ehdrlen; 9150 /* 9151 * Check if we have already processed 9152 * packets or we are just a forwarding 9153 * router which only pulled up msgs up 9154 * to IPV6HDR and one HBH ext header 9155 */ 9156 if (byteptr + MIN_EHDR_LEN > mp->b_wptr) { 9157 ip2dbg(("ip_source_routed_v6: Extension" 9158 " headers not processed\n")); 9159 return (B_FALSE); 9160 } 9161 hbhhdr = (ip6_hbh_t *)byteptr; 9162 nexthdr = hbhhdr->ip6h_nxt; 9163 ehdrlen = ehdrlen + 8 * (hbhhdr->ip6h_len + 1); 9164 } 9165 switch (nexthdr) { 9166 case IPPROTO_ROUTING: 9167 byteptr = (uint8_t *)ip6h + ehdrlen; 9168 /* 9169 * If for some reason, we haven't pulled up 9170 * the routing hdr data mblk, then we must 9171 * not have processed it at all. So for sure 9172 * we are not part of the source routed journey. 9173 */ 9174 if (byteptr + MIN_EHDR_LEN > mp->b_wptr) { 9175 ip2dbg(("ip_source_routed_v6: Routing" 9176 " header not processed\n")); 9177 return (B_FALSE); 9178 } 9179 rthdr = (ip6_rthdr0_t *)byteptr; 9180 /* 9181 * Either we are an intermediate router or the 9182 * last hop before destination and we have 9183 * already processed the routing header. 9184 * If segment_left is greater than or equal to zero, 9185 * then we must be the (numaddr - segleft) entry 9186 * of the routing header. Although ip6r0_segleft 9187 * is a unit8_t variable, we still check for zero 9188 * or greater value, if in case the data type 9189 * is changed someday in future. 9190 */ 9191 if (rthdr->ip6r0_segleft > 0 || 9192 rthdr->ip6r0_segleft == 0) { 9193 ire_t *ire = NULL; 9194 9195 numaddr = rthdr->ip6r0_len / 2; 9196 addrptr = (in6_addr_t *)((char *)rthdr + 9197 sizeof (*rthdr)); 9198 addrptr += (numaddr - (rthdr->ip6r0_segleft + 1)); 9199 if (addrptr != NULL) { 9200 ire = ire_ctable_lookup_v6(addrptr, NULL, 9201 IRE_LOCAL, NULL, ALL_ZONES, NULL, 9202 MATCH_IRE_TYPE); 9203 if (ire != NULL) { 9204 ire_refrele(ire); 9205 return (B_TRUE); 9206 } 9207 ip1dbg(("ip_source_routed_v6: No ire found\n")); 9208 } 9209 } 9210 /* FALLTHRU */ 9211 default: 9212 ip2dbg(("ip_source_routed_v6: Not source routed here\n")); 9213 return (B_FALSE); 9214 } 9215 } 9216 9217 /* 9218 * ip_wput_v6 -- Packets sent down from transport modules show up here. 9219 * Assumes that the following set of headers appear in the first 9220 * mblk: 9221 * ip6i_t (if present) CAN also appear as a separate mblk. 9222 * ip6_t 9223 * Any extension headers 9224 * TCP/UDP/SCTP header (if present) 9225 * The routine can handle an ICMPv6 header that is not in the first mblk. 9226 * 9227 * The order to determine the outgoing interface is as follows: 9228 * 1. IPV6_BOUND_PIF is set, use that ill (conn_outgoing_pill) 9229 * 2. If conn_nofailover_ill is set then use that ill. 9230 * 3. If an ip6i_t with IP6I_IFINDEX set then use that ill. 9231 * 4. If q is an ill queue and (link local or multicast destination) then 9232 * use that ill. 9233 * 5. If IPV6_BOUND_IF has been set use that ill. 9234 * 6. For multicast: if IPV6_MULTICAST_IF has been set use it. Otherwise 9235 * look for the best IRE match for the unspecified group to determine 9236 * the ill. 9237 * 7. For unicast: Just do an IRE lookup for the best match. 9238 * 9239 * arg2 is always a queue_t *. 9240 * When that queue is an ill_t (i.e. q_next != NULL), then arg must be 9241 * the zoneid. 9242 * When that queue is not an ill_t, then arg must be a conn_t pointer. 9243 */ 9244 void 9245 ip_output_v6(void *arg, mblk_t *mp, void *arg2, int caller) 9246 { 9247 conn_t *connp = NULL; 9248 queue_t *q = (queue_t *)arg2; 9249 ire_t *ire = NULL; 9250 ire_t *sctp_ire = NULL; 9251 ip6_t *ip6h; 9252 in6_addr_t *v6dstp; 9253 ill_t *ill = NULL; 9254 ipif_t *ipif; 9255 ip6i_t *ip6i; 9256 int cksum_request; /* -1 => normal. */ 9257 /* 1 => Skip TCP/UDP/SCTP checksum */ 9258 /* Otherwise contains insert offset for checksum */ 9259 int unspec_src; 9260 boolean_t do_outrequests; /* Increment OutRequests? */ 9261 mib2_ipv6IfStatsEntry_t *mibptr; 9262 int match_flags = MATCH_IRE_ILL_GROUP; 9263 boolean_t attach_if = B_FALSE; 9264 mblk_t *first_mp; 9265 boolean_t mctl_present; 9266 ipsec_out_t *io; 9267 boolean_t drop_if_delayed = B_FALSE; 9268 boolean_t multirt_need_resolve = B_FALSE; 9269 mblk_t *copy_mp = NULL; 9270 int err; 9271 int ip6i_flags = 0; 9272 zoneid_t zoneid; 9273 ill_t *saved_ill = NULL; 9274 boolean_t conn_lock_held; 9275 boolean_t need_decref = B_FALSE; 9276 9277 /* 9278 * Highest bit in version field is Reachability Confirmation bit 9279 * used by NUD in ip_xmit_v6(). 9280 */ 9281 #ifdef _BIG_ENDIAN 9282 #define IPVER(ip6h) ((((uint32_t *)ip6h)[0] >> 28) & 0x7) 9283 #else 9284 #define IPVER(ip6h) ((((uint32_t *)ip6h)[0] >> 4) & 0x7) 9285 #endif 9286 9287 /* 9288 * M_CTL comes from 6 places 9289 * 9290 * 1) TCP sends down IPSEC_OUT(M_CTL) for detached connections 9291 * both V4 and V6 datagrams. 9292 * 9293 * 2) AH/ESP sends down M_CTL after doing their job with both 9294 * V4 and V6 datagrams. 9295 * 9296 * 3) NDP callbacks when nce is resolved and IPSEC_OUT has been 9297 * attached. 9298 * 9299 * 4) Notifications from an external resolver (for XRESOLV ifs) 9300 * 9301 * 5) AH/ESP send down IPSEC_CTL(M_CTL) to be relayed to hardware for 9302 * IPsec hardware acceleration support. 9303 * 9304 * 6) TUN_HELLO. 9305 * 9306 * We need to handle (1)'s IPv6 case and (3) here. For the 9307 * IPv4 case in (1), and (2), IPSEC processing has already 9308 * started. The code in ip_wput() already knows how to handle 9309 * continuing IPSEC processing (for IPv4 and IPv6). All other 9310 * M_CTLs (including case (4)) are passed on to ip_wput_nondata() 9311 * for handling. 9312 */ 9313 first_mp = mp; 9314 mctl_present = B_FALSE; 9315 io = NULL; 9316 9317 /* Multidata transmit? */ 9318 if (DB_TYPE(mp) == M_MULTIDATA) { 9319 /* 9320 * We should never get here, since all Multidata messages 9321 * originating from tcp should have been directed over to 9322 * tcp_multisend() in the first place. 9323 */ 9324 BUMP_MIB(&ip6_mib, ipv6OutDiscards); 9325 freemsg(mp); 9326 return; 9327 } else if (DB_TYPE(mp) == M_CTL) { 9328 uint32_t mctltype = 0; 9329 uint32_t mlen = MBLKL(first_mp); 9330 9331 mp = mp->b_cont; 9332 mctl_present = B_TRUE; 9333 io = (ipsec_out_t *)first_mp->b_rptr; 9334 9335 /* 9336 * Validate this M_CTL message. The only three types of 9337 * M_CTL messages we expect to see in this code path are 9338 * ipsec_out_t or ipsec_in_t structures (allocated as 9339 * ipsec_info_t unions), or ipsec_ctl_t structures. 9340 * The ipsec_out_type and ipsec_in_type overlap in the two 9341 * data structures, and they are either set to IPSEC_OUT 9342 * or IPSEC_IN depending on which data structure it is. 9343 * ipsec_ctl_t is an IPSEC_CTL. 9344 * 9345 * All other M_CTL messages are sent to ip_wput_nondata() 9346 * for handling. 9347 */ 9348 if (mlen >= sizeof (io->ipsec_out_type)) 9349 mctltype = io->ipsec_out_type; 9350 9351 if ((mlen == sizeof (ipsec_ctl_t)) && 9352 (mctltype == IPSEC_CTL)) { 9353 ip_output(arg, first_mp, arg2, caller); 9354 return; 9355 } 9356 9357 if ((mlen < sizeof (ipsec_info_t)) || 9358 (mctltype != IPSEC_OUT && mctltype != IPSEC_IN) || 9359 mp == NULL) { 9360 ip_wput_nondata(NULL, q, first_mp, NULL); 9361 return; 9362 } 9363 /* NDP callbacks have q_next non-NULL. That's case #3. */ 9364 if (q->q_next == NULL) { 9365 ip6h = (ip6_t *)mp->b_rptr; 9366 /* 9367 * For a freshly-generated TCP dgram that needs IPV6 9368 * processing, don't call ip_wput immediately. We can 9369 * tell this by the ipsec_out_proc_begin. In-progress 9370 * IPSEC_OUT messages have proc_begin set to TRUE, 9371 * and we want to send all IPSEC_IN messages to 9372 * ip_wput() for IPsec processing or finishing. 9373 */ 9374 if (mctltype == IPSEC_IN || 9375 IPVER(ip6h) != IPV6_VERSION || 9376 io->ipsec_out_proc_begin) { 9377 mibptr = &ip6_mib; 9378 goto notv6; 9379 } 9380 } 9381 } else if (DB_TYPE(mp) != M_DATA) { 9382 ip_wput_nondata(NULL, q, mp, NULL); 9383 return; 9384 } 9385 9386 ip6h = (ip6_t *)mp->b_rptr; 9387 9388 if (IPVER(ip6h) != IPV6_VERSION) { 9389 mibptr = &ip6_mib; 9390 goto notv6; 9391 } 9392 9393 if (q->q_next != NULL) { 9394 ill = (ill_t *)q->q_ptr; 9395 /* 9396 * We don't know if this ill will be used for IPv6 9397 * until the ILLF_IPV6 flag is set via SIOCSLIFNAME. 9398 * ipif_set_values() sets the ill_isv6 flag to true if 9399 * ILLF_IPV6 is set. If the ill_isv6 flag isn't true, 9400 * just drop the packet. 9401 */ 9402 if (!ill->ill_isv6) { 9403 ip1dbg(("ip_wput_v6: Received an IPv6 packet before " 9404 "ILLF_IPV6 was set\n")); 9405 freemsg(first_mp); 9406 return; 9407 } 9408 /* For uniformity do a refhold */ 9409 mutex_enter(&ill->ill_lock); 9410 if (!ILL_CAN_LOOKUP(ill)) { 9411 mutex_exit(&ill->ill_lock); 9412 freemsg(first_mp); 9413 return; 9414 } 9415 ill_refhold_locked(ill); 9416 mutex_exit(&ill->ill_lock); 9417 mibptr = ill->ill_ip6_mib; 9418 /* 9419 * ill_ip6_mib is allocated by ipif_set_values() when 9420 * ill_isv6 is set. Thus if ill_isv6 is true, 9421 * ill_ip6_mib had better not be NULL. 9422 */ 9423 ASSERT(mibptr != NULL); 9424 unspec_src = 0; 9425 BUMP_MIB(mibptr, ipv6OutRequests); 9426 do_outrequests = B_FALSE; 9427 zoneid = (zoneid_t)(uintptr_t)arg; 9428 } else { 9429 connp = (conn_t *)arg; 9430 ASSERT(connp != NULL); 9431 zoneid = connp->conn_zoneid; 9432 9433 /* is queue flow controlled? */ 9434 if ((q->q_first || connp->conn_draining) && 9435 (caller == IP_WPUT)) { 9436 /* 9437 * 1) TCP sends down M_CTL for detached connections. 9438 * 2) AH/ESP sends down M_CTL. 9439 * 9440 * We don't flow control either of the above. Only 9441 * UDP and others are flow controlled for which we 9442 * can't have a M_CTL. 9443 */ 9444 ASSERT(first_mp == mp); 9445 (void) putq(q, mp); 9446 return; 9447 } 9448 mibptr = &ip6_mib; 9449 unspec_src = connp->conn_unspec_src; 9450 do_outrequests = B_TRUE; 9451 if (mp->b_flag & MSGHASREF) { 9452 mp->b_flag &= ~MSGHASREF; 9453 ASSERT(connp->conn_ulp == IPPROTO_SCTP); 9454 SCTP_EXTRACT_IPINFO(mp, sctp_ire); 9455 need_decref = B_TRUE; 9456 } 9457 9458 /* 9459 * If there is a policy, try to attach an ipsec_out in 9460 * the front. At the end, first_mp either points to a 9461 * M_DATA message or IPSEC_OUT message linked to a 9462 * M_DATA message. We have to do it now as we might 9463 * lose the "conn" if we go through ip_newroute. 9464 */ 9465 if (!mctl_present && 9466 (connp->conn_out_enforce_policy || 9467 connp->conn_latch != NULL)) { 9468 ASSERT(first_mp == mp); 9469 /* XXX Any better way to get the protocol fast ? */ 9470 if (((mp = ipsec_attach_ipsec_out(mp, connp, NULL, 9471 connp->conn_ulp)) == NULL)) { 9472 if (need_decref) 9473 CONN_DEC_REF(connp); 9474 return; 9475 } else { 9476 ASSERT(mp->b_datap->db_type == M_CTL); 9477 first_mp = mp; 9478 mp = mp->b_cont; 9479 mctl_present = B_TRUE; 9480 io = (ipsec_out_t *)first_mp->b_rptr; 9481 } 9482 } 9483 } 9484 9485 /* check for alignment and full IPv6 header */ 9486 if (!OK_32PTR((uchar_t *)ip6h) || 9487 (mp->b_wptr - (uchar_t *)ip6h) < IPV6_HDR_LEN) { 9488 ip0dbg(("ip_wput_v6: bad alignment or length\n")); 9489 if (do_outrequests) 9490 BUMP_MIB(mibptr, ipv6OutRequests); 9491 BUMP_MIB(mibptr, ipv6OutDiscards); 9492 freemsg(first_mp); 9493 if (ill != NULL) 9494 ill_refrele(ill); 9495 if (need_decref) 9496 CONN_DEC_REF(connp); 9497 return; 9498 } 9499 v6dstp = &ip6h->ip6_dst; 9500 cksum_request = -1; 9501 ip6i = NULL; 9502 9503 /* 9504 * Once neighbor discovery has completed, ndp_process() will provide 9505 * locally generated packets for which processing can be reattempted. 9506 * In these cases, connp is NULL and the original zone is part of a 9507 * prepended ipsec_out_t. 9508 */ 9509 if (io != NULL) { 9510 /* 9511 * When coming from icmp_input_v6, the zoneid might not match 9512 * for the loopback case, because inside icmp_input_v6 the 9513 * queue_t is a conn queue from the sending side. 9514 */ 9515 zoneid = io->ipsec_out_zoneid; 9516 ASSERT(zoneid != ALL_ZONES); 9517 } 9518 9519 if (ip6h->ip6_nxt == IPPROTO_RAW) { 9520 /* 9521 * This is an ip6i_t header followed by an ip6_hdr. 9522 * Check which fields are set. 9523 * 9524 * When the packet comes from a transport we should have 9525 * all needed headers in the first mblk. However, when 9526 * going through ip_newroute*_v6 the ip6i might be in 9527 * a separate mblk when we return here. In that case 9528 * we pullup everything to ensure that extension and transport 9529 * headers "stay" in the first mblk. 9530 */ 9531 ip6i = (ip6i_t *)ip6h; 9532 ip6i_flags = ip6i->ip6i_flags; 9533 9534 ASSERT((mp->b_wptr - (uchar_t *)ip6i) == sizeof (ip6i_t) || 9535 ((mp->b_wptr - (uchar_t *)ip6i) >= 9536 sizeof (ip6i_t) + IPV6_HDR_LEN)); 9537 9538 if ((mp->b_wptr - (uchar_t *)ip6i) == sizeof (ip6i_t)) { 9539 if (!pullupmsg(mp, -1)) { 9540 ip1dbg(("ip_wput_v6: pullupmsg failed\n")); 9541 if (do_outrequests) 9542 BUMP_MIB(mibptr, ipv6OutRequests); 9543 BUMP_MIB(mibptr, ipv6OutDiscards); 9544 freemsg(first_mp); 9545 if (ill != NULL) 9546 ill_refrele(ill); 9547 if (need_decref) 9548 CONN_DEC_REF(connp); 9549 return; 9550 } 9551 ip6h = (ip6_t *)mp->b_rptr; 9552 v6dstp = &ip6h->ip6_dst; 9553 ip6i = (ip6i_t *)ip6h; 9554 } 9555 ip6h = (ip6_t *)&ip6i[1]; 9556 9557 /* 9558 * Advance rptr past the ip6i_t to get ready for 9559 * transmitting the packet. However, if the packet gets 9560 * passed to ip_newroute*_v6 then rptr is moved back so 9561 * that the ip6i_t header can be inspected when the 9562 * packet comes back here after passing through 9563 * ire_add_then_send. 9564 */ 9565 mp->b_rptr = (uchar_t *)ip6h; 9566 9567 /* 9568 * IP6I_ATTACH_IF is set in this function when we had a 9569 * conn and it was either bound to the IPFF_NOFAILOVER address 9570 * or IPV6_BOUND_PIF was set. These options override other 9571 * options that set the ifindex. We come here with 9572 * IP6I_ATTACH_IF set when we can't find the ire and 9573 * ip_newroute_v6 is feeding the packet for second time. 9574 */ 9575 if ((ip6i->ip6i_flags & IP6I_IFINDEX) || 9576 (ip6i->ip6i_flags & IP6I_ATTACH_IF)) { 9577 ASSERT(ip6i->ip6i_ifindex != 0); 9578 if (ill != NULL) 9579 ill_refrele(ill); 9580 ill = ill_lookup_on_ifindex(ip6i->ip6i_ifindex, 1, 9581 NULL, NULL, NULL, NULL); 9582 if (ill == NULL) { 9583 if (do_outrequests) 9584 BUMP_MIB(mibptr, ipv6OutRequests); 9585 BUMP_MIB(mibptr, ipv6OutDiscards); 9586 ip1dbg(("ip_wput_v6: bad ifindex %d\n", 9587 ip6i->ip6i_ifindex)); 9588 if (need_decref) 9589 CONN_DEC_REF(connp); 9590 freemsg(first_mp); 9591 return; 9592 } 9593 mibptr = ill->ill_ip6_mib; 9594 if (ip6i->ip6i_flags & IP6I_IFINDEX) { 9595 /* 9596 * Preserve the index so that when we return 9597 * from IPSEC processing, we know where to 9598 * send the packet. 9599 */ 9600 if (mctl_present) { 9601 ASSERT(io != NULL); 9602 io->ipsec_out_ill_index = 9603 ip6i->ip6i_ifindex; 9604 } 9605 } 9606 if (ip6i->ip6i_flags & IP6I_ATTACH_IF) { 9607 /* 9608 * This is a multipathing probe packet that has 9609 * been delayed in ND resolution. Drop the 9610 * packet for the reasons mentioned in 9611 * nce_queue_mp() 9612 */ 9613 if ((ip6i->ip6i_flags & IP6I_DROP_IFDELAYED) && 9614 (ip6i->ip6i_flags & IP6I_ND_DELAYED)) { 9615 freemsg(first_mp); 9616 ill_refrele(ill); 9617 if (need_decref) 9618 CONN_DEC_REF(connp); 9619 return; 9620 } 9621 } 9622 } 9623 if (ip6i->ip6i_flags & IP6I_VERIFY_SRC) { 9624 cred_t *cr = DB_CREDDEF(mp, GET_QUEUE_CRED(q)); 9625 9626 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)); 9627 if (secpolicy_net_rawaccess(cr) != 0) { 9628 ire = ire_route_lookup_v6(&ip6h->ip6_src, 9629 0, 0, (IRE_LOCAL|IRE_LOOPBACK), NULL, 9630 NULL, zoneid, NULL, 9631 MATCH_IRE_TYPE | MATCH_IRE_ZONEONLY); 9632 if (ire == NULL) { 9633 if (do_outrequests) 9634 BUMP_MIB(mibptr, 9635 ipv6OutRequests); 9636 BUMP_MIB(mibptr, ipv6OutDiscards); 9637 ip1dbg(("ip_wput_v6: bad source " 9638 "addr\n")); 9639 freemsg(first_mp); 9640 if (ill != NULL) 9641 ill_refrele(ill); 9642 if (need_decref) 9643 CONN_DEC_REF(connp); 9644 return; 9645 } 9646 ire_refrele(ire); 9647 } 9648 /* No need to verify again when using ip_newroute */ 9649 ip6i->ip6i_flags &= ~IP6I_VERIFY_SRC; 9650 } 9651 if (!(ip6i->ip6i_flags & IP6I_NEXTHOP)) { 9652 /* 9653 * Make sure they match since ip_newroute*_v6 etc might 9654 * (unknown to them) inspect ip6i_nexthop when 9655 * they think they access ip6_dst. 9656 */ 9657 ip6i->ip6i_nexthop = ip6h->ip6_dst; 9658 } 9659 if (ip6i->ip6i_flags & IP6I_NO_ULP_CKSUM) 9660 cksum_request = 1; 9661 if (ip6i->ip6i_flags & IP6I_RAW_CHECKSUM) 9662 cksum_request = ip6i->ip6i_checksum_off; 9663 if (ip6i->ip6i_flags & IP6I_UNSPEC_SRC) 9664 unspec_src = 1; 9665 9666 if (do_outrequests && ill != NULL) { 9667 BUMP_MIB(mibptr, ipv6OutRequests); 9668 do_outrequests = B_FALSE; 9669 } 9670 /* 9671 * Store ip6i_t info that we need after we come back 9672 * from IPSEC processing. 9673 */ 9674 if (mctl_present) { 9675 ASSERT(io != NULL); 9676 io->ipsec_out_unspec_src = unspec_src; 9677 } 9678 } 9679 if (connp != NULL && connp->conn_dontroute) 9680 ip6h->ip6_hops = 1; 9681 9682 if (IN6_IS_ADDR_MULTICAST(v6dstp)) 9683 goto ipv6multicast; 9684 9685 /* 1. IPV6_BOUND_PIF takes precedence over all the ifindex settings. */ 9686 if (connp != NULL && connp->conn_outgoing_pill != NULL) { 9687 ill_t *conn_outgoing_pill; 9688 9689 conn_outgoing_pill = conn_get_held_ill(connp, 9690 &connp->conn_outgoing_pill, &err); 9691 if (err == ILL_LOOKUP_FAILED) { 9692 if (ill != NULL) 9693 ill_refrele(ill); 9694 if (need_decref) 9695 CONN_DEC_REF(connp); 9696 freemsg(first_mp); 9697 return; 9698 } 9699 if (conn_outgoing_pill != NULL) { 9700 if (ill != NULL) 9701 ill_refrele(ill); 9702 ill = conn_outgoing_pill; 9703 attach_if = B_TRUE; 9704 match_flags = MATCH_IRE_ILL; 9705 mibptr = ill->ill_ip6_mib; 9706 9707 /* 9708 * Check if we need an ire that will not be 9709 * looked up by anybody else i.e. HIDDEN. 9710 */ 9711 if (ill_is_probeonly(ill)) 9712 match_flags |= MATCH_IRE_MARK_HIDDEN; 9713 goto send_from_ill; 9714 } 9715 } 9716 9717 /* 2. If ipc_nofailover_ill is set then use that ill. */ 9718 if (connp != NULL && connp->conn_nofailover_ill != NULL) { 9719 ill_t *conn_nofailover_ill; 9720 9721 conn_nofailover_ill = conn_get_held_ill(connp, 9722 &connp->conn_nofailover_ill, &err); 9723 if (err == ILL_LOOKUP_FAILED) { 9724 if (ill != NULL) 9725 ill_refrele(ill); 9726 if (need_decref) 9727 CONN_DEC_REF(connp); 9728 freemsg(first_mp); 9729 return; 9730 } 9731 if (conn_nofailover_ill != NULL) { 9732 if (ill != NULL) 9733 ill_refrele(ill); 9734 ill = conn_nofailover_ill; 9735 attach_if = B_TRUE; 9736 /* 9737 * Assumes that ipc_nofailover_ill is used only for 9738 * multipathing probe packets. These packets are better 9739 * dropped, if they are delayed in ND resolution, for 9740 * the reasons described in nce_queue_mp(). 9741 * IP6I_DROP_IFDELAYED will be set later on in this 9742 * function for this packet. 9743 */ 9744 drop_if_delayed = B_TRUE; 9745 match_flags = MATCH_IRE_ILL; 9746 mibptr = ill->ill_ip6_mib; 9747 9748 /* 9749 * Check if we need an ire that will not be 9750 * looked up by anybody else i.e. HIDDEN. 9751 */ 9752 if (ill_is_probeonly(ill)) 9753 match_flags |= MATCH_IRE_MARK_HIDDEN; 9754 goto send_from_ill; 9755 } 9756 } 9757 9758 /* 9759 * Redo 1. If we did not find an IRE_CACHE the first time, we should 9760 * have an ip6i_t with IP6I_ATTACH_IF if IPV6_BOUND_PIF or 9761 * bind to the IPIF_NOFAILOVER address was used on this endpoint. 9762 */ 9763 if (ip6i != NULL && (ip6i->ip6i_flags & IP6I_ATTACH_IF)) { 9764 ASSERT(ip6i->ip6i_ifindex != 0); 9765 attach_if = B_TRUE; 9766 ASSERT(ill != NULL); 9767 match_flags = MATCH_IRE_ILL; 9768 9769 /* 9770 * Check if we need an ire that will not be 9771 * looked up by anybody else i.e. HIDDEN. 9772 */ 9773 if (ill_is_probeonly(ill)) 9774 match_flags |= MATCH_IRE_MARK_HIDDEN; 9775 goto send_from_ill; 9776 } 9777 9778 /* 3. If an ip6i_t with IP6I_IFINDEX set then use that ill. */ 9779 if (ip6i != NULL && (ip6i->ip6i_flags & IP6I_IFINDEX)) { 9780 ASSERT(ill != NULL); 9781 goto send_from_ill; 9782 } 9783 9784 /* 9785 * 4. If q is an ill queue and (link local or multicast destination) 9786 * then use that ill. 9787 */ 9788 if (ill != NULL && IN6_IS_ADDR_LINKLOCAL(v6dstp)) { 9789 goto send_from_ill; 9790 } 9791 9792 /* 5. If IPV6_BOUND_IF has been set use that ill. */ 9793 if (connp != NULL && connp->conn_outgoing_ill != NULL) { 9794 ill_t *conn_outgoing_ill; 9795 9796 conn_outgoing_ill = conn_get_held_ill(connp, 9797 &connp->conn_outgoing_ill, &err); 9798 if (err == ILL_LOOKUP_FAILED) { 9799 if (ill != NULL) 9800 ill_refrele(ill); 9801 if (need_decref) 9802 CONN_DEC_REF(connp); 9803 freemsg(first_mp); 9804 return; 9805 } 9806 if (ill != NULL) 9807 ill_refrele(ill); 9808 ill = conn_outgoing_ill; 9809 mibptr = ill->ill_ip6_mib; 9810 goto send_from_ill; 9811 } 9812 9813 /* 9814 * 6. For unicast: Just do an IRE lookup for the best match. 9815 * If we get here for a link-local address it is rather random 9816 * what interface we pick on a multihomed host. 9817 * *If* there is an IRE_CACHE (and the link-local address 9818 * isn't duplicated on multi links) this will find the IRE_CACHE. 9819 * Otherwise it will use one of the matching IRE_INTERFACE routes 9820 * for the link-local prefix. Hence, applications 9821 * *should* be encouraged to specify an outgoing interface when sending 9822 * to a link local address. 9823 */ 9824 if (connp == NULL || (IP_FLOW_CONTROLLED_ULP(connp->conn_ulp) && 9825 !connp->conn_fully_bound)) { 9826 /* 9827 * We cache IRE_CACHEs to avoid lookups. We don't do 9828 * this for the tcp global queue and listen end point 9829 * as it does not really have a real destination to 9830 * talk to. 9831 */ 9832 ire = ire_cache_lookup_v6(v6dstp, zoneid, MBLK_GETLABEL(mp)); 9833 } else { 9834 /* 9835 * IRE_MARK_CONDEMNED is marked in ire_delete. We don't 9836 * grab a lock here to check for CONDEMNED as it is okay 9837 * to send a packet or two with the IRE_CACHE that is going 9838 * away. 9839 */ 9840 mutex_enter(&connp->conn_lock); 9841 ire = sctp_ire != NULL ? sctp_ire : connp->conn_ire_cache; 9842 if (ire != NULL && 9843 IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, v6dstp) && 9844 !(ire->ire_marks & IRE_MARK_CONDEMNED)) { 9845 9846 IRE_REFHOLD(ire); 9847 mutex_exit(&connp->conn_lock); 9848 9849 } else { 9850 boolean_t cached = B_FALSE; 9851 9852 connp->conn_ire_cache = NULL; 9853 mutex_exit(&connp->conn_lock); 9854 /* Release the old ire */ 9855 if (ire != NULL && sctp_ire == NULL) 9856 IRE_REFRELE_NOTR(ire); 9857 9858 ire = ire_cache_lookup_v6(v6dstp, zoneid, 9859 MBLK_GETLABEL(mp)); 9860 if (ire != NULL) { 9861 IRE_REFHOLD_NOTR(ire); 9862 9863 mutex_enter(&connp->conn_lock); 9864 if (!(connp->conn_state_flags & CONN_CLOSING) && 9865 (connp->conn_ire_cache == NULL)) { 9866 rw_enter(&ire->ire_bucket->irb_lock, 9867 RW_READER); 9868 if (!(ire->ire_marks & 9869 IRE_MARK_CONDEMNED)) { 9870 connp->conn_ire_cache = ire; 9871 cached = B_TRUE; 9872 } 9873 rw_exit(&ire->ire_bucket->irb_lock); 9874 } 9875 mutex_exit(&connp->conn_lock); 9876 9877 /* 9878 * We can continue to use the ire but since it 9879 * was not cached, we should drop the extra 9880 * reference. 9881 */ 9882 if (!cached) 9883 IRE_REFRELE_NOTR(ire); 9884 } 9885 } 9886 } 9887 9888 if (ire != NULL) { 9889 if (do_outrequests) { 9890 /* Handle IRE_LOCAL's that might appear here */ 9891 if (ire->ire_type == IRE_CACHE) { 9892 mibptr = ((ill_t *)ire->ire_stq->q_ptr)-> 9893 ill_ip6_mib; 9894 } else { 9895 mibptr = ire->ire_ipif->ipif_ill->ill_ip6_mib; 9896 } 9897 BUMP_MIB(mibptr, ipv6OutRequests); 9898 } 9899 ASSERT(!attach_if); 9900 9901 /* 9902 * Check if the ire has the RTF_MULTIRT flag, inherited 9903 * from an IRE_OFFSUBNET ire entry in ip_newroute(). 9904 */ 9905 if (ire->ire_flags & RTF_MULTIRT) { 9906 /* 9907 * Force hop limit of multirouted packets if required. 9908 * The hop limit of such packets is bounded by the 9909 * ip_multirt_ttl ndd variable. 9910 * NDP packets must have a hop limit of 255; don't 9911 * change the hop limit in that case. 9912 */ 9913 if ((ip_multirt_ttl > 0) && 9914 (ip6h->ip6_hops > ip_multirt_ttl) && 9915 (ip6h->ip6_hops != IPV6_MAX_HOPS)) { 9916 if (ip_debug > 3) { 9917 ip2dbg(("ip_wput_v6: forcing multirt " 9918 "hop limit to %d (was %d) ", 9919 ip_multirt_ttl, ip6h->ip6_hops)); 9920 pr_addr_dbg("v6dst %s\n", AF_INET6, 9921 &ire->ire_addr_v6); 9922 } 9923 ip6h->ip6_hops = ip_multirt_ttl; 9924 } 9925 9926 /* 9927 * We look at this point if there are pending 9928 * unresolved routes. ire_multirt_need_resolve_v6() 9929 * checks in O(n) that all IRE_OFFSUBNET ire 9930 * entries for the packet's destination and 9931 * flagged RTF_MULTIRT are currently resolved. 9932 * If some remain unresolved, we do a copy 9933 * of the current message. It will be used 9934 * to initiate additional route resolutions. 9935 */ 9936 multirt_need_resolve = 9937 ire_multirt_need_resolve_v6(&ire->ire_addr_v6, 9938 MBLK_GETLABEL(first_mp)); 9939 ip2dbg(("ip_wput_v6: ire %p, " 9940 "multirt_need_resolve %d, first_mp %p\n", 9941 (void *)ire, multirt_need_resolve, 9942 (void *)first_mp)); 9943 if (multirt_need_resolve) { 9944 copy_mp = copymsg(first_mp); 9945 if (copy_mp != NULL) { 9946 MULTIRT_DEBUG_TAG(copy_mp); 9947 } 9948 } 9949 } 9950 ip_wput_ire_v6(q, first_mp, ire, unspec_src, cksum_request, 9951 connp, caller, 0, ip6i_flags, zoneid); 9952 if (need_decref) { 9953 CONN_DEC_REF(connp); 9954 connp = NULL; 9955 } 9956 IRE_REFRELE(ire); 9957 9958 /* 9959 * Try to resolve another multiroute if 9960 * ire_multirt_need_resolve_v6() deemed it necessary. 9961 * copy_mp will be consumed (sent or freed) by 9962 * ip_newroute_v6(). 9963 */ 9964 if (copy_mp != NULL) { 9965 if (mctl_present) { 9966 ip6h = (ip6_t *)copy_mp->b_cont->b_rptr; 9967 } else { 9968 ip6h = (ip6_t *)copy_mp->b_rptr; 9969 } 9970 ip_newroute_v6(q, copy_mp, &ip6h->ip6_dst, 9971 &ip6h->ip6_src, NULL, zoneid); 9972 } 9973 if (ill != NULL) 9974 ill_refrele(ill); 9975 return; 9976 } 9977 9978 /* 9979 * No full IRE for this destination. Send it to 9980 * ip_newroute_v6 to see if anything else matches. 9981 * Mark this packet as having originated on this 9982 * machine. 9983 * Update rptr if there was an ip6i_t header. 9984 */ 9985 mp->b_prev = NULL; 9986 mp->b_next = NULL; 9987 if (ip6i != NULL) 9988 mp->b_rptr -= sizeof (ip6i_t); 9989 9990 if (unspec_src) { 9991 if (ip6i == NULL) { 9992 /* 9993 * Add ip6i_t header to carry unspec_src 9994 * until the packet comes back in ip_wput_v6. 9995 */ 9996 mp = ip_add_info_v6(mp, NULL, v6dstp); 9997 if (mp == NULL) { 9998 if (do_outrequests) 9999 BUMP_MIB(mibptr, ipv6OutRequests); 10000 BUMP_MIB(mibptr, ipv6OutDiscards); 10001 if (mctl_present) 10002 freeb(first_mp); 10003 if (ill != NULL) 10004 ill_refrele(ill); 10005 if (need_decref) 10006 CONN_DEC_REF(connp); 10007 return; 10008 } 10009 ip6i = (ip6i_t *)mp->b_rptr; 10010 10011 if (mctl_present) { 10012 ASSERT(first_mp != mp); 10013 first_mp->b_cont = mp; 10014 } else { 10015 first_mp = mp; 10016 } 10017 10018 if ((mp->b_wptr - (uchar_t *)ip6i) == 10019 sizeof (ip6i_t)) { 10020 /* 10021 * ndp_resolver called from ip_newroute_v6 10022 * expects pulled up message. 10023 */ 10024 if (!pullupmsg(mp, -1)) { 10025 ip1dbg(("ip_wput_v6: pullupmsg" 10026 " failed\n")); 10027 if (do_outrequests) { 10028 BUMP_MIB(mibptr, 10029 ipv6OutRequests); 10030 } 10031 BUMP_MIB(mibptr, ipv6OutDiscards); 10032 freemsg(first_mp); 10033 if (ill != NULL) 10034 ill_refrele(ill); 10035 if (need_decref) 10036 CONN_DEC_REF(connp); 10037 return; 10038 } 10039 ip6i = (ip6i_t *)mp->b_rptr; 10040 } 10041 ip6h = (ip6_t *)&ip6i[1]; 10042 v6dstp = &ip6h->ip6_dst; 10043 } 10044 ip6i->ip6i_flags |= IP6I_UNSPEC_SRC; 10045 if (mctl_present) { 10046 ASSERT(io != NULL); 10047 io->ipsec_out_unspec_src = unspec_src; 10048 } 10049 } 10050 if (do_outrequests) 10051 BUMP_MIB(mibptr, ipv6OutRequests); 10052 if (need_decref) 10053 CONN_DEC_REF(connp); 10054 ip_newroute_v6(q, first_mp, v6dstp, &ip6h->ip6_src, NULL, zoneid); 10055 if (ill != NULL) 10056 ill_refrele(ill); 10057 return; 10058 10059 10060 /* 10061 * Handle multicast packets with or without an conn. 10062 * Assumes that the transports set ip6_hops taking 10063 * IPV6_MULTICAST_HOPS (and the other ways to set the hoplimit) 10064 * into account. 10065 */ 10066 ipv6multicast: 10067 ip2dbg(("ip_wput_v6: multicast\n")); 10068 10069 /* 10070 * 1. IPV6_BOUND_PIF takes precedence over all the ifindex settings 10071 * 2. If conn_nofailover_ill is set then use that ill. 10072 * 10073 * Hold the conn_lock till we refhold the ill of interest that is 10074 * pointed to from the conn. Since we cannot do an ill/ipif_refrele 10075 * while holding any locks, postpone the refrele until after the 10076 * conn_lock is dropped. 10077 */ 10078 if (connp != NULL) { 10079 mutex_enter(&connp->conn_lock); 10080 conn_lock_held = B_TRUE; 10081 } else { 10082 conn_lock_held = B_FALSE; 10083 } 10084 if (connp != NULL && connp->conn_outgoing_pill != NULL) { 10085 err = ill_check_and_refhold(connp->conn_outgoing_pill); 10086 if (err == ILL_LOOKUP_FAILED) { 10087 ip1dbg(("ip_output_v6: multicast" 10088 " conn_outgoing_pill no ipif\n")); 10089 multicast_discard: 10090 ASSERT(saved_ill == NULL); 10091 if (conn_lock_held) 10092 mutex_exit(&connp->conn_lock); 10093 if (ill != NULL) 10094 ill_refrele(ill); 10095 freemsg(first_mp); 10096 if (do_outrequests) 10097 BUMP_MIB(mibptr, ipv6OutDiscards); 10098 if (need_decref) 10099 CONN_DEC_REF(connp); 10100 return; 10101 } 10102 saved_ill = ill; 10103 ill = connp->conn_outgoing_pill; 10104 attach_if = B_TRUE; 10105 match_flags = MATCH_IRE_ILL; 10106 mibptr = ill->ill_ip6_mib; 10107 10108 /* 10109 * Check if we need an ire that will not be 10110 * looked up by anybody else i.e. HIDDEN. 10111 */ 10112 if (ill_is_probeonly(ill)) 10113 match_flags |= MATCH_IRE_MARK_HIDDEN; 10114 } else if (connp != NULL && connp->conn_nofailover_ill != NULL) { 10115 err = ill_check_and_refhold(connp->conn_nofailover_ill); 10116 if (err == ILL_LOOKUP_FAILED) { 10117 ip1dbg(("ip_output_v6: multicast" 10118 " conn_nofailover_ill no ipif\n")); 10119 goto multicast_discard; 10120 } 10121 saved_ill = ill; 10122 ill = connp->conn_nofailover_ill; 10123 attach_if = B_TRUE; 10124 match_flags = MATCH_IRE_ILL; 10125 10126 /* 10127 * Check if we need an ire that will not be 10128 * looked up by anybody else i.e. HIDDEN. 10129 */ 10130 if (ill_is_probeonly(ill)) 10131 match_flags |= MATCH_IRE_MARK_HIDDEN; 10132 } else if (ip6i != NULL && (ip6i->ip6i_flags & IP6I_ATTACH_IF)) { 10133 /* 10134 * Redo 1. If we did not find an IRE_CACHE the first time, 10135 * we should have an ip6i_t with IP6I_ATTACH_IF if 10136 * IPV6_BOUND_PIF or bind to the IPIF_NOFAILOVER address was 10137 * used on this endpoint. 10138 */ 10139 ASSERT(ip6i->ip6i_ifindex != 0); 10140 attach_if = B_TRUE; 10141 ASSERT(ill != NULL); 10142 match_flags = MATCH_IRE_ILL; 10143 10144 /* 10145 * Check if we need an ire that will not be 10146 * looked up by anybody else i.e. HIDDEN. 10147 */ 10148 if (ill_is_probeonly(ill)) 10149 match_flags |= MATCH_IRE_MARK_HIDDEN; 10150 } else if (ip6i != NULL && (ip6i->ip6i_flags & IP6I_IFINDEX)) { 10151 /* 3. If an ip6i_t with IP6I_IFINDEX set then use that ill. */ 10152 10153 ASSERT(ill != NULL); 10154 } else if (ill != NULL) { 10155 /* 10156 * 4. If q is an ill queue and (link local or multicast 10157 * destination) then use that ill. 10158 * We don't need the ipif initialization here. 10159 * This useless assert below is just to prevent lint from 10160 * reporting a null body if statement. 10161 */ 10162 ASSERT(ill != NULL); 10163 } else if (connp != NULL) { 10164 /* 10165 * 5. If IPV6_BOUND_IF has been set use that ill. 10166 * 10167 * 6. For multicast: if IPV6_MULTICAST_IF has been set use it. 10168 * Otherwise look for the best IRE match for the unspecified 10169 * group to determine the ill. 10170 * 10171 * conn_multicast_ill is used for only IPv6 packets. 10172 * conn_multicast_ipif is used for only IPv4 packets. 10173 * Thus a PF_INET6 socket send both IPv4 and IPv6 10174 * multicast packets using different IP*_MULTICAST_IF 10175 * interfaces. 10176 */ 10177 if (connp->conn_outgoing_ill != NULL) { 10178 err = ill_check_and_refhold(connp->conn_outgoing_ill); 10179 if (err == ILL_LOOKUP_FAILED) { 10180 ip1dbg(("ip_output_v6: multicast" 10181 " conn_outgoing_ill no ipif\n")); 10182 goto multicast_discard; 10183 } 10184 ill = connp->conn_outgoing_ill; 10185 } else if (connp->conn_multicast_ill != NULL) { 10186 err = ill_check_and_refhold(connp->conn_multicast_ill); 10187 if (err == ILL_LOOKUP_FAILED) { 10188 ip1dbg(("ip_output_v6: multicast" 10189 " conn_multicast_ill no ipif\n")); 10190 goto multicast_discard; 10191 } 10192 ill = connp->conn_multicast_ill; 10193 } else { 10194 mutex_exit(&connp->conn_lock); 10195 conn_lock_held = B_FALSE; 10196 ipif = ipif_lookup_group_v6(v6dstp, zoneid); 10197 if (ipif == NULL) { 10198 ip1dbg(("ip_output_v6: multicast no ipif\n")); 10199 goto multicast_discard; 10200 } 10201 /* 10202 * We have a ref to this ipif, so we can safely 10203 * access ipif_ill. 10204 */ 10205 ill = ipif->ipif_ill; 10206 mutex_enter(&ill->ill_lock); 10207 if (!ILL_CAN_LOOKUP(ill)) { 10208 mutex_exit(&ill->ill_lock); 10209 ipif_refrele(ipif); 10210 ill = NULL; 10211 ip1dbg(("ip_output_v6: multicast no ipif\n")); 10212 goto multicast_discard; 10213 } 10214 ill_refhold_locked(ill); 10215 mutex_exit(&ill->ill_lock); 10216 ipif_refrele(ipif); 10217 /* 10218 * Save binding until IPV6_MULTICAST_IF 10219 * changes it 10220 */ 10221 mutex_enter(&connp->conn_lock); 10222 connp->conn_multicast_ill = ill; 10223 connp->conn_orig_multicast_ifindex = 10224 ill->ill_phyint->phyint_ifindex; 10225 mutex_exit(&connp->conn_lock); 10226 } 10227 } 10228 if (conn_lock_held) 10229 mutex_exit(&connp->conn_lock); 10230 10231 if (saved_ill != NULL) 10232 ill_refrele(saved_ill); 10233 10234 ASSERT(ill != NULL); 10235 /* 10236 * For multicast loopback interfaces replace the multicast address 10237 * with a unicast address for the ire lookup. 10238 */ 10239 if (ill->ill_phyint->phyint_flags & PHYI_LOOPBACK) 10240 v6dstp = &ill->ill_ipif->ipif_v6lcl_addr; 10241 10242 mibptr = ill->ill_ip6_mib; 10243 if (do_outrequests) { 10244 BUMP_MIB(mibptr, ipv6OutRequests); 10245 do_outrequests = B_FALSE; 10246 } 10247 BUMP_MIB(mibptr, ipv6OutMcastPkts); 10248 10249 /* 10250 * As we may lose the conn by the time we reach ip_wput_ire_v6 10251 * we copy conn_multicast_loop and conn_dontroute on to an 10252 * ipsec_out. In case if this datagram goes out secure, 10253 * we need the ill_index also. Copy that also into the 10254 * ipsec_out. 10255 */ 10256 if (mctl_present) { 10257 io = (ipsec_out_t *)first_mp->b_rptr; 10258 ASSERT(first_mp->b_datap->db_type == M_CTL); 10259 ASSERT(io->ipsec_out_type == IPSEC_OUT); 10260 } else { 10261 ASSERT(mp == first_mp); 10262 if ((first_mp = ipsec_alloc_ipsec_out()) == NULL) { 10263 BUMP_MIB(mibptr, ipv6OutDiscards); 10264 freemsg(mp); 10265 if (ill != NULL) 10266 ill_refrele(ill); 10267 if (need_decref) 10268 CONN_DEC_REF(connp); 10269 return; 10270 } 10271 io = (ipsec_out_t *)first_mp->b_rptr; 10272 /* This is not a secure packet */ 10273 io->ipsec_out_secure = B_FALSE; 10274 io->ipsec_out_use_global_policy = B_TRUE; 10275 io->ipsec_out_zoneid = 10276 (zoneid != ALL_ZONES ? zoneid : GLOBAL_ZONEID); 10277 first_mp->b_cont = mp; 10278 mctl_present = B_TRUE; 10279 } 10280 io->ipsec_out_ill_index = ill->ill_phyint->phyint_ifindex; 10281 io->ipsec_out_unspec_src = unspec_src; 10282 if (connp != NULL) 10283 io->ipsec_out_dontroute = connp->conn_dontroute; 10284 10285 send_from_ill: 10286 ASSERT(ill != NULL); 10287 ASSERT(mibptr == ill->ill_ip6_mib); 10288 if (do_outrequests) { 10289 BUMP_MIB(mibptr, ipv6OutRequests); 10290 do_outrequests = B_FALSE; 10291 } 10292 10293 if (io != NULL) 10294 io->ipsec_out_ill_index = ill->ill_phyint->phyint_ifindex; 10295 10296 /* 10297 * When a specific ill is specified (using IPV6_PKTINFO, 10298 * IPV6_MULTICAST_IF, or IPV6_BOUND_IF) we will only match 10299 * on routing entries (ftable and ctable) that have a matching 10300 * ire->ire_ipif->ipif_ill. Thus this can only be used 10301 * for destinations that are on-link for the specific ill 10302 * and that can appear on multiple links. Thus it is useful 10303 * for multicast destinations, link-local destinations, and 10304 * at some point perhaps for site-local destinations (if the 10305 * node sits at a site boundary). 10306 * We create the cache entries in the regular ctable since 10307 * it can not "confuse" things for other destinations. 10308 * table. 10309 * 10310 * NOTE : conn_ire_cache is not used for caching ire_ctable_lookups. 10311 * It is used only when ire_cache_lookup is used above. 10312 */ 10313 ire = ire_ctable_lookup_v6(v6dstp, 0, 0, ill->ill_ipif, 10314 zoneid, MBLK_GETLABEL(mp), match_flags); 10315 if (ire != NULL) { 10316 /* 10317 * Check if the ire has the RTF_MULTIRT flag, inherited 10318 * from an IRE_OFFSUBNET ire entry in ip_newroute(). 10319 */ 10320 if (ire->ire_flags & RTF_MULTIRT) { 10321 /* 10322 * Force hop limit of multirouted packets if required. 10323 * The hop limit of such packets is bounded by the 10324 * ip_multirt_ttl ndd variable. 10325 * NDP packets must have a hop limit of 255; don't 10326 * change the hop limit in that case. 10327 */ 10328 if ((ip_multirt_ttl > 0) && 10329 (ip6h->ip6_hops > ip_multirt_ttl) && 10330 (ip6h->ip6_hops != IPV6_MAX_HOPS)) { 10331 if (ip_debug > 3) { 10332 ip2dbg(("ip_wput_v6: forcing multirt " 10333 "hop limit to %d (was %d) ", 10334 ip_multirt_ttl, ip6h->ip6_hops)); 10335 pr_addr_dbg("v6dst %s\n", AF_INET6, 10336 &ire->ire_addr_v6); 10337 } 10338 ip6h->ip6_hops = ip_multirt_ttl; 10339 } 10340 10341 /* 10342 * We look at this point if there are pending 10343 * unresolved routes. ire_multirt_need_resolve_v6() 10344 * checks in O(n) that all IRE_OFFSUBNET ire 10345 * entries for the packet's destination and 10346 * flagged RTF_MULTIRT are currently resolved. 10347 * If some remain unresolved, we make a copy 10348 * of the current message. It will be used 10349 * to initiate additional route resolutions. 10350 */ 10351 multirt_need_resolve = 10352 ire_multirt_need_resolve_v6(&ire->ire_addr_v6, 10353 MBLK_GETLABEL(first_mp)); 10354 ip2dbg(("ip_wput_v6[send_from_ill]: ire %p, " 10355 "multirt_need_resolve %d, first_mp %p\n", 10356 (void *)ire, multirt_need_resolve, 10357 (void *)first_mp)); 10358 if (multirt_need_resolve) { 10359 copy_mp = copymsg(first_mp); 10360 if (copy_mp != NULL) { 10361 MULTIRT_DEBUG_TAG(copy_mp); 10362 } 10363 } 10364 } 10365 10366 ip1dbg(("ip_wput_v6: send on %s, ire = %p, ill index = %d\n", 10367 ill->ill_name, (void *)ire, 10368 ill->ill_phyint->phyint_ifindex)); 10369 ip_wput_ire_v6(q, first_mp, ire, unspec_src, cksum_request, 10370 connp, caller, 10371 (attach_if ? ill->ill_phyint->phyint_ifindex : 0), 10372 ip6i_flags, zoneid); 10373 ire_refrele(ire); 10374 if (need_decref) { 10375 CONN_DEC_REF(connp); 10376 connp = NULL; 10377 } 10378 10379 /* 10380 * Try to resolve another multiroute if 10381 * ire_multirt_need_resolve_v6() deemed it necessary. 10382 * copy_mp will be consumed (sent or freed) by 10383 * ip_newroute_[ipif_]v6(). 10384 */ 10385 if (copy_mp != NULL) { 10386 if (mctl_present) { 10387 ip6h = (ip6_t *)copy_mp->b_cont->b_rptr; 10388 } else { 10389 ip6h = (ip6_t *)copy_mp->b_rptr; 10390 } 10391 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 10392 ipif = ipif_lookup_group_v6(&ip6h->ip6_dst, 10393 zoneid); 10394 if (ipif == NULL) { 10395 ip1dbg(("ip_wput_v6: No ipif for " 10396 "multicast\n")); 10397 MULTIRT_DEBUG_UNTAG(copy_mp); 10398 freemsg(copy_mp); 10399 return; 10400 } 10401 ip_newroute_ipif_v6(q, copy_mp, ipif, 10402 ip6h->ip6_dst, unspec_src, zoneid); 10403 ipif_refrele(ipif); 10404 } else { 10405 ip_newroute_v6(q, copy_mp, &ip6h->ip6_dst, 10406 &ip6h->ip6_src, ill, zoneid); 10407 } 10408 } 10409 ill_refrele(ill); 10410 return; 10411 } 10412 if (need_decref) { 10413 CONN_DEC_REF(connp); 10414 connp = NULL; 10415 } 10416 10417 /* Update rptr if there was an ip6i_t header. */ 10418 if (ip6i != NULL) 10419 mp->b_rptr -= sizeof (ip6i_t); 10420 if (unspec_src || attach_if) { 10421 if (ip6i == NULL) { 10422 /* 10423 * Add ip6i_t header to carry unspec_src 10424 * or attach_if until the packet comes back in 10425 * ip_wput_v6. 10426 */ 10427 if (mctl_present) { 10428 first_mp->b_cont = 10429 ip_add_info_v6(mp, NULL, v6dstp); 10430 mp = first_mp->b_cont; 10431 if (mp == NULL) 10432 freeb(first_mp); 10433 } else { 10434 first_mp = mp = ip_add_info_v6(mp, NULL, 10435 v6dstp); 10436 } 10437 if (mp == NULL) { 10438 BUMP_MIB(mibptr, ipv6OutDiscards); 10439 ill_refrele(ill); 10440 return; 10441 } 10442 ip6i = (ip6i_t *)mp->b_rptr; 10443 if ((mp->b_wptr - (uchar_t *)ip6i) == 10444 sizeof (ip6i_t)) { 10445 /* 10446 * ndp_resolver called from ip_newroute_v6 10447 * expects a pulled up message. 10448 */ 10449 if (!pullupmsg(mp, -1)) { 10450 ip1dbg(("ip_wput_v6: pullupmsg" 10451 " failed\n")); 10452 BUMP_MIB(mibptr, ipv6OutDiscards); 10453 freemsg(first_mp); 10454 return; 10455 } 10456 ip6i = (ip6i_t *)mp->b_rptr; 10457 } 10458 ip6h = (ip6_t *)&ip6i[1]; 10459 v6dstp = &ip6h->ip6_dst; 10460 } 10461 if (unspec_src) 10462 ip6i->ip6i_flags |= IP6I_UNSPEC_SRC; 10463 if (attach_if) { 10464 /* 10465 * Bind to nofailover/BOUND_PIF overrides ifindex. 10466 */ 10467 ip6i->ip6i_flags |= IP6I_ATTACH_IF; 10468 ip6i->ip6i_flags &= ~IP6I_IFINDEX; 10469 ip6i->ip6i_ifindex = ill->ill_phyint->phyint_ifindex; 10470 if (drop_if_delayed) { 10471 /* This is a multipathing probe packet */ 10472 ip6i->ip6i_flags |= IP6I_DROP_IFDELAYED; 10473 } 10474 } 10475 if (mctl_present) { 10476 ASSERT(io != NULL); 10477 io->ipsec_out_unspec_src = unspec_src; 10478 } 10479 } 10480 if (IN6_IS_ADDR_MULTICAST(v6dstp)) { 10481 ip_newroute_ipif_v6(q, first_mp, ill->ill_ipif, *v6dstp, 10482 unspec_src, zoneid); 10483 } else { 10484 ip_newroute_v6(q, first_mp, v6dstp, &ip6h->ip6_src, ill, 10485 zoneid); 10486 } 10487 ill_refrele(ill); 10488 return; 10489 10490 notv6: 10491 /* 10492 * XXX implement a IPv4 and IPv6 packet counter per conn and 10493 * switch when ratio exceeds e.g. 10:1 10494 */ 10495 if (q->q_next == NULL) { 10496 connp = Q_TO_CONN(q); 10497 10498 if (IPCL_IS_TCP(connp)) { 10499 /* change conn_send for the tcp_v4_connections */ 10500 connp->conn_send = ip_output; 10501 } else if (connp->conn_ulp == IPPROTO_SCTP) { 10502 /* The 'q' is the default SCTP queue */ 10503 connp = (conn_t *)arg; 10504 } else { 10505 ip_setqinfo(RD(q), IPV4_MINOR, B_TRUE); 10506 } 10507 } 10508 BUMP_MIB(mibptr, ipv6OutIPv4); 10509 (void) ip_output(arg, first_mp, arg2, caller); 10510 if (ill != NULL) 10511 ill_refrele(ill); 10512 } 10513 10514 /* 10515 * If this is a conn_t queue, then we pass in the conn. This includes the 10516 * zoneid. 10517 * Otherwise, this is a message for an ill_t queue, 10518 * in which case we use the global zoneid since those are all part of 10519 * the global zone. 10520 */ 10521 static void 10522 ip_wput_v6(queue_t *q, mblk_t *mp) 10523 { 10524 if (CONN_Q(q)) 10525 ip_output_v6(Q_TO_CONN(q), mp, q, IP_WPUT); 10526 else 10527 ip_output_v6(GLOBAL_ZONEID, mp, q, IP_WPUT); 10528 } 10529 10530 static void 10531 ipsec_out_attach_if(ipsec_out_t *io, int attach_index) 10532 { 10533 ASSERT(io->ipsec_out_type == IPSEC_OUT); 10534 io->ipsec_out_attach_if = B_TRUE; 10535 io->ipsec_out_ill_index = attach_index; 10536 } 10537 10538 /* 10539 * NULL send-to queue - packet is to be delivered locally. 10540 */ 10541 void 10542 ip_wput_local_v6(queue_t *q, ill_t *ill, ip6_t *ip6h, mblk_t *first_mp, 10543 ire_t *ire, int fanout_flags) 10544 { 10545 uint32_t ports; 10546 mblk_t *mp = first_mp, *first_mp1; 10547 boolean_t mctl_present; 10548 uint8_t nexthdr; 10549 uint16_t hdr_length; 10550 ipsec_out_t *io; 10551 mib2_ipv6IfStatsEntry_t *mibptr; 10552 ilm_t *ilm; 10553 uint_t nexthdr_offset; 10554 10555 if (DB_TYPE(mp) == M_CTL) { 10556 io = (ipsec_out_t *)mp->b_rptr; 10557 if (!io->ipsec_out_secure) { 10558 mp = mp->b_cont; 10559 freeb(first_mp); 10560 first_mp = mp; 10561 mctl_present = B_FALSE; 10562 } else { 10563 mctl_present = B_TRUE; 10564 mp = first_mp->b_cont; 10565 ipsec_out_to_in(first_mp); 10566 } 10567 } else { 10568 mctl_present = B_FALSE; 10569 } 10570 10571 nexthdr = ip6h->ip6_nxt; 10572 mibptr = ill->ill_ip6_mib; 10573 10574 /* Fastpath */ 10575 switch (nexthdr) { 10576 case IPPROTO_TCP: 10577 case IPPROTO_UDP: 10578 case IPPROTO_ICMPV6: 10579 case IPPROTO_SCTP: 10580 hdr_length = IPV6_HDR_LEN; 10581 nexthdr_offset = (uint_t)((uchar_t *)&ip6h->ip6_nxt - 10582 (uchar_t *)ip6h); 10583 break; 10584 default: { 10585 uint8_t *nexthdrp; 10586 10587 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, 10588 &hdr_length, &nexthdrp)) { 10589 /* Malformed packet */ 10590 BUMP_MIB(mibptr, ipv6OutDiscards); 10591 freemsg(first_mp); 10592 return; 10593 } 10594 nexthdr = *nexthdrp; 10595 nexthdr_offset = nexthdrp - (uint8_t *)ip6h; 10596 break; 10597 } 10598 } 10599 10600 10601 DTRACE_PROBE4(ip6__loopback__in__start, 10602 ill_t *, ill, ill_t *, NULL, 10603 ip6_t *, ip6h, mblk_t *, first_mp); 10604 10605 FW_HOOKS6(ip6_loopback_in_event, ipv6firewall_loopback_in, 10606 ill, NULL, ip6h, first_mp, mp); 10607 10608 DTRACE_PROBE1(ip6__loopback__in__end, mblk_t *, first_mp); 10609 10610 if (first_mp == NULL) 10611 return; 10612 10613 nexthdr = ip6h->ip6_nxt; 10614 10615 UPDATE_OB_PKT_COUNT(ire); 10616 ire->ire_last_used_time = lbolt; 10617 10618 /* 10619 * Remove reacability confirmation bit from version field 10620 * before looping back the packet. 10621 */ 10622 if (ip6h->ip6_vcf & IP_FORWARD_PROG) { 10623 ip6h->ip6_vcf &= ~IP_FORWARD_PROG; 10624 } 10625 10626 switch (nexthdr) { 10627 case IPPROTO_TCP: 10628 if (DB_TYPE(mp) == M_DATA) { 10629 /* 10630 * M_DATA mblk, so init mblk (chain) for 10631 * no struio(). 10632 */ 10633 mblk_t *mp1 = mp; 10634 10635 do { 10636 mp1->b_datap->db_struioflag = 0; 10637 } while ((mp1 = mp1->b_cont) != NULL); 10638 } 10639 ports = *(uint32_t *)(mp->b_rptr + hdr_length + 10640 TCP_PORTS_OFFSET); 10641 ip_fanout_tcp_v6(q, first_mp, ip6h, ill, ill, 10642 fanout_flags|IP_FF_SEND_ICMP|IP_FF_SYN_ADDIRE| 10643 IP_FF_IP6INFO|IP6_NO_IPPOLICY|IP_FF_LOOPBACK, 10644 hdr_length, mctl_present, ire->ire_zoneid); 10645 return; 10646 10647 case IPPROTO_UDP: 10648 ports = *(uint32_t *)(mp->b_rptr + hdr_length + 10649 UDP_PORTS_OFFSET); 10650 ip_fanout_udp_v6(q, first_mp, ip6h, ports, ill, ill, 10651 fanout_flags|IP_FF_SEND_ICMP|IP_FF_IP6INFO| 10652 IP6_NO_IPPOLICY, mctl_present, ire->ire_zoneid); 10653 return; 10654 10655 case IPPROTO_SCTP: 10656 { 10657 uint_t ipif_seqid = ire->ire_ipif->ipif_seqid; 10658 10659 ports = *(uint32_t *)(mp->b_rptr + hdr_length); 10660 ip_fanout_sctp(mp, ill, (ipha_t *)ip6h, ports, 10661 fanout_flags|IP_FF_SEND_ICMP|IP_FF_IP6INFO, 10662 mctl_present, IP6_NO_IPPOLICY, ipif_seqid, 10663 ire->ire_zoneid); 10664 return; 10665 } 10666 case IPPROTO_ICMPV6: { 10667 icmp6_t *icmp6; 10668 10669 /* check for full IPv6+ICMPv6 header */ 10670 if ((mp->b_wptr - mp->b_rptr) < 10671 (hdr_length + ICMP6_MINLEN)) { 10672 if (!pullupmsg(mp, hdr_length + ICMP6_MINLEN)) { 10673 ip1dbg(("ip_wput_v6: ICMP hdr pullupmsg" 10674 " failed\n")); 10675 BUMP_MIB(mibptr, ipv6OutDiscards); 10676 freemsg(first_mp); 10677 return; 10678 } 10679 ip6h = (ip6_t *)mp->b_rptr; 10680 } 10681 icmp6 = (icmp6_t *)((uchar_t *)ip6h + hdr_length); 10682 10683 /* Update output mib stats */ 10684 icmp_update_out_mib_v6(ill, icmp6); 10685 10686 /* Check variable for testing applications */ 10687 if (ipv6_drop_inbound_icmpv6) { 10688 freemsg(first_mp); 10689 return; 10690 } 10691 /* 10692 * Assume that there is always at least one conn for 10693 * ICMPv6 (in.ndpd) i.e. don't optimize the case 10694 * where there is no conn. 10695 */ 10696 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst) && 10697 !(ill->ill_phyint->phyint_flags & PHYI_LOOPBACK)) { 10698 /* 10699 * In the multicast case, applications may have 10700 * joined the group from different zones, so we 10701 * need to deliver the packet to each of them. 10702 * Loop through the multicast memberships 10703 * structures (ilm) on the receive ill and send 10704 * a copy of the packet up each matching one. 10705 * However, we don't do this for multicasts sent 10706 * on the loopback interface (PHYI_LOOPBACK flag 10707 * set) as they must stay in the sender's zone. 10708 */ 10709 ILM_WALKER_HOLD(ill); 10710 for (ilm = ill->ill_ilm; ilm != NULL; 10711 ilm = ilm->ilm_next) { 10712 if (ilm->ilm_flags & ILM_DELETED) 10713 continue; 10714 if (!IN6_ARE_ADDR_EQUAL( 10715 &ilm->ilm_v6addr, &ip6h->ip6_dst)) 10716 continue; 10717 if ((fanout_flags & 10718 IP_FF_NO_MCAST_LOOP) && 10719 ilm->ilm_zoneid == ire->ire_zoneid) 10720 continue; 10721 if (!ipif_lookup_zoneid(ill, 10722 ilm->ilm_zoneid, IPIF_UP, NULL)) 10723 continue; 10724 10725 first_mp1 = ip_copymsg(first_mp); 10726 if (first_mp1 == NULL) 10727 continue; 10728 icmp_inbound_v6(q, first_mp1, ill, 10729 hdr_length, mctl_present, 10730 IP6_NO_IPPOLICY, ilm->ilm_zoneid, 10731 NULL); 10732 } 10733 ILM_WALKER_RELE(ill); 10734 } else { 10735 first_mp1 = ip_copymsg(first_mp); 10736 if (first_mp1 != NULL) 10737 icmp_inbound_v6(q, first_mp1, ill, 10738 hdr_length, mctl_present, 10739 IP6_NO_IPPOLICY, ire->ire_zoneid, 10740 NULL); 10741 } 10742 } 10743 /* FALLTHRU */ 10744 default: { 10745 /* 10746 * Handle protocols with which IPv6 is less intimate. 10747 */ 10748 fanout_flags |= IP_FF_RAWIP|IP_FF_IP6INFO; 10749 10750 /* 10751 * Enable sending ICMP for "Unknown" nexthdr 10752 * case. i.e. where we did not FALLTHRU from 10753 * IPPROTO_ICMPV6 processing case above. 10754 */ 10755 if (nexthdr != IPPROTO_ICMPV6) 10756 fanout_flags |= IP_FF_SEND_ICMP; 10757 /* 10758 * Note: There can be more than one stream bound 10759 * to a particular protocol. When this is the case, 10760 * each one gets a copy of any incoming packets. 10761 */ 10762 ip_fanout_proto_v6(q, first_mp, ip6h, ill, ill, nexthdr, 10763 nexthdr_offset, fanout_flags|IP6_NO_IPPOLICY, 10764 mctl_present, ire->ire_zoneid); 10765 return; 10766 } 10767 } 10768 } 10769 10770 /* 10771 * Send packet using IRE. 10772 * Checksumming is controlled by cksum_request: 10773 * -1 => normal i.e. TCP/UDP/SCTP/ICMPv6 are checksummed and nothing else. 10774 * 1 => Skip TCP/UDP/SCTP checksum 10775 * Otherwise => checksum_request contains insert offset for checksum 10776 * 10777 * Assumes that the following set of headers appear in the first 10778 * mblk: 10779 * ip6_t 10780 * Any extension headers 10781 * TCP/UDP/SCTP header (if present) 10782 * The routine can handle an ICMPv6 header that is not in the first mblk. 10783 * 10784 * NOTE : This function does not ire_refrele the ire passed in as the 10785 * argument unlike ip_wput_ire where the REFRELE is done. 10786 * Refer to ip_wput_ire for more on this. 10787 */ 10788 static void 10789 ip_wput_ire_v6(queue_t *q, mblk_t *mp, ire_t *ire, int unspec_src, 10790 int cksum_request, conn_t *connp, int caller, int attach_index, int flags, 10791 zoneid_t zoneid) 10792 { 10793 ip6_t *ip6h; 10794 uint8_t nexthdr; 10795 uint16_t hdr_length; 10796 uint_t reachable = 0x0; 10797 ill_t *ill; 10798 mib2_ipv6IfStatsEntry_t *mibptr; 10799 mblk_t *first_mp; 10800 boolean_t mctl_present; 10801 ipsec_out_t *io; 10802 boolean_t conn_dontroute; /* conn value for multicast */ 10803 boolean_t conn_multicast_loop; /* conn value for multicast */ 10804 boolean_t multicast_forward; /* Should we forward ? */ 10805 int max_frag; 10806 10807 ill = ire_to_ill(ire); 10808 first_mp = mp; 10809 multicast_forward = B_FALSE; 10810 10811 if (mp->b_datap->db_type != M_CTL) { 10812 ip6h = (ip6_t *)first_mp->b_rptr; 10813 } else { 10814 io = (ipsec_out_t *)first_mp->b_rptr; 10815 ASSERT(io->ipsec_out_type == IPSEC_OUT); 10816 /* 10817 * Grab the zone id now because the M_CTL can be discarded by 10818 * ip_wput_ire_parse_ipsec_out() below. 10819 */ 10820 ASSERT(zoneid == io->ipsec_out_zoneid); 10821 ASSERT(zoneid != ALL_ZONES); 10822 ip6h = (ip6_t *)first_mp->b_cont->b_rptr; 10823 /* 10824 * For the multicast case, ipsec_out carries conn_dontroute and 10825 * conn_multicast_loop as conn may not be available here. We 10826 * need this for multicast loopback and forwarding which is done 10827 * later in the code. 10828 */ 10829 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 10830 conn_dontroute = io->ipsec_out_dontroute; 10831 conn_multicast_loop = io->ipsec_out_multicast_loop; 10832 /* 10833 * If conn_dontroute is not set or conn_multicast_loop 10834 * is set, we need to do forwarding/loopback. For 10835 * datagrams from ip_wput_multicast, conn_dontroute is 10836 * set to B_TRUE and conn_multicast_loop is set to 10837 * B_FALSE so that we neither do forwarding nor 10838 * loopback. 10839 */ 10840 if (!conn_dontroute || conn_multicast_loop) 10841 multicast_forward = B_TRUE; 10842 } 10843 } 10844 10845 /* 10846 * If the sender didn't supply the hop limit and there is a default 10847 * unicast hop limit associated with the output interface, we use 10848 * that if the packet is unicast. Interface specific unicast hop 10849 * limits as set via the SIOCSLIFLNKINFO ioctl. 10850 */ 10851 if (ill->ill_max_hops != 0 && !(flags & IP6I_HOPLIMIT) && 10852 !(IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst))) { 10853 ip6h->ip6_hops = ill->ill_max_hops; 10854 } 10855 10856 if (ire->ire_type == IRE_LOCAL && ire->ire_zoneid != zoneid && 10857 ire->ire_zoneid != ALL_ZONES) { 10858 /* 10859 * When a zone sends a packet to another zone, we try to deliver 10860 * the packet under the same conditions as if the destination 10861 * was a real node on the network. To do so, we look for a 10862 * matching route in the forwarding table. 10863 * RTF_REJECT and RTF_BLACKHOLE are handled just like 10864 * ip_newroute_v6() does. 10865 * Note that IRE_LOCAL are special, since they are used 10866 * when the zoneid doesn't match in some cases. This means that 10867 * we need to handle ipha_src differently since ire_src_addr 10868 * belongs to the receiving zone instead of the sending zone. 10869 * When ip_restrict_interzone_loopback is set, then 10870 * ire_cache_lookup_v6() ensures that IRE_LOCAL are only used 10871 * for loopback between zones when the logical "Ethernet" would 10872 * have looped them back. 10873 */ 10874 ire_t *src_ire; 10875 10876 src_ire = ire_ftable_lookup_v6(&ip6h->ip6_dst, 0, 0, 0, 10877 NULL, NULL, zoneid, 0, NULL, (MATCH_IRE_RECURSIVE | 10878 MATCH_IRE_DEFAULT | MATCH_IRE_RJ_BHOLE)); 10879 if (src_ire != NULL && 10880 !(src_ire->ire_flags & (RTF_REJECT | RTF_BLACKHOLE)) && 10881 (!ip_restrict_interzone_loopback || 10882 ire_local_same_ill_group(ire, src_ire))) { 10883 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src) && 10884 !unspec_src) { 10885 ip6h->ip6_src = src_ire->ire_src_addr_v6; 10886 } 10887 ire_refrele(src_ire); 10888 } else { 10889 BUMP_MIB(ill->ill_ip6_mib, ipv6OutNoRoutes); 10890 if (src_ire != NULL) { 10891 if (src_ire->ire_flags & RTF_BLACKHOLE) { 10892 ire_refrele(src_ire); 10893 freemsg(first_mp); 10894 return; 10895 } 10896 ire_refrele(src_ire); 10897 } 10898 if (ip_hdr_complete_v6(ip6h, zoneid)) { 10899 /* Failed */ 10900 freemsg(first_mp); 10901 return; 10902 } 10903 icmp_unreachable_v6(q, first_mp, 10904 ICMP6_DST_UNREACH_NOROUTE, B_FALSE, B_FALSE, 10905 zoneid); 10906 return; 10907 } 10908 } 10909 10910 if (mp->b_datap->db_type == M_CTL || ipsec_outbound_v6_policy_present) { 10911 mp = ip_wput_ire_parse_ipsec_out(first_mp, NULL, ip6h, ire, 10912 connp, unspec_src, zoneid); 10913 if (mp == NULL) { 10914 return; 10915 } 10916 } 10917 10918 first_mp = mp; 10919 if (mp->b_datap->db_type == M_CTL) { 10920 io = (ipsec_out_t *)mp->b_rptr; 10921 ASSERT(io->ipsec_out_type == IPSEC_OUT); 10922 mp = mp->b_cont; 10923 mctl_present = B_TRUE; 10924 } else { 10925 mctl_present = B_FALSE; 10926 } 10927 10928 ip6h = (ip6_t *)mp->b_rptr; 10929 nexthdr = ip6h->ip6_nxt; 10930 mibptr = ill->ill_ip6_mib; 10931 10932 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src) && !unspec_src) { 10933 ipif_t *ipif; 10934 10935 /* 10936 * Select the source address using ipif_select_source_v6. 10937 */ 10938 if (attach_index != 0) { 10939 ipif = ipif_select_source_v6(ill, &ip6h->ip6_dst, 10940 RESTRICT_TO_ILL, IPV6_PREFER_SRC_DEFAULT, zoneid); 10941 } else { 10942 ipif = ipif_select_source_v6(ill, &ip6h->ip6_dst, 10943 RESTRICT_TO_NONE, IPV6_PREFER_SRC_DEFAULT, zoneid); 10944 } 10945 if (ipif == NULL) { 10946 if (ip_debug > 2) { 10947 /* ip1dbg */ 10948 pr_addr_dbg("ip_wput_ire_v6: no src for " 10949 "dst %s\n, ", AF_INET6, &ip6h->ip6_dst); 10950 printf("ip_wput_ire_v6: interface name %s\n", 10951 ill->ill_name); 10952 } 10953 freemsg(first_mp); 10954 return; 10955 } 10956 ip6h->ip6_src = ipif->ipif_v6src_addr; 10957 ipif_refrele(ipif); 10958 } 10959 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 10960 if ((connp != NULL && connp->conn_multicast_loop) || 10961 !(ill->ill_phyint->phyint_flags & PHYI_LOOPBACK)) { 10962 ilm_t *ilm; 10963 10964 ILM_WALKER_HOLD(ill); 10965 ilm = ilm_lookup_ill_v6(ill, &ip6h->ip6_dst, ALL_ZONES); 10966 ILM_WALKER_RELE(ill); 10967 if (ilm != NULL) { 10968 mblk_t *nmp; 10969 int fanout_flags = 0; 10970 10971 if (connp != NULL && 10972 !connp->conn_multicast_loop) { 10973 fanout_flags |= IP_FF_NO_MCAST_LOOP; 10974 } 10975 ip1dbg(("ip_wput_ire_v6: " 10976 "Loopback multicast\n")); 10977 nmp = ip_copymsg(first_mp); 10978 if (nmp != NULL) { 10979 ip6_t *nip6h; 10980 mblk_t *mp_ip6h; 10981 10982 if (mctl_present) { 10983 nip6h = (ip6_t *) 10984 nmp->b_cont->b_rptr; 10985 mp_ip6h = nmp->b_cont; 10986 } else { 10987 nip6h = (ip6_t *)nmp->b_rptr; 10988 mp_ip6h = nmp; 10989 } 10990 10991 DTRACE_PROBE4( 10992 ip6__loopback__out__start, 10993 ill_t *, NULL, 10994 ill_t *, ill, 10995 ip6_t *, nip6h, 10996 mblk_t *, nmp); 10997 10998 FW_HOOKS6(ip6_loopback_out_event, 10999 ipv6firewall_loopback_out, 11000 NULL, ill, nip6h, nmp, mp_ip6h); 11001 11002 DTRACE_PROBE1( 11003 ip6__loopback__out__end, 11004 mblk_t *, nmp); 11005 11006 if (nmp != NULL) { 11007 /* 11008 * Deliver locally and to 11009 * every local zone, except 11010 * the sending zone when 11011 * IPV6_MULTICAST_LOOP is 11012 * disabled. 11013 */ 11014 ip_wput_local_v6(RD(q), ill, 11015 nip6h, nmp, 11016 ire, fanout_flags); 11017 } 11018 } else { 11019 BUMP_MIB(mibptr, ipv6OutDiscards); 11020 ip1dbg(("ip_wput_ire_v6: " 11021 "copymsg failed\n")); 11022 } 11023 } 11024 } 11025 if (ip6h->ip6_hops == 0 || 11026 IN6_IS_ADDR_MC_NODELOCAL(&ip6h->ip6_dst) || 11027 (ill->ill_phyint->phyint_flags & PHYI_LOOPBACK)) { 11028 /* 11029 * Local multicast or just loopback on loopback 11030 * interface. 11031 */ 11032 BUMP_MIB(mibptr, ipv6OutMcastPkts); 11033 ip1dbg(("ip_wput_ire_v6: local multicast only\n")); 11034 freemsg(first_mp); 11035 return; 11036 } 11037 } 11038 11039 if (ire->ire_stq != NULL) { 11040 uint32_t sum; 11041 uint_t ill_index = ((ill_t *)ire->ire_stq->q_ptr)-> 11042 ill_phyint->phyint_ifindex; 11043 queue_t *dev_q = ire->ire_stq->q_next; 11044 11045 /* 11046 * non-NULL send-to queue - packet is to be sent 11047 * out an interface. 11048 */ 11049 11050 /* Driver is flow-controlling? */ 11051 if (!IP_FLOW_CONTROLLED_ULP(nexthdr) && 11052 ((dev_q->q_next || dev_q->q_first) && !canput(dev_q))) { 11053 /* 11054 * Queue packet if we have an conn to give back 11055 * pressure. We can't queue packets intended for 11056 * hardware acceleration since we've tossed that 11057 * state already. If the packet is being fed back 11058 * from ire_send_v6, we don't know the position in 11059 * the queue to enqueue the packet and we discard 11060 * the packet. 11061 */ 11062 if (ip_output_queue && connp != NULL && 11063 !mctl_present && caller != IRE_SEND) { 11064 if (caller == IP_WSRV) { 11065 connp->conn_did_putbq = 1; 11066 (void) putbq(connp->conn_wq, mp); 11067 conn_drain_insert(connp); 11068 /* 11069 * caller == IP_WSRV implies we are 11070 * the service thread, and the 11071 * queue is already noenabled. 11072 * The check for canput and 11073 * the putbq is not atomic. 11074 * So we need to check again. 11075 */ 11076 if (canput(dev_q)) 11077 connp->conn_did_putbq = 0; 11078 } else { 11079 (void) putq(connp->conn_wq, mp); 11080 } 11081 return; 11082 } 11083 BUMP_MIB(mibptr, ipv6OutDiscards); 11084 freemsg(first_mp); 11085 return; 11086 } 11087 11088 /* 11089 * Look for reachability confirmations from the transport. 11090 */ 11091 if (ip6h->ip6_vcf & IP_FORWARD_PROG) { 11092 reachable |= IPV6_REACHABILITY_CONFIRMATION; 11093 ip6h->ip6_vcf &= ~IP_FORWARD_PROG; 11094 if (mctl_present) 11095 io->ipsec_out_reachable = B_TRUE; 11096 } 11097 /* Fastpath */ 11098 switch (nexthdr) { 11099 case IPPROTO_TCP: 11100 case IPPROTO_UDP: 11101 case IPPROTO_ICMPV6: 11102 case IPPROTO_SCTP: 11103 hdr_length = IPV6_HDR_LEN; 11104 break; 11105 default: { 11106 uint8_t *nexthdrp; 11107 11108 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, 11109 &hdr_length, &nexthdrp)) { 11110 /* Malformed packet */ 11111 BUMP_MIB(mibptr, ipv6OutDiscards); 11112 freemsg(first_mp); 11113 return; 11114 } 11115 nexthdr = *nexthdrp; 11116 break; 11117 } 11118 } 11119 11120 if (cksum_request != -1 && nexthdr != IPPROTO_ICMPV6) { 11121 uint16_t *up; 11122 uint16_t *insp; 11123 11124 /* 11125 * The packet header is processed once for all, even 11126 * in the multirouting case. We disable hardware 11127 * checksum if the packet is multirouted, as it will be 11128 * replicated via several interfaces, and not all of 11129 * them may have this capability. 11130 */ 11131 if (cksum_request == 1 && 11132 !(ire->ire_flags & RTF_MULTIRT)) { 11133 /* Skip the transport checksum */ 11134 goto cksum_done; 11135 } 11136 /* 11137 * Do user-configured raw checksum. 11138 * Compute checksum and insert at offset "cksum_request" 11139 */ 11140 11141 /* check for enough headers for checksum */ 11142 cksum_request += hdr_length; /* offset from rptr */ 11143 if ((mp->b_wptr - mp->b_rptr) < 11144 (cksum_request + sizeof (int16_t))) { 11145 if (!pullupmsg(mp, 11146 cksum_request + sizeof (int16_t))) { 11147 ip1dbg(("ip_wput_v6: ICMP hdr pullupmsg" 11148 " failed\n")); 11149 BUMP_MIB(mibptr, ipv6OutDiscards); 11150 freemsg(first_mp); 11151 return; 11152 } 11153 ip6h = (ip6_t *)mp->b_rptr; 11154 } 11155 insp = (uint16_t *)((uchar_t *)ip6h + cksum_request); 11156 ASSERT(((uintptr_t)insp & 0x1) == 0); 11157 up = (uint16_t *)&ip6h->ip6_src; 11158 /* 11159 * icmp has placed length and routing 11160 * header adjustment in *insp. 11161 */ 11162 sum = htons(nexthdr) + 11163 up[0] + up[1] + up[2] + up[3] + 11164 up[4] + up[5] + up[6] + up[7] + 11165 up[8] + up[9] + up[10] + up[11] + 11166 up[12] + up[13] + up[14] + up[15]; 11167 sum = (sum & 0xffff) + (sum >> 16); 11168 *insp = IP_CSUM(mp, hdr_length, sum); 11169 if (*insp == 0) 11170 *insp = 0xFFFF; 11171 } else if (nexthdr == IPPROTO_TCP) { 11172 uint16_t *up; 11173 11174 /* 11175 * Check for full IPv6 header + enough TCP header 11176 * to get at the checksum field. 11177 */ 11178 if ((mp->b_wptr - mp->b_rptr) < 11179 (hdr_length + TCP_CHECKSUM_OFFSET + 11180 TCP_CHECKSUM_SIZE)) { 11181 if (!pullupmsg(mp, hdr_length + 11182 TCP_CHECKSUM_OFFSET + TCP_CHECKSUM_SIZE)) { 11183 ip1dbg(("ip_wput_v6: TCP hdr pullupmsg" 11184 " failed\n")); 11185 BUMP_MIB(mibptr, ipv6OutDiscards); 11186 freemsg(first_mp); 11187 return; 11188 } 11189 ip6h = (ip6_t *)mp->b_rptr; 11190 } 11191 11192 up = (uint16_t *)&ip6h->ip6_src; 11193 /* 11194 * Note: The TCP module has stored the length value 11195 * into the tcp checksum field, so we don't 11196 * need to explicitly sum it in here. 11197 */ 11198 sum = up[0] + up[1] + up[2] + up[3] + 11199 up[4] + up[5] + up[6] + up[7] + 11200 up[8] + up[9] + up[10] + up[11] + 11201 up[12] + up[13] + up[14] + up[15]; 11202 11203 /* Fold the initial sum */ 11204 sum = (sum & 0xffff) + (sum >> 16); 11205 11206 up = (uint16_t *)(((uchar_t *)ip6h) + 11207 hdr_length + TCP_CHECKSUM_OFFSET); 11208 11209 IP_CKSUM_XMIT(ill, ire, mp, ip6h, up, IPPROTO_TCP, 11210 hdr_length, ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN, 11211 ire->ire_max_frag, mctl_present, sum); 11212 11213 /* Software checksum? */ 11214 if (DB_CKSUMFLAGS(mp) == 0) { 11215 IP6_STAT(ip6_out_sw_cksum); 11216 IP6_STAT_UPDATE(ip6_tcp_out_sw_cksum_bytes, 11217 (ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN) - 11218 hdr_length); 11219 } 11220 } else if (nexthdr == IPPROTO_UDP) { 11221 uint16_t *up; 11222 11223 /* 11224 * check for full IPv6 header + enough UDP header 11225 * to get at the UDP checksum field 11226 */ 11227 if ((mp->b_wptr - mp->b_rptr) < (hdr_length + 11228 UDP_CHECKSUM_OFFSET + UDP_CHECKSUM_SIZE)) { 11229 if (!pullupmsg(mp, hdr_length + 11230 UDP_CHECKSUM_OFFSET + UDP_CHECKSUM_SIZE)) { 11231 ip1dbg(("ip_wput_v6: UDP hdr pullupmsg" 11232 " failed\n")); 11233 BUMP_MIB(mibptr, ipv6OutDiscards); 11234 freemsg(first_mp); 11235 return; 11236 } 11237 ip6h = (ip6_t *)mp->b_rptr; 11238 } 11239 up = (uint16_t *)&ip6h->ip6_src; 11240 /* 11241 * Note: The UDP module has stored the length value 11242 * into the udp checksum field, so we don't 11243 * need to explicitly sum it in here. 11244 */ 11245 sum = up[0] + up[1] + up[2] + up[3] + 11246 up[4] + up[5] + up[6] + up[7] + 11247 up[8] + up[9] + up[10] + up[11] + 11248 up[12] + up[13] + up[14] + up[15]; 11249 11250 /* Fold the initial sum */ 11251 sum = (sum & 0xffff) + (sum >> 16); 11252 11253 up = (uint16_t *)(((uchar_t *)ip6h) + 11254 hdr_length + UDP_CHECKSUM_OFFSET); 11255 11256 IP_CKSUM_XMIT(ill, ire, mp, ip6h, up, IPPROTO_UDP, 11257 hdr_length, ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN, 11258 ire->ire_max_frag, mctl_present, sum); 11259 11260 /* Software checksum? */ 11261 if (DB_CKSUMFLAGS(mp) == 0) { 11262 IP6_STAT(ip6_out_sw_cksum); 11263 IP6_STAT_UPDATE(ip6_udp_out_sw_cksum_bytes, 11264 (ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN) - 11265 hdr_length); 11266 } 11267 } else if (nexthdr == IPPROTO_ICMPV6) { 11268 uint16_t *up; 11269 icmp6_t *icmp6; 11270 11271 /* check for full IPv6+ICMPv6 header */ 11272 if ((mp->b_wptr - mp->b_rptr) < 11273 (hdr_length + ICMP6_MINLEN)) { 11274 if (!pullupmsg(mp, hdr_length + ICMP6_MINLEN)) { 11275 ip1dbg(("ip_wput_v6: ICMP hdr pullupmsg" 11276 " failed\n")); 11277 BUMP_MIB(mibptr, ipv6OutDiscards); 11278 freemsg(first_mp); 11279 return; 11280 } 11281 ip6h = (ip6_t *)mp->b_rptr; 11282 } 11283 icmp6 = (icmp6_t *)((uchar_t *)ip6h + hdr_length); 11284 up = (uint16_t *)&ip6h->ip6_src; 11285 /* 11286 * icmp has placed length and routing 11287 * header adjustment in icmp6_cksum. 11288 */ 11289 sum = htons(IPPROTO_ICMPV6) + 11290 up[0] + up[1] + up[2] + up[3] + 11291 up[4] + up[5] + up[6] + up[7] + 11292 up[8] + up[9] + up[10] + up[11] + 11293 up[12] + up[13] + up[14] + up[15]; 11294 sum = (sum & 0xffff) + (sum >> 16); 11295 icmp6->icmp6_cksum = IP_CSUM(mp, hdr_length, sum); 11296 if (icmp6->icmp6_cksum == 0) 11297 icmp6->icmp6_cksum = 0xFFFF; 11298 11299 /* Update output mib stats */ 11300 icmp_update_out_mib_v6(ill, icmp6); 11301 } else if (nexthdr == IPPROTO_SCTP) { 11302 sctp_hdr_t *sctph; 11303 11304 if (MBLKL(mp) < (hdr_length + sizeof (*sctph))) { 11305 if (!pullupmsg(mp, hdr_length + 11306 sizeof (*sctph))) { 11307 ip1dbg(("ip_wput_v6: SCTP hdr pullupmsg" 11308 " failed\n")); 11309 BUMP_MIB(ill->ill_ip6_mib, 11310 ipv6OutDiscards); 11311 freemsg(mp); 11312 return; 11313 } 11314 ip6h = (ip6_t *)mp->b_rptr; 11315 } 11316 sctph = (sctp_hdr_t *)(mp->b_rptr + hdr_length); 11317 sctph->sh_chksum = 0; 11318 sctph->sh_chksum = sctp_cksum(mp, hdr_length); 11319 } 11320 11321 cksum_done: 11322 /* 11323 * We force the insertion of a fragment header using the 11324 * IPH_FRAG_HDR flag in two cases: 11325 * - after reception of an ICMPv6 "packet too big" message 11326 * with a MTU < 1280 (cf. RFC 2460 section 5) 11327 * - for multirouted IPv6 packets, so that the receiver can 11328 * discard duplicates according to their fragment identifier 11329 * 11330 * Two flags modifed from the API can modify this behavior. 11331 * The first is IPV6_USE_MIN_MTU. With this API the user 11332 * can specify how to manage PMTUD for unicast and multicast. 11333 * 11334 * IPV6_DONTFRAG disallows fragmentation. 11335 */ 11336 max_frag = ire->ire_max_frag; 11337 switch (IP6I_USE_MIN_MTU_API(flags)) { 11338 case IPV6_USE_MIN_MTU_DEFAULT: 11339 case IPV6_USE_MIN_MTU_UNICAST: 11340 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 11341 max_frag = IPV6_MIN_MTU; 11342 } 11343 break; 11344 11345 case IPV6_USE_MIN_MTU_NEVER: 11346 max_frag = IPV6_MIN_MTU; 11347 break; 11348 } 11349 if (ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN > max_frag || 11350 (ire->ire_frag_flag & IPH_FRAG_HDR)) { 11351 if (connp != NULL && (flags & IP6I_DONTFRAG)) { 11352 icmp_pkt2big_v6(ire->ire_stq, first_mp, 11353 max_frag, B_FALSE, B_TRUE, zoneid); 11354 return; 11355 } 11356 11357 if (ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN != 11358 (mp->b_cont ? msgdsize(mp) : 11359 mp->b_wptr - (uchar_t *)ip6h)) { 11360 ip0dbg(("Packet length mismatch: %d, %ld\n", 11361 ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN, 11362 msgdsize(mp))); 11363 freemsg(first_mp); 11364 return; 11365 } 11366 /* Do IPSEC processing first */ 11367 if (mctl_present) { 11368 if (attach_index != 0) 11369 ipsec_out_attach_if(io, attach_index); 11370 ipsec_out_process(q, first_mp, ire, ill_index); 11371 return; 11372 } 11373 ASSERT(mp->b_prev == NULL); 11374 ip2dbg(("Fragmenting Size = %d, mtu = %d\n", 11375 ntohs(ip6h->ip6_plen) + 11376 IPV6_HDR_LEN, max_frag)); 11377 ASSERT(mp == first_mp); 11378 /* Initiate IPPF processing */ 11379 if (IPP_ENABLED(IPP_LOCAL_OUT)) { 11380 ip_process(IPP_LOCAL_OUT, &mp, ill_index); 11381 if (mp == NULL) { 11382 return; 11383 } 11384 } 11385 ip_wput_frag_v6(mp, ire, reachable, connp, 11386 caller, max_frag); 11387 return; 11388 } 11389 /* Do IPSEC processing first */ 11390 if (mctl_present) { 11391 int extra_len = ipsec_out_extra_length(first_mp); 11392 11393 if (ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN + extra_len > 11394 max_frag && ip_ulp_cando_pkt2big(nexthdr)) { 11395 /* 11396 * IPsec headers will push the packet over the 11397 * MTU limit. Issue an ICMPv6 Packet Too Big 11398 * message for this packet if the upper-layer 11399 * that issued this packet will be able to 11400 * react to the icmp_pkt2big_v6() that we'll 11401 * generate. 11402 */ 11403 icmp_pkt2big_v6(ire->ire_stq, first_mp, 11404 max_frag, B_FALSE, B_TRUE, zoneid); 11405 return; 11406 } 11407 if (attach_index != 0) 11408 ipsec_out_attach_if(io, attach_index); 11409 ipsec_out_process(q, first_mp, ire, ill_index); 11410 return; 11411 } 11412 /* 11413 * XXX multicast: add ip_mforward_v6() here. 11414 * Check conn_dontroute 11415 */ 11416 #ifdef lint 11417 /* 11418 * XXX The only purpose of this statement is to avoid lint 11419 * errors. See the above "XXX multicast". When that gets 11420 * fixed, remove this whole #ifdef lint section. 11421 */ 11422 ip3dbg(("multicast forward is %s.\n", 11423 (multicast_forward ? "TRUE" : "FALSE"))); 11424 #endif 11425 11426 UPDATE_OB_PKT_COUNT(ire); 11427 ire->ire_last_used_time = lbolt; 11428 ASSERT(mp == first_mp); 11429 ip_xmit_v6(mp, ire, reachable, connp, caller, NULL); 11430 } else { 11431 DTRACE_PROBE4(ip6__loopback__out__start, 11432 ill_t *, NULL, ill_t *, ill, 11433 ip6_t *, ip6h, mblk_t *, first_mp); 11434 FW_HOOKS6(ip6_loopback_out_event, ipv6firewall_loopback_out, 11435 NULL, ill, ip6h, first_mp, mp); 11436 DTRACE_PROBE1(ip6__loopback__out__end, mblk_t *, first_mp); 11437 if (first_mp != NULL) 11438 ip_wput_local_v6(RD(q), ill, ip6h, first_mp, ire, 0); 11439 } 11440 } 11441 11442 /* 11443 * Outbound IPv6 fragmentation routine using MDT. 11444 */ 11445 static void 11446 ip_wput_frag_mdt_v6(mblk_t *mp, ire_t *ire, size_t max_chunk, 11447 size_t unfragmentable_len, uint8_t nexthdr, uint_t prev_nexthdr_offset) 11448 { 11449 ip6_t *ip6h = (ip6_t *)mp->b_rptr; 11450 uint_t pkts, wroff, hdr_chunk_len, pbuf_idx; 11451 mblk_t *hdr_mp, *md_mp = NULL; 11452 int i1; 11453 multidata_t *mmd; 11454 unsigned char *hdr_ptr, *pld_ptr; 11455 ip_pdescinfo_t pdi; 11456 uint32_t ident; 11457 size_t len; 11458 uint16_t offset; 11459 queue_t *stq = ire->ire_stq; 11460 ill_t *ill = (ill_t *)stq->q_ptr; 11461 11462 ASSERT(DB_TYPE(mp) == M_DATA); 11463 ASSERT(MBLKL(mp) > unfragmentable_len); 11464 11465 /* 11466 * Move read ptr past unfragmentable portion, we don't want this part 11467 * of the data in our fragments. 11468 */ 11469 mp->b_rptr += unfragmentable_len; 11470 11471 /* Calculate how many packets we will send out */ 11472 i1 = (mp->b_cont == NULL) ? MBLKL(mp) : msgsize(mp); 11473 pkts = (i1 + max_chunk - 1) / max_chunk; 11474 ASSERT(pkts > 1); 11475 11476 /* Allocate a message block which will hold all the IP Headers. */ 11477 wroff = ip_wroff_extra; 11478 hdr_chunk_len = wroff + unfragmentable_len + sizeof (ip6_frag_t); 11479 11480 i1 = pkts * hdr_chunk_len; 11481 /* 11482 * Create the header buffer, Multidata and destination address 11483 * and SAP attribute that should be associated with it. 11484 */ 11485 if ((hdr_mp = allocb(i1, BPRI_HI)) == NULL || 11486 ((hdr_mp->b_wptr += i1), 11487 (mmd = mmd_alloc(hdr_mp, &md_mp, KM_NOSLEEP)) == NULL) || 11488 !ip_md_addr_attr(mmd, NULL, ire->ire_nce->nce_res_mp)) { 11489 freemsg(mp); 11490 if (md_mp == NULL) { 11491 freemsg(hdr_mp); 11492 } else { 11493 free_mmd: IP6_STAT(ip6_frag_mdt_discarded); 11494 freemsg(md_mp); 11495 } 11496 IP6_STAT(ip6_frag_mdt_allocfail); 11497 BUMP_MIB(ill->ill_ip6_mib, ipv6OutFragFails); 11498 UPDATE_MIB(ill->ill_ip6_mib, ipv6OutDiscards, pkts); 11499 return; 11500 } 11501 IP6_STAT(ip6_frag_mdt_allocd); 11502 11503 /* 11504 * Add a payload buffer to the Multidata; this operation must not 11505 * fail, or otherwise our logic in this routine is broken. There 11506 * is no memory allocation done by the routine, so any returned 11507 * failure simply tells us that we've done something wrong. 11508 * 11509 * A failure tells us that either we're adding the same payload 11510 * buffer more than once, or we're trying to add more buffers than 11511 * allowed. None of the above cases should happen, and we panic 11512 * because either there's horrible heap corruption, and/or 11513 * programming mistake. 11514 */ 11515 if ((pbuf_idx = mmd_addpldbuf(mmd, mp)) < 0) { 11516 goto pbuf_panic; 11517 } 11518 11519 hdr_ptr = hdr_mp->b_rptr; 11520 pld_ptr = mp->b_rptr; 11521 11522 pdi.flags = PDESC_HBUF_REF | PDESC_PBUF_REF; 11523 11524 ident = htonl(atomic_add_32_nv(&ire->ire_ident, 1)); 11525 11526 /* 11527 * len is the total length of the fragmentable data in this 11528 * datagram. For each fragment sent, we will decrement len 11529 * by the amount of fragmentable data sent in that fragment 11530 * until len reaches zero. 11531 */ 11532 len = ntohs(ip6h->ip6_plen) - (unfragmentable_len - IPV6_HDR_LEN); 11533 11534 offset = 0; 11535 prev_nexthdr_offset += wroff; 11536 11537 while (len != 0) { 11538 size_t mlen; 11539 ip6_t *fip6h; 11540 ip6_frag_t *fraghdr; 11541 int error; 11542 11543 ASSERT((hdr_ptr + hdr_chunk_len) <= hdr_mp->b_wptr); 11544 mlen = MIN(len, max_chunk); 11545 len -= mlen; 11546 11547 fip6h = (ip6_t *)(hdr_ptr + wroff); 11548 ASSERT(OK_32PTR(fip6h)); 11549 bcopy(ip6h, fip6h, unfragmentable_len); 11550 hdr_ptr[prev_nexthdr_offset] = IPPROTO_FRAGMENT; 11551 11552 fip6h->ip6_plen = htons((uint16_t)(mlen + 11553 unfragmentable_len - IPV6_HDR_LEN + sizeof (ip6_frag_t))); 11554 11555 fraghdr = (ip6_frag_t *)((unsigned char *)fip6h + 11556 unfragmentable_len); 11557 fraghdr->ip6f_nxt = nexthdr; 11558 fraghdr->ip6f_reserved = 0; 11559 fraghdr->ip6f_offlg = htons(offset) | 11560 ((len != 0) ? IP6F_MORE_FRAG : 0); 11561 fraghdr->ip6f_ident = ident; 11562 11563 /* 11564 * Record offset and size of header and data of the next packet 11565 * in the multidata message. 11566 */ 11567 PDESC_HDR_ADD(&pdi, hdr_ptr, wroff, 11568 unfragmentable_len + sizeof (ip6_frag_t), 0); 11569 PDESC_PLD_INIT(&pdi); 11570 i1 = MIN(mp->b_wptr - pld_ptr, mlen); 11571 ASSERT(i1 > 0); 11572 PDESC_PLD_SPAN_ADD(&pdi, pbuf_idx, pld_ptr, i1); 11573 if (i1 == mlen) { 11574 pld_ptr += mlen; 11575 } else { 11576 i1 = mlen - i1; 11577 mp = mp->b_cont; 11578 ASSERT(mp != NULL); 11579 ASSERT(MBLKL(mp) >= i1); 11580 /* 11581 * Attach the next payload message block to the 11582 * multidata message. 11583 */ 11584 if ((pbuf_idx = mmd_addpldbuf(mmd, mp)) < 0) 11585 goto pbuf_panic; 11586 PDESC_PLD_SPAN_ADD(&pdi, pbuf_idx, mp->b_rptr, i1); 11587 pld_ptr = mp->b_rptr + i1; 11588 } 11589 11590 if ((mmd_addpdesc(mmd, (pdescinfo_t *)&pdi, &error, 11591 KM_NOSLEEP)) == NULL) { 11592 /* 11593 * Any failure other than ENOMEM indicates that we 11594 * have passed in invalid pdesc info or parameters 11595 * to mmd_addpdesc, which must not happen. 11596 * 11597 * EINVAL is a result of failure on boundary checks 11598 * against the pdesc info contents. It should not 11599 * happen, and we panic because either there's 11600 * horrible heap corruption, and/or programming 11601 * mistake. 11602 */ 11603 if (error != ENOMEM) { 11604 cmn_err(CE_PANIC, "ip_wput_frag_mdt_v6: " 11605 "pdesc logic error detected for " 11606 "mmd %p pinfo %p (%d)\n", 11607 (void *)mmd, (void *)&pdi, error); 11608 /* NOTREACHED */ 11609 } 11610 IP6_STAT(ip6_frag_mdt_addpdescfail); 11611 /* Free unattached payload message blocks as well */ 11612 md_mp->b_cont = mp->b_cont; 11613 goto free_mmd; 11614 } 11615 11616 /* Advance fragment offset. */ 11617 offset += mlen; 11618 11619 /* Advance to location for next header in the buffer. */ 11620 hdr_ptr += hdr_chunk_len; 11621 11622 /* Did we reach the next payload message block? */ 11623 if (pld_ptr == mp->b_wptr && mp->b_cont != NULL) { 11624 mp = mp->b_cont; 11625 /* 11626 * Attach the next message block with payload 11627 * data to the multidata message. 11628 */ 11629 if ((pbuf_idx = mmd_addpldbuf(mmd, mp)) < 0) 11630 goto pbuf_panic; 11631 pld_ptr = mp->b_rptr; 11632 } 11633 } 11634 11635 ASSERT(hdr_mp->b_wptr == hdr_ptr); 11636 ASSERT(mp->b_wptr == pld_ptr); 11637 11638 /* Update IP statistics */ 11639 UPDATE_MIB(ill->ill_ip6_mib, ipv6OutFragCreates, pkts); 11640 BUMP_MIB(ill->ill_ip6_mib, ipv6OutFragOKs); 11641 IP6_STAT_UPDATE(ip6_frag_mdt_pkt_out, pkts); 11642 11643 ire->ire_ob_pkt_count += pkts; 11644 if (ire->ire_ipif != NULL) 11645 atomic_add_32(&ire->ire_ipif->ipif_ob_pkt_count, pkts); 11646 11647 ire->ire_last_used_time = lbolt; 11648 /* Send it down */ 11649 putnext(stq, md_mp); 11650 return; 11651 11652 pbuf_panic: 11653 cmn_err(CE_PANIC, "ip_wput_frag_mdt_v6: payload buffer logic " 11654 "error for mmd %p pbuf %p (%d)", (void *)mmd, (void *)mp, 11655 pbuf_idx); 11656 /* NOTREACHED */ 11657 } 11658 11659 /* 11660 * IPv6 fragmentation. Essentially the same as IPv4 fragmentation. 11661 * We have not optimized this in terms of number of mblks 11662 * allocated. For instance, for each fragment sent we always allocate a 11663 * mblk to hold the IPv6 header and fragment header. 11664 * 11665 * Assumes that all the extension headers are contained in the first mblk. 11666 * 11667 * The fragment header is inserted after an hop-by-hop options header 11668 * and after [an optional destinations header followed by] a routing header. 11669 * 11670 * NOTE : This function does not ire_refrele the ire passed in as 11671 * the argument. 11672 */ 11673 void 11674 ip_wput_frag_v6(mblk_t *mp, ire_t *ire, uint_t reachable, conn_t *connp, 11675 int caller, int max_frag) 11676 { 11677 ip6_t *ip6h = (ip6_t *)mp->b_rptr; 11678 ip6_t *fip6h; 11679 mblk_t *hmp; 11680 mblk_t *hmp0; 11681 mblk_t *dmp; 11682 ip6_frag_t *fraghdr; 11683 size_t unfragmentable_len; 11684 size_t len; 11685 size_t mlen; 11686 size_t max_chunk; 11687 uint32_t ident; 11688 uint16_t off_flags; 11689 uint16_t offset = 0; 11690 ill_t *ill; 11691 uint8_t nexthdr; 11692 uint_t prev_nexthdr_offset; 11693 uint8_t *ptr; 11694 11695 ASSERT(ire->ire_type == IRE_CACHE); 11696 ill = (ill_t *)ire->ire_stq->q_ptr; 11697 11698 /* 11699 * Determine the length of the unfragmentable portion of this 11700 * datagram. This consists of the IPv6 header, a potential 11701 * hop-by-hop options header, a potential pre-routing-header 11702 * destination options header, and a potential routing header. 11703 */ 11704 nexthdr = ip6h->ip6_nxt; 11705 prev_nexthdr_offset = (uint8_t *)&ip6h->ip6_nxt - (uint8_t *)ip6h; 11706 ptr = (uint8_t *)&ip6h[1]; 11707 11708 if (nexthdr == IPPROTO_HOPOPTS) { 11709 ip6_hbh_t *hbh_hdr; 11710 uint_t hdr_len; 11711 11712 hbh_hdr = (ip6_hbh_t *)ptr; 11713 hdr_len = 8 * (hbh_hdr->ip6h_len + 1); 11714 nexthdr = hbh_hdr->ip6h_nxt; 11715 prev_nexthdr_offset = (uint8_t *)&hbh_hdr->ip6h_nxt 11716 - (uint8_t *)ip6h; 11717 ptr += hdr_len; 11718 } 11719 if (nexthdr == IPPROTO_DSTOPTS) { 11720 ip6_dest_t *dest_hdr; 11721 uint_t hdr_len; 11722 11723 dest_hdr = (ip6_dest_t *)ptr; 11724 if (dest_hdr->ip6d_nxt == IPPROTO_ROUTING) { 11725 hdr_len = 8 * (dest_hdr->ip6d_len + 1); 11726 nexthdr = dest_hdr->ip6d_nxt; 11727 prev_nexthdr_offset = (uint8_t *)&dest_hdr->ip6d_nxt 11728 - (uint8_t *)ip6h; 11729 ptr += hdr_len; 11730 } 11731 } 11732 if (nexthdr == IPPROTO_ROUTING) { 11733 ip6_rthdr_t *rthdr; 11734 uint_t hdr_len; 11735 11736 rthdr = (ip6_rthdr_t *)ptr; 11737 nexthdr = rthdr->ip6r_nxt; 11738 prev_nexthdr_offset = (uint8_t *)&rthdr->ip6r_nxt 11739 - (uint8_t *)ip6h; 11740 hdr_len = 8 * (rthdr->ip6r_len + 1); 11741 ptr += hdr_len; 11742 } 11743 unfragmentable_len = (uint_t)(ptr - (uint8_t *)ip6h); 11744 11745 max_chunk = (min(max_frag, ire->ire_max_frag) - unfragmentable_len - 11746 sizeof (ip6_frag_t)) & ~7; 11747 11748 /* Check if we can use MDT to send out the frags. */ 11749 ASSERT(!IRE_IS_LOCAL(ire)); 11750 if (ip_multidata_outbound && reachable == 0 && 11751 !(ire->ire_flags & RTF_MULTIRT) && ILL_MDT_CAPABLE(ill) && 11752 IP_CAN_FRAG_MDT(mp, unfragmentable_len, max_chunk)) { 11753 ip_wput_frag_mdt_v6(mp, ire, max_chunk, unfragmentable_len, 11754 nexthdr, prev_nexthdr_offset); 11755 return; 11756 } 11757 11758 /* 11759 * Allocate an mblk with enough room for the link-layer 11760 * header, the unfragmentable part of the datagram, and the 11761 * fragment header. This (or a copy) will be used as the 11762 * first mblk for each fragment we send. 11763 */ 11764 hmp = allocb(unfragmentable_len + sizeof (ip6_frag_t) + ip_wroff_extra, 11765 BPRI_HI); 11766 if (hmp == NULL) { 11767 BUMP_MIB(ill->ill_ip6_mib, ipv6OutFragFails); 11768 freemsg(mp); 11769 return; 11770 } 11771 hmp->b_rptr += ip_wroff_extra; 11772 hmp->b_wptr = hmp->b_rptr + unfragmentable_len + sizeof (ip6_frag_t); 11773 11774 fip6h = (ip6_t *)hmp->b_rptr; 11775 fraghdr = (ip6_frag_t *)(hmp->b_rptr + unfragmentable_len); 11776 11777 bcopy(ip6h, fip6h, unfragmentable_len); 11778 hmp->b_rptr[prev_nexthdr_offset] = IPPROTO_FRAGMENT; 11779 11780 ident = atomic_add_32_nv(&ire->ire_ident, 1); 11781 11782 fraghdr->ip6f_nxt = nexthdr; 11783 fraghdr->ip6f_reserved = 0; 11784 fraghdr->ip6f_offlg = 0; 11785 fraghdr->ip6f_ident = htonl(ident); 11786 11787 /* 11788 * len is the total length of the fragmentable data in this 11789 * datagram. For each fragment sent, we will decrement len 11790 * by the amount of fragmentable data sent in that fragment 11791 * until len reaches zero. 11792 */ 11793 len = ntohs(ip6h->ip6_plen) - (unfragmentable_len - IPV6_HDR_LEN); 11794 11795 /* 11796 * Move read ptr past unfragmentable portion, we don't want this part 11797 * of the data in our fragments. 11798 */ 11799 mp->b_rptr += unfragmentable_len; 11800 11801 while (len != 0) { 11802 mlen = MIN(len, max_chunk); 11803 len -= mlen; 11804 if (len != 0) { 11805 /* Not last */ 11806 hmp0 = copyb(hmp); 11807 if (hmp0 == NULL) { 11808 freeb(hmp); 11809 freemsg(mp); 11810 BUMP_MIB(ill->ill_ip6_mib, ipv6OutFragFails); 11811 ip1dbg(("ip_wput_frag_v6: copyb failed\n")); 11812 return; 11813 } 11814 off_flags = IP6F_MORE_FRAG; 11815 } else { 11816 /* Last fragment */ 11817 hmp0 = hmp; 11818 hmp = NULL; 11819 off_flags = 0; 11820 } 11821 fip6h = (ip6_t *)(hmp0->b_rptr); 11822 fraghdr = (ip6_frag_t *)(hmp0->b_rptr + unfragmentable_len); 11823 11824 fip6h->ip6_plen = htons((uint16_t)(mlen + 11825 unfragmentable_len - IPV6_HDR_LEN + sizeof (ip6_frag_t))); 11826 /* 11827 * Note: Optimization alert. 11828 * In IPv6 (and IPv4) protocol header, Fragment Offset 11829 * ("offset") is 13 bits wide and in 8-octet units. 11830 * In IPv6 protocol header (unlike IPv4) in a 16 bit field, 11831 * it occupies the most significant 13 bits. 11832 * (least significant 13 bits in IPv4). 11833 * We do not do any shifts here. Not shifting is same effect 11834 * as taking offset value in octet units, dividing by 8 and 11835 * then shifting 3 bits left to line it up in place in proper 11836 * place protocol header. 11837 */ 11838 fraghdr->ip6f_offlg = htons(offset) | off_flags; 11839 11840 if (!(dmp = ip_carve_mp(&mp, mlen))) { 11841 /* mp has already been freed by ip_carve_mp() */ 11842 if (hmp != NULL) 11843 freeb(hmp); 11844 freeb(hmp0); 11845 ip1dbg(("ip_carve_mp: failed\n")); 11846 BUMP_MIB(ill->ill_ip6_mib, ipv6OutFragFails); 11847 return; 11848 } 11849 hmp0->b_cont = dmp; 11850 /* Get the priority marking, if any */ 11851 hmp0->b_band = dmp->b_band; 11852 UPDATE_OB_PKT_COUNT(ire); 11853 ire->ire_last_used_time = lbolt; 11854 ip_xmit_v6(hmp0, ire, reachable | IP6_NO_IPPOLICY, connp, 11855 caller, NULL); 11856 reachable = 0; /* No need to redo state machine in loop */ 11857 BUMP_MIB(ill->ill_ip6_mib, ipv6OutFragCreates); 11858 offset += mlen; 11859 } 11860 BUMP_MIB(ill->ill_ip6_mib, ipv6OutFragOKs); 11861 } 11862 11863 /* 11864 * Determine if the ill and multicast aspects of that packets 11865 * "matches" the conn. 11866 */ 11867 boolean_t 11868 conn_wantpacket_v6(conn_t *connp, ill_t *ill, ip6_t *ip6h, int fanout_flags, 11869 zoneid_t zoneid) 11870 { 11871 ill_t *in_ill; 11872 boolean_t wantpacket = B_TRUE; 11873 in6_addr_t *v6dst_ptr = &ip6h->ip6_dst; 11874 in6_addr_t *v6src_ptr = &ip6h->ip6_src; 11875 11876 /* 11877 * conn_incoming_ill is set by IPV6_BOUND_IF which limits 11878 * unicast and multicast reception to conn_incoming_ill. 11879 * conn_wantpacket_v6 is called both for unicast and 11880 * multicast. 11881 * 11882 * 1) The unicast copy of the packet can come anywhere in 11883 * the ill group if it is part of the group. Thus, we 11884 * need to check to see whether the ill group matches 11885 * if in_ill is part of a group. 11886 * 11887 * 2) ip_rput does not suppress duplicate multicast packets. 11888 * If there are two interfaces in a ill group and we have 11889 * 2 applications (conns) joined a multicast group G on 11890 * both the interfaces, ilm_lookup_ill filter in ip_rput 11891 * will give us two packets because we join G on both the 11892 * interfaces rather than nominating just one interface 11893 * for receiving multicast like broadcast above. So, 11894 * we have to call ilg_lookup_ill to filter out duplicate 11895 * copies, if ill is part of a group, to supress duplicates. 11896 */ 11897 in_ill = connp->conn_incoming_ill; 11898 if (in_ill != NULL) { 11899 mutex_enter(&connp->conn_lock); 11900 in_ill = connp->conn_incoming_ill; 11901 mutex_enter(&ill->ill_lock); 11902 /* 11903 * No IPMP, and the packet did not arrive on conn_incoming_ill 11904 * OR, IPMP in use and the packet arrived on an IPMP group 11905 * different from the conn_incoming_ill's IPMP group. 11906 * Reject the packet. 11907 */ 11908 if ((in_ill->ill_group == NULL && in_ill != ill) || 11909 (in_ill->ill_group != NULL && 11910 in_ill->ill_group != ill->ill_group)) { 11911 wantpacket = B_FALSE; 11912 } 11913 mutex_exit(&ill->ill_lock); 11914 mutex_exit(&connp->conn_lock); 11915 if (!wantpacket) 11916 return (B_FALSE); 11917 } 11918 11919 if (connp->conn_multi_router) 11920 return (B_TRUE); 11921 11922 if (!IN6_IS_ADDR_MULTICAST(v6dst_ptr) && 11923 !IN6_IS_ADDR_V4MAPPED_CLASSD(v6dst_ptr)) { 11924 /* 11925 * Unicast case: we match the conn only if it's in the specified 11926 * zone. 11927 */ 11928 return (IPCL_ZONE_MATCH(connp, zoneid)); 11929 } 11930 11931 if ((fanout_flags & IP_FF_NO_MCAST_LOOP) && 11932 (connp->conn_zoneid == zoneid || zoneid == ALL_ZONES)) { 11933 /* 11934 * Loopback case: the sending endpoint has IP_MULTICAST_LOOP 11935 * disabled, therefore we don't dispatch the multicast packet to 11936 * the sending zone. 11937 */ 11938 return (B_FALSE); 11939 } 11940 11941 if ((ill->ill_phyint->phyint_flags & PHYI_LOOPBACK) && 11942 connp->conn_zoneid != zoneid && zoneid != ALL_ZONES) { 11943 /* 11944 * Multicast packet on the loopback interface: we only match 11945 * conns who joined the group in the specified zone. 11946 */ 11947 return (B_FALSE); 11948 } 11949 11950 mutex_enter(&connp->conn_lock); 11951 wantpacket = 11952 ilg_lookup_ill_withsrc_v6(connp, v6dst_ptr, v6src_ptr, ill) != NULL; 11953 mutex_exit(&connp->conn_lock); 11954 11955 return (wantpacket); 11956 } 11957 11958 11959 /* 11960 * Transmit a packet and update any NUD state based on the flags 11961 * XXX need to "recover" any ip6i_t when doing putq! 11962 * 11963 * NOTE : This function does not ire_refrele the ire passed in as the 11964 * argument. 11965 */ 11966 void 11967 ip_xmit_v6(mblk_t *mp, ire_t *ire, uint_t flags, conn_t *connp, 11968 int caller, ipsec_out_t *io) 11969 { 11970 mblk_t *mp1; 11971 nce_t *nce = ire->ire_nce; 11972 ill_t *ill; 11973 ill_t *out_ill; 11974 uint64_t delta; 11975 ip6_t *ip6h; 11976 queue_t *stq = ire->ire_stq; 11977 ire_t *ire1 = NULL; 11978 ire_t *save_ire = ire; 11979 boolean_t multirt_send = B_FALSE; 11980 mblk_t *next_mp = NULL; 11981 11982 ip6h = (ip6_t *)mp->b_rptr; 11983 ASSERT(!IN6_IS_ADDR_V4MAPPED(&ire->ire_addr_v6)); 11984 ASSERT(ire->ire_ipversion == IPV6_VERSION); 11985 ASSERT(nce != NULL); 11986 ASSERT(mp->b_datap->db_type == M_DATA); 11987 ASSERT(stq != NULL); 11988 11989 ill = ire_to_ill(ire); 11990 if (!ill) { 11991 ip0dbg(("ip_xmit_v6: ire_to_ill failed\n")); 11992 freemsg(mp); 11993 return; 11994 } 11995 11996 /* 11997 * If a packet is to be sent out an interface that is a 6to4 11998 * tunnel, outgoing IPv6 packets, with a 6to4 addressed IPv6 11999 * destination, must be checked to have a 6to4 prefix 12000 * (2002:V4ADDR::/48) that is NOT equal to the 6to4 prefix of 12001 * address configured on the sending interface. Otherwise, 12002 * the packet was delivered to this interface in error and the 12003 * packet must be dropped. 12004 */ 12005 if ((ill->ill_is_6to4tun) && IN6_IS_ADDR_6TO4(&ip6h->ip6_dst)) { 12006 ipif_t *ipif = ill->ill_ipif; 12007 12008 if (IN6_ARE_6TO4_PREFIX_EQUAL(&ipif->ipif_v6lcl_addr, 12009 &ip6h->ip6_dst)) { 12010 if (ip_debug > 2) { 12011 /* ip1dbg */ 12012 pr_addr_dbg("ip_xmit_v6: attempting to " 12013 "send 6to4 addressed IPv6 " 12014 "destination (%s) out the wrong " 12015 "interface.\n", AF_INET6, 12016 &ip6h->ip6_dst); 12017 } 12018 BUMP_MIB(ill->ill_ip6_mib, ipv6OutDiscards); 12019 freemsg(mp); 12020 return; 12021 } 12022 } 12023 12024 /* Flow-control check has been done in ip_wput_ire_v6 */ 12025 if (IP_FLOW_CONTROLLED_ULP(ip6h->ip6_nxt) || caller == IP_WPUT || 12026 caller == IP_WSRV || canput(stq->q_next)) { 12027 uint32_t ill_index; 12028 12029 /* 12030 * In most cases, the emission loop below is entered only 12031 * once. Only in the case where the ire holds the 12032 * RTF_MULTIRT flag, do we loop to process all RTF_MULTIRT 12033 * flagged ires in the bucket, and send the packet 12034 * through all crossed RTF_MULTIRT routes. 12035 */ 12036 if (ire->ire_flags & RTF_MULTIRT) { 12037 /* 12038 * Multirouting case. The bucket where ire is stored 12039 * probably holds other RTF_MULTIRT flagged ires 12040 * to the destination. In this call to ip_xmit_v6, 12041 * we attempt to send the packet through all 12042 * those ires. Thus, we first ensure that ire is the 12043 * first RTF_MULTIRT ire in the bucket, 12044 * before walking the ire list. 12045 */ 12046 ire_t *first_ire; 12047 irb_t *irb = ire->ire_bucket; 12048 ASSERT(irb != NULL); 12049 multirt_send = B_TRUE; 12050 12051 /* Make sure we do not omit any multiroute ire. */ 12052 IRB_REFHOLD(irb); 12053 for (first_ire = irb->irb_ire; 12054 first_ire != NULL; 12055 first_ire = first_ire->ire_next) { 12056 if ((first_ire->ire_flags & RTF_MULTIRT) && 12057 (IN6_ARE_ADDR_EQUAL(&first_ire->ire_addr_v6, 12058 &ire->ire_addr_v6)) && 12059 !(first_ire->ire_marks & 12060 (IRE_MARK_CONDEMNED | IRE_MARK_HIDDEN))) 12061 break; 12062 } 12063 12064 if ((first_ire != NULL) && (first_ire != ire)) { 12065 IRE_REFHOLD(first_ire); 12066 /* ire will be released by the caller */ 12067 ire = first_ire; 12068 nce = ire->ire_nce; 12069 stq = ire->ire_stq; 12070 ill = ire_to_ill(ire); 12071 } 12072 IRB_REFRELE(irb); 12073 } else if (connp != NULL && IPCL_IS_TCP(connp) && 12074 connp->conn_mdt_ok && !connp->conn_tcp->tcp_mdt && 12075 ILL_MDT_USABLE(ill)) { 12076 /* 12077 * This tcp connection was marked as MDT-capable, but 12078 * it has been turned off due changes in the interface. 12079 * Now that the interface support is back, turn it on 12080 * by notifying tcp. We don't directly modify tcp_mdt, 12081 * since we leave all the details to the tcp code that 12082 * knows better. 12083 */ 12084 mblk_t *mdimp = ip_mdinfo_alloc(ill->ill_mdt_capab); 12085 12086 if (mdimp == NULL) { 12087 ip0dbg(("ip_xmit_v6: can't re-enable MDT for " 12088 "connp %p (ENOMEM)\n", (void *)connp)); 12089 } else { 12090 CONN_INC_REF(connp); 12091 squeue_fill(connp->conn_sqp, mdimp, tcp_input, 12092 connp, SQTAG_TCP_INPUT_MCTL); 12093 } 12094 } 12095 12096 do { 12097 mblk_t *mp_ip6h; 12098 12099 if (multirt_send) { 12100 irb_t *irb; 12101 /* 12102 * We are in a multiple send case, need to get 12103 * the next ire and make a duplicate of the 12104 * packet. ire1 holds here the next ire to 12105 * process in the bucket. If multirouting is 12106 * expected, any non-RTF_MULTIRT ire that has 12107 * the right destination address is ignored. 12108 */ 12109 irb = ire->ire_bucket; 12110 ASSERT(irb != NULL); 12111 12112 IRB_REFHOLD(irb); 12113 for (ire1 = ire->ire_next; 12114 ire1 != NULL; 12115 ire1 = ire1->ire_next) { 12116 if (!(ire1->ire_flags & RTF_MULTIRT)) 12117 continue; 12118 if (!IN6_ARE_ADDR_EQUAL( 12119 &ire1->ire_addr_v6, 12120 &ire->ire_addr_v6)) 12121 continue; 12122 if (ire1->ire_marks & 12123 (IRE_MARK_CONDEMNED| 12124 IRE_MARK_HIDDEN)) 12125 continue; 12126 12127 /* Got one */ 12128 if (ire1 != save_ire) { 12129 IRE_REFHOLD(ire1); 12130 } 12131 break; 12132 } 12133 IRB_REFRELE(irb); 12134 12135 if (ire1 != NULL) { 12136 next_mp = copyb(mp); 12137 if ((next_mp == NULL) || 12138 ((mp->b_cont != NULL) && 12139 ((next_mp->b_cont = 12140 dupmsg(mp->b_cont)) == 12141 NULL))) { 12142 freemsg(next_mp); 12143 next_mp = NULL; 12144 ire_refrele(ire1); 12145 ire1 = NULL; 12146 } 12147 } 12148 12149 /* Last multiroute ire; don't loop anymore. */ 12150 if (ire1 == NULL) { 12151 multirt_send = B_FALSE; 12152 } 12153 } 12154 12155 ill_index = 12156 ((ill_t *)stq->q_ptr)->ill_phyint->phyint_ifindex; 12157 12158 /* Initiate IPPF processing */ 12159 if (IP6_OUT_IPP(flags)) { 12160 ip_process(IPP_LOCAL_OUT, &mp, ill_index); 12161 if (mp == NULL) { 12162 BUMP_MIB(ill->ill_ip6_mib, 12163 ipv6OutDiscards); 12164 if (next_mp != NULL) 12165 freemsg(next_mp); 12166 if (ire != save_ire) { 12167 ire_refrele(ire); 12168 } 12169 return; 12170 } 12171 ip6h = (ip6_t *)mp->b_rptr; 12172 } 12173 mp_ip6h = mp; 12174 12175 /* 12176 * Check for fastpath, we need to hold nce_lock to 12177 * prevent fastpath update from chaining nce_fp_mp. 12178 */ 12179 12180 ASSERT(nce->nce_ipversion != IPV4_VERSION); 12181 mutex_enter(&nce->nce_lock); 12182 if ((mp1 = nce->nce_fp_mp) != NULL) { 12183 uint32_t hlen; 12184 uchar_t *rptr; 12185 12186 hlen = MBLKL(mp1); 12187 rptr = mp->b_rptr - hlen; 12188 /* 12189 * make sure there is room for the fastpath 12190 * datalink header 12191 */ 12192 if (rptr < mp->b_datap->db_base) { 12193 mp1 = copyb(mp1); 12194 mutex_exit(&nce->nce_lock); 12195 if (mp1 == NULL) { 12196 BUMP_MIB(ill->ill_ip6_mib, 12197 ipv6OutDiscards); 12198 freemsg(mp); 12199 if (next_mp != NULL) 12200 freemsg(next_mp); 12201 if (ire != save_ire) { 12202 ire_refrele(ire); 12203 } 12204 return; 12205 } 12206 mp1->b_cont = mp; 12207 12208 /* Get the priority marking, if any */ 12209 mp1->b_band = mp->b_band; 12210 mp = mp1; 12211 } else { 12212 mp->b_rptr = rptr; 12213 /* 12214 * fastpath - pre-pend datalink 12215 * header 12216 */ 12217 bcopy(mp1->b_rptr, rptr, hlen); 12218 mutex_exit(&nce->nce_lock); 12219 } 12220 } else { 12221 /* 12222 * Get the DL_UNITDATA_REQ. 12223 */ 12224 mp1 = nce->nce_res_mp; 12225 if (mp1 == NULL) { 12226 mutex_exit(&nce->nce_lock); 12227 ip1dbg(("ip_xmit_v6: No resolution " 12228 "block ire = %p\n", (void *)ire)); 12229 freemsg(mp); 12230 if (next_mp != NULL) 12231 freemsg(next_mp); 12232 if (ire != save_ire) { 12233 ire_refrele(ire); 12234 } 12235 return; 12236 } 12237 /* 12238 * Prepend the DL_UNITDATA_REQ. 12239 */ 12240 mp1 = copyb(mp1); 12241 mutex_exit(&nce->nce_lock); 12242 if (mp1 == NULL) { 12243 BUMP_MIB(ill->ill_ip6_mib, 12244 ipv6OutDiscards); 12245 freemsg(mp); 12246 if (next_mp != NULL) 12247 freemsg(next_mp); 12248 if (ire != save_ire) { 12249 ire_refrele(ire); 12250 } 12251 return; 12252 } 12253 mp1->b_cont = mp; 12254 12255 /* Get the priority marking, if any */ 12256 mp1->b_band = mp->b_band; 12257 mp = mp1; 12258 } 12259 12260 out_ill = (ill_t *)stq->q_ptr; 12261 12262 DTRACE_PROBE4(ip6__physical__out__start, 12263 ill_t *, NULL, ill_t *, out_ill, 12264 ip6_t *, ip6h, mblk_t *, mp); 12265 12266 FW_HOOKS6(ip6_physical_out_event, 12267 ipv6firewall_physical_out, 12268 NULL, out_ill, ip6h, mp, mp_ip6h); 12269 12270 DTRACE_PROBE1(ip6__physical__out__end, mblk_t *, mp); 12271 12272 if (mp == NULL) { 12273 if (multirt_send) { 12274 ASSERT(ire1 != NULL); 12275 if (ire != save_ire) { 12276 ire_refrele(ire); 12277 } 12278 /* 12279 * Proceed with the next RTF_MULTIRT 12280 * ire, also set up the send-to queue 12281 * accordingly. 12282 */ 12283 ire = ire1; 12284 ire1 = NULL; 12285 stq = ire->ire_stq; 12286 nce = ire->ire_nce; 12287 ill = ire_to_ill(ire); 12288 mp = next_mp; 12289 next_mp = NULL; 12290 continue; 12291 } else { 12292 ASSERT(next_mp == NULL); 12293 ASSERT(ire1 == NULL); 12294 break; 12295 } 12296 } 12297 12298 /* 12299 * Update ire counters; for save_ire, this has been 12300 * done by the caller. 12301 */ 12302 if (ire != save_ire) { 12303 UPDATE_OB_PKT_COUNT(ire); 12304 ire->ire_last_used_time = lbolt; 12305 } 12306 12307 /* 12308 * Send it down. XXX Do we want to flow control AH/ESP 12309 * packets that carry TCP payloads? We don't flow 12310 * control TCP packets, but we should also not 12311 * flow-control TCP packets that have been protected. 12312 * We don't have an easy way to find out if an AH/ESP 12313 * packet was originally TCP or not currently. 12314 */ 12315 if (io == NULL) { 12316 putnext(stq, mp); 12317 } else { 12318 /* 12319 * Safety Pup says: make sure this is 12320 * going to the right interface! 12321 */ 12322 if (io->ipsec_out_capab_ill_index != 12323 ill_index) { 12324 /* IPsec kstats: bump lose counter */ 12325 freemsg(mp1); 12326 } else { 12327 ipsec_hw_putnext(stq, mp); 12328 } 12329 } 12330 12331 if (nce->nce_flags & (NCE_F_NONUD|NCE_F_PERMANENT)) { 12332 if (ire != save_ire) { 12333 ire_refrele(ire); 12334 } 12335 if (multirt_send) { 12336 ASSERT(ire1 != NULL); 12337 /* 12338 * Proceed with the next RTF_MULTIRT 12339 * ire, also set up the send-to queue 12340 * accordingly. 12341 */ 12342 ire = ire1; 12343 ire1 = NULL; 12344 stq = ire->ire_stq; 12345 nce = ire->ire_nce; 12346 ill = ire_to_ill(ire); 12347 mp = next_mp; 12348 next_mp = NULL; 12349 continue; 12350 } 12351 ASSERT(next_mp == NULL); 12352 ASSERT(ire1 == NULL); 12353 return; 12354 } 12355 12356 ASSERT(nce->nce_state != ND_INCOMPLETE); 12357 12358 /* 12359 * Check for upper layer advice 12360 */ 12361 if (flags & IPV6_REACHABILITY_CONFIRMATION) { 12362 /* 12363 * It should be o.k. to check the state without 12364 * a lock here, at most we lose an advice. 12365 */ 12366 nce->nce_last = TICK_TO_MSEC(lbolt64); 12367 if (nce->nce_state != ND_REACHABLE) { 12368 12369 mutex_enter(&nce->nce_lock); 12370 nce->nce_state = ND_REACHABLE; 12371 nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT; 12372 mutex_exit(&nce->nce_lock); 12373 (void) untimeout(nce->nce_timeout_id); 12374 if (ip_debug > 2) { 12375 /* ip1dbg */ 12376 pr_addr_dbg("ip_xmit_v6: state" 12377 " for %s changed to" 12378 " REACHABLE\n", AF_INET6, 12379 &ire->ire_addr_v6); 12380 } 12381 } 12382 if (ire != save_ire) { 12383 ire_refrele(ire); 12384 } 12385 if (multirt_send) { 12386 ASSERT(ire1 != NULL); 12387 /* 12388 * Proceed with the next RTF_MULTIRT 12389 * ire, also set up the send-to queue 12390 * accordingly. 12391 */ 12392 ire = ire1; 12393 ire1 = NULL; 12394 stq = ire->ire_stq; 12395 nce = ire->ire_nce; 12396 ill = ire_to_ill(ire); 12397 mp = next_mp; 12398 next_mp = NULL; 12399 continue; 12400 } 12401 ASSERT(next_mp == NULL); 12402 ASSERT(ire1 == NULL); 12403 return; 12404 } 12405 12406 delta = TICK_TO_MSEC(lbolt64) - nce->nce_last; 12407 ip1dbg(("ip_xmit_v6: delta = %" PRId64 12408 " ill_reachable_time = %d \n", delta, 12409 ill->ill_reachable_time)); 12410 if (delta > (uint64_t)ill->ill_reachable_time) { 12411 nce = ire->ire_nce; 12412 mutex_enter(&nce->nce_lock); 12413 switch (nce->nce_state) { 12414 case ND_REACHABLE: 12415 case ND_STALE: 12416 /* 12417 * ND_REACHABLE is identical to 12418 * ND_STALE in this specific case. If 12419 * reachable time has expired for this 12420 * neighbor (delta is greater than 12421 * reachable time), conceptually, the 12422 * neighbor cache is no longer in 12423 * REACHABLE state, but already in 12424 * STALE state. So the correct 12425 * transition here is to ND_DELAY. 12426 */ 12427 nce->nce_state = ND_DELAY; 12428 mutex_exit(&nce->nce_lock); 12429 NDP_RESTART_TIMER(nce, 12430 delay_first_probe_time); 12431 if (ip_debug > 3) { 12432 /* ip2dbg */ 12433 pr_addr_dbg("ip_xmit_v6: state" 12434 " for %s changed to" 12435 " DELAY\n", AF_INET6, 12436 &ire->ire_addr_v6); 12437 } 12438 break; 12439 case ND_DELAY: 12440 case ND_PROBE: 12441 mutex_exit(&nce->nce_lock); 12442 /* Timers have already started */ 12443 break; 12444 case ND_UNREACHABLE: 12445 /* 12446 * ndp timer has detected that this nce 12447 * is unreachable and initiated deleting 12448 * this nce and all its associated IREs. 12449 * This is a race where we found the 12450 * ire before it was deleted and have 12451 * just sent out a packet using this 12452 * unreachable nce. 12453 */ 12454 mutex_exit(&nce->nce_lock); 12455 break; 12456 default: 12457 ASSERT(0); 12458 } 12459 } 12460 12461 if (multirt_send) { 12462 ASSERT(ire1 != NULL); 12463 /* 12464 * Proceed with the next RTF_MULTIRT ire, 12465 * Also set up the send-to queue accordingly. 12466 */ 12467 if (ire != save_ire) { 12468 ire_refrele(ire); 12469 } 12470 ire = ire1; 12471 ire1 = NULL; 12472 stq = ire->ire_stq; 12473 nce = ire->ire_nce; 12474 ill = ire_to_ill(ire); 12475 mp = next_mp; 12476 next_mp = NULL; 12477 } 12478 } while (multirt_send); 12479 /* 12480 * In the multirouting case, release the last ire used for 12481 * emission. save_ire will be released by the caller. 12482 */ 12483 if (ire != save_ire) { 12484 ire_refrele(ire); 12485 } 12486 } else { 12487 /* 12488 * Queue packet if we have an conn to give back pressure. 12489 * We can't queue packets intended for hardware acceleration 12490 * since we've tossed that state already. If the packet is 12491 * being fed back from ire_send_v6, we don't know the 12492 * position in the queue to enqueue the packet and we discard 12493 * the packet. 12494 */ 12495 if (ip_output_queue && (connp != NULL) && (io == NULL) && 12496 (caller != IRE_SEND)) { 12497 if (caller == IP_WSRV) { 12498 connp->conn_did_putbq = 1; 12499 (void) putbq(connp->conn_wq, mp); 12500 conn_drain_insert(connp); 12501 /* 12502 * caller == IP_WSRV implies we are 12503 * the service thread, and the 12504 * queue is already noenabled. 12505 * The check for canput and 12506 * the putbq is not atomic. 12507 * So we need to check again. 12508 */ 12509 if (canput(stq->q_next)) 12510 connp->conn_did_putbq = 0; 12511 } else { 12512 (void) putq(connp->conn_wq, mp); 12513 } 12514 return; 12515 } 12516 BUMP_MIB(ill->ill_ip6_mib, ipv6OutDiscards); 12517 freemsg(mp); 12518 return; 12519 } 12520 } 12521 12522 /* 12523 * pr_addr_dbg function provides the needed buffer space to call 12524 * inet_ntop() function's 3rd argument. This function should be 12525 * used by any kernel routine which wants to save INET6_ADDRSTRLEN 12526 * stack buffer space in it's own stack frame. This function uses 12527 * a buffer from it's own stack and prints the information. 12528 * Example: pr_addr_dbg("func: no route for %s\n ", AF_INET, addr) 12529 * 12530 * Note: This function can call inet_ntop() once. 12531 */ 12532 void 12533 pr_addr_dbg(char *fmt1, int af, const void *addr) 12534 { 12535 char buf[INET6_ADDRSTRLEN]; 12536 12537 if (fmt1 == NULL) { 12538 ip0dbg(("pr_addr_dbg: Wrong arguments\n")); 12539 return; 12540 } 12541 12542 /* 12543 * This does not compare debug level and just prints 12544 * out. Thus it is the responsibility of the caller 12545 * to check the appropriate debug-level before calling 12546 * this function. 12547 */ 12548 if (ip_debug > 0) { 12549 printf(fmt1, inet_ntop(af, addr, buf, sizeof (buf))); 12550 } 12551 12552 12553 } 12554 12555 12556 /* 12557 * Return the length in bytes of the IPv6 headers (base header, ip6i_t 12558 * if needed and extension headers) that will be needed based on the 12559 * ip6_pkt_t structure passed by the caller. 12560 * 12561 * The returned length does not include the length of the upper level 12562 * protocol (ULP) header. 12563 */ 12564 int 12565 ip_total_hdrs_len_v6(ip6_pkt_t *ipp) 12566 { 12567 int len; 12568 12569 len = IPV6_HDR_LEN; 12570 if (ipp->ipp_fields & IPPF_HAS_IP6I) 12571 len += sizeof (ip6i_t); 12572 if (ipp->ipp_fields & IPPF_HOPOPTS) { 12573 ASSERT(ipp->ipp_hopoptslen != 0); 12574 len += ipp->ipp_hopoptslen; 12575 } 12576 if (ipp->ipp_fields & IPPF_RTHDR) { 12577 ASSERT(ipp->ipp_rthdrlen != 0); 12578 len += ipp->ipp_rthdrlen; 12579 } 12580 /* 12581 * En-route destination options 12582 * Only do them if there's a routing header as well 12583 */ 12584 if ((ipp->ipp_fields & (IPPF_RTDSTOPTS|IPPF_RTHDR)) == 12585 (IPPF_RTDSTOPTS|IPPF_RTHDR)) { 12586 ASSERT(ipp->ipp_rtdstoptslen != 0); 12587 len += ipp->ipp_rtdstoptslen; 12588 } 12589 if (ipp->ipp_fields & IPPF_DSTOPTS) { 12590 ASSERT(ipp->ipp_dstoptslen != 0); 12591 len += ipp->ipp_dstoptslen; 12592 } 12593 return (len); 12594 } 12595 12596 /* 12597 * All-purpose routine to build a header chain of an IPv6 header 12598 * followed by any required extension headers and a proto header, 12599 * preceeded (where necessary) by an ip6i_t private header. 12600 * 12601 * The fields of the IPv6 header that are derived from the ip6_pkt_t 12602 * will be filled in appropriately. 12603 * Thus the caller must fill in the rest of the IPv6 header, such as 12604 * traffic class/flowid, source address (if not set here), hoplimit (if not 12605 * set here) and destination address. 12606 * 12607 * The extension headers and ip6i_t header will all be fully filled in. 12608 */ 12609 void 12610 ip_build_hdrs_v6(uchar_t *ext_hdrs, uint_t ext_hdrs_len, 12611 ip6_pkt_t *ipp, uint8_t protocol) 12612 { 12613 uint8_t *nxthdr_ptr; 12614 uint8_t *cp; 12615 ip6i_t *ip6i; 12616 ip6_t *ip6h = (ip6_t *)ext_hdrs; 12617 12618 /* 12619 * If sending private ip6i_t header down (checksum info, nexthop, 12620 * or ifindex), adjust ip header pointer and set ip6i_t header pointer, 12621 * then fill it in. (The checksum info will be filled in by icmp). 12622 */ 12623 if (ipp->ipp_fields & IPPF_HAS_IP6I) { 12624 ip6i = (ip6i_t *)ip6h; 12625 ip6h = (ip6_t *)&ip6i[1]; 12626 12627 ip6i->ip6i_flags = 0; 12628 ip6i->ip6i_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 12629 if (ipp->ipp_fields & IPPF_IFINDEX || 12630 ipp->ipp_fields & IPPF_SCOPE_ID) { 12631 ASSERT(ipp->ipp_ifindex != 0); 12632 ip6i->ip6i_flags |= IP6I_IFINDEX; 12633 ip6i->ip6i_ifindex = ipp->ipp_ifindex; 12634 } 12635 if (ipp->ipp_fields & IPPF_ADDR) { 12636 /* 12637 * Enable per-packet source address verification if 12638 * IPV6_PKTINFO specified the source address. 12639 * ip6_src is set in the transport's _wput function. 12640 */ 12641 ASSERT(!IN6_IS_ADDR_UNSPECIFIED( 12642 &ipp->ipp_addr)); 12643 ip6i->ip6i_flags |= IP6I_VERIFY_SRC; 12644 } 12645 if (ipp->ipp_fields & IPPF_UNICAST_HOPS) { 12646 ip6h->ip6_hops = ipp->ipp_unicast_hops; 12647 /* 12648 * We need to set this flag so that IP doesn't 12649 * rewrite the IPv6 header's hoplimit with the 12650 * current default value. 12651 */ 12652 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 12653 } 12654 if (ipp->ipp_fields & IPPF_NEXTHOP) { 12655 ASSERT(!IN6_IS_ADDR_UNSPECIFIED( 12656 &ipp->ipp_nexthop)); 12657 ip6i->ip6i_flags |= IP6I_NEXTHOP; 12658 ip6i->ip6i_nexthop = ipp->ipp_nexthop; 12659 } 12660 /* 12661 * tell IP this is an ip6i_t private header 12662 */ 12663 ip6i->ip6i_nxt = IPPROTO_RAW; 12664 } 12665 /* Initialize IPv6 header */ 12666 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 12667 if (ipp->ipp_fields & IPPF_TCLASS) { 12668 ip6h->ip6_vcf = (ip6h->ip6_vcf & ~IPV6_FLOWINFO_TCLASS) | 12669 (ipp->ipp_tclass << 20); 12670 } 12671 if (ipp->ipp_fields & IPPF_ADDR) 12672 ip6h->ip6_src = ipp->ipp_addr; 12673 12674 nxthdr_ptr = (uint8_t *)&ip6h->ip6_nxt; 12675 cp = (uint8_t *)&ip6h[1]; 12676 /* 12677 * Here's where we have to start stringing together 12678 * any extension headers in the right order: 12679 * Hop-by-hop, destination, routing, and final destination opts. 12680 */ 12681 if (ipp->ipp_fields & IPPF_HOPOPTS) { 12682 /* Hop-by-hop options */ 12683 ip6_hbh_t *hbh = (ip6_hbh_t *)cp; 12684 12685 *nxthdr_ptr = IPPROTO_HOPOPTS; 12686 nxthdr_ptr = &hbh->ip6h_nxt; 12687 12688 bcopy(ipp->ipp_hopopts, cp, ipp->ipp_hopoptslen); 12689 cp += ipp->ipp_hopoptslen; 12690 } 12691 /* 12692 * En-route destination options 12693 * Only do them if there's a routing header as well 12694 */ 12695 if ((ipp->ipp_fields & (IPPF_RTDSTOPTS|IPPF_RTHDR)) == 12696 (IPPF_RTDSTOPTS|IPPF_RTHDR)) { 12697 ip6_dest_t *dst = (ip6_dest_t *)cp; 12698 12699 *nxthdr_ptr = IPPROTO_DSTOPTS; 12700 nxthdr_ptr = &dst->ip6d_nxt; 12701 12702 bcopy(ipp->ipp_rtdstopts, cp, ipp->ipp_rtdstoptslen); 12703 cp += ipp->ipp_rtdstoptslen; 12704 } 12705 /* 12706 * Routing header next 12707 */ 12708 if (ipp->ipp_fields & IPPF_RTHDR) { 12709 ip6_rthdr_t *rt = (ip6_rthdr_t *)cp; 12710 12711 *nxthdr_ptr = IPPROTO_ROUTING; 12712 nxthdr_ptr = &rt->ip6r_nxt; 12713 12714 bcopy(ipp->ipp_rthdr, cp, ipp->ipp_rthdrlen); 12715 cp += ipp->ipp_rthdrlen; 12716 } 12717 /* 12718 * Do ultimate destination options 12719 */ 12720 if (ipp->ipp_fields & IPPF_DSTOPTS) { 12721 ip6_dest_t *dest = (ip6_dest_t *)cp; 12722 12723 *nxthdr_ptr = IPPROTO_DSTOPTS; 12724 nxthdr_ptr = &dest->ip6d_nxt; 12725 12726 bcopy(ipp->ipp_dstopts, cp, ipp->ipp_dstoptslen); 12727 cp += ipp->ipp_dstoptslen; 12728 } 12729 /* 12730 * Now set the last header pointer to the proto passed in 12731 */ 12732 *nxthdr_ptr = protocol; 12733 ASSERT((int)(cp - ext_hdrs) == ext_hdrs_len); 12734 } 12735 12736 /* 12737 * Return a pointer to the routing header extension header 12738 * in the IPv6 header(s) chain passed in. 12739 * If none found, return NULL 12740 * Assumes that all extension headers are in same mblk as the v6 header 12741 */ 12742 ip6_rthdr_t * 12743 ip_find_rthdr_v6(ip6_t *ip6h, uint8_t *endptr) 12744 { 12745 ip6_dest_t *desthdr; 12746 ip6_frag_t *fraghdr; 12747 uint_t hdrlen; 12748 uint8_t nexthdr; 12749 uint8_t *ptr = (uint8_t *)&ip6h[1]; 12750 12751 if (ip6h->ip6_nxt == IPPROTO_ROUTING) 12752 return ((ip6_rthdr_t *)ptr); 12753 12754 /* 12755 * The routing header will precede all extension headers 12756 * other than the hop-by-hop and destination options 12757 * extension headers, so if we see anything other than those, 12758 * we're done and didn't find it. 12759 * We could see a destination options header alone but no 12760 * routing header, in which case we'll return NULL as soon as 12761 * we see anything after that. 12762 * Hop-by-hop and destination option headers are identical, 12763 * so we can use either one we want as a template. 12764 */ 12765 nexthdr = ip6h->ip6_nxt; 12766 while (ptr < endptr) { 12767 /* Is there enough left for len + nexthdr? */ 12768 if (ptr + MIN_EHDR_LEN > endptr) 12769 return (NULL); 12770 12771 switch (nexthdr) { 12772 case IPPROTO_HOPOPTS: 12773 case IPPROTO_DSTOPTS: 12774 /* Assumes the headers are identical for hbh and dst */ 12775 desthdr = (ip6_dest_t *)ptr; 12776 hdrlen = 8 * (desthdr->ip6d_len + 1); 12777 nexthdr = desthdr->ip6d_nxt; 12778 break; 12779 12780 case IPPROTO_ROUTING: 12781 return ((ip6_rthdr_t *)ptr); 12782 12783 case IPPROTO_FRAGMENT: 12784 fraghdr = (ip6_frag_t *)ptr; 12785 hdrlen = sizeof (ip6_frag_t); 12786 nexthdr = fraghdr->ip6f_nxt; 12787 break; 12788 12789 default: 12790 return (NULL); 12791 } 12792 ptr += hdrlen; 12793 } 12794 return (NULL); 12795 } 12796 12797 /* 12798 * Called for source-routed packets originating on this node. 12799 * Manipulates the original routing header by moving every entry up 12800 * one slot, placing the first entry in the v6 header's v6_dst field, 12801 * and placing the ultimate destination in the routing header's last 12802 * slot. 12803 * 12804 * Returns the checksum diference between the ultimate destination 12805 * (last hop in the routing header when the packet is sent) and 12806 * the first hop (ip6_dst when the packet is sent) 12807 */ 12808 uint32_t 12809 ip_massage_options_v6(ip6_t *ip6h, ip6_rthdr_t *rth) 12810 { 12811 uint_t numaddr; 12812 uint_t i; 12813 in6_addr_t *addrptr; 12814 in6_addr_t tmp; 12815 ip6_rthdr0_t *rthdr = (ip6_rthdr0_t *)rth; 12816 uint32_t cksm; 12817 uint32_t addrsum = 0; 12818 uint16_t *ptr; 12819 12820 /* 12821 * Perform any processing needed for source routing. 12822 * We know that all extension headers will be in the same mblk 12823 * as the IPv6 header. 12824 */ 12825 12826 /* 12827 * If no segments left in header, or the header length field is zero, 12828 * don't move hop addresses around; 12829 * Checksum difference is zero. 12830 */ 12831 if ((rthdr->ip6r0_segleft == 0) || (rthdr->ip6r0_len == 0)) 12832 return (0); 12833 12834 ptr = (uint16_t *)&ip6h->ip6_dst; 12835 cksm = 0; 12836 for (i = 0; i < (sizeof (in6_addr_t) / sizeof (uint16_t)); i++) { 12837 cksm += ptr[i]; 12838 } 12839 cksm = (cksm & 0xFFFF) + (cksm >> 16); 12840 12841 /* 12842 * Here's where the fun begins - we have to 12843 * move all addresses up one spot, take the 12844 * first hop and make it our first ip6_dst, 12845 * and place the ultimate destination in the 12846 * newly-opened last slot. 12847 */ 12848 addrptr = (in6_addr_t *)((char *)rthdr + sizeof (*rthdr)); 12849 numaddr = rthdr->ip6r0_len / 2; 12850 tmp = *addrptr; 12851 for (i = 0; i < (numaddr - 1); addrptr++, i++) { 12852 *addrptr = addrptr[1]; 12853 } 12854 *addrptr = ip6h->ip6_dst; 12855 ip6h->ip6_dst = tmp; 12856 12857 /* 12858 * From the checksummed ultimate destination subtract the checksummed 12859 * current ip6_dst (the first hop address). Return that number. 12860 * (In the v4 case, the second part of this is done in each routine 12861 * that calls ip_massage_options(). We do it all in this one place 12862 * for v6). 12863 */ 12864 ptr = (uint16_t *)&ip6h->ip6_dst; 12865 for (i = 0; i < (sizeof (in6_addr_t) / sizeof (uint16_t)); i++) { 12866 addrsum += ptr[i]; 12867 } 12868 cksm -= ((addrsum >> 16) + (addrsum & 0xFFFF)); 12869 if ((int)cksm < 0) 12870 cksm--; 12871 cksm = (cksm & 0xFFFF) + (cksm >> 16); 12872 12873 return (cksm); 12874 } 12875 12876 /* 12877 * See if the upper-level protocol indicated by 'proto' will be able 12878 * to do something with an ICMP_FRAGMENTATION_NEEDED (IPv4) or 12879 * ICMP6_PACKET_TOO_BIG (IPv6). 12880 */ 12881 static boolean_t 12882 ip_ulp_cando_pkt2big(int proto) 12883 { 12884 /* 12885 * For now, only TCP can handle this. 12886 * Tunnels may be able to also, but since tun isn't working over 12887 * IPv6 yet, don't worry about it for now. 12888 */ 12889 return (proto == IPPROTO_TCP); 12890 } 12891 12892 12893 /* 12894 * Propagate a multicast group membership operation (join/leave) (*fn) on 12895 * all interfaces crossed by the related multirt routes. 12896 * The call is considered successful if the operation succeeds 12897 * on at least one interface. 12898 * The function is called if the destination address in the packet to send 12899 * is multirouted. 12900 */ 12901 int 12902 ip_multirt_apply_membership_v6(int (*fn)(conn_t *, boolean_t, 12903 const in6_addr_t *, int, mcast_record_t, const in6_addr_t *, mblk_t *), 12904 ire_t *ire, conn_t *connp, boolean_t checkonly, const in6_addr_t *v6grp, 12905 mcast_record_t fmode, const in6_addr_t *v6src, mblk_t *first_mp) 12906 { 12907 ire_t *ire_gw; 12908 irb_t *irb; 12909 int index, error = 0; 12910 opt_restart_t *or; 12911 12912 irb = ire->ire_bucket; 12913 ASSERT(irb != NULL); 12914 12915 ASSERT(DB_TYPE(first_mp) == M_CTL); 12916 or = (opt_restart_t *)first_mp->b_rptr; 12917 12918 IRB_REFHOLD(irb); 12919 for (; ire != NULL; ire = ire->ire_next) { 12920 if ((ire->ire_flags & RTF_MULTIRT) == 0) 12921 continue; 12922 if (!IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, v6grp)) 12923 continue; 12924 12925 ire_gw = ire_ftable_lookup_v6(&ire->ire_gateway_addr_v6, 0, 0, 12926 IRE_INTERFACE, NULL, NULL, ALL_ZONES, 0, NULL, 12927 MATCH_IRE_RECURSIVE | MATCH_IRE_TYPE); 12928 /* No resolver exists for the gateway; skip this ire. */ 12929 if (ire_gw == NULL) 12930 continue; 12931 index = ire_gw->ire_ipif->ipif_ill->ill_phyint->phyint_ifindex; 12932 /* 12933 * A resolver exists: we can get the interface on which we have 12934 * to apply the operation. 12935 */ 12936 error = fn(connp, checkonly, v6grp, index, fmode, v6src, 12937 first_mp); 12938 if (error == 0) 12939 or->or_private = CGTP_MCAST_SUCCESS; 12940 12941 if (ip_debug > 0) { 12942 ulong_t off; 12943 char *ksym; 12944 12945 ksym = kobj_getsymname((uintptr_t)fn, &off); 12946 ip2dbg(("ip_multirt_apply_membership_v6: " 12947 "called %s, multirt group 0x%08x via itf 0x%08x, " 12948 "error %d [success %u]\n", 12949 ksym ? ksym : "?", 12950 ntohl(V4_PART_OF_V6((*v6grp))), 12951 ntohl(V4_PART_OF_V6(ire_gw->ire_src_addr_v6)), 12952 error, or->or_private)); 12953 } 12954 12955 ire_refrele(ire_gw); 12956 if (error == EINPROGRESS) { 12957 IRB_REFRELE(irb); 12958 return (error); 12959 } 12960 } 12961 IRB_REFRELE(irb); 12962 /* 12963 * Consider the call as successful if we succeeded on at least 12964 * one interface. Otherwise, return the last encountered error. 12965 */ 12966 return (or->or_private == CGTP_MCAST_SUCCESS ? 0 : error); 12967 } 12968 12969 void 12970 ip6_kstat_init(void) 12971 { 12972 if ((ip6_kstat = kstat_create("ip", 0, "ip6stat", 12973 "net", KSTAT_TYPE_NAMED, 12974 sizeof (ip6_statistics) / sizeof (kstat_named_t), 12975 KSTAT_FLAG_VIRTUAL)) != NULL) { 12976 ip6_kstat->ks_data = &ip6_statistics; 12977 kstat_install(ip6_kstat); 12978 } 12979 } 12980 12981 /* 12982 * The following two functions set and get the value for the 12983 * IPV6_SRC_PREFERENCES socket option. 12984 */ 12985 int 12986 ip6_set_src_preferences(conn_t *connp, uint32_t prefs) 12987 { 12988 /* 12989 * We only support preferences that are covered by 12990 * IPV6_PREFER_SRC_MASK. 12991 */ 12992 if (prefs & ~IPV6_PREFER_SRC_MASK) 12993 return (EINVAL); 12994 12995 /* 12996 * Look for conflicting preferences or default preferences. If 12997 * both bits of a related pair are clear, the application wants the 12998 * system's default value for that pair. Both bits in a pair can't 12999 * be set. 13000 */ 13001 if ((prefs & IPV6_PREFER_SRC_MIPMASK) == 0) { 13002 prefs |= IPV6_PREFER_SRC_MIPDEFAULT; 13003 } else if ((prefs & IPV6_PREFER_SRC_MIPMASK) == 13004 IPV6_PREFER_SRC_MIPMASK) { 13005 return (EINVAL); 13006 } 13007 if ((prefs & IPV6_PREFER_SRC_TMPMASK) == 0) { 13008 prefs |= IPV6_PREFER_SRC_TMPDEFAULT; 13009 } else if ((prefs & IPV6_PREFER_SRC_TMPMASK) == 13010 IPV6_PREFER_SRC_TMPMASK) { 13011 return (EINVAL); 13012 } 13013 if ((prefs & IPV6_PREFER_SRC_CGAMASK) == 0) { 13014 prefs |= IPV6_PREFER_SRC_CGADEFAULT; 13015 } else if ((prefs & IPV6_PREFER_SRC_CGAMASK) == 13016 IPV6_PREFER_SRC_CGAMASK) { 13017 return (EINVAL); 13018 } 13019 13020 connp->conn_src_preferences = prefs; 13021 return (0); 13022 } 13023 13024 size_t 13025 ip6_get_src_preferences(conn_t *connp, uint32_t *val) 13026 { 13027 *val = connp->conn_src_preferences; 13028 return (sizeof (connp->conn_src_preferences)); 13029 } 13030 13031 int 13032 ip6_set_pktinfo(cred_t *cr, conn_t *connp, struct in6_pktinfo *pkti, mblk_t *mp) 13033 { 13034 ill_t *ill; 13035 ire_t *ire; 13036 int error; 13037 13038 /* 13039 * Verify the source address and ifindex. Privileged users can use 13040 * any source address. For ancillary data the source address is 13041 * checked in ip_wput_v6. 13042 */ 13043 if (pkti->ipi6_ifindex != 0) { 13044 ASSERT(connp != NULL); 13045 ill = ill_lookup_on_ifindex(pkti->ipi6_ifindex, B_TRUE, 13046 CONNP_TO_WQ(connp), mp, ip_restart_optmgmt, &error); 13047 if (ill == NULL) { 13048 /* 13049 * We just want to know if the interface exists, we 13050 * don't really care about the ill pointer itself. 13051 */ 13052 if (error != EINPROGRESS) 13053 return (error); 13054 error = 0; /* Ensure we don't use it below */ 13055 } else { 13056 ill_refrele(ill); 13057 } 13058 } 13059 if (!IN6_IS_ADDR_UNSPECIFIED(&pkti->ipi6_addr) && 13060 secpolicy_net_rawaccess(cr) != 0) { 13061 ire = ire_route_lookup_v6(&pkti->ipi6_addr, 0, 0, 13062 (IRE_LOCAL|IRE_LOOPBACK), NULL, NULL, 13063 connp->conn_zoneid, NULL, MATCH_IRE_TYPE); 13064 if (ire != NULL) 13065 ire_refrele(ire); 13066 else 13067 return (ENXIO); 13068 } 13069 return (0); 13070 } 13071 13072 /* 13073 * Get the size of the IP options (including the IP headers size) 13074 * without including the AH header's size. If till_ah is B_FALSE, 13075 * and if AH header is present, dest options beyond AH header will 13076 * also be included in the returned size. 13077 */ 13078 int 13079 ipsec_ah_get_hdr_size_v6(mblk_t *mp, boolean_t till_ah) 13080 { 13081 ip6_t *ip6h; 13082 uint8_t nexthdr; 13083 uint8_t *whereptr; 13084 ip6_hbh_t *hbhhdr; 13085 ip6_dest_t *dsthdr; 13086 ip6_rthdr_t *rthdr; 13087 int ehdrlen; 13088 int size; 13089 ah_t *ah; 13090 13091 ip6h = (ip6_t *)mp->b_rptr; 13092 size = IPV6_HDR_LEN; 13093 nexthdr = ip6h->ip6_nxt; 13094 whereptr = (uint8_t *)&ip6h[1]; 13095 for (;;) { 13096 /* Assume IP has already stripped it */ 13097 ASSERT(nexthdr != IPPROTO_FRAGMENT && nexthdr != IPPROTO_RAW); 13098 switch (nexthdr) { 13099 case IPPROTO_HOPOPTS: 13100 hbhhdr = (ip6_hbh_t *)whereptr; 13101 nexthdr = hbhhdr->ip6h_nxt; 13102 ehdrlen = 8 * (hbhhdr->ip6h_len + 1); 13103 break; 13104 case IPPROTO_DSTOPTS: 13105 dsthdr = (ip6_dest_t *)whereptr; 13106 nexthdr = dsthdr->ip6d_nxt; 13107 ehdrlen = 8 * (dsthdr->ip6d_len + 1); 13108 break; 13109 case IPPROTO_ROUTING: 13110 rthdr = (ip6_rthdr_t *)whereptr; 13111 nexthdr = rthdr->ip6r_nxt; 13112 ehdrlen = 8 * (rthdr->ip6r_len + 1); 13113 break; 13114 default : 13115 if (till_ah) { 13116 ASSERT(nexthdr == IPPROTO_AH); 13117 return (size); 13118 } 13119 /* 13120 * If we don't have a AH header to traverse, 13121 * return now. This happens normally for 13122 * outbound datagrams where we have not inserted 13123 * the AH header. 13124 */ 13125 if (nexthdr != IPPROTO_AH) { 13126 return (size); 13127 } 13128 13129 /* 13130 * We don't include the AH header's size 13131 * to be symmetrical with other cases where 13132 * we either don't have a AH header (outbound) 13133 * or peek into the AH header yet (inbound and 13134 * not pulled up yet). 13135 */ 13136 ah = (ah_t *)whereptr; 13137 nexthdr = ah->ah_nexthdr; 13138 ehdrlen = (ah->ah_length << 2) + 8; 13139 13140 if (nexthdr == IPPROTO_DSTOPTS) { 13141 if (whereptr + ehdrlen >= mp->b_wptr) { 13142 /* 13143 * The destination options header 13144 * is not part of the first mblk. 13145 */ 13146 whereptr = mp->b_cont->b_rptr; 13147 } else { 13148 whereptr += ehdrlen; 13149 } 13150 13151 dsthdr = (ip6_dest_t *)whereptr; 13152 ehdrlen = 8 * (dsthdr->ip6d_len + 1); 13153 size += ehdrlen; 13154 } 13155 return (size); 13156 } 13157 whereptr += ehdrlen; 13158 size += ehdrlen; 13159 } 13160 } 13161