1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* 26 * Copyright (c) 1990 Mentat Inc. 27 */ 28 29 #pragma ident "%Z%%M% %I% %E% SMI" 30 31 #include <sys/types.h> 32 #include <sys/stream.h> 33 #include <sys/dlpi.h> 34 #include <sys/stropts.h> 35 #include <sys/sysmacros.h> 36 #include <sys/strsun.h> 37 #include <sys/strlog.h> 38 #include <sys/strsubr.h> 39 #define _SUN_TPI_VERSION 2 40 #include <sys/tihdr.h> 41 #include <sys/ddi.h> 42 #include <sys/sunddi.h> 43 #include <sys/cmn_err.h> 44 #include <sys/debug.h> 45 #include <sys/kobj.h> 46 #include <sys/zone.h> 47 48 #include <sys/kmem.h> 49 #include <sys/systm.h> 50 #include <sys/param.h> 51 #include <sys/socket.h> 52 #include <sys/vtrace.h> 53 #include <sys/isa_defs.h> 54 #include <sys/atomic.h> 55 #include <sys/iphada.h> 56 #include <sys/policy.h> 57 #include <net/if.h> 58 #include <net/if_types.h> 59 #include <net/route.h> 60 #include <net/if_dl.h> 61 #include <sys/sockio.h> 62 #include <netinet/in.h> 63 #include <netinet/ip6.h> 64 #include <netinet/icmp6.h> 65 #include <netinet/sctp.h> 66 67 #include <inet/common.h> 68 #include <inet/mi.h> 69 #include <inet/mib2.h> 70 #include <inet/nd.h> 71 #include <inet/arp.h> 72 73 #include <inet/ip.h> 74 #include <inet/ip_impl.h> 75 #include <inet/ip6.h> 76 #include <inet/ip6_asp.h> 77 #include <inet/tcp.h> 78 #include <inet/tcp_impl.h> 79 #include <inet/udp_impl.h> 80 #include <inet/ipp_common.h> 81 82 #include <inet/ip_multi.h> 83 #include <inet/ip_if.h> 84 #include <inet/ip_ire.h> 85 #include <inet/ip_rts.h> 86 #include <inet/optcom.h> 87 #include <inet/ip_ndp.h> 88 #include <net/pfkeyv2.h> 89 #include <inet/ipsec_info.h> 90 #include <inet/sadb.h> 91 #include <inet/ipsec_impl.h> 92 #include <inet/tun.h> 93 #include <inet/sctp_ip.h> 94 #include <sys/pattr.h> 95 #include <inet/ipclassifier.h> 96 #include <inet/ipsecah.h> 97 #include <inet/udp_impl.h> 98 #include <sys/squeue.h> 99 100 #include <sys/tsol/label.h> 101 #include <sys/tsol/tnet.h> 102 103 #include <rpc/pmap_prot.h> 104 105 /* Temporary; for CR 6451644 work-around */ 106 #include <sys/ethernet.h> 107 108 extern squeue_func_t ip_input_proc; 109 110 /* 111 * IP statistics. 112 */ 113 #define IP6_STAT(x) (ip6_statistics.x.value.ui64++) 114 #define IP6_STAT_UPDATE(x, n) (ip6_statistics.x.value.ui64 += (n)) 115 116 typedef struct ip6_stat { 117 kstat_named_t ip6_udp_fast_path; 118 kstat_named_t ip6_udp_slow_path; 119 kstat_named_t ip6_udp_fannorm; 120 kstat_named_t ip6_udp_fanmb; 121 kstat_named_t ip6_out_sw_cksum; 122 kstat_named_t ip6_in_sw_cksum; 123 kstat_named_t ip6_tcp_in_full_hw_cksum_err; 124 kstat_named_t ip6_tcp_in_part_hw_cksum_err; 125 kstat_named_t ip6_tcp_in_sw_cksum_err; 126 kstat_named_t ip6_tcp_out_sw_cksum_bytes; 127 kstat_named_t ip6_udp_in_full_hw_cksum_err; 128 kstat_named_t ip6_udp_in_part_hw_cksum_err; 129 kstat_named_t ip6_udp_in_sw_cksum_err; 130 kstat_named_t ip6_udp_out_sw_cksum_bytes; 131 kstat_named_t ip6_frag_mdt_pkt_out; 132 kstat_named_t ip6_frag_mdt_discarded; 133 kstat_named_t ip6_frag_mdt_allocfail; 134 kstat_named_t ip6_frag_mdt_addpdescfail; 135 kstat_named_t ip6_frag_mdt_allocd; 136 } ip6_stat_t; 137 138 static ip6_stat_t ip6_statistics = { 139 { "ip6_udp_fast_path", KSTAT_DATA_UINT64 }, 140 { "ip6_udp_slow_path", KSTAT_DATA_UINT64 }, 141 { "ip6_udp_fannorm", KSTAT_DATA_UINT64 }, 142 { "ip6_udp_fanmb", KSTAT_DATA_UINT64 }, 143 { "ip6_out_sw_cksum", KSTAT_DATA_UINT64 }, 144 { "ip6_in_sw_cksum", KSTAT_DATA_UINT64 }, 145 { "ip6_tcp_in_full_hw_cksum_err", KSTAT_DATA_UINT64 }, 146 { "ip6_tcp_in_part_hw_cksum_err", KSTAT_DATA_UINT64 }, 147 { "ip6_tcp_in_sw_cksum_err", KSTAT_DATA_UINT64 }, 148 { "ip6_tcp_out_sw_cksum_bytes", KSTAT_DATA_UINT64 }, 149 { "ip6_udp_in_full_hw_cksum_err", KSTAT_DATA_UINT64 }, 150 { "ip6_udp_in_part_hw_cksum_err", KSTAT_DATA_UINT64 }, 151 { "ip6_udp_in_sw_cksum_err", KSTAT_DATA_UINT64 }, 152 { "ip6_udp_out_sw_cksum_bytes", KSTAT_DATA_UINT64 }, 153 { "ip6_frag_mdt_pkt_out", KSTAT_DATA_UINT64 }, 154 { "ip6_frag_mdt_discarded", KSTAT_DATA_UINT64 }, 155 { "ip6_frag_mdt_allocfail", KSTAT_DATA_UINT64 }, 156 { "ip6_frag_mdt_addpdescfail", KSTAT_DATA_UINT64 }, 157 { "ip6_frag_mdt_allocd", KSTAT_DATA_UINT64 }, 158 }; 159 160 static kstat_t *ip6_kstat; 161 162 /* 163 * Naming conventions: 164 * These rules should be judiciously applied 165 * if there is a need to identify something as IPv6 versus IPv4 166 * IPv6 funcions will end with _v6 in the ip module. 167 * IPv6 funcions will end with _ipv6 in the transport modules. 168 * IPv6 macros: 169 * Some macros end with _V6; e.g. ILL_FRAG_HASH_V6 170 * Some macros start with V6_; e.g. V6_OR_V4_INADDR_ANY 171 * And then there are ..V4_PART_OF_V6. 172 * The intent is that macros in the ip module end with _V6. 173 * IPv6 global variables will start with ipv6_ 174 * IPv6 structures will start with ipv6 175 * IPv6 defined constants should start with IPV6_ 176 * (but then there are NDP_DEFAULT_VERS_PRI_AND_FLOW, etc) 177 */ 178 179 /* 180 * IPv6 mibs when the interface (ill) is not known. 181 * When the ill is known the per-interface mib in the ill is used. 182 */ 183 mib2_ipv6IfStatsEntry_t ip6_mib; 184 mib2_ipv6IfIcmpEntry_t icmp6_mib; 185 186 /* 187 * ip6opt_ls is used to enable IPv6 (via /etc/system on TX systems). 188 * We need to do this because we didn't obtain the IP6OPT_LS (0x0a) 189 * from IANA. This mechanism will remain in effect until an official 190 * number is obtained. 191 */ 192 uchar_t ip6opt_ls; 193 194 uint_t ipv6_ire_default_count; /* Number of IPv6 IRE_DEFAULT entries */ 195 uint_t ipv6_ire_default_index; /* Walking IPv6 index used to mod in */ 196 197 const in6_addr_t ipv6_all_ones = 198 { 0xffffffffU, 0xffffffffU, 0xffffffffU, 0xffffffffU }; 199 const in6_addr_t ipv6_all_zeros = { 0, 0, 0, 0 }; 200 201 #ifdef _BIG_ENDIAN 202 const in6_addr_t ipv6_unspecified_group = { 0xff000000U, 0, 0, 0 }; 203 #else /* _BIG_ENDIAN */ 204 const in6_addr_t ipv6_unspecified_group = { 0x000000ffU, 0, 0, 0 }; 205 #endif /* _BIG_ENDIAN */ 206 207 #ifdef _BIG_ENDIAN 208 const in6_addr_t ipv6_loopback = { 0, 0, 0, 0x00000001U }; 209 #else /* _BIG_ENDIAN */ 210 const in6_addr_t ipv6_loopback = { 0, 0, 0, 0x01000000U }; 211 #endif /* _BIG_ENDIAN */ 212 213 #ifdef _BIG_ENDIAN 214 const in6_addr_t ipv6_all_hosts_mcast = { 0xff020000U, 0, 0, 0x00000001U }; 215 #else /* _BIG_ENDIAN */ 216 const in6_addr_t ipv6_all_hosts_mcast = { 0x000002ffU, 0, 0, 0x01000000U }; 217 #endif /* _BIG_ENDIAN */ 218 219 #ifdef _BIG_ENDIAN 220 const in6_addr_t ipv6_all_rtrs_mcast = { 0xff020000U, 0, 0, 0x00000002U }; 221 #else /* _BIG_ENDIAN */ 222 const in6_addr_t ipv6_all_rtrs_mcast = { 0x000002ffU, 0, 0, 0x02000000U }; 223 #endif /* _BIG_ENDIAN */ 224 225 #ifdef _BIG_ENDIAN 226 const in6_addr_t ipv6_all_v2rtrs_mcast = { 0xff020000U, 0, 0, 0x00000016U }; 227 #else /* _BIG_ENDIAN */ 228 const in6_addr_t ipv6_all_v2rtrs_mcast = { 0x000002ffU, 0, 0, 0x16000000U }; 229 #endif /* _BIG_ENDIAN */ 230 231 #ifdef _BIG_ENDIAN 232 const in6_addr_t ipv6_solicited_node_mcast = 233 { 0xff020000U, 0, 0x00000001U, 0xff000000U }; 234 #else /* _BIG_ENDIAN */ 235 const in6_addr_t ipv6_solicited_node_mcast = 236 { 0x000002ffU, 0, 0x01000000U, 0x000000ffU }; 237 #endif /* _BIG_ENDIAN */ 238 239 /* 240 * Used by icmp_send_redirect_v6 for picking random src. 241 */ 242 uint_t icmp_redirect_v6_src_index; 243 244 /* Leave room for ip_newroute to tack on the src and target addresses */ 245 #define OK_RESOLVER_MP_V6(mp) \ 246 ((mp) && ((mp)->b_wptr - (mp)->b_rptr) >= (2 * IPV6_ADDR_LEN)) 247 248 static void icmp_inbound_too_big_v6(queue_t *, mblk_t *, ill_t *ill, 249 boolean_t, zoneid_t); 250 static void icmp_pkt_v6(queue_t *, mblk_t *, void *, size_t, 251 const in6_addr_t *, boolean_t); 252 static void icmp_redirect_v6(queue_t *, mblk_t *, ill_t *ill); 253 static boolean_t icmp_redirect_ok_v6(ill_t *ill, mblk_t *mp); 254 static int ip_bind_connected_v6(conn_t *, mblk_t *, in6_addr_t *, 255 uint16_t, const in6_addr_t *, ip6_pkt_t *, uint16_t, 256 boolean_t, boolean_t, boolean_t, boolean_t); 257 static boolean_t ip_bind_insert_ire_v6(mblk_t *, ire_t *, const in6_addr_t *, 258 iulp_t *); 259 static int ip_bind_laddr_v6(conn_t *, mblk_t *, const in6_addr_t *, 260 uint16_t, boolean_t, boolean_t, boolean_t); 261 static void ip_fanout_proto_v6(queue_t *, mblk_t *, ip6_t *, ill_t *, 262 ill_t *, uint8_t, uint_t, uint_t, boolean_t, zoneid_t); 263 static void ip_fanout_tcp_v6(queue_t *, mblk_t *, ip6_t *, ill_t *, 264 ill_t *, uint_t, uint_t, boolean_t, zoneid_t); 265 static void ip_fanout_udp_v6(queue_t *, mblk_t *, ip6_t *, uint32_t, 266 ill_t *, ill_t *, uint_t, boolean_t, zoneid_t); 267 static int ip_process_options_v6(queue_t *, mblk_t *, ip6_t *, 268 uint8_t *, uint_t, uint8_t); 269 static mblk_t *ip_rput_frag_v6(queue_t *, mblk_t *, ip6_t *, 270 ip6_frag_t *, uint_t, uint_t *, uint32_t *, uint16_t *); 271 static boolean_t ip_source_routed_v6(ip6_t *, mblk_t *); 272 static void ip_wput_ire_v6(queue_t *, mblk_t *, ire_t *, int, int, 273 conn_t *, int, int, int); 274 static boolean_t ip_ulp_cando_pkt2big(int); 275 276 static void ip_rput_v6(queue_t *, mblk_t *); 277 static void ip_wput_v6(queue_t *, mblk_t *); 278 279 /* 280 * A template for an IPv6 AR_ENTRY_QUERY 281 */ 282 static areq_t ipv6_areq_template = { 283 AR_ENTRY_QUERY, /* cmd */ 284 sizeof (areq_t)+(2*IPV6_ADDR_LEN), /* name offset */ 285 sizeof (areq_t), /* name len (filled by ill_arp_alloc) */ 286 IP6_DL_SAP, /* protocol, from arps perspective */ 287 sizeof (areq_t), /* target addr offset */ 288 IPV6_ADDR_LEN, /* target addr_length */ 289 0, /* flags */ 290 sizeof (areq_t) + IPV6_ADDR_LEN, /* sender addr offset */ 291 IPV6_ADDR_LEN, /* sender addr length */ 292 6, /* xmit_count */ 293 1000, /* (re)xmit_interval in milliseconds */ 294 4 /* max # of requests to buffer */ 295 /* anything else filled in by the code */ 296 }; 297 298 struct qinit rinit_ipv6 = { 299 (pfi_t)ip_rput_v6, 300 NULL, 301 ip_open, 302 ip_close, 303 NULL, 304 &ip_mod_info 305 }; 306 307 struct qinit winit_ipv6 = { 308 (pfi_t)ip_wput_v6, 309 (pfi_t)ip_wsrv, 310 ip_open, 311 ip_close, 312 NULL, 313 &ip_mod_info 314 }; 315 316 /* 317 * Handle IPv6 ICMP packets sent to us. Consume the mblk passed in. 318 * The message has already been checksummed and if needed, 319 * a copy has been made to be sent any interested ICMP client (conn) 320 * Note that this is different than icmp_inbound() which does the fanout 321 * to conn's as well as local processing of the ICMP packets. 322 * 323 * All error messages are passed to the matching transport stream. 324 * 325 * Zones notes: 326 * The packet is only processed in the context of the specified zone: typically 327 * only this zone will reply to an echo request. This means that the caller must 328 * call icmp_inbound_v6() for each relevant zone. 329 */ 330 static void 331 icmp_inbound_v6(queue_t *q, mblk_t *mp, ill_t *ill, uint_t hdr_length, 332 boolean_t mctl_present, uint_t flags, zoneid_t zoneid, mblk_t *dl_mp) 333 { 334 icmp6_t *icmp6; 335 ip6_t *ip6h; 336 boolean_t interested; 337 ip6i_t *ip6i; 338 in6_addr_t origsrc; 339 ire_t *ire; 340 mblk_t *first_mp; 341 ipsec_in_t *ii; 342 343 ASSERT(ill != NULL); 344 first_mp = mp; 345 if (mctl_present) { 346 mp = first_mp->b_cont; 347 ASSERT(mp != NULL); 348 349 ii = (ipsec_in_t *)first_mp->b_rptr; 350 ASSERT(ii->ipsec_in_type == IPSEC_IN); 351 } 352 353 ip6h = (ip6_t *)mp->b_rptr; 354 355 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInMsgs); 356 357 if ((mp->b_wptr - mp->b_rptr) < (hdr_length + ICMP6_MINLEN)) { 358 if (!pullupmsg(mp, hdr_length + ICMP6_MINLEN)) { 359 ip1dbg(("icmp_inbound_v6: pullupmsg failed\n")); 360 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors); 361 freemsg(first_mp); 362 return; 363 } 364 ip6h = (ip6_t *)mp->b_rptr; 365 } 366 if (icmp_accept_clear_messages == 0) { 367 first_mp = ipsec_check_global_policy(first_mp, NULL, 368 NULL, ip6h, mctl_present); 369 if (first_mp == NULL) 370 return; 371 } 372 373 /* 374 * On a labeled system, we have to check whether the zone itself is 375 * permitted to receive raw traffic. 376 */ 377 if (is_system_labeled()) { 378 if (zoneid == ALL_ZONES) 379 zoneid = tsol_packet_to_zoneid(mp); 380 if (!tsol_can_accept_raw(mp, B_FALSE)) { 381 ip1dbg(("icmp_inbound_v6: zone %d can't receive raw", 382 zoneid)); 383 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors); 384 freemsg(first_mp); 385 return; 386 } 387 } 388 389 icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]); 390 ip2dbg(("icmp_inbound_v6: type %d code %d\n", icmp6->icmp6_type, 391 icmp6->icmp6_code)); 392 interested = !(icmp6->icmp6_type & ICMP6_INFOMSG_MASK); 393 394 /* Initiate IPPF processing here */ 395 if (IP6_IN_IPP(flags)) { 396 397 /* 398 * If the ifindex changes due to SIOCSLIFINDEX 399 * packet may return to IP on the wrong ill. 400 */ 401 ip_process(IPP_LOCAL_IN, &mp, ill->ill_phyint->phyint_ifindex); 402 if (mp == NULL) { 403 if (mctl_present) { 404 freeb(first_mp); 405 } 406 return; 407 } 408 } 409 410 switch (icmp6->icmp6_type) { 411 case ICMP6_DST_UNREACH: 412 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInDestUnreachs); 413 if (icmp6->icmp6_code == ICMP6_DST_UNREACH_ADMIN) 414 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInAdminProhibs); 415 break; 416 417 case ICMP6_TIME_EXCEEDED: 418 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInTimeExcds); 419 break; 420 421 case ICMP6_PARAM_PROB: 422 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInParmProblems); 423 break; 424 425 case ICMP6_PACKET_TOO_BIG: 426 icmp_inbound_too_big_v6(q, first_mp, ill, mctl_present, 427 zoneid); 428 return; 429 case ICMP6_ECHO_REQUEST: 430 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInEchos); 431 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst) && 432 !ipv6_resp_echo_mcast) 433 break; 434 435 /* 436 * We must have exclusive use of the mblk to convert it to 437 * a response. 438 * If not, we copy it. 439 */ 440 if (mp->b_datap->db_ref > 1) { 441 mblk_t *mp1; 442 443 mp1 = copymsg(mp); 444 freemsg(mp); 445 if (mp1 == NULL) { 446 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 447 if (mctl_present) 448 freeb(first_mp); 449 return; 450 } 451 mp = mp1; 452 ip6h = (ip6_t *)mp->b_rptr; 453 icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]); 454 if (mctl_present) 455 first_mp->b_cont = mp; 456 else 457 first_mp = mp; 458 } 459 460 /* 461 * Turn the echo into an echo reply. 462 * Remove any extension headers (do not reverse a source route) 463 * and clear the flow id (keep traffic class for now). 464 */ 465 if (hdr_length != IPV6_HDR_LEN) { 466 int i; 467 468 for (i = 0; i < IPV6_HDR_LEN; i++) 469 mp->b_rptr[hdr_length - i - 1] = 470 mp->b_rptr[IPV6_HDR_LEN - i - 1]; 471 mp->b_rptr += (hdr_length - IPV6_HDR_LEN); 472 ip6h = (ip6_t *)mp->b_rptr; 473 ip6h->ip6_nxt = IPPROTO_ICMPV6; 474 hdr_length = IPV6_HDR_LEN; 475 } 476 ip6h->ip6_vcf &= ~IPV6_FLOWINFO_FLOWLABEL; 477 icmp6->icmp6_type = ICMP6_ECHO_REPLY; 478 479 ip6h->ip6_plen = 480 htons((uint16_t)(msgdsize(mp) - IPV6_HDR_LEN)); 481 origsrc = ip6h->ip6_src; 482 /* 483 * Reverse the source and destination addresses. 484 * If the return address is a multicast, zero out the source 485 * (ip_wput_v6 will set an address). 486 */ 487 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 488 ip6h->ip6_src = ipv6_all_zeros; 489 ip6h->ip6_dst = origsrc; 490 } else { 491 ip6h->ip6_src = ip6h->ip6_dst; 492 ip6h->ip6_dst = origsrc; 493 } 494 495 /* set the hop limit */ 496 ip6h->ip6_hops = ipv6_def_hops; 497 498 /* 499 * Prepare for checksum by putting icmp length in the icmp 500 * checksum field. The checksum is calculated in ip_wput_v6. 501 */ 502 icmp6->icmp6_cksum = ip6h->ip6_plen; 503 /* 504 * ICMP echo replies should go out on the same interface 505 * the request came on as probes used by in.mpathd for 506 * detecting NIC failures are ECHO packets. We turn-off load 507 * spreading by allocating a ip6i and setting ip6i_attach_if 508 * to B_TRUE which is handled both by ip_wput_v6 and 509 * ip_newroute_v6. If we don't turnoff load spreading, 510 * the packets might get dropped if there are no 511 * non-FAILED/INACTIVE interfaces for it to go out on and 512 * in.mpathd would wrongly detect a failure or mis-detect 513 * a NIC failure as a link failure. As load spreading can 514 * happen only if ill_group is not NULL, we do only for 515 * that case and this does not affect the normal case. 516 * 517 * We force this only on echo packets that came from on-link 518 * hosts. We restrict this to link-local addresses which 519 * is used by in.mpathd for probing. In the IPv6 case, 520 * default routes typically have an ire_ipif pointer and 521 * hence a MATCH_IRE_ILL later in ip_newroute_v6/ip_wput_v6 522 * might work. As a default route out of this interface 523 * may not be present, enforcing this packet to go out in 524 * this case may not work. 525 */ 526 if (ill->ill_group != NULL && 527 IN6_IS_ADDR_LINKLOCAL(&origsrc)) { 528 /* 529 * If we are sending replies to ourselves, don't 530 * set ATTACH_IF as we may not be able to find 531 * the IRE_LOCAL on this ill i.e setting ATTACH_IF 532 * causes ip_wput_v6 to look for an IRE_LOCAL on 533 * "ill" which it may not find and will try to 534 * create an IRE_CACHE for our local address. Once 535 * we do this, we will try to forward all packets 536 * meant to our LOCAL address. 537 */ 538 ire = ire_cache_lookup_v6(&ip6h->ip6_dst, ALL_ZONES, 539 NULL); 540 if (ire == NULL || ire->ire_type != IRE_LOCAL) { 541 mp = ip_add_info_v6(mp, NULL, &ip6h->ip6_dst); 542 if (mp == NULL) { 543 BUMP_MIB(ill->ill_icmp6_mib, 544 ipv6IfIcmpInErrors); 545 if (ire != NULL) 546 ire_refrele(ire); 547 if (mctl_present) 548 freeb(first_mp); 549 return; 550 } else if (mctl_present) { 551 first_mp->b_cont = mp; 552 } else { 553 first_mp = mp; 554 } 555 ip6i = (ip6i_t *)mp->b_rptr; 556 ip6i->ip6i_flags = IP6I_ATTACH_IF; 557 ip6i->ip6i_ifindex = 558 ill->ill_phyint->phyint_ifindex; 559 } 560 if (ire != NULL) 561 ire_refrele(ire); 562 } 563 564 if (!mctl_present) { 565 /* 566 * This packet should go out the same way as it 567 * came in i.e in clear. To make sure that global 568 * policy will not be applied to this in ip_wput, 569 * we attach a IPSEC_IN mp and clear ipsec_in_secure. 570 */ 571 ASSERT(first_mp == mp); 572 if ((first_mp = ipsec_in_alloc(B_FALSE)) == NULL) { 573 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 574 freemsg(mp); 575 return; 576 } 577 ii = (ipsec_in_t *)first_mp->b_rptr; 578 579 /* This is not a secure packet */ 580 ii->ipsec_in_secure = B_FALSE; 581 first_mp->b_cont = mp; 582 } 583 ii->ipsec_in_zoneid = zoneid; 584 ASSERT(zoneid != ALL_ZONES); 585 if (!ipsec_in_to_out(first_mp, NULL, ip6h)) { 586 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 587 return; 588 } 589 put(WR(q), first_mp); 590 return; 591 592 case ICMP6_ECHO_REPLY: 593 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInEchoReplies); 594 break; 595 596 case ND_ROUTER_SOLICIT: 597 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInRouterSolicits); 598 break; 599 600 case ND_ROUTER_ADVERT: 601 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInRouterAdvertisements); 602 break; 603 604 case ND_NEIGHBOR_SOLICIT: 605 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInNeighborSolicits); 606 if (mctl_present) 607 freeb(first_mp); 608 /* XXX may wish to pass first_mp up to ndp_input someday. */ 609 ndp_input(ill, mp, dl_mp); 610 return; 611 612 case ND_NEIGHBOR_ADVERT: 613 BUMP_MIB(ill->ill_icmp6_mib, 614 ipv6IfIcmpInNeighborAdvertisements); 615 if (mctl_present) 616 freeb(first_mp); 617 /* XXX may wish to pass first_mp up to ndp_input someday. */ 618 ndp_input(ill, mp, dl_mp); 619 return; 620 621 case ND_REDIRECT: { 622 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInRedirects); 623 624 if (ipv6_ignore_redirect) 625 break; 626 627 /* 628 * As there is no upper client to deliver, we don't 629 * need the first_mp any more. 630 */ 631 if (mctl_present) 632 freeb(first_mp); 633 if (!pullupmsg(mp, -1) || 634 !icmp_redirect_ok_v6(ill, mp)) { 635 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects); 636 break; 637 } 638 icmp_redirect_v6(q, mp, ill); 639 return; 640 } 641 642 /* 643 * The next three icmp messages will be handled by MLD. 644 * Pass all valid MLD packets up to any process(es) 645 * listening on a raw ICMP socket. MLD messages are 646 * freed by mld_input function. 647 */ 648 case MLD_LISTENER_QUERY: 649 case MLD_LISTENER_REPORT: 650 case MLD_LISTENER_REDUCTION: 651 if (mctl_present) 652 freeb(first_mp); 653 mld_input(q, mp, ill); 654 return; 655 default: 656 break; 657 } 658 if (interested) { 659 icmp_inbound_error_fanout_v6(q, first_mp, ip6h, icmp6, ill, 660 mctl_present, zoneid); 661 } else { 662 freemsg(first_mp); 663 } 664 } 665 666 /* 667 * Process received IPv6 ICMP Packet too big. 668 * After updating any IRE it does the fanout to any matching transport streams. 669 * Assumes the IPv6 plus ICMPv6 headers have been pulled up but nothing else. 670 */ 671 /* ARGSUSED */ 672 static void 673 icmp_inbound_too_big_v6(queue_t *q, mblk_t *mp, ill_t *ill, 674 boolean_t mctl_present, zoneid_t zoneid) 675 { 676 ip6_t *ip6h; 677 ip6_t *inner_ip6h; 678 icmp6_t *icmp6; 679 uint16_t hdr_length; 680 uint32_t mtu; 681 ire_t *ire, *first_ire; 682 mblk_t *first_mp; 683 684 first_mp = mp; 685 if (mctl_present) 686 mp = first_mp->b_cont; 687 /* 688 * We must have exclusive use of the mblk to update the MTU 689 * in the packet. 690 * If not, we copy it. 691 * 692 * If there's an M_CTL present, we know that allocated first_mp 693 * earlier in this function, so we know first_mp has refcnt of one. 694 */ 695 ASSERT(!mctl_present || first_mp->b_datap->db_ref == 1); 696 if (mp->b_datap->db_ref > 1) { 697 mblk_t *mp1; 698 699 mp1 = copymsg(mp); 700 freemsg(mp); 701 if (mp1 == NULL) { 702 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 703 if (mctl_present) 704 freeb(first_mp); 705 return; 706 } 707 mp = mp1; 708 if (mctl_present) 709 first_mp->b_cont = mp; 710 else 711 first_mp = mp; 712 } 713 ip6h = (ip6_t *)mp->b_rptr; 714 if (ip6h->ip6_nxt != IPPROTO_ICMPV6) 715 hdr_length = ip_hdr_length_v6(mp, ip6h); 716 else 717 hdr_length = IPV6_HDR_LEN; 718 719 icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]); 720 ASSERT((size_t)(mp->b_wptr - mp->b_rptr) >= hdr_length + ICMP6_MINLEN); 721 inner_ip6h = (ip6_t *)&icmp6[1]; /* Packet in error */ 722 if ((uchar_t *)&inner_ip6h[1] > mp->b_wptr) { 723 if (!pullupmsg(mp, (uchar_t *)&inner_ip6h[1] - mp->b_rptr)) { 724 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 725 freemsg(first_mp); 726 return; 727 } 728 ip6h = (ip6_t *)mp->b_rptr; 729 icmp6 = (icmp6_t *)&mp->b_rptr[hdr_length]; 730 inner_ip6h = (ip6_t *)&icmp6[1]; 731 } 732 733 /* 734 * For link local destinations matching simply on IRE type is not 735 * sufficient. Same link local addresses for different ILL's is 736 * possible. 737 */ 738 739 if (IN6_IS_ADDR_LINKLOCAL(&inner_ip6h->ip6_dst)) { 740 first_ire = ire_ctable_lookup_v6(&inner_ip6h->ip6_dst, NULL, 741 IRE_CACHE, ill->ill_ipif, ALL_ZONES, NULL, 742 MATCH_IRE_TYPE | MATCH_IRE_ILL_GROUP); 743 744 if (first_ire == NULL) { 745 if (ip_debug > 2) { 746 /* ip1dbg */ 747 pr_addr_dbg("icmp_inbound_too_big_v6:" 748 "no ire for dst %s\n", AF_INET6, 749 &inner_ip6h->ip6_dst); 750 } 751 freemsg(first_mp); 752 return; 753 } 754 755 mtu = ntohl(icmp6->icmp6_mtu); 756 rw_enter(&first_ire->ire_bucket->irb_lock, RW_READER); 757 for (ire = first_ire; ire != NULL && 758 IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, &inner_ip6h->ip6_dst); 759 ire = ire->ire_next) { 760 mutex_enter(&ire->ire_lock); 761 if (mtu < IPV6_MIN_MTU) { 762 ip1dbg(("Received mtu less than IPv6 " 763 "min mtu %d: %d\n", IPV6_MIN_MTU, mtu)); 764 mtu = IPV6_MIN_MTU; 765 /* 766 * If an mtu less than IPv6 min mtu is received, 767 * we must include a fragment header in 768 * subsequent packets. 769 */ 770 ire->ire_frag_flag |= IPH_FRAG_HDR; 771 } 772 ip1dbg(("Received mtu from router: %d\n", mtu)); 773 ire->ire_max_frag = MIN(ire->ire_max_frag, mtu); 774 /* Record the new max frag size for the ULP. */ 775 if (ire->ire_frag_flag & IPH_FRAG_HDR) { 776 /* 777 * If we need a fragment header in every packet 778 * (above case or multirouting), make sure the 779 * ULP takes it into account when computing the 780 * payload size. 781 */ 782 icmp6->icmp6_mtu = htonl(ire->ire_max_frag - 783 sizeof (ip6_frag_t)); 784 } else { 785 icmp6->icmp6_mtu = htonl(ire->ire_max_frag); 786 } 787 mutex_exit(&ire->ire_lock); 788 } 789 rw_exit(&first_ire->ire_bucket->irb_lock); 790 ire_refrele(first_ire); 791 } else { 792 irb_t *irb = NULL; 793 /* 794 * for non-link local destinations we match only on the IRE type 795 */ 796 ire = ire_ctable_lookup_v6(&inner_ip6h->ip6_dst, NULL, 797 IRE_CACHE, ill->ill_ipif, ALL_ZONES, NULL, MATCH_IRE_TYPE); 798 if (ire == NULL) { 799 if (ip_debug > 2) { 800 /* ip1dbg */ 801 pr_addr_dbg("icmp_inbound_too_big_v6:" 802 "no ire for dst %s\n", 803 AF_INET6, &inner_ip6h->ip6_dst); 804 } 805 freemsg(first_mp); 806 return; 807 } 808 irb = ire->ire_bucket; 809 ire_refrele(ire); 810 rw_enter(&irb->irb_lock, RW_READER); 811 for (ire = irb->irb_ire; ire != NULL; ire = ire->ire_next) { 812 if (IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, 813 &inner_ip6h->ip6_dst)) { 814 mtu = ntohl(icmp6->icmp6_mtu); 815 mutex_enter(&ire->ire_lock); 816 if (mtu < IPV6_MIN_MTU) { 817 ip1dbg(("Received mtu less than IPv6" 818 "min mtu %d: %d\n", 819 IPV6_MIN_MTU, mtu)); 820 mtu = IPV6_MIN_MTU; 821 /* 822 * If an mtu less than IPv6 min mtu is 823 * received, we must include a fragment 824 * header in subsequent packets. 825 */ 826 ire->ire_frag_flag |= IPH_FRAG_HDR; 827 } 828 829 ip1dbg(("Received mtu from router: %d\n", mtu)); 830 ire->ire_max_frag = MIN(ire->ire_max_frag, mtu); 831 /* Record the new max frag size for the ULP. */ 832 if (ire->ire_frag_flag & IPH_FRAG_HDR) { 833 /* 834 * If we need a fragment header in 835 * every packet (above case or 836 * multirouting), make sure the ULP 837 * takes it into account when computing 838 * the payload size. 839 */ 840 icmp6->icmp6_mtu = 841 htonl(ire->ire_max_frag - 842 sizeof (ip6_frag_t)); 843 } else { 844 icmp6->icmp6_mtu = 845 htonl(ire->ire_max_frag); 846 } 847 mutex_exit(&ire->ire_lock); 848 } 849 } 850 rw_exit(&irb->irb_lock); 851 } 852 icmp_inbound_error_fanout_v6(q, first_mp, ip6h, icmp6, ill, 853 mctl_present, zoneid); 854 } 855 856 static void 857 pkt_too_big(conn_t *connp, void *arg) 858 { 859 mblk_t *mp; 860 861 if (!connp->conn_ipv6_recvpathmtu) 862 return; 863 864 /* create message and drop it on this connections read queue */ 865 if ((mp = dupb((mblk_t *)arg)) == NULL) { 866 return; 867 } 868 mp->b_datap->db_type = M_CTL; 869 870 putnext(connp->conn_rq, mp); 871 } 872 873 /* 874 * Fanout received ICMPv6 error packets to the transports. 875 * Assumes the IPv6 plus ICMPv6 headers have been pulled up but nothing else. 876 */ 877 void 878 icmp_inbound_error_fanout_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, 879 icmp6_t *icmp6, ill_t *ill, boolean_t mctl_present, zoneid_t zoneid) 880 { 881 uint16_t *up; /* Pointer to ports in ULP header */ 882 uint32_t ports; /* reversed ports for fanout */ 883 ip6_t rip6h; /* With reversed addresses */ 884 uint16_t hdr_length; 885 uint8_t *nexthdrp; 886 uint8_t nexthdr; 887 mblk_t *first_mp; 888 ipsec_in_t *ii; 889 tcpha_t *tcpha; 890 conn_t *connp; 891 892 first_mp = mp; 893 if (mctl_present) { 894 mp = first_mp->b_cont; 895 ASSERT(mp != NULL); 896 897 ii = (ipsec_in_t *)first_mp->b_rptr; 898 ASSERT(ii->ipsec_in_type == IPSEC_IN); 899 } else { 900 ii = NULL; 901 } 902 903 hdr_length = (uint16_t)((uchar_t *)icmp6 - (uchar_t *)ip6h); 904 ASSERT((size_t)(mp->b_wptr - (uchar_t *)icmp6) >= ICMP6_MINLEN); 905 906 /* 907 * Need to pullup everything in order to use 908 * ip_hdr_length_nexthdr_v6() 909 */ 910 if (mp->b_cont != NULL) { 911 if (!pullupmsg(mp, -1)) { 912 ip1dbg(("icmp_inbound_error_fanout_v6: " 913 "pullupmsg failed\n")); 914 goto drop_pkt; 915 } 916 ip6h = (ip6_t *)mp->b_rptr; 917 icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]); 918 } 919 920 ip6h = (ip6_t *)&icmp6[1]; /* Packet in error */ 921 if ((uchar_t *)&ip6h[1] > mp->b_wptr) 922 goto drop_pkt; 923 924 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_length, &nexthdrp)) 925 goto drop_pkt; 926 nexthdr = *nexthdrp; 927 928 /* Set message type, must be done after pullups */ 929 mp->b_datap->db_type = M_CTL; 930 931 if (icmp6->icmp6_type == ICMP6_PACKET_TOO_BIG) { 932 /* 933 * Deliver indication of ICMP6_PACKET_TOO_BIG to interested 934 * sockets. 935 * 936 * Note I don't like walking every connection to deliver 937 * this information to a set of listeners. A separate 938 * list could be kept to keep the cost of this down. 939 */ 940 ipcl_walk(pkt_too_big, (void *)mp); 941 } 942 943 /* Try to pass the ICMP message to clients who need it */ 944 switch (nexthdr) { 945 case IPPROTO_UDP: { 946 /* 947 * Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of 948 * UDP header to get the port information. 949 */ 950 if ((uchar_t *)ip6h + hdr_length + ICMP_MIN_TP_HDR_LEN > 951 mp->b_wptr) { 952 break; 953 } 954 /* 955 * Attempt to find a client stream based on port. 956 * Note that we do a reverse lookup since the header is 957 * in the form we sent it out. 958 * The rip6h header is only used for the IPCL_UDP_MATCH_V6 959 * and we only set the src and dst addresses and nexthdr. 960 */ 961 up = (uint16_t *)((uchar_t *)ip6h + hdr_length); 962 rip6h.ip6_src = ip6h->ip6_dst; 963 rip6h.ip6_dst = ip6h->ip6_src; 964 rip6h.ip6_nxt = nexthdr; 965 ((uint16_t *)&ports)[0] = up[1]; 966 ((uint16_t *)&ports)[1] = up[0]; 967 968 ip_fanout_udp_v6(q, first_mp, &rip6h, ports, ill, ill, 969 IP6_NO_IPPOLICY, mctl_present, zoneid); 970 return; 971 } 972 case IPPROTO_TCP: { 973 /* 974 * Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of 975 * the TCP header to get the port information. 976 */ 977 if ((uchar_t *)ip6h + hdr_length + ICMP_MIN_TP_HDR_LEN > 978 mp->b_wptr) { 979 break; 980 } 981 982 /* 983 * Attempt to find a client stream based on port. 984 * Note that we do a reverse lookup since the header is 985 * in the form we sent it out. 986 * The rip6h header is only used for the IP_TCP_*MATCH_V6 and 987 * we only set the src and dst addresses and nexthdr. 988 */ 989 990 tcpha = (tcpha_t *)((char *)ip6h + hdr_length); 991 connp = ipcl_tcp_lookup_reversed_ipv6(ip6h, tcpha, 992 TCPS_LISTEN, ill->ill_phyint->phyint_ifindex); 993 if (connp == NULL) { 994 goto drop_pkt; 995 } 996 997 squeue_fill(connp->conn_sqp, first_mp, tcp_input, 998 connp, SQTAG_TCP6_INPUT_ICMP_ERR); 999 return; 1000 1001 } 1002 case IPPROTO_SCTP: 1003 /* 1004 * Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of 1005 * the SCTP header to get the port information. 1006 */ 1007 if ((uchar_t *)ip6h + hdr_length + ICMP_MIN_TP_HDR_LEN > 1008 mp->b_wptr) { 1009 break; 1010 } 1011 1012 up = (uint16_t *)((uchar_t *)ip6h + hdr_length); 1013 ((uint16_t *)&ports)[0] = up[1]; 1014 ((uint16_t *)&ports)[1] = up[0]; 1015 ip_fanout_sctp(mp, ill, (ipha_t *)ip6h, ports, 0, mctl_present, 1016 IP6_NO_IPPOLICY, 0, zoneid); 1017 return; 1018 case IPPROTO_ESP: 1019 case IPPROTO_AH: { 1020 int ipsec_rc; 1021 1022 /* 1023 * We need a IPSEC_IN in the front to fanout to AH/ESP. 1024 * We will re-use the IPSEC_IN if it is already present as 1025 * AH/ESP will not affect any fields in the IPSEC_IN for 1026 * ICMP errors. If there is no IPSEC_IN, allocate a new 1027 * one and attach it in the front. 1028 */ 1029 if (ii != NULL) { 1030 /* 1031 * ip_fanout_proto_again converts the ICMP errors 1032 * that come back from AH/ESP to M_DATA so that 1033 * if it is non-AH/ESP and we do a pullupmsg in 1034 * this function, it would work. Convert it back 1035 * to M_CTL before we send up as this is a ICMP 1036 * error. This could have been generated locally or 1037 * by some router. Validate the inner IPSEC 1038 * headers. 1039 * 1040 * NOTE : ill_index is used by ip_fanout_proto_again 1041 * to locate the ill. 1042 */ 1043 ASSERT(ill != NULL); 1044 ii->ipsec_in_ill_index = 1045 ill->ill_phyint->phyint_ifindex; 1046 ii->ipsec_in_rill_index = ii->ipsec_in_ill_index; 1047 first_mp->b_cont->b_datap->db_type = M_CTL; 1048 } else { 1049 /* 1050 * IPSEC_IN is not present. We attach a ipsec_in 1051 * message and send up to IPSEC for validating 1052 * and removing the IPSEC headers. Clear 1053 * ipsec_in_secure so that when we return 1054 * from IPSEC, we don't mistakenly think that this 1055 * is a secure packet came from the network. 1056 * 1057 * NOTE : ill_index is used by ip_fanout_proto_again 1058 * to locate the ill. 1059 */ 1060 ASSERT(first_mp == mp); 1061 first_mp = ipsec_in_alloc(B_FALSE); 1062 if (first_mp == NULL) { 1063 freemsg(mp); 1064 BUMP_MIB(&ip_mib, ipInDiscards); 1065 return; 1066 } 1067 ii = (ipsec_in_t *)first_mp->b_rptr; 1068 1069 /* This is not a secure packet */ 1070 ii->ipsec_in_secure = B_FALSE; 1071 first_mp->b_cont = mp; 1072 mp->b_datap->db_type = M_CTL; 1073 ASSERT(ill != NULL); 1074 ii->ipsec_in_ill_index = 1075 ill->ill_phyint->phyint_ifindex; 1076 ii->ipsec_in_rill_index = ii->ipsec_in_ill_index; 1077 } 1078 1079 if (!ipsec_loaded()) { 1080 ip_proto_not_sup(q, first_mp, 0, zoneid); 1081 return; 1082 } 1083 1084 if (nexthdr == IPPROTO_ESP) 1085 ipsec_rc = ipsecesp_icmp_error(first_mp); 1086 else 1087 ipsec_rc = ipsecah_icmp_error(first_mp); 1088 if (ipsec_rc == IPSEC_STATUS_FAILED) 1089 return; 1090 1091 ip_fanout_proto_again(first_mp, ill, ill, NULL); 1092 return; 1093 } 1094 case IPPROTO_ENCAP: 1095 case IPPROTO_IPV6: 1096 if ((uint8_t *)ip6h + hdr_length + 1097 (nexthdr == IPPROTO_ENCAP ? sizeof (ipha_t) : 1098 sizeof (ip6_t)) > mp->b_wptr) 1099 goto drop_pkt; 1100 1101 if (nexthdr == IPPROTO_ENCAP || 1102 !IN6_ARE_ADDR_EQUAL( 1103 &((ip6_t *)(((uint8_t *)ip6h) + hdr_length))->ip6_src, 1104 &ip6h->ip6_src) || 1105 !IN6_ARE_ADDR_EQUAL( 1106 &((ip6_t *)(((uint8_t *)ip6h) + hdr_length))->ip6_dst, 1107 &ip6h->ip6_dst)) { 1108 /* 1109 * For tunnels that have used IPsec protection, 1110 * we need to adjust the MTU to take into account 1111 * the IPsec overhead. 1112 */ 1113 if (ii != NULL) 1114 icmp6->icmp6_mtu = htons( 1115 ntohs(icmp6->icmp6_mtu) - 1116 ipsec_in_extra_length(first_mp)); 1117 } else { 1118 /* 1119 * Self-encapsulated case. As in the ipv4 case, 1120 * we need to strip the 2nd IP header. Since mp 1121 * is already pulled-up, we can simply bcopy 1122 * the 3rd header + data over the 2nd header. 1123 */ 1124 uint16_t unused_len; 1125 ip6_t *inner_ip6h = (ip6_t *) 1126 ((uchar_t *)ip6h + hdr_length); 1127 1128 /* 1129 * Make sure we don't do recursion more than once. 1130 */ 1131 if (!ip_hdr_length_nexthdr_v6(mp, inner_ip6h, 1132 &unused_len, &nexthdrp) || 1133 *nexthdrp == IPPROTO_IPV6) { 1134 goto drop_pkt; 1135 } 1136 1137 /* 1138 * We are about to modify the packet. Make a copy if 1139 * someone else has a reference to it. 1140 */ 1141 if (DB_REF(mp) > 1) { 1142 mblk_t *mp1; 1143 uint16_t icmp6_offset; 1144 1145 mp1 = copymsg(mp); 1146 if (mp1 == NULL) { 1147 goto drop_pkt; 1148 } 1149 icmp6_offset = (uint16_t) 1150 ((uchar_t *)icmp6 - mp->b_rptr); 1151 freemsg(mp); 1152 mp = mp1; 1153 1154 icmp6 = (icmp6_t *)(mp->b_rptr + icmp6_offset); 1155 ip6h = (ip6_t *)&icmp6[1]; 1156 inner_ip6h = (ip6_t *) 1157 ((uchar_t *)ip6h + hdr_length); 1158 1159 if (mctl_present) 1160 first_mp->b_cont = mp; 1161 else 1162 first_mp = mp; 1163 } 1164 1165 /* 1166 * Need to set db_type back to M_DATA before 1167 * refeeding mp into this function. 1168 */ 1169 DB_TYPE(mp) = M_DATA; 1170 1171 /* 1172 * Copy the 3rd header + remaining data on top 1173 * of the 2nd header. 1174 */ 1175 bcopy(inner_ip6h, ip6h, 1176 mp->b_wptr - (uchar_t *)inner_ip6h); 1177 1178 /* 1179 * Subtract length of the 2nd header. 1180 */ 1181 mp->b_wptr -= hdr_length; 1182 1183 /* 1184 * Now recurse, and see what I _really_ should be 1185 * doing here. 1186 */ 1187 icmp_inbound_error_fanout_v6(q, first_mp, 1188 (ip6_t *)mp->b_rptr, icmp6, ill, mctl_present, 1189 zoneid); 1190 return; 1191 } 1192 /* FALLTHRU */ 1193 default: 1194 /* 1195 * The rip6h header is only used for the lookup and we 1196 * only set the src and dst addresses and nexthdr. 1197 */ 1198 rip6h.ip6_src = ip6h->ip6_dst; 1199 rip6h.ip6_dst = ip6h->ip6_src; 1200 rip6h.ip6_nxt = nexthdr; 1201 ip_fanout_proto_v6(q, first_mp, &rip6h, ill, ill, nexthdr, 0, 1202 IP6_NO_IPPOLICY, mctl_present, zoneid); 1203 return; 1204 } 1205 /* NOTREACHED */ 1206 drop_pkt: 1207 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors); 1208 ip1dbg(("icmp_inbound_error_fanout_v6: drop pkt\n")); 1209 freemsg(first_mp); 1210 } 1211 1212 /* 1213 * Validate the incoming redirect message, if valid redirect 1214 * processing is done later. This is separated from the actual 1215 * redirect processing to avoid becoming single threaded when not 1216 * necessary. (i.e invalid packet) 1217 * Assumes that any AH or ESP headers have already been removed. 1218 * The mp has already been pulled up. 1219 */ 1220 boolean_t 1221 icmp_redirect_ok_v6(ill_t *ill, mblk_t *mp) 1222 { 1223 ip6_t *ip6h = (ip6_t *)mp->b_rptr; 1224 nd_redirect_t *rd; 1225 ire_t *ire; 1226 uint16_t len; 1227 uint16_t hdr_length; 1228 1229 ASSERT(mp->b_cont == NULL); 1230 if (ip6h->ip6_nxt != IPPROTO_ICMPV6) 1231 hdr_length = ip_hdr_length_v6(mp, ip6h); 1232 else 1233 hdr_length = IPV6_HDR_LEN; 1234 rd = (nd_redirect_t *)&mp->b_rptr[hdr_length]; 1235 len = mp->b_wptr - mp->b_rptr - hdr_length; 1236 if (!IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src) || 1237 (ip6h->ip6_hops != IPV6_MAX_HOPS) || 1238 (rd->nd_rd_code != 0) || 1239 (len < sizeof (nd_redirect_t)) || 1240 (IN6_IS_ADDR_V4MAPPED(&rd->nd_rd_dst)) || 1241 (IN6_IS_ADDR_MULTICAST(&rd->nd_rd_dst))) { 1242 return (B_FALSE); 1243 } 1244 if (!(IN6_IS_ADDR_LINKLOCAL(&rd->nd_rd_target) || 1245 IN6_ARE_ADDR_EQUAL(&rd->nd_rd_target, &rd->nd_rd_dst))) { 1246 return (B_FALSE); 1247 } 1248 1249 /* 1250 * Verify that the IP source address of the redirect is 1251 * the same as the current first-hop router for the specified 1252 * ICMP destination address. Just to be cautious, this test 1253 * will be done again before we add the redirect, in case 1254 * router goes away between now and then. 1255 */ 1256 ire = ire_route_lookup_v6(&rd->nd_rd_dst, 0, 1257 &ip6h->ip6_src, 0, ill->ill_ipif, NULL, ALL_ZONES, NULL, 1258 MATCH_IRE_GW | MATCH_IRE_ILL_GROUP); 1259 if (ire == NULL) 1260 return (B_FALSE); 1261 ire_refrele(ire); 1262 if (len > sizeof (nd_redirect_t)) { 1263 if (!ndp_verify_optlen((nd_opt_hdr_t *)&rd[1], 1264 len - sizeof (nd_redirect_t))) 1265 return (B_FALSE); 1266 } 1267 return (B_TRUE); 1268 } 1269 1270 /* 1271 * Process received IPv6 ICMP Redirect messages. 1272 * Assumes that the icmp packet has already been verfied to be 1273 * valid, aligned and in a single mblk all done in icmp_redirect_ok_v6(). 1274 */ 1275 /* ARGSUSED */ 1276 static void 1277 icmp_redirect_v6(queue_t *q, mblk_t *mp, ill_t *ill) 1278 { 1279 ip6_t *ip6h; 1280 uint16_t hdr_length; 1281 nd_redirect_t *rd; 1282 ire_t *ire; 1283 ire_t *prev_ire; 1284 ire_t *redir_ire; 1285 in6_addr_t *src, *dst, *gateway; 1286 nd_opt_hdr_t *opt; 1287 nce_t *nce; 1288 int nce_flags = 0; 1289 int err = 0; 1290 boolean_t redirect_to_router = B_FALSE; 1291 int len; 1292 iulp_t ulp_info = { 0 }; 1293 ill_t *prev_ire_ill; 1294 ipif_t *ipif; 1295 1296 ip6h = (ip6_t *)mp->b_rptr; 1297 if (ip6h->ip6_nxt != IPPROTO_ICMPV6) 1298 hdr_length = ip_hdr_length_v6(mp, ip6h); 1299 else 1300 hdr_length = IPV6_HDR_LEN; 1301 1302 rd = (nd_redirect_t *)&mp->b_rptr[hdr_length]; 1303 src = &ip6h->ip6_src; 1304 dst = &rd->nd_rd_dst; 1305 gateway = &rd->nd_rd_target; 1306 if (!IN6_ARE_ADDR_EQUAL(gateway, dst)) { 1307 redirect_to_router = B_TRUE; 1308 nce_flags |= NCE_F_ISROUTER; 1309 } 1310 /* 1311 * Make sure we had a route for the dest in question and that 1312 * route was pointing to the old gateway (the source of the 1313 * redirect packet.) 1314 */ 1315 ipif = ipif_get_next_ipif(NULL, ill); 1316 if (ipif == NULL) { 1317 freemsg(mp); 1318 return; 1319 } 1320 prev_ire = ire_route_lookup_v6(dst, 0, src, 0, ipif, NULL, 1321 ALL_ZONES, NULL, MATCH_IRE_GW | MATCH_IRE_ILL_GROUP); 1322 ipif_refrele(ipif); 1323 /* 1324 * Check that 1325 * the redirect was not from ourselves 1326 * old gateway is still directly reachable 1327 */ 1328 if (prev_ire == NULL || 1329 prev_ire->ire_type == IRE_LOCAL) { 1330 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects); 1331 goto fail_redirect; 1332 } 1333 prev_ire_ill = ire_to_ill(prev_ire); 1334 ASSERT(prev_ire_ill != NULL); 1335 if (prev_ire_ill->ill_flags & ILLF_NONUD) 1336 nce_flags |= NCE_F_NONUD; 1337 1338 /* 1339 * Should we use the old ULP info to create the new gateway? From 1340 * a user's perspective, we should inherit the info so that it 1341 * is a "smooth" transition. If we do not do that, then new 1342 * connections going thru the new gateway will have no route metrics, 1343 * which is counter-intuitive to user. From a network point of 1344 * view, this may or may not make sense even though the new gateway 1345 * is still directly connected to us so the route metrics should not 1346 * change much. 1347 * 1348 * But if the old ire_uinfo is not initialized, we do another 1349 * recursive lookup on the dest using the new gateway. There may 1350 * be a route to that. If so, use it to initialize the redirect 1351 * route. 1352 */ 1353 if (prev_ire->ire_uinfo.iulp_set) { 1354 bcopy(&prev_ire->ire_uinfo, &ulp_info, sizeof (iulp_t)); 1355 } else if (redirect_to_router) { 1356 /* 1357 * Only do the following if the redirection is really to 1358 * a router. 1359 */ 1360 ire_t *tmp_ire; 1361 ire_t *sire; 1362 1363 tmp_ire = ire_ftable_lookup_v6(dst, 0, gateway, 0, NULL, &sire, 1364 ALL_ZONES, 0, NULL, 1365 (MATCH_IRE_RECURSIVE | MATCH_IRE_GW | MATCH_IRE_DEFAULT)); 1366 if (sire != NULL) { 1367 bcopy(&sire->ire_uinfo, &ulp_info, sizeof (iulp_t)); 1368 ASSERT(tmp_ire != NULL); 1369 ire_refrele(tmp_ire); 1370 ire_refrele(sire); 1371 } else if (tmp_ire != NULL) { 1372 bcopy(&tmp_ire->ire_uinfo, &ulp_info, 1373 sizeof (iulp_t)); 1374 ire_refrele(tmp_ire); 1375 } 1376 } 1377 1378 len = mp->b_wptr - mp->b_rptr - hdr_length - sizeof (nd_redirect_t); 1379 opt = (nd_opt_hdr_t *)&rd[1]; 1380 opt = ndp_get_option(opt, len, ND_OPT_TARGET_LINKADDR); 1381 if (opt != NULL) { 1382 err = ndp_lookup_then_add(ill, 1383 (uchar_t *)&opt[1], /* Link layer address */ 1384 gateway, 1385 &ipv6_all_ones, /* prefix mask */ 1386 &ipv6_all_zeros, /* Mapping mask */ 1387 0, 1388 nce_flags, 1389 ND_STALE, 1390 &nce, 1391 NULL, 1392 NULL); 1393 switch (err) { 1394 case 0: 1395 NCE_REFRELE(nce); 1396 break; 1397 case EEXIST: 1398 /* 1399 * Check to see if link layer address has changed and 1400 * process the nce_state accordingly. 1401 */ 1402 ndp_process(nce, (uchar_t *)&opt[1], 0, B_FALSE); 1403 NCE_REFRELE(nce); 1404 break; 1405 default: 1406 ip1dbg(("icmp_redirect_v6: NCE create failed %d\n", 1407 err)); 1408 goto fail_redirect; 1409 } 1410 } 1411 if (redirect_to_router) { 1412 /* icmp_redirect_ok_v6() must have already verified this */ 1413 ASSERT(IN6_IS_ADDR_LINKLOCAL(gateway)); 1414 1415 /* 1416 * Create a Route Association. This will allow us to remember 1417 * a router told us to use the particular gateway. 1418 */ 1419 ire = ire_create_v6( 1420 dst, 1421 &ipv6_all_ones, /* mask */ 1422 &prev_ire->ire_src_addr_v6, /* source addr */ 1423 gateway, /* gateway addr */ 1424 &prev_ire->ire_max_frag, /* max frag */ 1425 NULL, /* Fast Path header */ 1426 NULL, /* no rfq */ 1427 NULL, /* no stq */ 1428 IRE_HOST_REDIRECT, 1429 NULL, 1430 prev_ire->ire_ipif, 1431 NULL, 1432 0, 1433 0, 1434 (RTF_DYNAMIC | RTF_GATEWAY | RTF_HOST), 1435 &ulp_info, 1436 NULL, 1437 NULL); 1438 } else { 1439 /* 1440 * Just create an on link entry, may or may not be a router 1441 * If there is no link layer address option ire_add() won't 1442 * add this. 1443 */ 1444 ire = ire_create_v6( 1445 dst, /* gateway == dst */ 1446 &ipv6_all_ones, /* mask */ 1447 &prev_ire->ire_src_addr_v6, /* source addr */ 1448 &ipv6_all_zeros, /* gateway addr */ 1449 &prev_ire->ire_max_frag, /* max frag */ 1450 NULL, /* Fast Path header */ 1451 prev_ire->ire_rfq, /* ire rfq */ 1452 prev_ire->ire_stq, /* ire stq */ 1453 IRE_CACHE, 1454 NULL, 1455 prev_ire->ire_ipif, 1456 &ipv6_all_ones, 1457 0, 1458 0, 1459 0, 1460 &ulp_info, 1461 NULL, 1462 NULL); 1463 } 1464 if (ire == NULL) 1465 goto fail_redirect; 1466 1467 /* 1468 * XXX If there is no nce i.e there is no target link layer address 1469 * option with the redirect message, ire_add will fail. In that 1470 * case we never add the IRE_CACHE/IRE_HOST_REDIRECT. We need 1471 * to fix this. 1472 */ 1473 if (ire_add(&ire, NULL, NULL, NULL, B_FALSE) == 0) { 1474 1475 /* tell routing sockets that we received a redirect */ 1476 ip_rts_change_v6(RTM_REDIRECT, 1477 &rd->nd_rd_dst, 1478 &rd->nd_rd_target, 1479 &ipv6_all_ones, 0, &ire->ire_src_addr_v6, 1480 (RTF_DYNAMIC | RTF_GATEWAY | RTF_HOST), 0, 1481 (RTA_DST | RTA_GATEWAY | RTA_NETMASK | RTA_AUTHOR)); 1482 1483 /* 1484 * Delete any existing IRE_HOST_REDIRECT for this destination. 1485 * This together with the added IRE has the effect of 1486 * modifying an existing redirect. 1487 */ 1488 redir_ire = ire_ftable_lookup_v6(dst, 0, src, IRE_HOST_REDIRECT, 1489 ire->ire_ipif, NULL, ALL_ZONES, 0, NULL, 1490 (MATCH_IRE_GW | MATCH_IRE_TYPE | MATCH_IRE_ILL_GROUP)); 1491 1492 ire_refrele(ire); /* Held in ire_add_v6 */ 1493 1494 if (redir_ire != NULL) { 1495 ire_delete(redir_ire); 1496 ire_refrele(redir_ire); 1497 } 1498 } 1499 1500 if (prev_ire->ire_type == IRE_CACHE) 1501 ire_delete(prev_ire); 1502 ire_refrele(prev_ire); 1503 prev_ire = NULL; 1504 1505 fail_redirect: 1506 if (prev_ire != NULL) 1507 ire_refrele(prev_ire); 1508 freemsg(mp); 1509 } 1510 1511 static ill_t * 1512 ip_queue_to_ill_v6(queue_t *q) 1513 { 1514 ill_t *ill; 1515 1516 ASSERT(WR(q) == q); 1517 1518 if (q->q_next != NULL) { 1519 ill = (ill_t *)q->q_ptr; 1520 if (ILL_CAN_LOOKUP(ill)) 1521 ill_refhold(ill); 1522 else 1523 ill = NULL; 1524 } else { 1525 ill = ill_lookup_on_name(ipif_loopback_name, B_FALSE, B_TRUE, 1526 NULL, NULL, NULL, NULL, NULL); 1527 } 1528 if (ill == NULL) 1529 ip0dbg(("ip_queue_to_ill_v6: no ill\n")); 1530 return (ill); 1531 } 1532 1533 /* 1534 * Assigns an appropriate source address to the packet. 1535 * If origdst is one of our IP addresses that use it as the source. 1536 * If the queue is an ill queue then select a source from that ill. 1537 * Otherwise pick a source based on a route lookup back to the origsrc. 1538 * 1539 * src is the return parameter. Returns a pointer to src or NULL if failure. 1540 */ 1541 static in6_addr_t * 1542 icmp_pick_source_v6(queue_t *wq, in6_addr_t *origsrc, in6_addr_t *origdst, 1543 in6_addr_t *src) 1544 { 1545 ill_t *ill; 1546 ire_t *ire; 1547 ipif_t *ipif; 1548 zoneid_t zoneid; 1549 1550 ASSERT(!(wq->q_flag & QREADR)); 1551 if (wq->q_next != NULL) { 1552 ill = (ill_t *)wq->q_ptr; 1553 zoneid = GLOBAL_ZONEID; 1554 } else { 1555 ill = NULL; 1556 zoneid = Q_TO_CONN(wq)->conn_zoneid; 1557 } 1558 1559 ire = ire_route_lookup_v6(origdst, 0, 0, (IRE_LOCAL|IRE_LOOPBACK), 1560 NULL, NULL, zoneid, NULL, (MATCH_IRE_TYPE|MATCH_IRE_ZONEONLY)); 1561 if (ire != NULL) { 1562 /* Destined to one of our addresses */ 1563 *src = *origdst; 1564 ire_refrele(ire); 1565 return (src); 1566 } 1567 if (ire != NULL) { 1568 ire_refrele(ire); 1569 ire = NULL; 1570 } 1571 if (ill == NULL) { 1572 /* What is the route back to the original source? */ 1573 ire = ire_route_lookup_v6(origsrc, 0, 0, 0, 1574 NULL, NULL, zoneid, NULL, 1575 (MATCH_IRE_DEFAULT|MATCH_IRE_RECURSIVE)); 1576 if (ire == NULL) { 1577 BUMP_MIB(&ip6_mib, ipv6OutNoRoutes); 1578 return (NULL); 1579 } 1580 /* 1581 * Does not matter whether we use ire_stq or ire_ipif here. 1582 * Just pick an ill for ICMP replies. 1583 */ 1584 ASSERT(ire->ire_ipif != NULL); 1585 ill = ire->ire_ipif->ipif_ill; 1586 ire_refrele(ire); 1587 } 1588 ipif = ipif_select_source_v6(ill, origsrc, RESTRICT_TO_NONE, 1589 IPV6_PREFER_SRC_DEFAULT, zoneid); 1590 if (ipif != NULL) { 1591 *src = ipif->ipif_v6src_addr; 1592 ipif_refrele(ipif); 1593 return (src); 1594 } 1595 /* 1596 * Unusual case - can't find a usable source address to reach the 1597 * original source. Use what in the route to the source. 1598 */ 1599 ire = ire_route_lookup_v6(origsrc, 0, 0, 0, 1600 NULL, NULL, zoneid, NULL, 1601 (MATCH_IRE_DEFAULT|MATCH_IRE_RECURSIVE)); 1602 if (ire == NULL) { 1603 BUMP_MIB(&ip6_mib, ipv6OutNoRoutes); 1604 return (NULL); 1605 } 1606 ASSERT(ire != NULL); 1607 *src = ire->ire_src_addr_v6; 1608 ire_refrele(ire); 1609 return (src); 1610 } 1611 1612 /* 1613 * Build and ship an IPv6 ICMP message using the packet data in mp, 1614 * and the ICMP header pointed to by "stuff". (May be called as 1615 * writer.) 1616 * Note: assumes that icmp_pkt_err_ok_v6 has been called to 1617 * verify that an icmp error packet can be sent. 1618 * 1619 * If q is an ill write side queue (which is the case when packets 1620 * arrive from ip_rput) then ip_wput code will ensure that packets to 1621 * link-local destinations are sent out that ill. 1622 * 1623 * If v6src_ptr is set use it as a source. Otherwise select a reasonable 1624 * source address (see above function). 1625 */ 1626 static void 1627 icmp_pkt_v6(queue_t *q, mblk_t *mp, void *stuff, size_t len, 1628 const in6_addr_t *v6src_ptr, boolean_t mctl_present) 1629 { 1630 ip6_t *ip6h; 1631 in6_addr_t v6dst; 1632 size_t len_needed; 1633 size_t msg_len; 1634 mblk_t *mp1; 1635 icmp6_t *icmp6; 1636 ill_t *ill; 1637 in6_addr_t v6src; 1638 mblk_t *ipsec_mp; 1639 ipsec_out_t *io; 1640 1641 ill = ip_queue_to_ill_v6(q); 1642 if (ill == NULL) { 1643 freemsg(mp); 1644 return; 1645 } 1646 1647 if (mctl_present) { 1648 /* 1649 * If it is : 1650 * 1651 * 1) a IPSEC_OUT, then this is caused by outbound 1652 * datagram originating on this host. IPSEC processing 1653 * may or may not have been done. Refer to comments above 1654 * icmp_inbound_error_fanout for details. 1655 * 1656 * 2) a IPSEC_IN if we are generating a icmp_message 1657 * for an incoming datagram destined for us i.e called 1658 * from ip_fanout_send_icmp. 1659 */ 1660 ipsec_info_t *in; 1661 1662 ipsec_mp = mp; 1663 mp = ipsec_mp->b_cont; 1664 1665 in = (ipsec_info_t *)ipsec_mp->b_rptr; 1666 ip6h = (ip6_t *)mp->b_rptr; 1667 1668 ASSERT(in->ipsec_info_type == IPSEC_OUT || 1669 in->ipsec_info_type == IPSEC_IN); 1670 1671 if (in->ipsec_info_type == IPSEC_IN) { 1672 /* 1673 * Convert the IPSEC_IN to IPSEC_OUT. 1674 */ 1675 if (!ipsec_in_to_out(ipsec_mp, NULL, ip6h)) { 1676 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 1677 ill_refrele(ill); 1678 return; 1679 } 1680 } else { 1681 ASSERT(in->ipsec_info_type == IPSEC_OUT); 1682 io = (ipsec_out_t *)in; 1683 /* 1684 * Clear out ipsec_out_proc_begin, so we do a fresh 1685 * ire lookup. 1686 */ 1687 io->ipsec_out_proc_begin = B_FALSE; 1688 } 1689 } else { 1690 /* 1691 * This is in clear. The icmp message we are building 1692 * here should go out in clear. 1693 */ 1694 ipsec_in_t *ii; 1695 ASSERT(mp->b_datap->db_type == M_DATA); 1696 if ((ipsec_mp = ipsec_in_alloc(B_FALSE)) == NULL) { 1697 freemsg(mp); 1698 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 1699 ill_refrele(ill); 1700 return; 1701 } 1702 ii = (ipsec_in_t *)ipsec_mp->b_rptr; 1703 1704 /* This is not a secure packet */ 1705 ii->ipsec_in_secure = B_FALSE; 1706 ipsec_mp->b_cont = mp; 1707 ip6h = (ip6_t *)mp->b_rptr; 1708 /* 1709 * Convert the IPSEC_IN to IPSEC_OUT. 1710 */ 1711 if (!ipsec_in_to_out(ipsec_mp, NULL, ip6h)) { 1712 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 1713 ill_refrele(ill); 1714 return; 1715 } 1716 } 1717 io = (ipsec_out_t *)ipsec_mp->b_rptr; 1718 1719 if (v6src_ptr != NULL) { 1720 v6src = *v6src_ptr; 1721 } else { 1722 if (icmp_pick_source_v6(q, &ip6h->ip6_src, &ip6h->ip6_dst, 1723 &v6src) == NULL) { 1724 freemsg(ipsec_mp); 1725 ill_refrele(ill); 1726 return; 1727 } 1728 } 1729 v6dst = ip6h->ip6_src; 1730 len_needed = ipv6_icmp_return - IPV6_HDR_LEN - len; 1731 msg_len = msgdsize(mp); 1732 if (msg_len > len_needed) { 1733 if (!adjmsg(mp, len_needed - msg_len)) { 1734 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutErrors); 1735 freemsg(ipsec_mp); 1736 ill_refrele(ill); 1737 return; 1738 } 1739 msg_len = len_needed; 1740 } 1741 mp1 = allocb(IPV6_HDR_LEN + len, BPRI_HI); 1742 if (mp1 == NULL) { 1743 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutErrors); 1744 freemsg(ipsec_mp); 1745 ill_refrele(ill); 1746 return; 1747 } 1748 ill_refrele(ill); 1749 mp1->b_cont = mp; 1750 mp = mp1; 1751 ASSERT(ipsec_mp->b_datap->db_type == M_CTL && 1752 io->ipsec_out_type == IPSEC_OUT); 1753 ipsec_mp->b_cont = mp; 1754 1755 /* 1756 * Set ipsec_out_icmp_loopback so we can let the ICMP messages this 1757 * node generates be accepted in peace by all on-host destinations. 1758 * If we do NOT assume that all on-host destinations trust 1759 * self-generated ICMP messages, then rework here, ip.c, and spd.c. 1760 * (Look for ipsec_out_icmp_loopback). 1761 */ 1762 io->ipsec_out_icmp_loopback = B_TRUE; 1763 1764 ip6h = (ip6_t *)mp->b_rptr; 1765 mp1->b_wptr = (uchar_t *)ip6h + (IPV6_HDR_LEN + len); 1766 1767 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 1768 ip6h->ip6_nxt = IPPROTO_ICMPV6; 1769 ip6h->ip6_hops = ipv6_def_hops; 1770 ip6h->ip6_dst = v6dst; 1771 ip6h->ip6_src = v6src; 1772 msg_len += IPV6_HDR_LEN + len; 1773 if (msg_len > IP_MAXPACKET + IPV6_HDR_LEN) { 1774 (void) adjmsg(mp, IP_MAXPACKET + IPV6_HDR_LEN - msg_len); 1775 msg_len = IP_MAXPACKET + IPV6_HDR_LEN; 1776 } 1777 ip6h->ip6_plen = htons((uint16_t)(msgdsize(mp) - IPV6_HDR_LEN)); 1778 icmp6 = (icmp6_t *)&ip6h[1]; 1779 bcopy(stuff, (char *)icmp6, len); 1780 /* 1781 * Prepare for checksum by putting icmp length in the icmp 1782 * checksum field. The checksum is calculated in ip_wput_v6. 1783 */ 1784 icmp6->icmp6_cksum = ip6h->ip6_plen; 1785 if (icmp6->icmp6_type == ND_REDIRECT) { 1786 ip6h->ip6_hops = IPV6_MAX_HOPS; 1787 } 1788 /* Send to V6 writeside put routine */ 1789 put(q, ipsec_mp); 1790 } 1791 1792 /* 1793 * Update the output mib when ICMPv6 packets are sent. 1794 */ 1795 static void 1796 icmp_update_out_mib_v6(ill_t *ill, icmp6_t *icmp6) 1797 { 1798 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutMsgs); 1799 1800 switch (icmp6->icmp6_type) { 1801 case ICMP6_DST_UNREACH: 1802 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutDestUnreachs); 1803 if (icmp6->icmp6_code == ICMP6_DST_UNREACH_ADMIN) 1804 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutAdminProhibs); 1805 break; 1806 1807 case ICMP6_TIME_EXCEEDED: 1808 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutTimeExcds); 1809 break; 1810 1811 case ICMP6_PARAM_PROB: 1812 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutParmProblems); 1813 break; 1814 1815 case ICMP6_PACKET_TOO_BIG: 1816 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutPktTooBigs); 1817 break; 1818 1819 case ICMP6_ECHO_REQUEST: 1820 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutEchos); 1821 break; 1822 1823 case ICMP6_ECHO_REPLY: 1824 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutEchoReplies); 1825 break; 1826 1827 case ND_ROUTER_SOLICIT: 1828 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutRouterSolicits); 1829 break; 1830 1831 case ND_ROUTER_ADVERT: 1832 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutRouterAdvertisements); 1833 break; 1834 1835 case ND_NEIGHBOR_SOLICIT: 1836 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutNeighborSolicits); 1837 break; 1838 1839 case ND_NEIGHBOR_ADVERT: 1840 BUMP_MIB(ill->ill_icmp6_mib, 1841 ipv6IfIcmpOutNeighborAdvertisements); 1842 break; 1843 1844 case ND_REDIRECT: 1845 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutRedirects); 1846 break; 1847 1848 case MLD_LISTENER_QUERY: 1849 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutGroupMembQueries); 1850 break; 1851 1852 case MLD_LISTENER_REPORT: 1853 case MLD_V2_LISTENER_REPORT: 1854 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutGroupMembResponses); 1855 break; 1856 1857 case MLD_LISTENER_REDUCTION: 1858 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutGroupMembReductions); 1859 break; 1860 } 1861 } 1862 1863 /* 1864 * Check if it is ok to send an ICMPv6 error packet in 1865 * response to the IP packet in mp. 1866 * Free the message and return null if no 1867 * ICMP error packet should be sent. 1868 */ 1869 static mblk_t * 1870 icmp_pkt_err_ok_v6(queue_t *q, mblk_t *mp, 1871 boolean_t llbcast, boolean_t mcast_ok) 1872 { 1873 ip6_t *ip6h; 1874 1875 if (!mp) 1876 return (NULL); 1877 1878 ip6h = (ip6_t *)mp->b_rptr; 1879 1880 /* Check if source address uniquely identifies the host */ 1881 1882 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_src) || 1883 IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_src) || 1884 IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 1885 freemsg(mp); 1886 return (NULL); 1887 } 1888 1889 if (ip6h->ip6_nxt == IPPROTO_ICMPV6) { 1890 size_t len_needed = IPV6_HDR_LEN + ICMP6_MINLEN; 1891 icmp6_t *icmp6; 1892 1893 if (mp->b_wptr - mp->b_rptr < len_needed) { 1894 if (!pullupmsg(mp, len_needed)) { 1895 ill_t *ill; 1896 1897 ill = ip_queue_to_ill_v6(q); 1898 if (ill == NULL) { 1899 BUMP_MIB(&icmp6_mib, 1900 ipv6IfIcmpInErrors); 1901 } else { 1902 BUMP_MIB(ill->ill_icmp6_mib, 1903 ipv6IfIcmpInErrors); 1904 ill_refrele(ill); 1905 } 1906 freemsg(mp); 1907 return (NULL); 1908 } 1909 ip6h = (ip6_t *)mp->b_rptr; 1910 } 1911 icmp6 = (icmp6_t *)&ip6h[1]; 1912 /* Explicitly do not generate errors in response to redirects */ 1913 if (ICMP6_IS_ERROR(icmp6->icmp6_type) || 1914 icmp6->icmp6_type == ND_REDIRECT) { 1915 freemsg(mp); 1916 return (NULL); 1917 } 1918 } 1919 /* 1920 * Check that the destination is not multicast and that the packet 1921 * was not sent on link layer broadcast or multicast. (Exception 1922 * is Packet too big message as per the draft - when mcast_ok is set.) 1923 */ 1924 if (!mcast_ok && 1925 (llbcast || IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst))) { 1926 freemsg(mp); 1927 return (NULL); 1928 } 1929 if (icmp_err_rate_limit()) { 1930 /* 1931 * Only send ICMP error packets every so often. 1932 * This should be done on a per port/source basis, 1933 * but for now this will suffice. 1934 */ 1935 freemsg(mp); 1936 return (NULL); 1937 } 1938 return (mp); 1939 } 1940 1941 /* 1942 * Generate an ICMPv6 redirect message. 1943 * Include target link layer address option if it exits. 1944 * Always include redirect header. 1945 */ 1946 static void 1947 icmp_send_redirect_v6(queue_t *q, mblk_t *mp, in6_addr_t *targetp, 1948 in6_addr_t *dest, ill_t *ill, boolean_t llbcast) 1949 { 1950 nd_redirect_t *rd; 1951 nd_opt_rd_hdr_t *rdh; 1952 uchar_t *buf; 1953 nce_t *nce = NULL; 1954 nd_opt_hdr_t *opt; 1955 int len; 1956 int ll_opt_len = 0; 1957 int max_redir_hdr_data_len; 1958 int pkt_len; 1959 in6_addr_t *srcp; 1960 1961 /* 1962 * We are called from ip_rput where we could 1963 * not have attached an IPSEC_IN. 1964 */ 1965 ASSERT(mp->b_datap->db_type == M_DATA); 1966 1967 mp = icmp_pkt_err_ok_v6(q, mp, llbcast, B_FALSE); 1968 if (mp == NULL) 1969 return; 1970 nce = ndp_lookup_v6(ill, targetp, B_FALSE); 1971 if (nce != NULL && nce->nce_state != ND_INCOMPLETE) { 1972 ll_opt_len = (sizeof (nd_opt_hdr_t) + 1973 ill->ill_phys_addr_length + 7)/8 * 8; 1974 } 1975 len = sizeof (nd_redirect_t) + sizeof (nd_opt_rd_hdr_t) + ll_opt_len; 1976 ASSERT(len % 4 == 0); 1977 buf = kmem_alloc(len, KM_NOSLEEP); 1978 if (buf == NULL) { 1979 if (nce != NULL) 1980 NCE_REFRELE(nce); 1981 freemsg(mp); 1982 return; 1983 } 1984 1985 rd = (nd_redirect_t *)buf; 1986 rd->nd_rd_type = (uint8_t)ND_REDIRECT; 1987 rd->nd_rd_code = 0; 1988 rd->nd_rd_reserved = 0; 1989 rd->nd_rd_target = *targetp; 1990 rd->nd_rd_dst = *dest; 1991 1992 opt = (nd_opt_hdr_t *)(buf + sizeof (nd_redirect_t)); 1993 if (nce != NULL && ll_opt_len != 0) { 1994 opt->nd_opt_type = ND_OPT_TARGET_LINKADDR; 1995 opt->nd_opt_len = ll_opt_len/8; 1996 bcopy((char *)nce->nce_res_mp->b_rptr + 1997 NCE_LL_ADDR_OFFSET(ill), &opt[1], 1998 ill->ill_phys_addr_length); 1999 } 2000 if (nce != NULL) 2001 NCE_REFRELE(nce); 2002 rdh = (nd_opt_rd_hdr_t *)(buf + sizeof (nd_redirect_t) + ll_opt_len); 2003 rdh->nd_opt_rh_type = (uint8_t)ND_OPT_REDIRECTED_HEADER; 2004 /* max_redir_hdr_data_len and nd_opt_rh_len must be multiple of 8 */ 2005 max_redir_hdr_data_len = (ipv6_icmp_return - IPV6_HDR_LEN - len)/8*8; 2006 pkt_len = msgdsize(mp); 2007 /* Make sure mp is 8 byte aligned */ 2008 if (pkt_len > max_redir_hdr_data_len) { 2009 rdh->nd_opt_rh_len = (max_redir_hdr_data_len + 2010 sizeof (nd_opt_rd_hdr_t))/8; 2011 (void) adjmsg(mp, max_redir_hdr_data_len - pkt_len); 2012 } else { 2013 rdh->nd_opt_rh_len = (pkt_len + sizeof (nd_opt_rd_hdr_t))/8; 2014 (void) adjmsg(mp, -(pkt_len % 8)); 2015 } 2016 rdh->nd_opt_rh_reserved1 = 0; 2017 rdh->nd_opt_rh_reserved2 = 0; 2018 /* ipif_v6src_addr contains the link-local source address */ 2019 rw_enter(&ill_g_lock, RW_READER); 2020 if (ill->ill_group != NULL) { 2021 /* 2022 * The receiver of the redirect will verify whether it 2023 * had a route through us (srcp that we will use in 2024 * the redirect) or not. As we load spread even link-locals, 2025 * we don't know which source address the receiver of 2026 * redirect has in its route for communicating with us. 2027 * Thus we randomly choose a source here and finally we 2028 * should get to the right one and it will eventually 2029 * accept the redirect from us. We can't call 2030 * ip_lookup_scope_v6 because we don't have the right 2031 * link-local address here. Thus we randomly choose one. 2032 */ 2033 int cnt = ill->ill_group->illgrp_ill_count; 2034 2035 ill = ill->ill_group->illgrp_ill; 2036 cnt = ++icmp_redirect_v6_src_index % cnt; 2037 while (cnt--) 2038 ill = ill->ill_group_next; 2039 srcp = &ill->ill_ipif->ipif_v6src_addr; 2040 } else { 2041 srcp = &ill->ill_ipif->ipif_v6src_addr; 2042 } 2043 rw_exit(&ill_g_lock); 2044 icmp_pkt_v6(q, mp, buf, len, srcp, B_FALSE); 2045 kmem_free(buf, len); 2046 } 2047 2048 2049 /* Generate an ICMP time exceeded message. (May be called as writer.) */ 2050 void 2051 icmp_time_exceeded_v6(queue_t *q, mblk_t *mp, uint8_t code, 2052 boolean_t llbcast, boolean_t mcast_ok) 2053 { 2054 icmp6_t icmp6; 2055 boolean_t mctl_present; 2056 mblk_t *first_mp; 2057 2058 EXTRACT_PKT_MP(mp, first_mp, mctl_present); 2059 2060 mp = icmp_pkt_err_ok_v6(q, mp, llbcast, mcast_ok); 2061 if (mp == NULL) { 2062 if (mctl_present) 2063 freeb(first_mp); 2064 return; 2065 } 2066 bzero(&icmp6, sizeof (icmp6_t)); 2067 icmp6.icmp6_type = ICMP6_TIME_EXCEEDED; 2068 icmp6.icmp6_code = code; 2069 icmp_pkt_v6(q, first_mp, &icmp6, sizeof (icmp6_t), NULL, mctl_present); 2070 } 2071 2072 /* 2073 * Generate an ICMP unreachable message. 2074 */ 2075 void 2076 icmp_unreachable_v6(queue_t *q, mblk_t *mp, uint8_t code, 2077 boolean_t llbcast, boolean_t mcast_ok) 2078 { 2079 icmp6_t icmp6; 2080 boolean_t mctl_present; 2081 mblk_t *first_mp; 2082 2083 EXTRACT_PKT_MP(mp, first_mp, mctl_present); 2084 2085 mp = icmp_pkt_err_ok_v6(q, mp, llbcast, mcast_ok); 2086 if (mp == NULL) { 2087 if (mctl_present) 2088 freeb(first_mp); 2089 return; 2090 } 2091 bzero(&icmp6, sizeof (icmp6_t)); 2092 icmp6.icmp6_type = ICMP6_DST_UNREACH; 2093 icmp6.icmp6_code = code; 2094 icmp_pkt_v6(q, first_mp, &icmp6, sizeof (icmp6_t), NULL, mctl_present); 2095 } 2096 2097 /* 2098 * Generate an ICMP pkt too big message. 2099 */ 2100 static void 2101 icmp_pkt2big_v6(queue_t *q, mblk_t *mp, uint32_t mtu, 2102 boolean_t llbcast, boolean_t mcast_ok) 2103 { 2104 icmp6_t icmp6; 2105 mblk_t *first_mp; 2106 boolean_t mctl_present; 2107 2108 EXTRACT_PKT_MP(mp, first_mp, mctl_present); 2109 2110 mp = icmp_pkt_err_ok_v6(q, mp, llbcast, mcast_ok); 2111 if (mp == NULL) { 2112 if (mctl_present) 2113 freeb(first_mp); 2114 return; 2115 } 2116 bzero(&icmp6, sizeof (icmp6_t)); 2117 icmp6.icmp6_type = ICMP6_PACKET_TOO_BIG; 2118 icmp6.icmp6_code = 0; 2119 icmp6.icmp6_mtu = htonl(mtu); 2120 2121 icmp_pkt_v6(q, first_mp, &icmp6, sizeof (icmp6_t), NULL, mctl_present); 2122 } 2123 2124 /* 2125 * Generate an ICMP parameter problem message. (May be called as writer.) 2126 * 'offset' is the offset from the beginning of the packet in error. 2127 */ 2128 static void 2129 icmp_param_problem_v6(queue_t *q, mblk_t *mp, uint8_t code, 2130 uint32_t offset, boolean_t llbcast, boolean_t mcast_ok) 2131 { 2132 icmp6_t icmp6; 2133 boolean_t mctl_present; 2134 mblk_t *first_mp; 2135 2136 EXTRACT_PKT_MP(mp, first_mp, mctl_present); 2137 2138 mp = icmp_pkt_err_ok_v6(q, mp, llbcast, mcast_ok); 2139 if (mp == NULL) { 2140 if (mctl_present) 2141 freeb(first_mp); 2142 return; 2143 } 2144 bzero((char *)&icmp6, sizeof (icmp6_t)); 2145 icmp6.icmp6_type = ICMP6_PARAM_PROB; 2146 icmp6.icmp6_code = code; 2147 icmp6.icmp6_pptr = htonl(offset); 2148 icmp_pkt_v6(q, first_mp, &icmp6, sizeof (icmp6_t), NULL, mctl_present); 2149 } 2150 2151 /* 2152 * This code will need to take into account the possibility of binding 2153 * to a link local address on a multi-homed host, in which case the 2154 * outgoing interface (from the conn) will need to be used when getting 2155 * an ire for the dst. Going through proper outgoing interface and 2156 * choosing the source address corresponding to the outgoing interface 2157 * is necessary when the destination address is a link-local address and 2158 * IPV6_BOUND_IF or IPV6_PKTINFO or scope_id has been set. 2159 * This can happen when active connection is setup; thus ipp pointer 2160 * is passed here from tcp_connect_*() routines, in non-TCP cases NULL 2161 * pointer is passed as ipp pointer. 2162 */ 2163 mblk_t * 2164 ip_bind_v6(queue_t *q, mblk_t *mp, conn_t *connp, ip6_pkt_t *ipp) 2165 { 2166 ssize_t len; 2167 int protocol; 2168 struct T_bind_req *tbr; 2169 sin6_t *sin6; 2170 ipa6_conn_t *ac6; 2171 in6_addr_t *v6srcp; 2172 in6_addr_t *v6dstp; 2173 uint16_t lport; 2174 uint16_t fport; 2175 uchar_t *ucp; 2176 mblk_t *mp1; 2177 boolean_t ire_requested; 2178 boolean_t ipsec_policy_set; 2179 int error = 0; 2180 boolean_t local_bind; 2181 boolean_t orig_pkt_isv6 = connp->conn_pkt_isv6; 2182 ipa6_conn_x_t *acx6; 2183 boolean_t verify_dst; 2184 2185 ASSERT(connp->conn_af_isv6); 2186 len = mp->b_wptr - mp->b_rptr; 2187 if (len < (sizeof (*tbr) + 1)) { 2188 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 2189 "ip_bind_v6: bogus msg, len %ld", len); 2190 goto bad_addr; 2191 } 2192 /* Back up and extract the protocol identifier. */ 2193 mp->b_wptr--; 2194 tbr = (struct T_bind_req *)mp->b_rptr; 2195 /* Reset the message type in preparation for shipping it back. */ 2196 mp->b_datap->db_type = M_PCPROTO; 2197 2198 protocol = *mp->b_wptr & 0xFF; 2199 connp->conn_ulp = (uint8_t)protocol; 2200 2201 /* 2202 * Check for a zero length address. This is from a protocol that 2203 * wants to register to receive all packets of its type. 2204 */ 2205 if (tbr->ADDR_length == 0) { 2206 if ((protocol == IPPROTO_TCP || protocol == IPPROTO_SCTP || 2207 protocol == IPPROTO_ESP || protocol == IPPROTO_AH) && 2208 ipcl_proto_fanout_v6[protocol].connf_head != NULL) { 2209 /* 2210 * TCP, SCTP, AH, and ESP have single protocol fanouts. 2211 * Do not allow others to bind to these. 2212 */ 2213 goto bad_addr; 2214 } 2215 2216 /* 2217 * 2218 * The udp module never sends down a zero-length address, 2219 * and allowing this on a labeled system will break MLP 2220 * functionality. 2221 */ 2222 if (is_system_labeled() && protocol == IPPROTO_UDP) 2223 goto bad_addr; 2224 2225 /* Allow ipsec plumbing */ 2226 if (connp->conn_mac_exempt && protocol != IPPROTO_AH && 2227 protocol != IPPROTO_ESP) 2228 goto bad_addr; 2229 2230 connp->conn_srcv6 = ipv6_all_zeros; 2231 ipcl_proto_insert_v6(connp, protocol); 2232 2233 tbr->PRIM_type = T_BIND_ACK; 2234 return (mp); 2235 } 2236 2237 /* Extract the address pointer from the message. */ 2238 ucp = (uchar_t *)mi_offset_param(mp, tbr->ADDR_offset, 2239 tbr->ADDR_length); 2240 if (ucp == NULL) { 2241 ip1dbg(("ip_bind_v6: no address\n")); 2242 goto bad_addr; 2243 } 2244 if (!OK_32PTR(ucp)) { 2245 ip1dbg(("ip_bind_v6: unaligned address\n")); 2246 goto bad_addr; 2247 } 2248 mp1 = mp->b_cont; /* trailing mp if any */ 2249 ire_requested = (mp1 && mp1->b_datap->db_type == IRE_DB_REQ_TYPE); 2250 ipsec_policy_set = (mp1 && mp1->b_datap->db_type == IPSEC_POLICY_SET); 2251 2252 switch (tbr->ADDR_length) { 2253 default: 2254 ip1dbg(("ip_bind_v6: bad address length %d\n", 2255 (int)tbr->ADDR_length)); 2256 goto bad_addr; 2257 2258 case IPV6_ADDR_LEN: 2259 /* Verification of local address only */ 2260 v6srcp = (in6_addr_t *)ucp; 2261 lport = 0; 2262 local_bind = B_TRUE; 2263 break; 2264 2265 case sizeof (sin6_t): 2266 sin6 = (sin6_t *)ucp; 2267 v6srcp = &sin6->sin6_addr; 2268 lport = sin6->sin6_port; 2269 local_bind = B_TRUE; 2270 break; 2271 2272 case sizeof (ipa6_conn_t): 2273 /* 2274 * Verify that both the source and destination addresses 2275 * are valid. 2276 * Note that we allow connect to broadcast and multicast 2277 * addresses when ire_requested is set. Thus the ULP 2278 * has to check for IRE_BROADCAST and multicast. 2279 */ 2280 ac6 = (ipa6_conn_t *)ucp; 2281 v6srcp = &ac6->ac6_laddr; 2282 v6dstp = &ac6->ac6_faddr; 2283 fport = ac6->ac6_fport; 2284 /* For raw socket, the local port is not set. */ 2285 lport = ac6->ac6_lport != 0 ? ac6->ac6_lport : 2286 connp->conn_lport; 2287 local_bind = B_FALSE; 2288 /* Always verify destination reachability. */ 2289 verify_dst = B_TRUE; 2290 break; 2291 2292 case sizeof (ipa6_conn_x_t): 2293 /* 2294 * Verify that the source address is valid. 2295 * Note that we allow connect to broadcast and multicast 2296 * addresses when ire_requested is set. Thus the ULP 2297 * has to check for IRE_BROADCAST and multicast. 2298 */ 2299 acx6 = (ipa6_conn_x_t *)ucp; 2300 ac6 = &acx6->ac6x_conn; 2301 v6srcp = &ac6->ac6_laddr; 2302 v6dstp = &ac6->ac6_faddr; 2303 fport = ac6->ac6_fport; 2304 lport = ac6->ac6_lport; 2305 local_bind = B_FALSE; 2306 /* 2307 * Client that passed ipa6_conn_x_t to us specifies whether to 2308 * verify destination reachability. 2309 */ 2310 verify_dst = (acx6->ac6x_flags & ACX_VERIFY_DST) != 0; 2311 break; 2312 } 2313 if (local_bind) { 2314 if (IN6_IS_ADDR_V4MAPPED(v6srcp) && !connp->conn_ipv6_v6only) { 2315 /* Bind to IPv4 address */ 2316 ipaddr_t v4src; 2317 2318 IN6_V4MAPPED_TO_IPADDR(v6srcp, v4src); 2319 2320 error = ip_bind_laddr(connp, mp, v4src, lport, 2321 ire_requested, ipsec_policy_set, 2322 tbr->ADDR_length != IPV6_ADDR_LEN); 2323 if (error != 0) 2324 goto bad_addr; 2325 connp->conn_pkt_isv6 = B_FALSE; 2326 } else { 2327 if (IN6_IS_ADDR_V4MAPPED(v6srcp)) { 2328 error = 0; 2329 goto bad_addr; 2330 } 2331 error = ip_bind_laddr_v6(connp, mp, v6srcp, lport, 2332 ire_requested, ipsec_policy_set, 2333 (tbr->ADDR_length != IPV6_ADDR_LEN)); 2334 if (error != 0) 2335 goto bad_addr; 2336 connp->conn_pkt_isv6 = B_TRUE; 2337 } 2338 if (protocol == IPPROTO_TCP) 2339 connp->conn_recv = tcp_conn_request; 2340 } else { 2341 /* 2342 * Bind to local and remote address. Local might be 2343 * unspecified in which case it will be extracted from 2344 * ire_src_addr_v6 2345 */ 2346 if (IN6_IS_ADDR_V4MAPPED(v6dstp) && !connp->conn_ipv6_v6only) { 2347 /* Connect to IPv4 address */ 2348 ipaddr_t v4src; 2349 ipaddr_t v4dst; 2350 2351 /* Is the source unspecified or mapped? */ 2352 if (!IN6_IS_ADDR_V4MAPPED(v6srcp) && 2353 !IN6_IS_ADDR_UNSPECIFIED(v6srcp)) { 2354 ip1dbg(("ip_bind_v6: " 2355 "dst is mapped, but not the src\n")); 2356 goto bad_addr; 2357 } 2358 IN6_V4MAPPED_TO_IPADDR(v6srcp, v4src); 2359 IN6_V4MAPPED_TO_IPADDR(v6dstp, v4dst); 2360 2361 /* 2362 * XXX Fix needed. Need to pass ipsec_policy_set 2363 * instead of B_FALSE. 2364 */ 2365 2366 /* Always verify destination reachability. */ 2367 error = ip_bind_connected(connp, mp, &v4src, lport, 2368 v4dst, fport, ire_requested, ipsec_policy_set, 2369 B_TRUE, B_TRUE); 2370 if (error != 0) 2371 goto bad_addr; 2372 IN6_IPADDR_TO_V4MAPPED(v4src, v6srcp); 2373 connp->conn_pkt_isv6 = B_FALSE; 2374 } else if (IN6_IS_ADDR_V4MAPPED(v6srcp)) { 2375 ip1dbg(("ip_bind_v6: " 2376 "src is mapped, but not the dst\n")); 2377 goto bad_addr; 2378 } else { 2379 error = ip_bind_connected_v6(connp, mp, v6srcp, 2380 lport, v6dstp, ipp, fport, ire_requested, 2381 ipsec_policy_set, B_TRUE, verify_dst); 2382 if (error != 0) 2383 goto bad_addr; 2384 connp->conn_pkt_isv6 = B_TRUE; 2385 } 2386 if (protocol == IPPROTO_TCP) 2387 connp->conn_recv = tcp_input; 2388 } 2389 /* Update qinfo if v4/v6 changed */ 2390 if ((orig_pkt_isv6 != connp->conn_pkt_isv6) && 2391 !(IPCL_IS_TCP(connp) || IPCL_IS_UDP(connp))) { 2392 if (connp->conn_pkt_isv6) 2393 ip_setqinfo(RD(q), IPV6_MINOR, B_TRUE); 2394 else 2395 ip_setqinfo(RD(q), IPV4_MINOR, B_TRUE); 2396 } 2397 2398 /* 2399 * Pass the IPSEC headers size in ire_ipsec_overhead. 2400 * We can't do this in ip_bind_insert_ire because the policy 2401 * may not have been inherited at that point in time and hence 2402 * conn_out_enforce_policy may not be set. 2403 */ 2404 mp1 = mp->b_cont; 2405 if (ire_requested && connp->conn_out_enforce_policy && 2406 mp1 != NULL && DB_TYPE(mp1) == IRE_DB_REQ_TYPE) { 2407 ire_t *ire = (ire_t *)mp1->b_rptr; 2408 ASSERT(MBLKL(mp1) >= sizeof (ire_t)); 2409 ire->ire_ipsec_overhead = (conn_ipsec_length(connp)); 2410 } 2411 2412 /* Send it home. */ 2413 mp->b_datap->db_type = M_PCPROTO; 2414 tbr->PRIM_type = T_BIND_ACK; 2415 return (mp); 2416 2417 bad_addr: 2418 if (error == EINPROGRESS) 2419 return (NULL); 2420 if (error > 0) 2421 mp = mi_tpi_err_ack_alloc(mp, TSYSERR, error); 2422 else 2423 mp = mi_tpi_err_ack_alloc(mp, TBADADDR, 0); 2424 return (mp); 2425 } 2426 2427 /* 2428 * Here address is verified to be a valid local address. 2429 * If the IRE_DB_REQ_TYPE mp is present, a multicast 2430 * address is also considered a valid local address. 2431 * In the case of a multicast address, however, the 2432 * upper protocol is expected to reset the src address 2433 * to 0 if it sees an ire with IN6_IS_ADDR_MULTICAST returned so that 2434 * no packets are emitted with multicast address as 2435 * source address. 2436 * The addresses valid for bind are: 2437 * (1) - in6addr_any 2438 * (2) - IP address of an UP interface 2439 * (3) - IP address of a DOWN interface 2440 * (4) - a multicast address. In this case 2441 * the conn will only receive packets destined to 2442 * the specified multicast address. Note: the 2443 * application still has to issue an 2444 * IPV6_JOIN_GROUP socket option. 2445 * 2446 * In all the above cases, the bound address must be valid in the current zone. 2447 * When the address is loopback or multicast, there might be many matching IREs 2448 * so bind has to look up based on the zone. 2449 */ 2450 static int 2451 ip_bind_laddr_v6(conn_t *connp, mblk_t *mp, const in6_addr_t *v6src, 2452 uint16_t lport, boolean_t ire_requested, boolean_t ipsec_policy_set, 2453 boolean_t fanout_insert) 2454 { 2455 int error = 0; 2456 ire_t *src_ire = NULL; 2457 ipif_t *ipif = NULL; 2458 mblk_t *policy_mp; 2459 zoneid_t zoneid; 2460 2461 if (ipsec_policy_set) 2462 policy_mp = mp->b_cont; 2463 2464 /* 2465 * If it was previously connected, conn_fully_bound would have 2466 * been set. 2467 */ 2468 connp->conn_fully_bound = B_FALSE; 2469 2470 zoneid = connp->conn_zoneid; 2471 2472 if (!IN6_IS_ADDR_UNSPECIFIED(v6src)) { 2473 src_ire = ire_route_lookup_v6(v6src, 0, 0, 2474 0, NULL, NULL, zoneid, NULL, MATCH_IRE_ZONEONLY); 2475 /* 2476 * If an address other than in6addr_any is requested, 2477 * we verify that it is a valid address for bind 2478 * Note: Following code is in if-else-if form for 2479 * readability compared to a condition check. 2480 */ 2481 ASSERT(src_ire == NULL || !(src_ire->ire_type & IRE_BROADCAST)); 2482 if (IRE_IS_LOCAL(src_ire)) { 2483 /* 2484 * (2) Bind to address of local UP interface 2485 */ 2486 ipif = src_ire->ire_ipif; 2487 } else if (IN6_IS_ADDR_MULTICAST(v6src)) { 2488 ipif_t *multi_ipif = NULL; 2489 ire_t *save_ire; 2490 /* 2491 * (4) bind to multicast address. 2492 * Fake out the IRE returned to upper 2493 * layer to be a broadcast IRE in 2494 * ip_bind_insert_ire_v6(). 2495 * Pass other information that matches 2496 * the ipif (e.g. the source address). 2497 * conn_multicast_ill is only used for 2498 * IPv6 packets 2499 */ 2500 mutex_enter(&connp->conn_lock); 2501 if (connp->conn_multicast_ill != NULL) { 2502 (void) ipif_lookup_zoneid( 2503 connp->conn_multicast_ill, zoneid, 0, 2504 &multi_ipif); 2505 } else { 2506 /* 2507 * Look for default like 2508 * ip_wput_v6 2509 */ 2510 multi_ipif = ipif_lookup_group_v6( 2511 &ipv6_unspecified_group, zoneid); 2512 } 2513 mutex_exit(&connp->conn_lock); 2514 save_ire = src_ire; 2515 src_ire = NULL; 2516 if (multi_ipif == NULL || !ire_requested || 2517 (src_ire = ipif_to_ire_v6(multi_ipif)) == NULL) { 2518 src_ire = save_ire; 2519 error = EADDRNOTAVAIL; 2520 } else { 2521 ASSERT(src_ire != NULL); 2522 if (save_ire != NULL) 2523 ire_refrele(save_ire); 2524 } 2525 if (multi_ipif != NULL) 2526 ipif_refrele(multi_ipif); 2527 } else { 2528 *mp->b_wptr++ = (char)connp->conn_ulp; 2529 ipif = ipif_lookup_addr_v6(v6src, NULL, zoneid, 2530 CONNP_TO_WQ(connp), mp, ip_wput_nondata, &error); 2531 if (ipif == NULL) { 2532 if (error == EINPROGRESS) { 2533 if (src_ire != NULL) 2534 ire_refrele(src_ire); 2535 return (error); 2536 } 2537 /* 2538 * Not a valid address for bind 2539 */ 2540 error = EADDRNOTAVAIL; 2541 } else { 2542 ipif_refrele(ipif); 2543 } 2544 /* 2545 * Just to keep it consistent with the processing in 2546 * ip_bind_v6(). 2547 */ 2548 mp->b_wptr--; 2549 } 2550 2551 if (error != 0) { 2552 /* Red Alert! Attempting to be a bogon! */ 2553 if (ip_debug > 2) { 2554 /* ip1dbg */ 2555 pr_addr_dbg("ip_bind_laddr_v6: bad src" 2556 " address %s\n", AF_INET6, v6src); 2557 } 2558 goto bad_addr; 2559 } 2560 } 2561 2562 /* 2563 * Allow setting new policies. For example, disconnects come 2564 * down as ipa_t bind. As we would have set conn_policy_cached 2565 * to B_TRUE before, we should set it to B_FALSE, so that policy 2566 * can change after the disconnect. 2567 */ 2568 connp->conn_policy_cached = B_FALSE; 2569 2570 /* If not fanout_insert this was just an address verification */ 2571 if (fanout_insert) { 2572 /* 2573 * The addresses have been verified. Time to insert in 2574 * the correct fanout list. 2575 */ 2576 connp->conn_srcv6 = *v6src; 2577 connp->conn_remv6 = ipv6_all_zeros; 2578 connp->conn_lport = lport; 2579 connp->conn_fport = 0; 2580 error = ipcl_bind_insert_v6(connp, *mp->b_wptr, v6src, lport); 2581 } 2582 if (error == 0) { 2583 if (ire_requested) { 2584 if (!ip_bind_insert_ire_v6(mp, src_ire, v6src, NULL)) { 2585 error = -1; 2586 goto bad_addr; 2587 } 2588 } else if (ipsec_policy_set) { 2589 if (!ip_bind_ipsec_policy_set(connp, policy_mp)) { 2590 error = -1; 2591 goto bad_addr; 2592 } 2593 } 2594 } 2595 bad_addr: 2596 if (error != 0) { 2597 if (connp->conn_anon_port) { 2598 (void) tsol_mlp_anon(crgetzone(connp->conn_cred), 2599 connp->conn_mlp_type, connp->conn_ulp, ntohs(lport), 2600 B_FALSE); 2601 } 2602 connp->conn_mlp_type = mlptSingle; 2603 } 2604 2605 if (src_ire != NULL) 2606 ire_refrele(src_ire); 2607 2608 if (ipsec_policy_set) { 2609 ASSERT(policy_mp != NULL); 2610 freeb(policy_mp); 2611 /* 2612 * As of now assume that nothing else accompanies 2613 * IPSEC_POLICY_SET. 2614 */ 2615 mp->b_cont = NULL; 2616 } 2617 return (error); 2618 } 2619 2620 /* ARGSUSED */ 2621 static void 2622 ip_bind_connected_resume_v6(ipsq_t *ipsq, queue_t *q, mblk_t *mp, 2623 void *dummy_arg) 2624 { 2625 conn_t *connp = NULL; 2626 t_scalar_t prim; 2627 2628 ASSERT(DB_TYPE(mp) == M_PROTO || DB_TYPE(mp) == M_PCPROTO); 2629 2630 if (CONN_Q(q)) 2631 connp = Q_TO_CONN(q); 2632 ASSERT(connp != NULL); 2633 2634 prim = ((union T_primitives *)mp->b_rptr)->type; 2635 ASSERT(prim == O_T_BIND_REQ || prim == T_BIND_REQ); 2636 2637 if (IPCL_IS_TCP(connp)) { 2638 /* Pass sticky_ipp for scope_id and pktinfo */ 2639 mp = ip_bind_v6(q, mp, connp, &connp->conn_tcp->tcp_sticky_ipp); 2640 } else { 2641 /* For UDP and ICMP */ 2642 mp = ip_bind_v6(q, mp, connp, NULL); 2643 } 2644 if (mp != NULL) { 2645 if (IPCL_IS_TCP(connp)) { 2646 CONN_INC_REF(connp); 2647 squeue_fill(connp->conn_sqp, mp, ip_resume_tcp_bind, 2648 connp, SQTAG_TCP_RPUTOTHER); 2649 } else if (IPCL_IS_UDP(connp)) { 2650 udp_resume_bind(connp, mp); 2651 } else { 2652 qreply(q, mp); 2653 CONN_OPER_PENDING_DONE(connp); 2654 } 2655 } 2656 } 2657 2658 /* 2659 * Verify that both the source and destination addresses 2660 * are valid. If verify_dst, then destination address must also be reachable, 2661 * i.e. have a route. Protocols like TCP want this. Tunnels do not. 2662 * It takes ip6_pkt_t * as one of the arguments to determine correct 2663 * source address when IPV6_PKTINFO or scope_id is set along with a link-local 2664 * destination address. Note that parameter ipp is only useful for TCP connect 2665 * when scope_id is set or IPV6_PKTINFO option is set with an ifindex. For all 2666 * non-TCP cases, it is NULL and for all other tcp cases it is not useful. 2667 * 2668 */ 2669 static int 2670 ip_bind_connected_v6(conn_t *connp, mblk_t *mp, in6_addr_t *v6src, 2671 uint16_t lport, const in6_addr_t *v6dst, ip6_pkt_t *ipp, uint16_t fport, 2672 boolean_t ire_requested, boolean_t ipsec_policy_set, 2673 boolean_t fanout_insert, boolean_t verify_dst) 2674 { 2675 ire_t *src_ire; 2676 ire_t *dst_ire; 2677 int error = 0; 2678 int protocol; 2679 mblk_t *policy_mp; 2680 ire_t *sire = NULL; 2681 ire_t *md_dst_ire = NULL; 2682 ill_t *md_ill = NULL; 2683 ill_t *dst_ill = NULL; 2684 ipif_t *src_ipif = NULL; 2685 zoneid_t zoneid; 2686 boolean_t ill_held = B_FALSE; 2687 2688 src_ire = dst_ire = NULL; 2689 /* 2690 * NOTE: The protocol is beyond the wptr because that's how 2691 * the undocumented transport<-->IP T_BIND_REQ behavior works. 2692 */ 2693 protocol = *mp->b_wptr & 0xFF; 2694 2695 /* 2696 * If we never got a disconnect before, clear it now. 2697 */ 2698 connp->conn_fully_bound = B_FALSE; 2699 2700 if (ipsec_policy_set) { 2701 policy_mp = mp->b_cont; 2702 } 2703 2704 zoneid = connp->conn_zoneid; 2705 2706 if (IN6_IS_ADDR_MULTICAST(v6dst)) { 2707 ipif_t *ipif; 2708 2709 /* 2710 * Use an "emulated" IRE_BROADCAST to tell the transport it 2711 * is a multicast. 2712 * Pass other information that matches 2713 * the ipif (e.g. the source address). 2714 * 2715 * conn_multicast_ill is only used for IPv6 packets 2716 */ 2717 mutex_enter(&connp->conn_lock); 2718 if (connp->conn_multicast_ill != NULL) { 2719 (void) ipif_lookup_zoneid(connp->conn_multicast_ill, 2720 zoneid, 0, &ipif); 2721 } else { 2722 /* Look for default like ip_wput_v6 */ 2723 ipif = ipif_lookup_group_v6(v6dst, zoneid); 2724 } 2725 mutex_exit(&connp->conn_lock); 2726 if (ipif == NULL || !ire_requested || 2727 (dst_ire = ipif_to_ire_v6(ipif)) == NULL) { 2728 if (ipif != NULL) 2729 ipif_refrele(ipif); 2730 if (ip_debug > 2) { 2731 /* ip1dbg */ 2732 pr_addr_dbg("ip_bind_connected_v6: bad " 2733 "connected multicast %s\n", AF_INET6, 2734 v6dst); 2735 } 2736 error = ENETUNREACH; 2737 goto bad_addr; 2738 } 2739 if (ipif != NULL) 2740 ipif_refrele(ipif); 2741 } else { 2742 dst_ire = ire_route_lookup_v6(v6dst, NULL, NULL, 0, 2743 NULL, &sire, zoneid, MBLK_GETLABEL(mp), 2744 MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT | 2745 MATCH_IRE_PARENT | MATCH_IRE_RJ_BHOLE | MATCH_IRE_SECATTR); 2746 /* 2747 * We also prevent ire's with src address INADDR_ANY to 2748 * be used, which are created temporarily for 2749 * sending out packets from endpoints that have 2750 * conn_unspec_src set. 2751 */ 2752 if (dst_ire == NULL || 2753 (dst_ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) || 2754 IN6_IS_ADDR_UNSPECIFIED(&dst_ire->ire_src_addr_v6)) { 2755 /* 2756 * When verifying destination reachability, we always 2757 * complain. 2758 * 2759 * When not verifying destination reachability but we 2760 * found an IRE, i.e. the destination is reachable, 2761 * then the other tests still apply and we complain. 2762 */ 2763 if (verify_dst || (dst_ire != NULL)) { 2764 if (ip_debug > 2) { 2765 /* ip1dbg */ 2766 pr_addr_dbg("ip_bind_connected_v6: bad" 2767 " connected dst %s\n", AF_INET6, 2768 v6dst); 2769 } 2770 if (dst_ire == NULL || 2771 !(dst_ire->ire_type & IRE_HOST)) { 2772 error = ENETUNREACH; 2773 } else { 2774 error = EHOSTUNREACH; 2775 } 2776 goto bad_addr; 2777 } 2778 } 2779 } 2780 2781 /* 2782 * We now know that routing will allow us to reach the destination. 2783 * Check whether Trusted Solaris policy allows communication with this 2784 * host, and pretend that the destination is unreachable if not. 2785 * 2786 * This is never a problem for TCP, since that transport is known to 2787 * compute the label properly as part of the tcp_rput_other T_BIND_ACK 2788 * handling. If the remote is unreachable, it will be detected at that 2789 * point, so there's no reason to check it here. 2790 * 2791 * Note that for sendto (and other datagram-oriented friends), this 2792 * check is done as part of the data path label computation instead. 2793 * The check here is just to make non-TCP connect() report the right 2794 * error. 2795 */ 2796 if (dst_ire != NULL && is_system_labeled() && 2797 !IPCL_IS_TCP(connp) && 2798 tsol_compute_label_v6(DB_CREDDEF(mp, connp->conn_cred), v6dst, NULL, 2799 connp->conn_mac_exempt) != 0) { 2800 error = EHOSTUNREACH; 2801 if (ip_debug > 2) { 2802 pr_addr_dbg("ip_bind_connected: no label for dst %s\n", 2803 AF_INET6, v6dst); 2804 } 2805 goto bad_addr; 2806 } 2807 2808 /* 2809 * If the app does a connect(), it means that it will most likely 2810 * send more than 1 packet to the destination. It makes sense 2811 * to clear the temporary flag. 2812 */ 2813 if (dst_ire != NULL && dst_ire->ire_type == IRE_CACHE && 2814 (dst_ire->ire_marks & IRE_MARK_TEMPORARY)) { 2815 irb_t *irb = dst_ire->ire_bucket; 2816 2817 rw_enter(&irb->irb_lock, RW_WRITER); 2818 dst_ire->ire_marks &= ~IRE_MARK_TEMPORARY; 2819 irb->irb_tmp_ire_cnt--; 2820 rw_exit(&irb->irb_lock); 2821 } 2822 2823 ASSERT(dst_ire == NULL || dst_ire->ire_ipversion == IPV6_VERSION); 2824 2825 /* 2826 * See if we should notify ULP about MDT; we do this whether or not 2827 * ire_requested is TRUE, in order to handle active connects; MDT 2828 * eligibility tests for passive connects are handled separately 2829 * through tcp_adapt_ire(). We do this before the source address 2830 * selection, because dst_ire may change after a call to 2831 * ipif_select_source_v6(). This is a best-effort check, as the 2832 * packet for this connection may not actually go through 2833 * dst_ire->ire_stq, and the exact IRE can only be known after 2834 * calling ip_newroute_v6(). This is why we further check on the 2835 * IRE during Multidata packet transmission in tcp_multisend(). 2836 */ 2837 if (ip_multidata_outbound && !ipsec_policy_set && dst_ire != NULL && 2838 !(dst_ire->ire_type & (IRE_LOCAL | IRE_LOOPBACK | IRE_BROADCAST)) && 2839 (md_ill = ire_to_ill(dst_ire), md_ill != NULL) && 2840 ILL_MDT_CAPABLE(md_ill)) { 2841 md_dst_ire = dst_ire; 2842 IRE_REFHOLD(md_dst_ire); 2843 } 2844 2845 if (dst_ire != NULL && 2846 dst_ire->ire_type == IRE_LOCAL && 2847 dst_ire->ire_zoneid != zoneid && 2848 dst_ire->ire_zoneid != ALL_ZONES) { 2849 src_ire = ire_ftable_lookup_v6(v6dst, 0, 0, 0, NULL, NULL, 2850 zoneid, 0, NULL, 2851 MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT | 2852 MATCH_IRE_RJ_BHOLE); 2853 if (src_ire == NULL) { 2854 error = EHOSTUNREACH; 2855 goto bad_addr; 2856 } else if (src_ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) { 2857 if (!(src_ire->ire_type & IRE_HOST)) 2858 error = ENETUNREACH; 2859 else 2860 error = EHOSTUNREACH; 2861 goto bad_addr; 2862 } 2863 if (IN6_IS_ADDR_UNSPECIFIED(v6src)) { 2864 src_ipif = src_ire->ire_ipif; 2865 ipif_refhold(src_ipif); 2866 *v6src = src_ipif->ipif_v6lcl_addr; 2867 } 2868 ire_refrele(src_ire); 2869 src_ire = NULL; 2870 } else if (IN6_IS_ADDR_UNSPECIFIED(v6src) && dst_ire != NULL) { 2871 if ((sire != NULL) && (sire->ire_flags & RTF_SETSRC)) { 2872 *v6src = sire->ire_src_addr_v6; 2873 ire_refrele(dst_ire); 2874 dst_ire = sire; 2875 sire = NULL; 2876 } else if (dst_ire->ire_type == IRE_CACHE && 2877 (dst_ire->ire_flags & RTF_SETSRC)) { 2878 ASSERT(dst_ire->ire_zoneid == zoneid || 2879 dst_ire->ire_zoneid == ALL_ZONES); 2880 *v6src = dst_ire->ire_src_addr_v6; 2881 } else { 2882 /* 2883 * Pick a source address so that a proper inbound load 2884 * spreading would happen. Use dst_ill specified by the 2885 * app. when socket option or scopeid is set. 2886 */ 2887 int err; 2888 2889 if (ipp != NULL && ipp->ipp_ifindex != 0) { 2890 uint_t if_index; 2891 2892 /* 2893 * Scope id or IPV6_PKTINFO 2894 */ 2895 2896 if_index = ipp->ipp_ifindex; 2897 dst_ill = ill_lookup_on_ifindex( 2898 if_index, B_TRUE, NULL, NULL, NULL, NULL); 2899 if (dst_ill == NULL) { 2900 ip1dbg(("ip_bind_connected_v6:" 2901 " bad ifindex %d\n", if_index)); 2902 error = EADDRNOTAVAIL; 2903 goto bad_addr; 2904 } 2905 ill_held = B_TRUE; 2906 } else if (connp->conn_outgoing_ill != NULL) { 2907 /* 2908 * For IPV6_BOUND_IF socket option, 2909 * conn_outgoing_ill should be set 2910 * already in TCP or UDP/ICMP. 2911 */ 2912 dst_ill = conn_get_held_ill(connp, 2913 &connp->conn_outgoing_ill, &err); 2914 if (err == ILL_LOOKUP_FAILED) { 2915 ip1dbg(("ip_bind_connected_v6:" 2916 "no ill for bound_if\n")); 2917 error = EADDRNOTAVAIL; 2918 goto bad_addr; 2919 } 2920 ill_held = B_TRUE; 2921 } else if (dst_ire->ire_stq != NULL) { 2922 /* No need to hold ill here */ 2923 dst_ill = (ill_t *)dst_ire->ire_stq->q_ptr; 2924 } else { 2925 /* No need to hold ill here */ 2926 dst_ill = dst_ire->ire_ipif->ipif_ill; 2927 } 2928 if (!ip6_asp_can_lookup()) { 2929 *mp->b_wptr++ = (char)protocol; 2930 ip6_asp_pending_op(CONNP_TO_WQ(connp), mp, 2931 ip_bind_connected_resume_v6); 2932 error = EINPROGRESS; 2933 goto refrele_and_quit; 2934 } 2935 src_ipif = ipif_select_source_v6(dst_ill, v6dst, 2936 RESTRICT_TO_NONE, connp->conn_src_preferences, 2937 zoneid); 2938 ip6_asp_table_refrele(); 2939 if (src_ipif == NULL) { 2940 pr_addr_dbg("ip_bind_connected_v6: " 2941 "no usable source address for " 2942 "connection to %s\n", AF_INET6, v6dst); 2943 error = EADDRNOTAVAIL; 2944 goto bad_addr; 2945 } 2946 *v6src = src_ipif->ipif_v6lcl_addr; 2947 } 2948 } 2949 2950 /* 2951 * We do ire_route_lookup_v6() here (and not an interface lookup) 2952 * as we assert that v6src should only come from an 2953 * UP interface for hard binding. 2954 */ 2955 src_ire = ire_route_lookup_v6(v6src, 0, 0, 0, NULL, 2956 NULL, zoneid, NULL, MATCH_IRE_ZONEONLY); 2957 2958 /* src_ire must be a local|loopback */ 2959 if (!IRE_IS_LOCAL(src_ire)) { 2960 if (ip_debug > 2) { 2961 /* ip1dbg */ 2962 pr_addr_dbg("ip_bind_connected_v6: bad " 2963 "connected src %s\n", AF_INET6, v6src); 2964 } 2965 error = EADDRNOTAVAIL; 2966 goto bad_addr; 2967 } 2968 2969 /* 2970 * If the source address is a loopback address, the 2971 * destination had best be local or multicast. 2972 * The transports that can't handle multicast will reject 2973 * those addresses. 2974 */ 2975 if (src_ire->ire_type == IRE_LOOPBACK && 2976 !(IRE_IS_LOCAL(dst_ire) || IN6_IS_ADDR_MULTICAST(v6dst) || 2977 IN6_IS_ADDR_V4MAPPED_CLASSD(v6dst))) { 2978 ip1dbg(("ip_bind_connected_v6: bad connected loopback\n")); 2979 error = -1; 2980 goto bad_addr; 2981 } 2982 /* 2983 * Allow setting new policies. For example, disconnects come 2984 * down as ipa_t bind. As we would have set conn_policy_cached 2985 * to B_TRUE before, we should set it to B_FALSE, so that policy 2986 * can change after the disconnect. 2987 */ 2988 connp->conn_policy_cached = B_FALSE; 2989 2990 /* 2991 * The addresses have been verified. Initialize the conn 2992 * before calling the policy as they expect the conns 2993 * initialized. 2994 */ 2995 connp->conn_srcv6 = *v6src; 2996 connp->conn_remv6 = *v6dst; 2997 connp->conn_lport = lport; 2998 connp->conn_fport = fport; 2999 3000 ASSERT(!(ipsec_policy_set && ire_requested)); 3001 if (ire_requested) { 3002 iulp_t *ulp_info = NULL; 3003 3004 /* 3005 * Note that sire will not be NULL if this is an off-link 3006 * connection and there is not cache for that dest yet. 3007 * 3008 * XXX Because of an existing bug, if there are multiple 3009 * default routes, the IRE returned now may not be the actual 3010 * default route used (default routes are chosen in a 3011 * round robin fashion). So if the metrics for different 3012 * default routes are different, we may return the wrong 3013 * metrics. This will not be a problem if the existing 3014 * bug is fixed. 3015 */ 3016 if (sire != NULL) 3017 ulp_info = &(sire->ire_uinfo); 3018 3019 if (!ip_bind_insert_ire_v6(mp, dst_ire, v6dst, ulp_info)) { 3020 error = -1; 3021 goto bad_addr; 3022 } 3023 } else if (ipsec_policy_set) { 3024 if (!ip_bind_ipsec_policy_set(connp, policy_mp)) { 3025 error = -1; 3026 goto bad_addr; 3027 } 3028 } 3029 3030 /* 3031 * Cache IPsec policy in this conn. If we have per-socket policy, 3032 * we'll cache that. If we don't, we'll inherit global policy. 3033 * 3034 * We can't insert until the conn reflects the policy. Note that 3035 * conn_policy_cached is set by ipsec_conn_cache_policy() even for 3036 * connections where we don't have a policy. This is to prevent 3037 * global policy lookups in the inbound path. 3038 * 3039 * If we insert before we set conn_policy_cached, 3040 * CONN_INBOUND_POLICY_PRESENT_V6() check can still evaluate true 3041 * because global policy cound be non-empty. We normally call 3042 * ipsec_check_policy() for conn_policy_cached connections only if 3043 * conn_in_enforce_policy is set. But in this case, 3044 * conn_policy_cached can get set anytime since we made the 3045 * CONN_INBOUND_POLICY_PRESENT_V6() check and ipsec_check_policy() 3046 * is called, which will make the above assumption false. Thus, we 3047 * need to insert after we set conn_policy_cached. 3048 */ 3049 if ((error = ipsec_conn_cache_policy(connp, B_FALSE)) != 0) 3050 goto bad_addr; 3051 3052 /* If not fanout_insert this was just an address verification */ 3053 if (fanout_insert) { 3054 /* 3055 * The addresses have been verified. Time to insert in 3056 * the correct fanout list. 3057 */ 3058 error = ipcl_conn_insert_v6(connp, protocol, v6src, v6dst, 3059 connp->conn_ports, 3060 IPCL_IS_TCP(connp) ? connp->conn_tcp->tcp_bound_if : 0); 3061 } 3062 if (error == 0) { 3063 connp->conn_fully_bound = B_TRUE; 3064 /* 3065 * Our initial checks for MDT have passed; the IRE is not 3066 * LOCAL/LOOPBACK/BROADCAST, and the link layer seems to 3067 * be supporting MDT. Pass the IRE, IPC and ILL into 3068 * ip_mdinfo_return(), which performs further checks 3069 * against them and upon success, returns the MDT info 3070 * mblk which we will attach to the bind acknowledgment. 3071 */ 3072 if (md_dst_ire != NULL) { 3073 mblk_t *mdinfo_mp; 3074 3075 ASSERT(md_ill != NULL); 3076 ASSERT(md_ill->ill_mdt_capab != NULL); 3077 if ((mdinfo_mp = ip_mdinfo_return(md_dst_ire, connp, 3078 md_ill->ill_name, md_ill->ill_mdt_capab)) != NULL) 3079 linkb(mp, mdinfo_mp); 3080 } 3081 } 3082 bad_addr: 3083 if (ipsec_policy_set) { 3084 ASSERT(policy_mp != NULL); 3085 freeb(policy_mp); 3086 /* 3087 * As of now assume that nothing else accompanies 3088 * IPSEC_POLICY_SET. 3089 */ 3090 mp->b_cont = NULL; 3091 } 3092 refrele_and_quit: 3093 if (src_ire != NULL) 3094 IRE_REFRELE(src_ire); 3095 if (dst_ire != NULL) 3096 IRE_REFRELE(dst_ire); 3097 if (sire != NULL) 3098 IRE_REFRELE(sire); 3099 if (src_ipif != NULL) 3100 ipif_refrele(src_ipif); 3101 if (md_dst_ire != NULL) 3102 IRE_REFRELE(md_dst_ire); 3103 if (ill_held && dst_ill != NULL) 3104 ill_refrele(dst_ill); 3105 return (error); 3106 } 3107 3108 /* 3109 * Insert the ire in b_cont. Returns false if it fails (due to lack of space). 3110 * Makes the IRE be IRE_BROADCAST if dst is a multicast address. 3111 */ 3112 static boolean_t 3113 ip_bind_insert_ire_v6(mblk_t *mp, ire_t *ire, const in6_addr_t *dst, 3114 iulp_t *ulp_info) 3115 { 3116 mblk_t *mp1; 3117 ire_t *ret_ire; 3118 3119 mp1 = mp->b_cont; 3120 ASSERT(mp1 != NULL); 3121 3122 if (ire != NULL) { 3123 /* 3124 * mp1 initialized above to IRE_DB_REQ_TYPE 3125 * appended mblk. Its <upper protocol>'s 3126 * job to make sure there is room. 3127 */ 3128 if ((mp1->b_datap->db_lim - mp1->b_rptr) < sizeof (ire_t)) 3129 return (B_FALSE); 3130 3131 mp1->b_datap->db_type = IRE_DB_TYPE; 3132 mp1->b_wptr = mp1->b_rptr + sizeof (ire_t); 3133 bcopy(ire, mp1->b_rptr, sizeof (ire_t)); 3134 ret_ire = (ire_t *)mp1->b_rptr; 3135 if (IN6_IS_ADDR_MULTICAST(dst) || 3136 IN6_IS_ADDR_V4MAPPED_CLASSD(dst)) { 3137 ret_ire->ire_type = IRE_BROADCAST; 3138 ret_ire->ire_addr_v6 = *dst; 3139 } 3140 if (ulp_info != NULL) { 3141 bcopy(ulp_info, &(ret_ire->ire_uinfo), 3142 sizeof (iulp_t)); 3143 } 3144 ret_ire->ire_mp = mp1; 3145 } else { 3146 /* 3147 * No IRE was found. Remove IRE mblk. 3148 */ 3149 mp->b_cont = mp1->b_cont; 3150 freeb(mp1); 3151 } 3152 return (B_TRUE); 3153 } 3154 3155 /* 3156 * Add an ip6i_t header to the front of the mblk. 3157 * Inline if possible else allocate a separate mblk containing only the ip6i_t. 3158 * Returns NULL if allocation fails (and frees original message). 3159 * Used in outgoing path when going through ip_newroute_*v6(). 3160 * Used in incoming path to pass ifindex to transports. 3161 */ 3162 mblk_t * 3163 ip_add_info_v6(mblk_t *mp, ill_t *ill, const in6_addr_t *dst) 3164 { 3165 mblk_t *mp1; 3166 ip6i_t *ip6i; 3167 ip6_t *ip6h; 3168 3169 ip6h = (ip6_t *)mp->b_rptr; 3170 ip6i = (ip6i_t *)(mp->b_rptr - sizeof (ip6i_t)); 3171 if ((uchar_t *)ip6i < mp->b_datap->db_base || 3172 mp->b_datap->db_ref > 1) { 3173 mp1 = allocb(sizeof (ip6i_t), BPRI_MED); 3174 if (mp1 == NULL) { 3175 freemsg(mp); 3176 return (NULL); 3177 } 3178 mp1->b_wptr = mp1->b_rptr = mp1->b_datap->db_lim; 3179 mp1->b_cont = mp; 3180 mp = mp1; 3181 ip6i = (ip6i_t *)(mp->b_rptr - sizeof (ip6i_t)); 3182 } 3183 mp->b_rptr = (uchar_t *)ip6i; 3184 ip6i->ip6i_vcf = ip6h->ip6_vcf; 3185 ip6i->ip6i_nxt = IPPROTO_RAW; 3186 if (ill != NULL) { 3187 ip6i->ip6i_flags = IP6I_IFINDEX; 3188 ip6i->ip6i_ifindex = ill->ill_phyint->phyint_ifindex; 3189 } else { 3190 ip6i->ip6i_flags = 0; 3191 } 3192 ip6i->ip6i_nexthop = *dst; 3193 return (mp); 3194 } 3195 3196 /* 3197 * Handle protocols with which IP is less intimate. There 3198 * can be more than one stream bound to a particular 3199 * protocol. When this is the case, normally each one gets a copy 3200 * of any incoming packets. 3201 * However, if the packet was tunneled and not multicast we only send to it 3202 * the first match. 3203 * 3204 * Zones notes: 3205 * Packets will be distributed to streams in all zones. This is really only 3206 * useful for ICMPv6 as only applications in the global zone can create raw 3207 * sockets for other protocols. 3208 */ 3209 static void 3210 ip_fanout_proto_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, ill_t *ill, 3211 ill_t *inill, uint8_t nexthdr, uint_t nexthdr_offset, uint_t flags, 3212 boolean_t mctl_present, zoneid_t zoneid) 3213 { 3214 queue_t *rq; 3215 mblk_t *mp1, *first_mp1; 3216 in6_addr_t dst = ip6h->ip6_dst; 3217 in6_addr_t src = ip6h->ip6_src; 3218 boolean_t one_only; 3219 mblk_t *first_mp = mp; 3220 boolean_t secure, shared_addr; 3221 conn_t *connp, *first_connp, *next_connp; 3222 connf_t *connfp; 3223 3224 if (mctl_present) { 3225 mp = first_mp->b_cont; 3226 secure = ipsec_in_is_secure(first_mp); 3227 ASSERT(mp != NULL); 3228 } else { 3229 secure = B_FALSE; 3230 } 3231 3232 /* 3233 * If the packet was tunneled and not multicast we only send to it 3234 * the first match. 3235 */ 3236 one_only = ((nexthdr == IPPROTO_ENCAP || nexthdr == IPPROTO_IPV6) && 3237 !IN6_IS_ADDR_MULTICAST(&dst)); 3238 3239 shared_addr = (zoneid == ALL_ZONES); 3240 if (shared_addr) { 3241 /* 3242 * We don't allow multilevel ports for raw IP, so no need to 3243 * check for that here. 3244 */ 3245 zoneid = tsol_packet_to_zoneid(mp); 3246 } 3247 3248 connfp = &ipcl_proto_fanout_v6[nexthdr]; 3249 mutex_enter(&connfp->connf_lock); 3250 connp = connfp->connf_head; 3251 for (connp = connfp->connf_head; connp != NULL; 3252 connp = connp->conn_next) { 3253 if (IPCL_PROTO_MATCH_V6(connp, nexthdr, ip6h, ill, flags, 3254 zoneid) && 3255 (!is_system_labeled() || 3256 tsol_receive_local(mp, &dst, IPV6_VERSION, shared_addr, 3257 connp))) 3258 break; 3259 } 3260 3261 if (connp == NULL || connp->conn_upq == NULL) { 3262 /* 3263 * No one bound to this port. Is 3264 * there a client that wants all 3265 * unclaimed datagrams? 3266 */ 3267 mutex_exit(&connfp->connf_lock); 3268 if (ip_fanout_send_icmp_v6(q, first_mp, flags, 3269 ICMP6_PARAM_PROB, ICMP6_PARAMPROB_NEXTHEADER, 3270 nexthdr_offset, mctl_present, zoneid)) { 3271 BUMP_MIB(ill->ill_ip6_mib, ipv6InUnknownProtos); 3272 } 3273 3274 return; 3275 } 3276 3277 CONN_INC_REF(connp); 3278 first_connp = connp; 3279 3280 /* 3281 * XXX: Fix the multiple protocol listeners case. We should not 3282 * be walking the conn->next list here. 3283 */ 3284 if (one_only) { 3285 /* 3286 * Only send message to one tunnel driver by immediately 3287 * terminating the loop. 3288 */ 3289 connp = NULL; 3290 } else { 3291 connp = connp->conn_next; 3292 3293 } 3294 for (;;) { 3295 while (connp != NULL) { 3296 if (IPCL_PROTO_MATCH_V6(connp, nexthdr, ip6h, ill, 3297 flags, zoneid) && 3298 (!is_system_labeled() || 3299 tsol_receive_local(mp, &dst, IPV6_VERSION, 3300 shared_addr, connp))) 3301 break; 3302 connp = connp->conn_next; 3303 } 3304 3305 /* 3306 * Just copy the data part alone. The mctl part is 3307 * needed just for verifying policy and it is never 3308 * sent up. 3309 */ 3310 if (connp == NULL || connp->conn_upq == NULL || 3311 (((first_mp1 = dupmsg(first_mp)) == NULL) && 3312 ((first_mp1 = ip_copymsg(first_mp)) == NULL))) { 3313 /* 3314 * No more intested clients or memory 3315 * allocation failed 3316 */ 3317 connp = first_connp; 3318 break; 3319 } 3320 mp1 = mctl_present ? first_mp1->b_cont : first_mp1; 3321 CONN_INC_REF(connp); 3322 mutex_exit(&connfp->connf_lock); 3323 rq = connp->conn_rq; 3324 /* 3325 * For link-local always add ifindex so that transport can set 3326 * sin6_scope_id. Avoid it for ICMP error fanout. 3327 */ 3328 if ((connp->conn_ipv6_recvpktinfo || 3329 IN6_IS_ADDR_LINKLOCAL(&src)) && 3330 (flags & IP_FF_IP6INFO)) { 3331 /* Add header */ 3332 mp1 = ip_add_info_v6(mp1, inill, &dst); 3333 } 3334 if (mp1 == NULL) { 3335 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 3336 } else if (!canputnext(rq)) { 3337 if (flags & IP_FF_RAWIP) { 3338 BUMP_MIB(ill->ill_ip6_mib, rawipInOverflows); 3339 } else { 3340 BUMP_MIB(ill->ill_icmp6_mib, 3341 ipv6IfIcmpInOverflows); 3342 } 3343 3344 freemsg(mp1); 3345 } else { 3346 if (CONN_INBOUND_POLICY_PRESENT_V6(connp) || secure) { 3347 first_mp1 = ipsec_check_inbound_policy 3348 (first_mp1, connp, NULL, ip6h, 3349 mctl_present); 3350 } 3351 if (first_mp1 != NULL) { 3352 if (mctl_present) 3353 freeb(first_mp1); 3354 BUMP_MIB(ill->ill_ip6_mib, ipv6InDelivers); 3355 putnext(rq, mp1); 3356 } 3357 } 3358 mutex_enter(&connfp->connf_lock); 3359 /* Follow the next pointer before releasing the conn. */ 3360 next_connp = connp->conn_next; 3361 CONN_DEC_REF(connp); 3362 connp = next_connp; 3363 } 3364 3365 /* Last one. Send it upstream. */ 3366 mutex_exit(&connfp->connf_lock); 3367 3368 /* Initiate IPPF processing */ 3369 if (IP6_IN_IPP(flags)) { 3370 uint_t ifindex; 3371 3372 mutex_enter(&ill->ill_lock); 3373 ifindex = ill->ill_phyint->phyint_ifindex; 3374 mutex_exit(&ill->ill_lock); 3375 ip_process(IPP_LOCAL_IN, &mp, ifindex); 3376 if (mp == NULL) { 3377 CONN_DEC_REF(connp); 3378 if (mctl_present) 3379 freeb(first_mp); 3380 return; 3381 } 3382 } 3383 3384 /* 3385 * For link-local always add ifindex so that transport can set 3386 * sin6_scope_id. Avoid it for ICMP error fanout. 3387 */ 3388 if ((connp->conn_ipv6_recvpktinfo || IN6_IS_ADDR_LINKLOCAL(&src)) && 3389 (flags & IP_FF_IP6INFO)) { 3390 /* Add header */ 3391 mp = ip_add_info_v6(mp, inill, &dst); 3392 if (mp == NULL) { 3393 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 3394 CONN_DEC_REF(connp); 3395 if (mctl_present) 3396 freeb(first_mp); 3397 return; 3398 } else if (mctl_present) { 3399 first_mp->b_cont = mp; 3400 } else { 3401 first_mp = mp; 3402 } 3403 } 3404 3405 rq = connp->conn_rq; 3406 if (!canputnext(rq)) { 3407 if (flags & IP_FF_RAWIP) { 3408 BUMP_MIB(ill->ill_ip6_mib, rawipInOverflows); 3409 } else { 3410 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInOverflows); 3411 } 3412 3413 freemsg(first_mp); 3414 } else { 3415 if (CONN_INBOUND_POLICY_PRESENT_V6(connp) || secure) { 3416 first_mp = ipsec_check_inbound_policy(first_mp, connp, 3417 NULL, ip6h, mctl_present); 3418 if (first_mp == NULL) { 3419 CONN_DEC_REF(connp); 3420 return; 3421 } 3422 } 3423 BUMP_MIB(ill->ill_ip6_mib, ipv6InDelivers); 3424 putnext(rq, mp); 3425 if (mctl_present) 3426 freeb(first_mp); 3427 } 3428 CONN_DEC_REF(connp); 3429 } 3430 3431 /* 3432 * Send an ICMP error after patching up the packet appropriately. Returns 3433 * non-zero if the appropriate MIB should be bumped; zero otherwise. 3434 */ 3435 int 3436 ip_fanout_send_icmp_v6(queue_t *q, mblk_t *mp, uint_t flags, 3437 uint_t icmp_type, uint8_t icmp_code, uint_t nexthdr_offset, 3438 boolean_t mctl_present, zoneid_t zoneid) 3439 { 3440 ip6_t *ip6h; 3441 mblk_t *first_mp; 3442 boolean_t secure; 3443 unsigned char db_type; 3444 3445 first_mp = mp; 3446 if (mctl_present) { 3447 mp = mp->b_cont; 3448 secure = ipsec_in_is_secure(first_mp); 3449 ASSERT(mp != NULL); 3450 } else { 3451 /* 3452 * If this is an ICMP error being reported - which goes 3453 * up as M_CTLs, we need to convert them to M_DATA till 3454 * we finish checking with global policy because 3455 * ipsec_check_global_policy() assumes M_DATA as clear 3456 * and M_CTL as secure. 3457 */ 3458 db_type = mp->b_datap->db_type; 3459 mp->b_datap->db_type = M_DATA; 3460 secure = B_FALSE; 3461 } 3462 /* 3463 * We are generating an icmp error for some inbound packet. 3464 * Called from all ip_fanout_(udp, tcp, proto) functions. 3465 * Before we generate an error, check with global policy 3466 * to see whether this is allowed to enter the system. As 3467 * there is no "conn", we are checking with global policy. 3468 */ 3469 ip6h = (ip6_t *)mp->b_rptr; 3470 if (secure || ipsec_inbound_v6_policy_present) { 3471 first_mp = ipsec_check_global_policy(first_mp, NULL, 3472 NULL, ip6h, mctl_present); 3473 if (first_mp == NULL) 3474 return (0); 3475 } 3476 3477 if (!mctl_present) 3478 mp->b_datap->db_type = db_type; 3479 3480 if (flags & IP_FF_SEND_ICMP) { 3481 if (flags & IP_FF_HDR_COMPLETE) { 3482 if (ip_hdr_complete_v6(ip6h, zoneid)) { 3483 freemsg(first_mp); 3484 return (1); 3485 } 3486 } 3487 switch (icmp_type) { 3488 case ICMP6_DST_UNREACH: 3489 icmp_unreachable_v6(WR(q), first_mp, icmp_code, 3490 B_FALSE, B_FALSE); 3491 break; 3492 case ICMP6_PARAM_PROB: 3493 icmp_param_problem_v6(WR(q), first_mp, icmp_code, 3494 nexthdr_offset, B_FALSE, B_FALSE); 3495 break; 3496 default: 3497 #ifdef DEBUG 3498 panic("ip_fanout_send_icmp_v6: wrong type"); 3499 /*NOTREACHED*/ 3500 #else 3501 freemsg(first_mp); 3502 break; 3503 #endif 3504 } 3505 } else { 3506 freemsg(first_mp); 3507 return (0); 3508 } 3509 3510 return (1); 3511 } 3512 3513 3514 /* 3515 * Fanout for TCP packets 3516 * The caller puts <fport, lport> in the ports parameter. 3517 */ 3518 static void 3519 ip_fanout_tcp_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, ill_t *ill, ill_t *inill, 3520 uint_t flags, uint_t hdr_len, boolean_t mctl_present, zoneid_t zoneid) 3521 { 3522 mblk_t *first_mp; 3523 boolean_t secure; 3524 conn_t *connp; 3525 tcph_t *tcph; 3526 boolean_t syn_present = B_FALSE; 3527 3528 first_mp = mp; 3529 if (mctl_present) { 3530 mp = first_mp->b_cont; 3531 secure = ipsec_in_is_secure(first_mp); 3532 ASSERT(mp != NULL); 3533 } else { 3534 secure = B_FALSE; 3535 } 3536 3537 connp = ipcl_classify_v6(mp, IPPROTO_TCP, hdr_len, zoneid); 3538 3539 if (connp == NULL || 3540 !conn_wantpacket_v6(connp, ill, ip6h, flags, zoneid)) { 3541 /* 3542 * No hard-bound match. Send Reset. 3543 */ 3544 dblk_t *dp = mp->b_datap; 3545 uint32_t ill_index; 3546 3547 ASSERT((dp->db_struioflag & STRUIO_IP) == 0); 3548 3549 /* Initiate IPPf processing, if needed. */ 3550 if (IPP_ENABLED(IPP_LOCAL_IN) && (flags & IP6_NO_IPPOLICY)) { 3551 ill_index = ill->ill_phyint->phyint_ifindex; 3552 ip_process(IPP_LOCAL_IN, &first_mp, ill_index); 3553 if (first_mp == NULL) { 3554 if (connp != NULL) 3555 CONN_DEC_REF(connp); 3556 return; 3557 } 3558 } 3559 BUMP_MIB(ill->ill_ip6_mib, ipv6InDelivers); 3560 tcp_xmit_listeners_reset(first_mp, hdr_len); 3561 if (connp != NULL) 3562 CONN_DEC_REF(connp); 3563 return; 3564 } 3565 3566 tcph = (tcph_t *)&mp->b_rptr[hdr_len]; 3567 if ((tcph->th_flags[0] & (TH_SYN|TH_ACK|TH_RST|TH_URG)) == TH_SYN) { 3568 if (connp->conn_flags & IPCL_TCP) { 3569 squeue_t *sqp; 3570 3571 /* 3572 * For fused tcp loopback, assign the eager's 3573 * squeue to be that of the active connect's. 3574 */ 3575 if ((flags & IP_FF_LOOPBACK) && do_tcp_fusion && 3576 !CONN_INBOUND_POLICY_PRESENT_V6(connp) && !secure && 3577 !IP6_IN_IPP(flags)) { 3578 ASSERT(Q_TO_CONN(q) != NULL); 3579 sqp = Q_TO_CONN(q)->conn_sqp; 3580 } else { 3581 sqp = IP_SQUEUE_GET(lbolt); 3582 } 3583 3584 mp->b_datap->db_struioflag |= STRUIO_EAGER; 3585 DB_CKSUMSTART(mp) = (intptr_t)sqp; 3586 3587 /* 3588 * db_cksumstuff is unused in the incoming 3589 * path; Thus store the ifindex here. It will 3590 * be cleared in tcp_conn_create_v6(). 3591 */ 3592 DB_CKSUMSTUFF(mp) = 3593 (intptr_t)ill->ill_phyint->phyint_ifindex; 3594 syn_present = B_TRUE; 3595 } 3596 } 3597 3598 if (IPCL_IS_TCP(connp) && IPCL_IS_BOUND(connp) && !syn_present) { 3599 uint_t flags = (unsigned int)tcph->th_flags[0] & 0xFF; 3600 if ((flags & TH_RST) || (flags & TH_URG)) { 3601 CONN_DEC_REF(connp); 3602 freemsg(first_mp); 3603 return; 3604 } 3605 if (flags & TH_ACK) { 3606 tcp_xmit_listeners_reset(first_mp, hdr_len); 3607 CONN_DEC_REF(connp); 3608 return; 3609 } 3610 3611 CONN_DEC_REF(connp); 3612 freemsg(first_mp); 3613 return; 3614 } 3615 3616 if (CONN_INBOUND_POLICY_PRESENT_V6(connp) || secure) { 3617 first_mp = ipsec_check_inbound_policy(first_mp, connp, 3618 NULL, ip6h, mctl_present); 3619 if (first_mp == NULL) { 3620 CONN_DEC_REF(connp); 3621 return; 3622 } 3623 if (IPCL_IS_TCP(connp) && IPCL_IS_BOUND(connp)) { 3624 ASSERT(syn_present); 3625 if (mctl_present) { 3626 ASSERT(first_mp != mp); 3627 first_mp->b_datap->db_struioflag |= 3628 STRUIO_POLICY; 3629 } else { 3630 ASSERT(first_mp == mp); 3631 mp->b_datap->db_struioflag &= 3632 ~STRUIO_EAGER; 3633 mp->b_datap->db_struioflag |= 3634 STRUIO_POLICY; 3635 } 3636 } else { 3637 /* 3638 * Discard first_mp early since we're dealing with a 3639 * fully-connected conn_t and tcp doesn't do policy in 3640 * this case. Also, if someone is bound to IPPROTO_TCP 3641 * over raw IP, they don't expect to see a M_CTL. 3642 */ 3643 if (mctl_present) { 3644 freeb(first_mp); 3645 mctl_present = B_FALSE; 3646 } 3647 first_mp = mp; 3648 } 3649 } 3650 3651 /* Initiate IPPF processing */ 3652 if (IP6_IN_IPP(flags)) { 3653 uint_t ifindex; 3654 3655 mutex_enter(&ill->ill_lock); 3656 ifindex = ill->ill_phyint->phyint_ifindex; 3657 mutex_exit(&ill->ill_lock); 3658 ip_process(IPP_LOCAL_IN, &mp, ifindex); 3659 if (mp == NULL) { 3660 CONN_DEC_REF(connp); 3661 if (mctl_present) { 3662 freeb(first_mp); 3663 } 3664 return; 3665 } else if (mctl_present) { 3666 /* 3667 * ip_add_info_v6 might return a new mp. 3668 */ 3669 ASSERT(first_mp != mp); 3670 first_mp->b_cont = mp; 3671 } else { 3672 first_mp = mp; 3673 } 3674 } 3675 3676 /* 3677 * For link-local always add ifindex so that TCP can bind to that 3678 * interface. Avoid it for ICMP error fanout. 3679 */ 3680 if (!syn_present && ((connp->conn_ipv6_recvpktinfo || 3681 IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src)) && 3682 (flags & IP_FF_IP6INFO))) { 3683 /* Add header */ 3684 mp = ip_add_info_v6(mp, inill, &ip6h->ip6_dst); 3685 if (mp == NULL) { 3686 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 3687 CONN_DEC_REF(connp); 3688 if (mctl_present) 3689 freeb(first_mp); 3690 return; 3691 } else if (mctl_present) { 3692 ASSERT(first_mp != mp); 3693 first_mp->b_cont = mp; 3694 } else { 3695 first_mp = mp; 3696 } 3697 } 3698 3699 BUMP_MIB(ill->ill_ip6_mib, ipv6InDelivers); 3700 if (IPCL_IS_TCP(connp)) { 3701 (*ip_input_proc)(connp->conn_sqp, first_mp, 3702 connp->conn_recv, connp, SQTAG_IP6_TCP_INPUT); 3703 } else { 3704 putnext(connp->conn_rq, first_mp); 3705 CONN_DEC_REF(connp); 3706 } 3707 } 3708 3709 /* 3710 * Fanout for UDP packets. 3711 * The caller puts <fport, lport> in the ports parameter. 3712 * ire_type must be IRE_BROADCAST for multicast and broadcast packets. 3713 * 3714 * If SO_REUSEADDR is set all multicast and broadcast packets 3715 * will be delivered to all streams bound to the same port. 3716 * 3717 * Zones notes: 3718 * Multicast packets will be distributed to streams in all zones. 3719 */ 3720 static void 3721 ip_fanout_udp_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, uint32_t ports, 3722 ill_t *ill, ill_t *inill, uint_t flags, boolean_t mctl_present, 3723 zoneid_t zoneid) 3724 { 3725 uint32_t dstport, srcport; 3726 in6_addr_t dst; 3727 mblk_t *first_mp; 3728 boolean_t secure; 3729 conn_t *connp; 3730 connf_t *connfp; 3731 conn_t *first_conn; 3732 conn_t *next_conn; 3733 mblk_t *mp1, *first_mp1; 3734 in6_addr_t src; 3735 boolean_t shared_addr; 3736 3737 first_mp = mp; 3738 if (mctl_present) { 3739 mp = first_mp->b_cont; 3740 secure = ipsec_in_is_secure(first_mp); 3741 ASSERT(mp != NULL); 3742 } else { 3743 secure = B_FALSE; 3744 } 3745 3746 /* Extract ports in net byte order */ 3747 dstport = htons(ntohl(ports) & 0xFFFF); 3748 srcport = htons(ntohl(ports) >> 16); 3749 dst = ip6h->ip6_dst; 3750 src = ip6h->ip6_src; 3751 3752 shared_addr = (zoneid == ALL_ZONES); 3753 if (shared_addr) { 3754 zoneid = tsol_mlp_findzone(IPPROTO_UDP, dstport); 3755 /* 3756 * If no shared MLP is found, tsol_mlp_findzone returns 3757 * ALL_ZONES. In that case, we assume it's SLP, and 3758 * search for the zone based on the packet label. 3759 * That will also return ALL_ZONES on failure, but 3760 * we never allow conn_zoneid to be set to ALL_ZONES. 3761 */ 3762 if (zoneid == ALL_ZONES) 3763 zoneid = tsol_packet_to_zoneid(mp); 3764 } 3765 3766 /* Attempt to find a client stream based on destination port. */ 3767 connfp = &ipcl_udp_fanout[IPCL_UDP_HASH(dstport)]; 3768 mutex_enter(&connfp->connf_lock); 3769 connp = connfp->connf_head; 3770 if (!IN6_IS_ADDR_MULTICAST(&dst)) { 3771 /* 3772 * Not multicast. Send to the one (first) client we find. 3773 */ 3774 while (connp != NULL) { 3775 if (IPCL_UDP_MATCH_V6(connp, dstport, dst, srcport, 3776 src) && connp->conn_zoneid == zoneid && 3777 conn_wantpacket_v6(connp, ill, ip6h, 3778 flags, zoneid)) { 3779 break; 3780 } 3781 connp = connp->conn_next; 3782 } 3783 if (connp == NULL || connp->conn_upq == NULL) 3784 goto notfound; 3785 3786 if (is_system_labeled() && 3787 !tsol_receive_local(mp, &dst, IPV6_VERSION, shared_addr, 3788 connp)) 3789 goto notfound; 3790 3791 /* Found a client */ 3792 CONN_INC_REF(connp); 3793 mutex_exit(&connfp->connf_lock); 3794 3795 if (CONN_UDP_FLOWCTLD(connp)) { 3796 freemsg(first_mp); 3797 CONN_DEC_REF(connp); 3798 return; 3799 } 3800 if (CONN_INBOUND_POLICY_PRESENT_V6(connp) || secure) { 3801 first_mp = ipsec_check_inbound_policy(first_mp, 3802 connp, NULL, ip6h, mctl_present); 3803 if (first_mp == NULL) { 3804 CONN_DEC_REF(connp); 3805 return; 3806 } 3807 } 3808 /* Initiate IPPF processing */ 3809 if (IP6_IN_IPP(flags)) { 3810 uint_t ifindex; 3811 3812 mutex_enter(&ill->ill_lock); 3813 ifindex = ill->ill_phyint->phyint_ifindex; 3814 mutex_exit(&ill->ill_lock); 3815 ip_process(IPP_LOCAL_IN, &mp, ifindex); 3816 if (mp == NULL) { 3817 CONN_DEC_REF(connp); 3818 if (mctl_present) 3819 freeb(first_mp); 3820 return; 3821 } 3822 } 3823 /* 3824 * For link-local always add ifindex so that 3825 * transport can set sin6_scope_id. Avoid it for 3826 * ICMP error fanout. 3827 */ 3828 if ((connp->conn_ipv6_recvpktinfo || 3829 IN6_IS_ADDR_LINKLOCAL(&src)) && 3830 (flags & IP_FF_IP6INFO)) { 3831 /* Add header */ 3832 mp = ip_add_info_v6(mp, inill, &dst); 3833 if (mp == NULL) { 3834 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 3835 CONN_DEC_REF(connp); 3836 if (mctl_present) 3837 freeb(first_mp); 3838 return; 3839 } else if (mctl_present) { 3840 first_mp->b_cont = mp; 3841 } else { 3842 first_mp = mp; 3843 } 3844 } 3845 BUMP_MIB(ill->ill_ip6_mib, ipv6InDelivers); 3846 3847 /* Send it upstream */ 3848 CONN_UDP_RECV(connp, mp); 3849 3850 IP6_STAT(ip6_udp_fannorm); 3851 CONN_DEC_REF(connp); 3852 if (mctl_present) 3853 freeb(first_mp); 3854 return; 3855 } 3856 3857 while (connp != NULL) { 3858 if ((IPCL_UDP_MATCH_V6(connp, dstport, dst, srcport, src)) && 3859 conn_wantpacket_v6(connp, ill, ip6h, flags, zoneid) && 3860 (!is_system_labeled() || 3861 tsol_receive_local(mp, &dst, IPV6_VERSION, shared_addr, 3862 connp))) 3863 break; 3864 connp = connp->conn_next; 3865 } 3866 3867 if (connp == NULL || connp->conn_upq == NULL) 3868 goto notfound; 3869 3870 first_conn = connp; 3871 3872 CONN_INC_REF(connp); 3873 connp = connp->conn_next; 3874 for (;;) { 3875 while (connp != NULL) { 3876 if (IPCL_UDP_MATCH_V6(connp, dstport, dst, srcport, 3877 src) && conn_wantpacket_v6(connp, ill, ip6h, 3878 flags, zoneid) && 3879 (!is_system_labeled() || 3880 tsol_receive_local(mp, &dst, IPV6_VERSION, 3881 shared_addr, connp))) 3882 break; 3883 connp = connp->conn_next; 3884 } 3885 /* 3886 * Just copy the data part alone. The mctl part is 3887 * needed just for verifying policy and it is never 3888 * sent up. 3889 */ 3890 if (connp == NULL || 3891 (((first_mp1 = dupmsg(first_mp)) == NULL) && 3892 ((first_mp1 = ip_copymsg(first_mp)) 3893 == NULL))) { 3894 /* 3895 * No more interested clients or memory 3896 * allocation failed 3897 */ 3898 connp = first_conn; 3899 break; 3900 } 3901 mp1 = mctl_present ? first_mp1->b_cont : first_mp1; 3902 CONN_INC_REF(connp); 3903 mutex_exit(&connfp->connf_lock); 3904 /* 3905 * For link-local always add ifindex so that transport 3906 * can set sin6_scope_id. Avoid it for ICMP error 3907 * fanout. 3908 */ 3909 if ((connp->conn_ipv6_recvpktinfo || 3910 IN6_IS_ADDR_LINKLOCAL(&src)) && 3911 (flags & IP_FF_IP6INFO)) { 3912 /* Add header */ 3913 mp1 = ip_add_info_v6(mp1, inill, &dst); 3914 } 3915 /* mp1 could have changed */ 3916 if (mctl_present) 3917 first_mp1->b_cont = mp1; 3918 else 3919 first_mp1 = mp1; 3920 if (mp1 == NULL) { 3921 if (mctl_present) 3922 freeb(first_mp1); 3923 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 3924 goto next_one; 3925 } 3926 if (CONN_UDP_FLOWCTLD(connp)) { 3927 BUMP_MIB(ill->ill_ip6_mib, udpInOverflows); 3928 freemsg(first_mp1); 3929 goto next_one; 3930 } 3931 3932 if (CONN_INBOUND_POLICY_PRESENT_V6(connp) || 3933 secure) { 3934 first_mp1 = ipsec_check_inbound_policy 3935 (first_mp1, connp, NULL, ip6h, 3936 mctl_present); 3937 } 3938 if (first_mp1 != NULL) { 3939 if (mctl_present) 3940 freeb(first_mp1); 3941 BUMP_MIB(ill->ill_ip6_mib, ipv6InDelivers); 3942 3943 /* Send it upstream */ 3944 CONN_UDP_RECV(connp, mp1); 3945 } 3946 next_one: 3947 mutex_enter(&connfp->connf_lock); 3948 /* Follow the next pointer before releasing the conn. */ 3949 next_conn = connp->conn_next; 3950 IP6_STAT(ip6_udp_fanmb); 3951 CONN_DEC_REF(connp); 3952 connp = next_conn; 3953 } 3954 3955 /* Last one. Send it upstream. */ 3956 mutex_exit(&connfp->connf_lock); 3957 3958 /* Initiate IPPF processing */ 3959 if (IP6_IN_IPP(flags)) { 3960 uint_t ifindex; 3961 3962 mutex_enter(&ill->ill_lock); 3963 ifindex = ill->ill_phyint->phyint_ifindex; 3964 mutex_exit(&ill->ill_lock); 3965 ip_process(IPP_LOCAL_IN, &mp, ifindex); 3966 if (mp == NULL) { 3967 CONN_DEC_REF(connp); 3968 if (mctl_present) { 3969 freeb(first_mp); 3970 } 3971 return; 3972 } 3973 } 3974 3975 /* 3976 * For link-local always add ifindex so that transport can set 3977 * sin6_scope_id. Avoid it for ICMP error fanout. 3978 */ 3979 if ((connp->conn_ipv6_recvpktinfo || 3980 IN6_IS_ADDR_LINKLOCAL(&src)) && (flags & IP_FF_IP6INFO)) { 3981 /* Add header */ 3982 mp = ip_add_info_v6(mp, inill, &dst); 3983 if (mp == NULL) { 3984 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 3985 CONN_DEC_REF(connp); 3986 if (mctl_present) 3987 freeb(first_mp); 3988 return; 3989 } else if (mctl_present) { 3990 first_mp->b_cont = mp; 3991 } else { 3992 first_mp = mp; 3993 } 3994 } 3995 if (CONN_UDP_FLOWCTLD(connp)) { 3996 BUMP_MIB(ill->ill_ip6_mib, udpInOverflows); 3997 freemsg(mp); 3998 } else { 3999 if (CONN_INBOUND_POLICY_PRESENT_V6(connp) || secure) { 4000 first_mp = ipsec_check_inbound_policy(first_mp, 4001 connp, NULL, ip6h, mctl_present); 4002 if (first_mp == NULL) { 4003 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 4004 CONN_DEC_REF(connp); 4005 return; 4006 } 4007 } 4008 BUMP_MIB(ill->ill_ip6_mib, ipv6InDelivers); 4009 4010 /* Send it upstream */ 4011 CONN_UDP_RECV(connp, mp); 4012 } 4013 IP6_STAT(ip6_udp_fanmb); 4014 CONN_DEC_REF(connp); 4015 if (mctl_present) 4016 freeb(first_mp); 4017 return; 4018 4019 notfound: 4020 mutex_exit(&connfp->connf_lock); 4021 /* 4022 * No one bound to this port. Is 4023 * there a client that wants all 4024 * unclaimed datagrams? 4025 */ 4026 if (ipcl_proto_fanout_v6[IPPROTO_UDP].connf_head != NULL) { 4027 ip_fanout_proto_v6(q, first_mp, ip6h, ill, inill, IPPROTO_UDP, 4028 0, flags | IP_FF_RAWIP | IP_FF_IP6INFO, mctl_present, 4029 zoneid); 4030 } else { 4031 if (ip_fanout_send_icmp_v6(q, first_mp, flags, 4032 ICMP6_DST_UNREACH, ICMP6_DST_UNREACH_NOPORT, 0, 4033 mctl_present, zoneid)) { 4034 BUMP_MIB(&ip_mib, udpNoPorts); 4035 } 4036 } 4037 } 4038 4039 /* 4040 * int ip_find_hdr_v6() 4041 * 4042 * This routine is used by the upper layer protocols and the IP tunnel 4043 * module to: 4044 * - Set extension header pointers to appropriate locations 4045 * - Determine IPv6 header length and return it 4046 * - Return a pointer to the last nexthdr value 4047 * 4048 * The caller must initialize ipp_fields. 4049 * 4050 * NOTE: If multiple extension headers of the same type are present, 4051 * ip_find_hdr_v6() will set the respective extension header pointers 4052 * to the first one that it encounters in the IPv6 header. It also 4053 * skips fragment headers. This routine deals with malformed packets 4054 * of various sorts in which case the returned length is up to the 4055 * malformed part. 4056 */ 4057 int 4058 ip_find_hdr_v6(mblk_t *mp, ip6_t *ip6h, ip6_pkt_t *ipp, uint8_t *nexthdrp) 4059 { 4060 uint_t length, ehdrlen; 4061 uint8_t nexthdr; 4062 uint8_t *whereptr, *endptr; 4063 ip6_dest_t *tmpdstopts; 4064 ip6_rthdr_t *tmprthdr; 4065 ip6_hbh_t *tmphopopts; 4066 ip6_frag_t *tmpfraghdr; 4067 4068 length = IPV6_HDR_LEN; 4069 whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */ 4070 endptr = mp->b_wptr; 4071 4072 nexthdr = ip6h->ip6_nxt; 4073 while (whereptr < endptr) { 4074 /* Is there enough left for len + nexthdr? */ 4075 if (whereptr + MIN_EHDR_LEN > endptr) 4076 goto done; 4077 4078 switch (nexthdr) { 4079 case IPPROTO_HOPOPTS: 4080 tmphopopts = (ip6_hbh_t *)whereptr; 4081 ehdrlen = 8 * (tmphopopts->ip6h_len + 1); 4082 if ((uchar_t *)tmphopopts + ehdrlen > endptr) 4083 goto done; 4084 nexthdr = tmphopopts->ip6h_nxt; 4085 /* return only 1st hbh */ 4086 if (!(ipp->ipp_fields & IPPF_HOPOPTS)) { 4087 ipp->ipp_fields |= IPPF_HOPOPTS; 4088 ipp->ipp_hopopts = tmphopopts; 4089 ipp->ipp_hopoptslen = ehdrlen; 4090 } 4091 break; 4092 case IPPROTO_DSTOPTS: 4093 tmpdstopts = (ip6_dest_t *)whereptr; 4094 ehdrlen = 8 * (tmpdstopts->ip6d_len + 1); 4095 if ((uchar_t *)tmpdstopts + ehdrlen > endptr) 4096 goto done; 4097 nexthdr = tmpdstopts->ip6d_nxt; 4098 /* 4099 * ipp_dstopts is set to the destination header after a 4100 * routing header. 4101 * Assume it is a post-rthdr destination header 4102 * and adjust when we find an rthdr. 4103 */ 4104 if (!(ipp->ipp_fields & IPPF_DSTOPTS)) { 4105 ipp->ipp_fields |= IPPF_DSTOPTS; 4106 ipp->ipp_dstopts = tmpdstopts; 4107 ipp->ipp_dstoptslen = ehdrlen; 4108 } 4109 break; 4110 case IPPROTO_ROUTING: 4111 tmprthdr = (ip6_rthdr_t *)whereptr; 4112 ehdrlen = 8 * (tmprthdr->ip6r_len + 1); 4113 if ((uchar_t *)tmprthdr + ehdrlen > endptr) 4114 goto done; 4115 nexthdr = tmprthdr->ip6r_nxt; 4116 /* return only 1st rthdr */ 4117 if (!(ipp->ipp_fields & IPPF_RTHDR)) { 4118 ipp->ipp_fields |= IPPF_RTHDR; 4119 ipp->ipp_rthdr = tmprthdr; 4120 ipp->ipp_rthdrlen = ehdrlen; 4121 } 4122 /* 4123 * Make any destination header we've seen be a 4124 * pre-rthdr destination header. 4125 */ 4126 if (ipp->ipp_fields & IPPF_DSTOPTS) { 4127 ipp->ipp_fields &= ~IPPF_DSTOPTS; 4128 ipp->ipp_fields |= IPPF_RTDSTOPTS; 4129 ipp->ipp_rtdstopts = ipp->ipp_dstopts; 4130 ipp->ipp_dstopts = NULL; 4131 ipp->ipp_rtdstoptslen = ipp->ipp_dstoptslen; 4132 ipp->ipp_dstoptslen = 0; 4133 } 4134 break; 4135 case IPPROTO_FRAGMENT: 4136 /* 4137 * Fragment headers are skipped. Currently, only 4138 * IP cares for their existence. If anyone other 4139 * than IP ever has the need to know about the 4140 * location of fragment headers, support can be 4141 * added to the ip6_pkt_t at that time. 4142 */ 4143 tmpfraghdr = (ip6_frag_t *)whereptr; 4144 ehdrlen = sizeof (ip6_frag_t); 4145 if ((uchar_t *)tmpfraghdr + ehdrlen > endptr) 4146 goto done; 4147 nexthdr = tmpfraghdr->ip6f_nxt; 4148 break; 4149 case IPPROTO_NONE: 4150 default: 4151 goto done; 4152 } 4153 length += ehdrlen; 4154 whereptr += ehdrlen; 4155 } 4156 done: 4157 if (nexthdrp != NULL) 4158 *nexthdrp = nexthdr; 4159 return (length); 4160 } 4161 4162 int 4163 ip_hdr_complete_v6(ip6_t *ip6h, zoneid_t zoneid) 4164 { 4165 ire_t *ire; 4166 4167 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 4168 ire = ire_lookup_local_v6(zoneid); 4169 if (ire == NULL) { 4170 ip1dbg(("ip_hdr_complete_v6: no source IRE\n")); 4171 return (1); 4172 } 4173 ip6h->ip6_src = ire->ire_addr_v6; 4174 ire_refrele(ire); 4175 } 4176 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 4177 ip6h->ip6_hops = ipv6_def_hops; 4178 return (0); 4179 } 4180 4181 /* 4182 * Try to determine where and what are the IPv6 header length and 4183 * pointer to nexthdr value for the upper layer protocol (or an 4184 * unknown next hdr). 4185 * 4186 * Parameters returns a pointer to the nexthdr value; 4187 * Must handle malformed packets of various sorts. 4188 * Function returns failure for malformed cases. 4189 */ 4190 boolean_t 4191 ip_hdr_length_nexthdr_v6(mblk_t *mp, ip6_t *ip6h, uint16_t *hdr_length_ptr, 4192 uint8_t **nexthdrpp) 4193 { 4194 uint16_t length; 4195 uint_t ehdrlen; 4196 uint8_t *nexthdrp; 4197 uint8_t *whereptr; 4198 uint8_t *endptr; 4199 ip6_dest_t *desthdr; 4200 ip6_rthdr_t *rthdr; 4201 ip6_frag_t *fraghdr; 4202 4203 length = IPV6_HDR_LEN; 4204 whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */ 4205 endptr = mp->b_wptr; 4206 4207 nexthdrp = &ip6h->ip6_nxt; 4208 while (whereptr < endptr) { 4209 /* Is there enough left for len + nexthdr? */ 4210 if (whereptr + MIN_EHDR_LEN > endptr) 4211 break; 4212 4213 switch (*nexthdrp) { 4214 case IPPROTO_HOPOPTS: 4215 case IPPROTO_DSTOPTS: 4216 /* Assumes the headers are identical for hbh and dst */ 4217 desthdr = (ip6_dest_t *)whereptr; 4218 ehdrlen = 8 * (desthdr->ip6d_len + 1); 4219 if ((uchar_t *)desthdr + ehdrlen > endptr) 4220 return (B_FALSE); 4221 nexthdrp = &desthdr->ip6d_nxt; 4222 break; 4223 case IPPROTO_ROUTING: 4224 rthdr = (ip6_rthdr_t *)whereptr; 4225 ehdrlen = 8 * (rthdr->ip6r_len + 1); 4226 if ((uchar_t *)rthdr + ehdrlen > endptr) 4227 return (B_FALSE); 4228 nexthdrp = &rthdr->ip6r_nxt; 4229 break; 4230 case IPPROTO_FRAGMENT: 4231 fraghdr = (ip6_frag_t *)whereptr; 4232 ehdrlen = sizeof (ip6_frag_t); 4233 if ((uchar_t *)&fraghdr[1] > endptr) 4234 return (B_FALSE); 4235 nexthdrp = &fraghdr->ip6f_nxt; 4236 break; 4237 case IPPROTO_NONE: 4238 /* No next header means we're finished */ 4239 default: 4240 *hdr_length_ptr = length; 4241 *nexthdrpp = nexthdrp; 4242 return (B_TRUE); 4243 } 4244 length += ehdrlen; 4245 whereptr += ehdrlen; 4246 *hdr_length_ptr = length; 4247 *nexthdrpp = nexthdrp; 4248 } 4249 switch (*nexthdrp) { 4250 case IPPROTO_HOPOPTS: 4251 case IPPROTO_DSTOPTS: 4252 case IPPROTO_ROUTING: 4253 case IPPROTO_FRAGMENT: 4254 /* 4255 * If any know extension headers are still to be processed, 4256 * the packet's malformed (or at least all the IP header(s) are 4257 * not in the same mblk - and that should never happen. 4258 */ 4259 return (B_FALSE); 4260 4261 default: 4262 /* 4263 * If we get here, we know that all of the IP headers were in 4264 * the same mblk, even if the ULP header is in the next mblk. 4265 */ 4266 *hdr_length_ptr = length; 4267 *nexthdrpp = nexthdrp; 4268 return (B_TRUE); 4269 } 4270 } 4271 4272 /* 4273 * Return the length of the IPv6 related headers (including extension headers) 4274 * Returns a length even if the packet is malformed. 4275 */ 4276 int 4277 ip_hdr_length_v6(mblk_t *mp, ip6_t *ip6h) 4278 { 4279 uint16_t hdr_len; 4280 uint8_t *nexthdrp; 4281 4282 (void) ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_len, &nexthdrp); 4283 return (hdr_len); 4284 } 4285 4286 /* 4287 * Select an ill for the packet by considering load spreading across 4288 * a different ill in the group if dst_ill is part of some group. 4289 */ 4290 static ill_t * 4291 ip_newroute_get_dst_ill_v6(ill_t *dst_ill) 4292 { 4293 ill_t *ill; 4294 4295 /* 4296 * We schedule irrespective of whether the source address is 4297 * INADDR_UNSPECIED or not. 4298 */ 4299 ill = illgrp_scheduler(dst_ill); 4300 if (ill == NULL) 4301 return (NULL); 4302 4303 /* 4304 * For groups with names ip_sioctl_groupname ensures that all 4305 * ills are of same type. For groups without names, ifgrp_insert 4306 * ensures this. 4307 */ 4308 ASSERT(dst_ill->ill_type == ill->ill_type); 4309 4310 return (ill); 4311 } 4312 4313 /* 4314 * IPv6 - 4315 * ip_newroute_v6 is called by ip_rput_data_v6 or ip_wput_v6 whenever we need 4316 * to send out a packet to a destination address for which we do not have 4317 * specific routing information. 4318 * 4319 * Handle non-multicast packets. If ill is non-NULL the match is done 4320 * for that ill. 4321 * 4322 * When a specific ill is specified (using IPV6_PKTINFO, 4323 * IPV6_MULTICAST_IF, or IPV6_BOUND_IF) we will only match 4324 * on routing entries (ftable and ctable) that have a matching 4325 * ire->ire_ipif->ipif_ill. Thus this can only be used 4326 * for destinations that are on-link for the specific ill 4327 * and that can appear on multiple links. Thus it is useful 4328 * for multicast destinations, link-local destinations, and 4329 * at some point perhaps for site-local destinations (if the 4330 * node sits at a site boundary). 4331 * We create the cache entries in the regular ctable since 4332 * it can not "confuse" things for other destinations. 4333 * table. 4334 * 4335 * When ill is part of a ill group, we subject the packets 4336 * to load spreading even if the ill is specified by the 4337 * means described above. We disable only for IPV6_BOUND_PIF 4338 * and for the cases where IP6I_ATTACH_IF is set i.e NS/NA/ 4339 * Echo replies to link-local destinations have IP6I_ATTACH_IF 4340 * set. 4341 * 4342 * NOTE : These are the scopes of some of the variables that point at IRE, 4343 * which needs to be followed while making any future modifications 4344 * to avoid memory leaks. 4345 * 4346 * - ire and sire are the entries looked up initially by 4347 * ire_ftable_lookup_v6. 4348 * - ipif_ire is used to hold the interface ire associated with 4349 * the new cache ire. But it's scope is limited, so we always REFRELE 4350 * it before branching out to error paths. 4351 * - save_ire is initialized before ire_create, so that ire returned 4352 * by ire_create will not over-write the ire. We REFRELE save_ire 4353 * before breaking out of the switch. 4354 * 4355 * Thus on failures, we have to REFRELE only ire and sire, if they 4356 * are not NULL. 4357 * 4358 * v6srcp may be used in the future. Currently unused. 4359 */ 4360 /* ARGSUSED */ 4361 void 4362 ip_newroute_v6(queue_t *q, mblk_t *mp, const in6_addr_t *v6dstp, 4363 const in6_addr_t *v6srcp, ill_t *ill, zoneid_t zoneid) 4364 { 4365 in6_addr_t v6gw; 4366 in6_addr_t dst; 4367 ire_t *ire = NULL; 4368 ipif_t *src_ipif = NULL; 4369 ill_t *dst_ill = NULL; 4370 ire_t *sire = NULL; 4371 ire_t *save_ire; 4372 mblk_t *dlureq_mp; 4373 ip6_t *ip6h; 4374 int err = 0; 4375 mblk_t *first_mp; 4376 ipsec_out_t *io; 4377 ill_t *attach_ill = NULL; 4378 ushort_t ire_marks = 0; 4379 int match_flags; 4380 boolean_t ip6i_present; 4381 ire_t *first_sire = NULL; 4382 mblk_t *copy_mp = NULL; 4383 mblk_t *xmit_mp = NULL; 4384 in6_addr_t save_dst; 4385 uint32_t multirt_flags = 4386 MULTIRT_CACHEGW | MULTIRT_USESTAMP | MULTIRT_SETSTAMP; 4387 boolean_t multirt_is_resolvable; 4388 boolean_t multirt_resolve_next; 4389 boolean_t need_rele = B_FALSE; 4390 boolean_t do_attach_ill = B_FALSE; 4391 boolean_t ip6_asp_table_held = B_FALSE; 4392 tsol_ire_gw_secattr_t *attrp = NULL; 4393 tsol_gcgrp_t *gcgrp = NULL; 4394 tsol_gcgrp_addr_t ga; 4395 4396 ASSERT(!IN6_IS_ADDR_MULTICAST(v6dstp)); 4397 4398 first_mp = mp; 4399 if (mp->b_datap->db_type == M_CTL) { 4400 mp = mp->b_cont; 4401 io = (ipsec_out_t *)first_mp->b_rptr; 4402 ASSERT(io->ipsec_out_type == IPSEC_OUT); 4403 } else { 4404 io = NULL; 4405 } 4406 4407 /* 4408 * If this end point is bound to IPIF_NOFAILOVER, set bnf_ill and 4409 * bind_to_nofailover B_TRUE. We can't use conn to determine as it 4410 * could be NULL. 4411 * 4412 * This information can appear either in an ip6i_t or an IPSEC_OUT 4413 * message. 4414 */ 4415 ip6h = (ip6_t *)mp->b_rptr; 4416 ip6i_present = (ip6h->ip6_nxt == IPPROTO_RAW); 4417 if (ip6i_present || (io != NULL && io->ipsec_out_attach_if)) { 4418 if (!ip6i_present || 4419 ((ip6i_t *)ip6h)->ip6i_flags & IP6I_ATTACH_IF) { 4420 attach_ill = ip_grab_attach_ill(ill, first_mp, 4421 (ip6i_present ? ((ip6i_t *)ip6h)->ip6i_ifindex : 4422 io->ipsec_out_ill_index), B_TRUE); 4423 /* Failure case frees things for us. */ 4424 if (attach_ill == NULL) 4425 return; 4426 4427 /* 4428 * Check if we need an ire that will not be 4429 * looked up by anybody else i.e. HIDDEN. 4430 */ 4431 if (ill_is_probeonly(attach_ill)) 4432 ire_marks = IRE_MARK_HIDDEN; 4433 } 4434 } 4435 4436 if (IN6_IS_ADDR_LOOPBACK(v6dstp)) { 4437 ip1dbg(("ip_newroute_v6: dst with loopback addr\n")); 4438 goto icmp_err_ret; 4439 } else if ((v6srcp != NULL) && IN6_IS_ADDR_LOOPBACK(v6srcp)) { 4440 ip1dbg(("ip_newroute_v6: src with loopback addr\n")); 4441 goto icmp_err_ret; 4442 } 4443 4444 /* 4445 * If this IRE is created for forwarding or it is not for 4446 * TCP traffic, mark it as temporary. 4447 * 4448 * Is it sufficient just to check the next header?? 4449 */ 4450 if (mp->b_prev != NULL || !IP_FLOW_CONTROLLED_ULP(ip6h->ip6_nxt)) 4451 ire_marks |= IRE_MARK_TEMPORARY; 4452 4453 /* 4454 * Get what we can from ire_ftable_lookup_v6 which will follow an IRE 4455 * chain until it gets the most specific information available. 4456 * For example, we know that there is no IRE_CACHE for this dest, 4457 * but there may be an IRE_OFFSUBNET which specifies a gateway. 4458 * ire_ftable_lookup_v6 will look up the gateway, etc. 4459 */ 4460 4461 if (ill == NULL) { 4462 match_flags = MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT | 4463 MATCH_IRE_PARENT | MATCH_IRE_RJ_BHOLE | MATCH_IRE_SECATTR; 4464 ire = ire_ftable_lookup_v6(v6dstp, 0, 0, 0, 4465 NULL, &sire, zoneid, 0, MBLK_GETLABEL(mp), 4466 match_flags); 4467 /* 4468 * ire_add_then_send -> ip_newroute_v6 in the CGTP case passes 4469 * in a NULL ill, but the packet could be a neighbor 4470 * solicitation/advertisment and could have a valid attach_ill. 4471 */ 4472 if (attach_ill != NULL) 4473 ill_refrele(attach_ill); 4474 } else { 4475 if (attach_ill != NULL) { 4476 /* 4477 * attach_ill is set only for communicating with 4478 * on-link hosts. So, don't look for DEFAULT. 4479 * ip_wput_v6 passes the right ill in this case and 4480 * hence we can assert. 4481 */ 4482 ASSERT(ill == attach_ill); 4483 ill_refrele(attach_ill); 4484 do_attach_ill = B_TRUE; 4485 match_flags = MATCH_IRE_RJ_BHOLE | MATCH_IRE_ILL; 4486 } else { 4487 match_flags = MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT | 4488 MATCH_IRE_RJ_BHOLE | MATCH_IRE_ILL_GROUP; 4489 } 4490 match_flags |= MATCH_IRE_PARENT | MATCH_IRE_SECATTR; 4491 ire = ire_ftable_lookup_v6(v6dstp, NULL, NULL, 0, ill->ill_ipif, 4492 &sire, zoneid, 0, MBLK_GETLABEL(mp), match_flags); 4493 } 4494 4495 ip3dbg(("ip_newroute_v6: ire_ftable_lookup_v6() " 4496 "returned ire %p, sire %p\n", (void *)ire, (void *)sire)); 4497 4498 if (zoneid == ALL_ZONES && ire != NULL) { 4499 /* 4500 * In the forwarding case, we can use a route from any zone 4501 * since we won't change the source address. We can easily 4502 * assert that the source address is already set when there's no 4503 * ip6_info header - otherwise we'd have to call pullupmsg(). 4504 */ 4505 ASSERT(ip6i_present || 4506 !IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)); 4507 zoneid = ire->ire_zoneid; 4508 } 4509 4510 /* 4511 * We enter a loop that will be run only once in most cases. 4512 * The loop is re-entered in the case where the destination 4513 * can be reached through multiple RTF_MULTIRT-flagged routes. 4514 * The intention is to compute multiple routes to a single 4515 * destination in a single ip_newroute_v6 call. 4516 * The information is contained in sire->ire_flags. 4517 */ 4518 do { 4519 multirt_resolve_next = B_FALSE; 4520 4521 if (dst_ill != NULL) { 4522 ill_refrele(dst_ill); 4523 dst_ill = NULL; 4524 } 4525 if (src_ipif != NULL) { 4526 ipif_refrele(src_ipif); 4527 src_ipif = NULL; 4528 } 4529 if ((sire != NULL) && sire->ire_flags & RTF_MULTIRT) { 4530 ip3dbg(("ip_newroute_v6: starting new resolution " 4531 "with first_mp %p, tag %d\n", 4532 (void *)first_mp, MULTIRT_DEBUG_TAGGED(first_mp))); 4533 4534 /* 4535 * We check if there are trailing unresolved routes for 4536 * the destination contained in sire. 4537 */ 4538 multirt_is_resolvable = ire_multirt_lookup_v6(&ire, 4539 &sire, multirt_flags, MBLK_GETLABEL(mp)); 4540 4541 ip3dbg(("ip_newroute_v6: multirt_is_resolvable %d, " 4542 "ire %p, sire %p\n", 4543 multirt_is_resolvable, (void *)ire, (void *)sire)); 4544 4545 if (!multirt_is_resolvable) { 4546 /* 4547 * No more multirt routes to resolve; give up 4548 * (all routes resolved or no more resolvable 4549 * routes). 4550 */ 4551 if (ire != NULL) { 4552 ire_refrele(ire); 4553 ire = NULL; 4554 } 4555 } else { 4556 ASSERT(sire != NULL); 4557 ASSERT(ire != NULL); 4558 /* 4559 * We simply use first_sire as a flag that 4560 * indicates if a resolvable multirt route has 4561 * already been found during the preceding 4562 * loops. If it is not the case, we may have 4563 * to send an ICMP error to report that the 4564 * destination is unreachable. We do not 4565 * IRE_REFHOLD first_sire. 4566 */ 4567 if (first_sire == NULL) { 4568 first_sire = sire; 4569 } 4570 } 4571 } 4572 if ((ire == NULL) || (ire == sire)) { 4573 /* 4574 * either ire == NULL (the destination cannot be 4575 * resolved) or ire == sire (the gateway cannot be 4576 * resolved). At this point, there are no more routes 4577 * to resolve for the destination, thus we exit. 4578 */ 4579 if (ip_debug > 3) { 4580 /* ip2dbg */ 4581 pr_addr_dbg("ip_newroute_v6: " 4582 "can't resolve %s\n", AF_INET6, v6dstp); 4583 } 4584 ip3dbg(("ip_newroute_v6: " 4585 "ire %p, sire %p, first_sire %p\n", 4586 (void *)ire, (void *)sire, (void *)first_sire)); 4587 4588 if (sire != NULL) { 4589 ire_refrele(sire); 4590 sire = NULL; 4591 } 4592 4593 if (first_sire != NULL) { 4594 /* 4595 * At least one multirt route has been found 4596 * in the same ip_newroute() call; there is no 4597 * need to report an ICMP error. 4598 * first_sire was not IRE_REFHOLDed. 4599 */ 4600 MULTIRT_DEBUG_UNTAG(first_mp); 4601 freemsg(first_mp); 4602 return; 4603 } 4604 ip_rts_change_v6(RTM_MISS, v6dstp, 0, 0, 0, 0, 0, 0, 4605 RTA_DST); 4606 goto icmp_err_ret; 4607 } 4608 4609 ASSERT(ire->ire_ipversion == IPV6_VERSION); 4610 4611 /* 4612 * Verify that the returned IRE does not have either the 4613 * RTF_REJECT or RTF_BLACKHOLE flags set and that the IRE is 4614 * either an IRE_CACHE, IRE_IF_NORESOLVER or IRE_IF_RESOLVER. 4615 */ 4616 if ((ire->ire_flags & (RTF_REJECT | RTF_BLACKHOLE)) || 4617 (ire->ire_type & (IRE_CACHE | IRE_INTERFACE)) == 0) 4618 goto icmp_err_ret; 4619 4620 /* 4621 * Increment the ire_ob_pkt_count field for ire if it is an 4622 * INTERFACE (IF_RESOLVER or IF_NORESOLVER) IRE type, and 4623 * increment the same for the parent IRE, sire, if it is some 4624 * sort of prefix IRE (which includes DEFAULT, PREFIX, HOST 4625 * and HOST_REDIRECT). 4626 */ 4627 if ((ire->ire_type & IRE_INTERFACE) != 0) { 4628 UPDATE_OB_PKT_COUNT(ire); 4629 ire->ire_last_used_time = lbolt; 4630 } 4631 4632 if (sire != NULL) { 4633 mutex_enter(&sire->ire_lock); 4634 v6gw = sire->ire_gateway_addr_v6; 4635 mutex_exit(&sire->ire_lock); 4636 ASSERT((sire->ire_type & (IRE_CACHETABLE | 4637 IRE_INTERFACE)) == 0); 4638 UPDATE_OB_PKT_COUNT(sire); 4639 sire->ire_last_used_time = lbolt; 4640 } else { 4641 v6gw = ipv6_all_zeros; 4642 } 4643 4644 /* 4645 * We have a route to reach the destination. 4646 * 4647 * 1) If the interface is part of ill group, try to get a new 4648 * ill taking load spreading into account. 4649 * 4650 * 2) After selecting the ill, get a source address that might 4651 * create good inbound load spreading and that matches the 4652 * right scope. ipif_select_source_v6 does this for us. 4653 * 4654 * If the application specified the ill (ifindex), we still 4655 * load spread. Only if the packets needs to go out specifically 4656 * on a given ill e.g. bind to IPIF_NOFAILOVER address, 4657 * IPV6_BOUND_PIF we don't try to use a different ill for load 4658 * spreading. 4659 */ 4660 if (!do_attach_ill) { 4661 /* 4662 * If the interface belongs to an interface group, 4663 * make sure the next possible interface in the group 4664 * is used. This encourages load spreading among 4665 * peers in an interface group. However, in the case 4666 * of multirouting, load spreading is not used, as we 4667 * actually want to replicate outgoing packets through 4668 * particular interfaces. 4669 * 4670 * Note: While we pick a dst_ill we are really only 4671 * interested in the ill for load spreading. 4672 * The source ipif is determined by source address 4673 * selection below. 4674 */ 4675 if ((sire != NULL) && (sire->ire_flags & RTF_MULTIRT)) { 4676 dst_ill = ire->ire_ipif->ipif_ill; 4677 /* For uniformity do a refhold */ 4678 ill_refhold(dst_ill); 4679 } else { 4680 /* 4681 * If we are here trying to create an IRE_CACHE 4682 * for an offlink destination and have the 4683 * IRE_CACHE for the next hop and the latter is 4684 * using virtual IP source address selection i.e 4685 * it's ire->ire_ipif is pointing to a virtual 4686 * network interface (vni) then 4687 * ip_newroute_get_dst_ll() will return the vni 4688 * interface as the dst_ill. Since the vni is 4689 * virtual i.e not associated with any physical 4690 * interface, it cannot be the dst_ill, hence 4691 * in such a case call ip_newroute_get_dst_ll() 4692 * with the stq_ill instead of the ire_ipif ILL. 4693 * The function returns a refheld ill. 4694 */ 4695 if ((ire->ire_type == IRE_CACHE) && 4696 IS_VNI(ire->ire_ipif->ipif_ill)) 4697 dst_ill = ip_newroute_get_dst_ill_v6( 4698 ire->ire_stq->q_ptr); 4699 else 4700 dst_ill = ip_newroute_get_dst_ill_v6( 4701 ire->ire_ipif->ipif_ill); 4702 } 4703 if (dst_ill == NULL) { 4704 if (ip_debug > 2) { 4705 pr_addr_dbg("ip_newroute_v6 : no dst " 4706 "ill for dst %s\n", 4707 AF_INET6, v6dstp); 4708 } 4709 goto icmp_err_ret; 4710 } else if (dst_ill->ill_group == NULL && ill != NULL && 4711 dst_ill != ill) { 4712 /* 4713 * If "ill" is not part of any group, we should 4714 * have found a route matching "ill" as we 4715 * called ire_ftable_lookup_v6 with 4716 * MATCH_IRE_ILL_GROUP. 4717 * Rather than asserting when there is a 4718 * mismatch, we just drop the packet. 4719 */ 4720 ip0dbg(("ip_newroute_v6: BOUND_IF failed : " 4721 "dst_ill %s ill %s\n", 4722 dst_ill->ill_name, 4723 ill->ill_name)); 4724 goto icmp_err_ret; 4725 } 4726 } else { 4727 dst_ill = ire->ire_ipif->ipif_ill; 4728 /* For uniformity do refhold */ 4729 ill_refhold(dst_ill); 4730 /* 4731 * We should have found a route matching ill as we 4732 * called ire_ftable_lookup_v6 with MATCH_IRE_ILL. 4733 * Rather than asserting, while there is a mismatch, 4734 * we just drop the packet. 4735 */ 4736 if (dst_ill != ill) { 4737 ip0dbg(("ip_newroute_v6: Packet dropped as " 4738 "IP6I_ATTACH_IF ill is %s, " 4739 "ire->ire_ipif->ipif_ill is %s\n", 4740 ill->ill_name, 4741 dst_ill->ill_name)); 4742 goto icmp_err_ret; 4743 } 4744 } 4745 /* 4746 * Pick a source address which matches the scope of the 4747 * destination address. 4748 * For RTF_SETSRC routes, the source address is imposed by the 4749 * parent ire (sire). 4750 */ 4751 ASSERT(src_ipif == NULL); 4752 if (ire->ire_type == IRE_IF_RESOLVER && 4753 !IN6_IS_ADDR_UNSPECIFIED(&v6gw) && 4754 ip6_asp_can_lookup()) { 4755 /* 4756 * The ire cache entry we're adding is for the 4757 * gateway itself. The source address in this case 4758 * is relative to the gateway's address. 4759 */ 4760 ip6_asp_table_held = B_TRUE; 4761 src_ipif = ipif_select_source_v6(dst_ill, &v6gw, 4762 RESTRICT_TO_GROUP, IPV6_PREFER_SRC_DEFAULT, zoneid); 4763 if (src_ipif != NULL) 4764 ire_marks |= IRE_MARK_USESRC_CHECK; 4765 } else { 4766 if ((sire != NULL) && (sire->ire_flags & RTF_SETSRC)) { 4767 /* 4768 * Check that the ipif matching the requested 4769 * source address still exists. 4770 */ 4771 src_ipif = ipif_lookup_addr_v6( 4772 &sire->ire_src_addr_v6, NULL, zoneid, 4773 NULL, NULL, NULL, NULL); 4774 } 4775 if (src_ipif == NULL && ip6_asp_can_lookup()) { 4776 uint_t restrict_ill = RESTRICT_TO_NONE; 4777 4778 if (ip6i_present && ((ip6i_t *)ip6h)->ip6i_flags 4779 & IP6I_ATTACH_IF) 4780 restrict_ill = RESTRICT_TO_ILL; 4781 ip6_asp_table_held = B_TRUE; 4782 src_ipif = ipif_select_source_v6(dst_ill, 4783 v6dstp, restrict_ill, 4784 IPV6_PREFER_SRC_DEFAULT, zoneid); 4785 if (src_ipif != NULL) 4786 ire_marks |= IRE_MARK_USESRC_CHECK; 4787 } 4788 } 4789 4790 if (src_ipif == NULL) { 4791 if (ip_debug > 2) { 4792 /* ip1dbg */ 4793 pr_addr_dbg("ip_newroute_v6: no src for " 4794 "dst %s\n, ", AF_INET6, v6dstp); 4795 printf("ip_newroute_v6: interface name %s\n", 4796 dst_ill->ill_name); 4797 } 4798 goto icmp_err_ret; 4799 } 4800 4801 if (ip_debug > 3) { 4802 /* ip2dbg */ 4803 pr_addr_dbg("ip_newroute_v6: first hop %s\n", 4804 AF_INET6, &v6gw); 4805 } 4806 ip2dbg(("\tire type %s (%d)\n", 4807 ip_nv_lookup(ire_nv_tbl, ire->ire_type), ire->ire_type)); 4808 4809 /* 4810 * At this point in ip_newroute_v6(), ire is either the 4811 * IRE_CACHE of the next-hop gateway for an off-subnet 4812 * destination or an IRE_INTERFACE type that should be used 4813 * to resolve an on-subnet destination or an on-subnet 4814 * next-hop gateway. 4815 * 4816 * In the IRE_CACHE case, we have the following : 4817 * 4818 * 1) src_ipif - used for getting a source address. 4819 * 4820 * 2) dst_ill - from which we derive ire_stq/ire_rfq. This 4821 * means packets using this IRE_CACHE will go out on dst_ill. 4822 * 4823 * 3) The IRE sire will point to the prefix that is the longest 4824 * matching route for the destination. These prefix types 4825 * include IRE_DEFAULT, IRE_PREFIX, IRE_HOST, and 4826 * IRE_HOST_REDIRECT. 4827 * 4828 * The newly created IRE_CACHE entry for the off-subnet 4829 * destination is tied to both the prefix route and the 4830 * interface route used to resolve the next-hop gateway 4831 * via the ire_phandle and ire_ihandle fields, respectively. 4832 * 4833 * In the IRE_INTERFACE case, we have the following : 4834 * 4835 * 1) src_ipif - used for getting a source address. 4836 * 4837 * 2) dst_ill - from which we derive ire_stq/ire_rfq. This 4838 * means packets using the IRE_CACHE that we will build 4839 * here will go out on dst_ill. 4840 * 4841 * 3) sire may or may not be NULL. But, the IRE_CACHE that is 4842 * to be created will only be tied to the IRE_INTERFACE that 4843 * was derived from the ire_ihandle field. 4844 * 4845 * If sire is non-NULL, it means the destination is off-link 4846 * and we will first create the IRE_CACHE for the gateway. 4847 * Next time through ip_newroute_v6, we will create the 4848 * IRE_CACHE for the final destination as described above. 4849 */ 4850 save_ire = ire; 4851 switch (ire->ire_type) { 4852 case IRE_CACHE: { 4853 ire_t *ipif_ire; 4854 4855 ASSERT(sire != NULL); 4856 if (IN6_IS_ADDR_UNSPECIFIED(&v6gw)) { 4857 mutex_enter(&ire->ire_lock); 4858 v6gw = ire->ire_gateway_addr_v6; 4859 mutex_exit(&ire->ire_lock); 4860 } 4861 /* 4862 * We need 3 ire's to create a new cache ire for an 4863 * off-link destination from the cache ire of the 4864 * gateway. 4865 * 4866 * 1. The prefix ire 'sire' 4867 * 2. The cache ire of the gateway 'ire' 4868 * 3. The interface ire 'ipif_ire' 4869 * 4870 * We have (1) and (2). We lookup (3) below. 4871 * 4872 * If there is no interface route to the gateway, 4873 * it is a race condition, where we found the cache 4874 * but the inteface route has been deleted. 4875 */ 4876 ipif_ire = ire_ihandle_lookup_offlink_v6(ire, sire); 4877 if (ipif_ire == NULL) { 4878 ip1dbg(("ip_newroute_v6:" 4879 "ire_ihandle_lookup_offlink_v6 failed\n")); 4880 goto icmp_err_ret; 4881 } 4882 /* 4883 * Assume DL_UNITDATA_REQ is same for all physical 4884 * interfaces in the ifgrp. If it isn't, this code will 4885 * have to be seriously rewhacked to allow the 4886 * fastpath probing (such that I cache the link 4887 * header in the IRE_CACHE) to work over ifgrps. 4888 * We have what we need to build an IRE_CACHE. 4889 */ 4890 /* 4891 * Note: the new ire inherits RTF_SETSRC 4892 * and RTF_MULTIRT to propagate these flags from prefix 4893 * to cache. 4894 */ 4895 4896 /* 4897 * Check cached gateway IRE for any security 4898 * attributes; if found, associate the gateway 4899 * credentials group to the destination IRE. 4900 */ 4901 if ((attrp = save_ire->ire_gw_secattr) != NULL) { 4902 mutex_enter(&attrp->igsa_lock); 4903 if ((gcgrp = attrp->igsa_gcgrp) != NULL) 4904 GCGRP_REFHOLD(gcgrp); 4905 mutex_exit(&attrp->igsa_lock); 4906 } 4907 4908 ire = ire_create_v6( 4909 v6dstp, /* dest address */ 4910 &ipv6_all_ones, /* mask */ 4911 &src_ipif->ipif_v6src_addr, /* source address */ 4912 &v6gw, /* gateway address */ 4913 &save_ire->ire_max_frag, 4914 NULL, /* Fast Path header */ 4915 dst_ill->ill_rq, /* recv-from queue */ 4916 dst_ill->ill_wq, /* send-to queue */ 4917 IRE_CACHE, 4918 NULL, 4919 src_ipif, 4920 &sire->ire_mask_v6, /* Parent mask */ 4921 sire->ire_phandle, /* Parent handle */ 4922 ipif_ire->ire_ihandle, /* Interface handle */ 4923 sire->ire_flags & /* flags if any */ 4924 (RTF_SETSRC | RTF_MULTIRT), 4925 &(sire->ire_uinfo), 4926 NULL, 4927 gcgrp); 4928 4929 if (ire == NULL) { 4930 if (gcgrp != NULL) { 4931 GCGRP_REFRELE(gcgrp); 4932 gcgrp = NULL; 4933 } 4934 ire_refrele(save_ire); 4935 ire_refrele(ipif_ire); 4936 break; 4937 } 4938 4939 /* reference now held by IRE */ 4940 gcgrp = NULL; 4941 4942 ire->ire_marks |= ire_marks; 4943 4944 /* 4945 * Prevent sire and ipif_ire from getting deleted. The 4946 * newly created ire is tied to both of them via the 4947 * phandle and ihandle respectively. 4948 */ 4949 IRB_REFHOLD(sire->ire_bucket); 4950 /* Has it been removed already ? */ 4951 if (sire->ire_marks & IRE_MARK_CONDEMNED) { 4952 IRB_REFRELE(sire->ire_bucket); 4953 ire_refrele(ipif_ire); 4954 ire_refrele(save_ire); 4955 break; 4956 } 4957 4958 IRB_REFHOLD(ipif_ire->ire_bucket); 4959 /* Has it been removed already ? */ 4960 if (ipif_ire->ire_marks & IRE_MARK_CONDEMNED) { 4961 IRB_REFRELE(ipif_ire->ire_bucket); 4962 IRB_REFRELE(sire->ire_bucket); 4963 ire_refrele(ipif_ire); 4964 ire_refrele(save_ire); 4965 break; 4966 } 4967 4968 xmit_mp = first_mp; 4969 if (ire->ire_flags & RTF_MULTIRT) { 4970 copy_mp = copymsg(first_mp); 4971 if (copy_mp != NULL) { 4972 xmit_mp = copy_mp; 4973 MULTIRT_DEBUG_TAG(first_mp); 4974 } 4975 } 4976 ire_add_then_send(q, ire, xmit_mp); 4977 if (ip6_asp_table_held) { 4978 ip6_asp_table_refrele(); 4979 ip6_asp_table_held = B_FALSE; 4980 } 4981 ire_refrele(save_ire); 4982 4983 /* Assert that sire is not deleted yet. */ 4984 ASSERT(sire->ire_ptpn != NULL); 4985 IRB_REFRELE(sire->ire_bucket); 4986 4987 /* Assert that ipif_ire is not deleted yet. */ 4988 ASSERT(ipif_ire->ire_ptpn != NULL); 4989 IRB_REFRELE(ipif_ire->ire_bucket); 4990 ire_refrele(ipif_ire); 4991 4992 if (copy_mp != NULL) { 4993 /* 4994 * Search for the next unresolved 4995 * multirt route. 4996 */ 4997 copy_mp = NULL; 4998 ipif_ire = NULL; 4999 ire = NULL; 5000 /* re-enter the loop */ 5001 multirt_resolve_next = B_TRUE; 5002 continue; 5003 } 5004 ire_refrele(sire); 5005 ill_refrele(dst_ill); 5006 ipif_refrele(src_ipif); 5007 return; 5008 } 5009 case IRE_IF_NORESOLVER: 5010 /* 5011 * We have what we need to build an IRE_CACHE. 5012 * 5013 * Create a new dlureq_mp with the IPv6 gateway 5014 * address in destination address in the DLPI hdr 5015 * if the physical length is exactly 16 bytes. 5016 */ 5017 if (dst_ill->ill_phys_addr_length == IPV6_ADDR_LEN) { 5018 const in6_addr_t *addr; 5019 5020 if (!IN6_IS_ADDR_UNSPECIFIED(&v6gw)) 5021 addr = &v6gw; 5022 else 5023 addr = v6dstp; 5024 5025 dlureq_mp = ill_dlur_gen((uchar_t *)addr, 5026 dst_ill->ill_phys_addr_length, 5027 dst_ill->ill_sap, 5028 dst_ill->ill_sap_length); 5029 } else { 5030 dlureq_mp = ill_dlur_gen(NULL, 5031 dst_ill->ill_phys_addr_length, 5032 dst_ill->ill_sap, 5033 dst_ill->ill_sap_length); 5034 } 5035 if (dlureq_mp == NULL) 5036 break; 5037 /* 5038 * TSol note: We are creating the ire cache for the 5039 * destination 'dst'. If 'dst' is offlink, going 5040 * through the first hop 'gw', the security attributes 5041 * of 'dst' must be set to point to the gateway 5042 * credentials of gateway 'gw'. If 'dst' is onlink, it 5043 * is possible that 'dst' is a potential gateway that is 5044 * referenced by some route that has some security 5045 * attributes. Thus in the former case, we need to do a 5046 * gcgrp_lookup of 'gw' while in the latter case we 5047 * need to do gcgrp_lookup of 'dst' itself. 5048 */ 5049 ga.ga_af = AF_INET6; 5050 if (!IN6_IS_ADDR_UNSPECIFIED(&v6gw)) 5051 ga.ga_addr = v6gw; 5052 else 5053 ga.ga_addr = *v6dstp; 5054 gcgrp = gcgrp_lookup(&ga, B_FALSE); 5055 5056 /* 5057 * Note: the new ire inherits sire flags RTF_SETSRC 5058 * and RTF_MULTIRT to propagate those rules from prefix 5059 * to cache. 5060 */ 5061 ire = ire_create_v6( 5062 v6dstp, /* dest address */ 5063 &ipv6_all_ones, /* mask */ 5064 &src_ipif->ipif_v6src_addr, /* source address */ 5065 &v6gw, /* gateway address */ 5066 &save_ire->ire_max_frag, 5067 NULL, /* Fast Path header */ 5068 dst_ill->ill_rq, /* recv-from queue */ 5069 dst_ill->ill_wq, /* send-to queue */ 5070 IRE_CACHE, 5071 dlureq_mp, 5072 src_ipif, 5073 &save_ire->ire_mask_v6, /* Parent mask */ 5074 (sire != NULL) ? /* Parent handle */ 5075 sire->ire_phandle : 0, 5076 save_ire->ire_ihandle, /* Interface handle */ 5077 (sire != NULL) ? /* flags if any */ 5078 sire->ire_flags & 5079 (RTF_SETSRC | RTF_MULTIRT) : 0, 5080 &(save_ire->ire_uinfo), 5081 NULL, 5082 gcgrp); 5083 5084 freeb(dlureq_mp); 5085 5086 if (ire == NULL) { 5087 if (gcgrp != NULL) { 5088 GCGRP_REFRELE(gcgrp); 5089 gcgrp = NULL; 5090 } 5091 ire_refrele(save_ire); 5092 break; 5093 } 5094 5095 /* reference now held by IRE */ 5096 gcgrp = NULL; 5097 5098 ire->ire_marks |= ire_marks; 5099 5100 if (!IN6_IS_ADDR_UNSPECIFIED(&v6gw)) 5101 dst = v6gw; 5102 else 5103 dst = *v6dstp; 5104 err = ndp_noresolver(dst_ill, &dst); 5105 if (err != 0) { 5106 ire_refrele(save_ire); 5107 break; 5108 } 5109 5110 /* Prevent save_ire from getting deleted */ 5111 IRB_REFHOLD(save_ire->ire_bucket); 5112 /* Has it been removed already ? */ 5113 if (save_ire->ire_marks & IRE_MARK_CONDEMNED) { 5114 IRB_REFRELE(save_ire->ire_bucket); 5115 ire_refrele(save_ire); 5116 break; 5117 } 5118 5119 xmit_mp = first_mp; 5120 /* 5121 * In case of MULTIRT, a copy of the current packet 5122 * to send is made to further re-enter the 5123 * loop and attempt another route resolution 5124 */ 5125 if ((sire != NULL) && sire->ire_flags & RTF_MULTIRT) { 5126 copy_mp = copymsg(first_mp); 5127 if (copy_mp != NULL) { 5128 xmit_mp = copy_mp; 5129 MULTIRT_DEBUG_TAG(first_mp); 5130 } 5131 } 5132 ire_add_then_send(q, ire, xmit_mp); 5133 if (ip6_asp_table_held) { 5134 ip6_asp_table_refrele(); 5135 ip6_asp_table_held = B_FALSE; 5136 } 5137 5138 /* Assert that it is not deleted yet. */ 5139 ASSERT(save_ire->ire_ptpn != NULL); 5140 IRB_REFRELE(save_ire->ire_bucket); 5141 ire_refrele(save_ire); 5142 5143 if (copy_mp != NULL) { 5144 /* 5145 * If we found a (no)resolver, we ignore any 5146 * trailing top priority IRE_CACHE in 5147 * further loops. This ensures that we do not 5148 * omit any (no)resolver despite the priority 5149 * in this call. 5150 * IRE_CACHE, if any, will be processed 5151 * by another thread entering ip_newroute(), 5152 * (on resolver response, for example). 5153 * We use this to force multiple parallel 5154 * resolution as soon as a packet needs to be 5155 * sent. The result is, after one packet 5156 * emission all reachable routes are generally 5157 * resolved. 5158 * Otherwise, complete resolution of MULTIRT 5159 * routes would require several emissions as 5160 * side effect. 5161 */ 5162 multirt_flags &= ~MULTIRT_CACHEGW; 5163 5164 /* 5165 * Search for the next unresolved multirt 5166 * route. 5167 */ 5168 copy_mp = NULL; 5169 save_ire = NULL; 5170 ire = NULL; 5171 /* re-enter the loop */ 5172 multirt_resolve_next = B_TRUE; 5173 continue; 5174 } 5175 5176 /* Don't need sire anymore */ 5177 if (sire != NULL) 5178 ire_refrele(sire); 5179 ill_refrele(dst_ill); 5180 ipif_refrele(src_ipif); 5181 return; 5182 5183 case IRE_IF_RESOLVER: 5184 /* 5185 * We can't build an IRE_CACHE yet, but at least we 5186 * found a resolver that can help. 5187 */ 5188 dst = *v6dstp; 5189 5190 /* 5191 * To be at this point in the code with a non-zero gw 5192 * means that dst is reachable through a gateway that 5193 * we have never resolved. By changing dst to the gw 5194 * addr we resolve the gateway first. When 5195 * ire_add_then_send() tries to put the IP dg to dst, 5196 * it will reenter ip_newroute() at which time we will 5197 * find the IRE_CACHE for the gw and create another 5198 * IRE_CACHE above (for dst itself). 5199 */ 5200 if (!IN6_IS_ADDR_UNSPECIFIED(&v6gw)) { 5201 save_dst = dst; 5202 dst = v6gw; 5203 v6gw = ipv6_all_zeros; 5204 } 5205 if (dst_ill->ill_flags & ILLF_XRESOLV) { 5206 /* 5207 * Ask the external resolver to do its thing. 5208 * Make an mblk chain in the following form: 5209 * ARQ_REQ_MBLK-->IRE_MBLK-->packet 5210 */ 5211 mblk_t *ire_mp; 5212 mblk_t *areq_mp; 5213 areq_t *areq; 5214 in6_addr_t *addrp; 5215 5216 ip1dbg(("ip_newroute_v6:ILLF_XRESOLV\n")); 5217 if (ip6_asp_table_held) { 5218 ip6_asp_table_refrele(); 5219 ip6_asp_table_held = B_FALSE; 5220 } 5221 ire = ire_create_mp_v6( 5222 &dst, /* dest address */ 5223 &ipv6_all_ones, /* mask */ 5224 &src_ipif->ipif_v6src_addr, 5225 /* source address */ 5226 &v6gw, /* gateway address */ 5227 NULL, /* Fast Path header */ 5228 dst_ill->ill_rq, /* recv-from queue */ 5229 dst_ill->ill_wq, /* send-to queue */ 5230 IRE_CACHE, 5231 NULL, 5232 src_ipif, 5233 &save_ire->ire_mask_v6, 5234 /* Parent mask */ 5235 0, 5236 save_ire->ire_ihandle, 5237 /* Interface handle */ 5238 0, /* flags if any */ 5239 &(save_ire->ire_uinfo), 5240 NULL, 5241 NULL); 5242 5243 ire_refrele(save_ire); 5244 if (ire == NULL) { 5245 ip1dbg(("ip_newroute_v6:" 5246 "ire is NULL\n")); 5247 break; 5248 } 5249 5250 if ((sire != NULL) && 5251 (sire->ire_flags & RTF_MULTIRT)) { 5252 /* 5253 * processing a copy of the packet to 5254 * send for further resolution loops 5255 */ 5256 copy_mp = copymsg(first_mp); 5257 if (copy_mp != NULL) 5258 MULTIRT_DEBUG_TAG(copy_mp); 5259 } 5260 ire->ire_marks |= ire_marks; 5261 ire_mp = ire->ire_mp; 5262 /* 5263 * Now create or find an nce for this interface. 5264 * The hw addr will need to to be set from 5265 * the reply to the AR_ENTRY_QUERY that 5266 * we're about to send. This will be done in 5267 * ire_add_v6(). 5268 */ 5269 err = ndp_resolver(dst_ill, &dst, mp, zoneid); 5270 switch (err) { 5271 case 0: 5272 /* 5273 * New cache entry created. 5274 * Break, then ask the external 5275 * resolver. 5276 */ 5277 break; 5278 case EINPROGRESS: 5279 /* 5280 * Resolution in progress; 5281 * packet has been queued by 5282 * ndp_resolver(). 5283 */ 5284 ire_delete(ire); 5285 ire = NULL; 5286 /* 5287 * Check if another multirt 5288 * route must be resolved. 5289 */ 5290 if (copy_mp != NULL) { 5291 /* 5292 * If we found a resolver, we 5293 * ignore any trailing top 5294 * priority IRE_CACHE in 5295 * further loops. The reason is 5296 * the same as for noresolver. 5297 */ 5298 multirt_flags &= 5299 ~MULTIRT_CACHEGW; 5300 /* 5301 * Search for the next 5302 * unresolved multirt route. 5303 */ 5304 first_mp = copy_mp; 5305 copy_mp = NULL; 5306 mp = first_mp; 5307 if (mp->b_datap->db_type == 5308 M_CTL) { 5309 mp = mp->b_cont; 5310 } 5311 ASSERT(sire != NULL); 5312 dst = save_dst; 5313 /* 5314 * re-enter the loop 5315 */ 5316 multirt_resolve_next = 5317 B_TRUE; 5318 continue; 5319 } 5320 5321 if (sire != NULL) 5322 ire_refrele(sire); 5323 ill_refrele(dst_ill); 5324 ipif_refrele(src_ipif); 5325 return; 5326 default: 5327 /* 5328 * Transient error; packet will be 5329 * freed. 5330 */ 5331 ire_delete(ire); 5332 ire = NULL; 5333 break; 5334 } 5335 if (err != 0) 5336 break; 5337 /* 5338 * Now set up the AR_ENTRY_QUERY and send it. 5339 */ 5340 areq_mp = ill_arp_alloc(dst_ill, 5341 (uchar_t *)&ipv6_areq_template, 5342 (caddr_t)&dst); 5343 if (areq_mp == NULL) { 5344 ip1dbg(("ip_newroute_v6:" 5345 "areq_mp is NULL\n")); 5346 freemsg(ire_mp); 5347 break; 5348 } 5349 areq = (areq_t *)areq_mp->b_rptr; 5350 addrp = (in6_addr_t *)((char *)areq + 5351 areq->areq_target_addr_offset); 5352 *addrp = dst; 5353 addrp = (in6_addr_t *)((char *)areq + 5354 areq->areq_sender_addr_offset); 5355 *addrp = src_ipif->ipif_v6src_addr; 5356 /* 5357 * link the chain, then send up to the resolver. 5358 */ 5359 linkb(areq_mp, ire_mp); 5360 linkb(areq_mp, mp); 5361 ip1dbg(("ip_newroute_v6:" 5362 "putnext to resolver\n")); 5363 putnext(dst_ill->ill_rq, areq_mp); 5364 /* 5365 * Check if another multirt route 5366 * must be resolved. 5367 */ 5368 ire = NULL; 5369 if (copy_mp != NULL) { 5370 /* 5371 * If we find a resolver, we ignore any 5372 * trailing top priority IRE_CACHE in 5373 * further loops. The reason is the 5374 * same as for noresolver. 5375 */ 5376 multirt_flags &= ~MULTIRT_CACHEGW; 5377 /* 5378 * Search for the next unresolved 5379 * multirt route. 5380 */ 5381 first_mp = copy_mp; 5382 copy_mp = NULL; 5383 mp = first_mp; 5384 if (mp->b_datap->db_type == M_CTL) { 5385 mp = mp->b_cont; 5386 } 5387 ASSERT(sire != NULL); 5388 dst = save_dst; 5389 /* 5390 * re-enter the loop 5391 */ 5392 multirt_resolve_next = B_TRUE; 5393 continue; 5394 } 5395 5396 if (sire != NULL) 5397 ire_refrele(sire); 5398 ill_refrele(dst_ill); 5399 ipif_refrele(src_ipif); 5400 return; 5401 } 5402 /* 5403 * Non-external resolver case. 5404 * 5405 * TSol note: Please see the note above the 5406 * IRE_IF_NORESOLVER case. 5407 */ 5408 ga.ga_af = AF_INET6; 5409 ga.ga_addr = dst; 5410 gcgrp = gcgrp_lookup(&ga, B_FALSE); 5411 5412 ire = ire_create_v6( 5413 &dst, /* dest address */ 5414 &ipv6_all_ones, /* mask */ 5415 &src_ipif->ipif_v6src_addr, /* source address */ 5416 &v6gw, /* gateway address */ 5417 &save_ire->ire_max_frag, 5418 NULL, /* Fast Path header */ 5419 dst_ill->ill_rq, /* recv-from queue */ 5420 dst_ill->ill_wq, /* send-to queue */ 5421 IRE_CACHE, 5422 NULL, 5423 src_ipif, 5424 &save_ire->ire_mask_v6, /* Parent mask */ 5425 0, 5426 save_ire->ire_ihandle, /* Interface handle */ 5427 0, /* flags if any */ 5428 &(save_ire->ire_uinfo), 5429 NULL, 5430 gcgrp); 5431 5432 if (ire == NULL) { 5433 if (gcgrp != NULL) { 5434 GCGRP_REFRELE(gcgrp); 5435 gcgrp = NULL; 5436 } 5437 ire_refrele(save_ire); 5438 break; 5439 } 5440 5441 /* reference now held by IRE */ 5442 gcgrp = NULL; 5443 5444 if ((sire != NULL) && 5445 (sire->ire_flags & RTF_MULTIRT)) { 5446 copy_mp = copymsg(first_mp); 5447 if (copy_mp != NULL) 5448 MULTIRT_DEBUG_TAG(copy_mp); 5449 } 5450 5451 ire->ire_marks |= ire_marks; 5452 err = ndp_resolver(dst_ill, &dst, first_mp, zoneid); 5453 switch (err) { 5454 case 0: 5455 /* Prevent save_ire from getting deleted */ 5456 IRB_REFHOLD(save_ire->ire_bucket); 5457 /* Has it been removed already ? */ 5458 if (save_ire->ire_marks & IRE_MARK_CONDEMNED) { 5459 IRB_REFRELE(save_ire->ire_bucket); 5460 ire_refrele(save_ire); 5461 break; 5462 } 5463 5464 /* 5465 * We have a resolved cache entry, 5466 * add in the IRE. 5467 */ 5468 ire_add_then_send(q, ire, first_mp); 5469 if (ip6_asp_table_held) { 5470 ip6_asp_table_refrele(); 5471 ip6_asp_table_held = B_FALSE; 5472 } 5473 5474 /* Assert that it is not deleted yet. */ 5475 ASSERT(save_ire->ire_ptpn != NULL); 5476 IRB_REFRELE(save_ire->ire_bucket); 5477 ire_refrele(save_ire); 5478 /* 5479 * Check if another multirt route 5480 * must be resolved. 5481 */ 5482 ire = NULL; 5483 if (copy_mp != NULL) { 5484 /* 5485 * If we find a resolver, we ignore any 5486 * trailing top priority IRE_CACHE in 5487 * further loops. The reason is the 5488 * same as for noresolver. 5489 */ 5490 multirt_flags &= ~MULTIRT_CACHEGW; 5491 /* 5492 * Search for the next unresolved 5493 * multirt route. 5494 */ 5495 first_mp = copy_mp; 5496 copy_mp = NULL; 5497 mp = first_mp; 5498 if (mp->b_datap->db_type == M_CTL) { 5499 mp = mp->b_cont; 5500 } 5501 ASSERT(sire != NULL); 5502 dst = save_dst; 5503 /* 5504 * re-enter the loop 5505 */ 5506 multirt_resolve_next = B_TRUE; 5507 continue; 5508 } 5509 5510 if (sire != NULL) 5511 ire_refrele(sire); 5512 ill_refrele(dst_ill); 5513 ipif_refrele(src_ipif); 5514 return; 5515 5516 case EINPROGRESS: 5517 /* 5518 * mp was consumed - presumably queued. 5519 * No need for ire, presumably resolution is 5520 * in progress, and ire will be added when the 5521 * address is resolved. 5522 */ 5523 if (ip6_asp_table_held) { 5524 ip6_asp_table_refrele(); 5525 ip6_asp_table_held = B_FALSE; 5526 } 5527 ASSERT(ire->ire_nce == NULL); 5528 ire_delete(ire); 5529 ire_refrele(save_ire); 5530 /* 5531 * Check if another multirt route 5532 * must be resolved. 5533 */ 5534 ire = NULL; 5535 if (copy_mp != NULL) { 5536 /* 5537 * If we find a resolver, we ignore any 5538 * trailing top priority IRE_CACHE in 5539 * further loops. The reason is the 5540 * same as for noresolver. 5541 */ 5542 multirt_flags &= ~MULTIRT_CACHEGW; 5543 /* 5544 * Search for the next unresolved 5545 * multirt route. 5546 */ 5547 first_mp = copy_mp; 5548 copy_mp = NULL; 5549 mp = first_mp; 5550 if (mp->b_datap->db_type == M_CTL) { 5551 mp = mp->b_cont; 5552 } 5553 ASSERT(sire != NULL); 5554 dst = save_dst; 5555 /* 5556 * re-enter the loop 5557 */ 5558 multirt_resolve_next = B_TRUE; 5559 continue; 5560 } 5561 if (sire != NULL) 5562 ire_refrele(sire); 5563 ill_refrele(dst_ill); 5564 ipif_refrele(src_ipif); 5565 return; 5566 default: 5567 /* Some transient error */ 5568 ASSERT(ire->ire_nce == NULL); 5569 ire_refrele(save_ire); 5570 break; 5571 } 5572 break; 5573 default: 5574 break; 5575 } 5576 if (ip6_asp_table_held) { 5577 ip6_asp_table_refrele(); 5578 ip6_asp_table_held = B_FALSE; 5579 } 5580 } while (multirt_resolve_next); 5581 5582 err_ret: 5583 ip1dbg(("ip_newroute_v6: dropped\n")); 5584 if (src_ipif != NULL) 5585 ipif_refrele(src_ipif); 5586 if (dst_ill != NULL) { 5587 need_rele = B_TRUE; 5588 ill = dst_ill; 5589 } 5590 if (ill != NULL) { 5591 if (mp->b_prev != NULL) { 5592 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 5593 } else { 5594 BUMP_MIB(ill->ill_ip6_mib, ipv6OutDiscards); 5595 } 5596 5597 if (need_rele) 5598 ill_refrele(ill); 5599 } else { 5600 if (mp->b_prev != NULL) { 5601 BUMP_MIB(&ip6_mib, ipv6InDiscards); 5602 } else { 5603 BUMP_MIB(&ip6_mib, ipv6OutDiscards); 5604 } 5605 } 5606 /* Did this packet originate externally? */ 5607 if (mp->b_prev) { 5608 mp->b_next = NULL; 5609 mp->b_prev = NULL; 5610 } 5611 if (copy_mp != NULL) { 5612 MULTIRT_DEBUG_UNTAG(copy_mp); 5613 freemsg(copy_mp); 5614 } 5615 MULTIRT_DEBUG_UNTAG(first_mp); 5616 freemsg(first_mp); 5617 if (ire != NULL) 5618 ire_refrele(ire); 5619 if (sire != NULL) 5620 ire_refrele(sire); 5621 return; 5622 5623 icmp_err_ret: 5624 if (ip6_asp_table_held) 5625 ip6_asp_table_refrele(); 5626 if (src_ipif != NULL) 5627 ipif_refrele(src_ipif); 5628 if (dst_ill != NULL) { 5629 need_rele = B_TRUE; 5630 ill = dst_ill; 5631 } 5632 ip1dbg(("ip_newroute_v6: no route\n")); 5633 if (sire != NULL) 5634 ire_refrele(sire); 5635 /* 5636 * We need to set sire to NULL to avoid double freeing if we 5637 * ever goto err_ret from below. 5638 */ 5639 sire = NULL; 5640 ip6h = (ip6_t *)mp->b_rptr; 5641 /* Skip ip6i_t header if present */ 5642 if (ip6h->ip6_nxt == IPPROTO_RAW) { 5643 /* Make sure the IPv6 header is present */ 5644 if ((mp->b_wptr - (uchar_t *)ip6h) < 5645 sizeof (ip6i_t) + IPV6_HDR_LEN) { 5646 if (!pullupmsg(mp, sizeof (ip6i_t) + IPV6_HDR_LEN)) { 5647 ip1dbg(("ip_newroute_v6: pullupmsg failed\n")); 5648 goto err_ret; 5649 } 5650 } 5651 mp->b_rptr += sizeof (ip6i_t); 5652 ip6h = (ip6_t *)mp->b_rptr; 5653 } 5654 /* Did this packet originate externally? */ 5655 if (mp->b_prev) { 5656 if (ill != NULL) { 5657 BUMP_MIB(ill->ill_ip6_mib, ipv6InNoRoutes); 5658 } else { 5659 BUMP_MIB(&ip6_mib, ipv6InNoRoutes); 5660 } 5661 mp->b_next = NULL; 5662 mp->b_prev = NULL; 5663 q = WR(q); 5664 } else { 5665 if (ill != NULL) { 5666 BUMP_MIB(ill->ill_ip6_mib, ipv6OutNoRoutes); 5667 } else { 5668 BUMP_MIB(&ip6_mib, ipv6OutNoRoutes); 5669 } 5670 if (ip_hdr_complete_v6(ip6h, zoneid)) { 5671 /* Failed */ 5672 if (copy_mp != NULL) { 5673 MULTIRT_DEBUG_UNTAG(copy_mp); 5674 freemsg(copy_mp); 5675 } 5676 MULTIRT_DEBUG_UNTAG(first_mp); 5677 freemsg(first_mp); 5678 if (ire != NULL) 5679 ire_refrele(ire); 5680 if (need_rele) 5681 ill_refrele(ill); 5682 return; 5683 } 5684 } 5685 5686 if (need_rele) 5687 ill_refrele(ill); 5688 5689 /* 5690 * At this point we will have ire only if RTF_BLACKHOLE 5691 * or RTF_REJECT flags are set on the IRE. It will not 5692 * generate ICMP6_DST_UNREACH_NOROUTE if RTF_BLACKHOLE is set. 5693 */ 5694 if (ire != NULL) { 5695 if (ire->ire_flags & RTF_BLACKHOLE) { 5696 ire_refrele(ire); 5697 if (copy_mp != NULL) { 5698 MULTIRT_DEBUG_UNTAG(copy_mp); 5699 freemsg(copy_mp); 5700 } 5701 MULTIRT_DEBUG_UNTAG(first_mp); 5702 freemsg(first_mp); 5703 return; 5704 } 5705 ire_refrele(ire); 5706 } 5707 if (ip_debug > 3) { 5708 /* ip2dbg */ 5709 pr_addr_dbg("ip_newroute_v6: no route to %s\n", 5710 AF_INET6, v6dstp); 5711 } 5712 icmp_unreachable_v6(WR(q), first_mp, ICMP6_DST_UNREACH_NOROUTE, 5713 B_FALSE, B_FALSE); 5714 } 5715 5716 /* 5717 * ip_newroute_ipif_v6 is called by ip_wput_v6 and ip_wput_ipsec_out_v6 whenever 5718 * we need to send out a packet to a destination address for which we do not 5719 * have specific routing information. It is only used for multicast packets. 5720 * 5721 * If unspec_src we allow creating an IRE with source address zero. 5722 * ire_send_v6() will delete it after the packet is sent. 5723 */ 5724 void 5725 ip_newroute_ipif_v6(queue_t *q, mblk_t *mp, ipif_t *ipif, 5726 in6_addr_t v6dst, int unspec_src, zoneid_t zoneid) 5727 { 5728 ire_t *ire = NULL; 5729 ipif_t *src_ipif = NULL; 5730 int err = 0; 5731 ill_t *dst_ill = NULL; 5732 ire_t *save_ire; 5733 ushort_t ire_marks = 0; 5734 ipsec_out_t *io; 5735 ill_t *attach_ill = NULL; 5736 ill_t *ill; 5737 ip6_t *ip6h; 5738 mblk_t *first_mp; 5739 boolean_t ip6i_present; 5740 ire_t *fire = NULL; 5741 mblk_t *copy_mp = NULL; 5742 boolean_t multirt_resolve_next; 5743 in6_addr_t *v6dstp = &v6dst; 5744 boolean_t ipif_held = B_FALSE; 5745 boolean_t ill_held = B_FALSE; 5746 boolean_t ip6_asp_table_held = B_FALSE; 5747 5748 /* 5749 * This loop is run only once in most cases. 5750 * We loop to resolve further routes only when the destination 5751 * can be reached through multiple RTF_MULTIRT-flagged ires. 5752 */ 5753 do { 5754 multirt_resolve_next = B_FALSE; 5755 if (dst_ill != NULL) { 5756 ill_refrele(dst_ill); 5757 dst_ill = NULL; 5758 } 5759 5760 if (src_ipif != NULL) { 5761 ipif_refrele(src_ipif); 5762 src_ipif = NULL; 5763 } 5764 ASSERT(ipif != NULL); 5765 ill = ipif->ipif_ill; 5766 5767 ASSERT(!IN6_IS_ADDR_V4MAPPED(v6dstp)); 5768 if (ip_debug > 2) { 5769 /* ip1dbg */ 5770 pr_addr_dbg("ip_newroute_ipif_v6: v6dst %s\n", 5771 AF_INET6, v6dstp); 5772 printf("ip_newroute_ipif_v6: if %s, v6 %d\n", 5773 ill->ill_name, ipif->ipif_isv6); 5774 } 5775 5776 first_mp = mp; 5777 if (mp->b_datap->db_type == M_CTL) { 5778 mp = mp->b_cont; 5779 io = (ipsec_out_t *)first_mp->b_rptr; 5780 ASSERT(io->ipsec_out_type == IPSEC_OUT); 5781 } else { 5782 io = NULL; 5783 } 5784 5785 /* 5786 * If the interface is a pt-pt interface we look for an 5787 * IRE_IF_RESOLVER or IRE_IF_NORESOLVER that matches both the 5788 * local_address and the pt-pt destination address. 5789 * Otherwise we just match the local address. 5790 */ 5791 if (!(ill->ill_flags & ILLF_MULTICAST)) { 5792 goto err_ret; 5793 } 5794 /* 5795 * If this end point is bound to IPIF_NOFAILOVER, set bnf_ill 5796 * and bind_to_nofailover B_TRUE. We can't use conn to determine 5797 * as it could be NULL. 5798 * 5799 * This information can appear either in an ip6i_t or an 5800 * IPSEC_OUT message. 5801 */ 5802 ip6h = (ip6_t *)mp->b_rptr; 5803 ip6i_present = (ip6h->ip6_nxt == IPPROTO_RAW); 5804 if (ip6i_present || (io != NULL && io->ipsec_out_attach_if)) { 5805 if (!ip6i_present || 5806 ((ip6i_t *)ip6h)->ip6i_flags & IP6I_ATTACH_IF) { 5807 attach_ill = ip_grab_attach_ill(ill, first_mp, 5808 (ip6i_present ? 5809 ((ip6i_t *)ip6h)->ip6i_ifindex : 5810 io->ipsec_out_ill_index), B_TRUE); 5811 /* Failure case frees things for us. */ 5812 if (attach_ill == NULL) 5813 return; 5814 5815 /* 5816 * Check if we need an ire that will not be 5817 * looked up by anybody else i.e. HIDDEN. 5818 */ 5819 if (ill_is_probeonly(attach_ill)) 5820 ire_marks = IRE_MARK_HIDDEN; 5821 } 5822 } 5823 5824 /* 5825 * We check if an IRE_OFFSUBNET for the addr that goes through 5826 * ipif exists. We need it to determine if the RTF_SETSRC and/or 5827 * RTF_MULTIRT flags must be honored. 5828 */ 5829 fire = ipif_lookup_multi_ire_v6(ipif, v6dstp); 5830 ip2dbg(("ip_newroute_ipif_v6: " 5831 "ipif_lookup_multi_ire_v6(" 5832 "ipif %p, dst %08x) = fire %p\n", 5833 (void *)ipif, ntohl(V4_PART_OF_V6((*v6dstp))), 5834 (void *)fire)); 5835 5836 /* 5837 * If the application specified the ill (ifindex), we still 5838 * load spread. Only if the packets needs to go out specifically 5839 * on a given ill e.g. binding to IPIF_NOFAILOVER address or 5840 * IPV6_BOUND_PIF, or there is a parent ire entry that specified 5841 * multirouting, then we don't try to use a different ill for 5842 * load spreading. 5843 */ 5844 if (attach_ill == NULL) { 5845 /* 5846 * If the interface belongs to an interface group, 5847 * make sure the next possible interface in the group 5848 * is used. This encourages load spreading among peers 5849 * in an interface group. 5850 * 5851 * Note: While we pick a dst_ill we are really only 5852 * interested in the ill for load spreading. The source 5853 * ipif is determined by source address selection below. 5854 */ 5855 if ((fire != NULL) && (fire->ire_flags & RTF_MULTIRT)) { 5856 dst_ill = ipif->ipif_ill; 5857 /* For uniformity do a refhold */ 5858 ill_refhold(dst_ill); 5859 } else { 5860 /* refheld by ip_newroute_get_dst_ill_v6 */ 5861 dst_ill = 5862 ip_newroute_get_dst_ill_v6(ipif->ipif_ill); 5863 } 5864 if (dst_ill == NULL) { 5865 if (ip_debug > 2) { 5866 pr_addr_dbg("ip_newroute_ipif_v6: " 5867 "no dst ill for dst %s\n", 5868 AF_INET6, v6dstp); 5869 } 5870 goto err_ret; 5871 } 5872 } else { 5873 dst_ill = ipif->ipif_ill; 5874 /* 5875 * ip_wput_v6 passes the right ipif for IPIF_NOFAILOVER 5876 * and IPV6_BOUND_PIF case. 5877 */ 5878 ASSERT(dst_ill == attach_ill); 5879 /* attach_ill is already refheld */ 5880 } 5881 /* 5882 * Pick a source address which matches the scope of the 5883 * destination address. 5884 * For RTF_SETSRC routes, the source address is imposed by the 5885 * parent ire (fire). 5886 */ 5887 ASSERT(src_ipif == NULL); 5888 if ((fire != NULL) && (fire->ire_flags & RTF_SETSRC)) { 5889 /* 5890 * Check that the ipif matching the requested source 5891 * address still exists. 5892 */ 5893 src_ipif = 5894 ipif_lookup_addr_v6(&fire->ire_src_addr_v6, 5895 NULL, zoneid, NULL, NULL, NULL, NULL); 5896 } 5897 if (src_ipif == NULL && ip6_asp_can_lookup()) { 5898 ip6_asp_table_held = B_TRUE; 5899 src_ipif = ipif_select_source_v6(dst_ill, v6dstp, 5900 RESTRICT_TO_NONE, IPV6_PREFER_SRC_DEFAULT, zoneid); 5901 } 5902 5903 if (src_ipif == NULL) { 5904 if (!unspec_src) { 5905 if (ip_debug > 2) { 5906 /* ip1dbg */ 5907 pr_addr_dbg("ip_newroute_ipif_v6: " 5908 "no src for dst %s\n,", 5909 AF_INET6, v6dstp); 5910 printf(" through interface %s\n", 5911 dst_ill->ill_name); 5912 } 5913 goto err_ret; 5914 } 5915 src_ipif = ipif; 5916 ipif_refhold(src_ipif); 5917 } 5918 ire = ipif_to_ire_v6(ipif); 5919 if (ire == NULL) { 5920 if (ip_debug > 2) { 5921 /* ip1dbg */ 5922 pr_addr_dbg("ip_newroute_ipif_v6: v6src %s\n", 5923 AF_INET6, &ipif->ipif_v6lcl_addr); 5924 printf("ip_newroute_ipif_v6: " 5925 "if %s\n", dst_ill->ill_name); 5926 } 5927 goto err_ret; 5928 } 5929 if (ire->ire_flags & (RTF_REJECT | RTF_BLACKHOLE)) 5930 goto err_ret; 5931 5932 ASSERT(ire->ire_ipversion == IPV6_VERSION); 5933 5934 ip1dbg(("ip_newroute_ipif_v6: interface type %s (%d),", 5935 ip_nv_lookup(ire_nv_tbl, ire->ire_type), ire->ire_type)); 5936 if (ip_debug > 2) { 5937 /* ip1dbg */ 5938 pr_addr_dbg(" address %s\n", 5939 AF_INET6, &ire->ire_src_addr_v6); 5940 } 5941 save_ire = ire; 5942 ip2dbg(("ip_newroute_ipif: ire %p, ipif %p\n", 5943 (void *)ire, (void *)ipif)); 5944 5945 if ((fire != NULL) && (fire->ire_flags & RTF_MULTIRT)) { 5946 /* 5947 * an IRE_OFFSUBET was looked up 5948 * on that interface. 5949 * this ire has RTF_MULTIRT flag, 5950 * so the resolution loop 5951 * will be re-entered to resolve 5952 * additional routes on other 5953 * interfaces. For that purpose, 5954 * a copy of the packet is 5955 * made at this point. 5956 */ 5957 fire->ire_last_used_time = lbolt; 5958 copy_mp = copymsg(first_mp); 5959 if (copy_mp) { 5960 MULTIRT_DEBUG_TAG(copy_mp); 5961 } 5962 } 5963 5964 ASSERT((attach_ill == NULL) || (dst_ill == attach_ill)); 5965 switch (ire->ire_type) { 5966 case IRE_IF_NORESOLVER: { 5967 /* We have what we need to build an IRE_CACHE. */ 5968 mblk_t *dlureq_mp; 5969 5970 /* 5971 * Create a new dlureq_mp with the 5972 * IPv6 gateway address in destination address in the 5973 * DLPI hdr if the physical length is exactly 16 bytes. 5974 */ 5975 ASSERT(dst_ill->ill_isv6); 5976 if (dst_ill->ill_phys_addr_length == IPV6_ADDR_LEN) { 5977 dlureq_mp = ill_dlur_gen((uchar_t *)v6dstp, 5978 dst_ill->ill_phys_addr_length, 5979 dst_ill->ill_sap, 5980 dst_ill->ill_sap_length); 5981 } else { 5982 dlureq_mp = ill_dlur_gen(NULL, 5983 dst_ill->ill_phys_addr_length, 5984 dst_ill->ill_sap, 5985 dst_ill->ill_sap_length); 5986 } 5987 5988 if (dlureq_mp == NULL) 5989 break; 5990 /* 5991 * The newly created ire will inherit the flags of the 5992 * parent ire, if any. 5993 */ 5994 ire = ire_create_v6( 5995 v6dstp, /* dest address */ 5996 &ipv6_all_ones, /* mask */ 5997 &src_ipif->ipif_v6src_addr, /* source address */ 5998 NULL, /* gateway address */ 5999 &save_ire->ire_max_frag, 6000 NULL, /* Fast Path header */ 6001 dst_ill->ill_rq, /* recv-from queue */ 6002 dst_ill->ill_wq, /* send-to queue */ 6003 IRE_CACHE, 6004 dlureq_mp, 6005 src_ipif, 6006 NULL, 6007 (fire != NULL) ? /* Parent handle */ 6008 fire->ire_phandle : 0, 6009 save_ire->ire_ihandle, /* Interface handle */ 6010 (fire != NULL) ? 6011 (fire->ire_flags & (RTF_SETSRC | RTF_MULTIRT)) : 6012 0, 6013 &ire_uinfo_null, 6014 NULL, 6015 NULL); 6016 6017 freeb(dlureq_mp); 6018 6019 if (ire == NULL) { 6020 ire_refrele(save_ire); 6021 break; 6022 } 6023 6024 ire->ire_marks |= ire_marks; 6025 6026 err = ndp_noresolver(dst_ill, v6dstp); 6027 if (err != 0) { 6028 ire_refrele(save_ire); 6029 break; 6030 } 6031 6032 /* Prevent save_ire from getting deleted */ 6033 IRB_REFHOLD(save_ire->ire_bucket); 6034 /* Has it been removed already ? */ 6035 if (save_ire->ire_marks & IRE_MARK_CONDEMNED) { 6036 IRB_REFRELE(save_ire->ire_bucket); 6037 ire_refrele(save_ire); 6038 break; 6039 } 6040 6041 ire_add_then_send(q, ire, first_mp); 6042 if (ip6_asp_table_held) { 6043 ip6_asp_table_refrele(); 6044 ip6_asp_table_held = B_FALSE; 6045 } 6046 6047 /* Assert that it is not deleted yet. */ 6048 ASSERT(save_ire->ire_ptpn != NULL); 6049 IRB_REFRELE(save_ire->ire_bucket); 6050 ire_refrele(save_ire); 6051 if (fire != NULL) { 6052 ire_refrele(fire); 6053 fire = NULL; 6054 } 6055 6056 /* 6057 * The resolution loop is re-entered if we 6058 * actually are in a multirouting case. 6059 */ 6060 if (copy_mp != NULL) { 6061 boolean_t need_resolve = 6062 ire_multirt_need_resolve_v6(v6dstp, 6063 MBLK_GETLABEL(copy_mp)); 6064 if (!need_resolve) { 6065 MULTIRT_DEBUG_UNTAG(copy_mp); 6066 freemsg(copy_mp); 6067 copy_mp = NULL; 6068 } else { 6069 /* 6070 * ipif_lookup_group_v6() calls 6071 * ire_lookup_multi_v6() that uses 6072 * ire_ftable_lookup_v6() to find 6073 * an IRE_INTERFACE for the group. 6074 * In the multirt case, 6075 * ire_lookup_multi_v6() then invokes 6076 * ire_multirt_lookup_v6() to find 6077 * the next resolvable ire. 6078 * As a result, we obtain a new 6079 * interface, derived from the 6080 * next ire. 6081 */ 6082 if (ipif_held) { 6083 ipif_refrele(ipif); 6084 ipif_held = B_FALSE; 6085 } 6086 ipif = ipif_lookup_group_v6(v6dstp, 6087 zoneid); 6088 ip2dbg(("ip_newroute_ipif: " 6089 "multirt dst %08x, ipif %p\n", 6090 ntohl(V4_PART_OF_V6((*v6dstp))), 6091 (void *)ipif)); 6092 if (ipif != NULL) { 6093 ipif_held = B_TRUE; 6094 mp = copy_mp; 6095 copy_mp = NULL; 6096 multirt_resolve_next = 6097 B_TRUE; 6098 continue; 6099 } else { 6100 freemsg(copy_mp); 6101 } 6102 } 6103 } 6104 ill_refrele(dst_ill); 6105 if (ipif_held) { 6106 ipif_refrele(ipif); 6107 ipif_held = B_FALSE; 6108 } 6109 if (src_ipif != NULL) 6110 ipif_refrele(src_ipif); 6111 return; 6112 } 6113 case IRE_IF_RESOLVER: { 6114 6115 ASSERT(dst_ill->ill_isv6); 6116 6117 /* 6118 * We obtain a partial IRE_CACHE which we will pass 6119 * along with the resolver query. When the response 6120 * comes back it will be there ready for us to add. 6121 */ 6122 /* 6123 * the newly created ire will inherit the flags of the 6124 * parent ire, if any. 6125 */ 6126 ire = ire_create_v6( 6127 v6dstp, /* dest address */ 6128 &ipv6_all_ones, /* mask */ 6129 &src_ipif->ipif_v6src_addr, /* source address */ 6130 NULL, /* gateway address */ 6131 &save_ire->ire_max_frag, 6132 NULL, /* Fast Path header */ 6133 dst_ill->ill_rq, /* recv-from queue */ 6134 dst_ill->ill_wq, /* send-to queue */ 6135 IRE_CACHE, 6136 NULL, 6137 src_ipif, 6138 NULL, 6139 (fire != NULL) ? /* Parent handle */ 6140 fire->ire_phandle : 0, 6141 save_ire->ire_ihandle, /* Interface handle */ 6142 (fire != NULL) ? 6143 (fire->ire_flags & (RTF_SETSRC | RTF_MULTIRT)) : 6144 0, 6145 &ire_uinfo_null, 6146 NULL, 6147 NULL); 6148 6149 if (ire == NULL) { 6150 ire_refrele(save_ire); 6151 break; 6152 } 6153 6154 ire->ire_marks |= ire_marks; 6155 6156 /* Resolve and add ire to the ctable */ 6157 err = ndp_resolver(dst_ill, v6dstp, first_mp, zoneid); 6158 switch (err) { 6159 case 0: 6160 /* Prevent save_ire from getting deleted */ 6161 IRB_REFHOLD(save_ire->ire_bucket); 6162 /* Has it been removed already ? */ 6163 if (save_ire->ire_marks & IRE_MARK_CONDEMNED) { 6164 IRB_REFRELE(save_ire->ire_bucket); 6165 ire_refrele(save_ire); 6166 break; 6167 } 6168 /* 6169 * We have a resolved cache entry, 6170 * add in the IRE. 6171 */ 6172 ire_add_then_send(q, ire, first_mp); 6173 if (ip6_asp_table_held) { 6174 ip6_asp_table_refrele(); 6175 ip6_asp_table_held = B_FALSE; 6176 } 6177 6178 /* Assert that it is not deleted yet. */ 6179 ASSERT(save_ire->ire_ptpn != NULL); 6180 IRB_REFRELE(save_ire->ire_bucket); 6181 ire_refrele(save_ire); 6182 if (fire != NULL) { 6183 ire_refrele(fire); 6184 fire = NULL; 6185 } 6186 6187 /* 6188 * The resolution loop is re-entered if we 6189 * actually are in a multirouting case. 6190 */ 6191 if (copy_mp != NULL) { 6192 boolean_t need_resolve = 6193 ire_multirt_need_resolve_v6(v6dstp, 6194 MBLK_GETLABEL(copy_mp)); 6195 if (!need_resolve) { 6196 MULTIRT_DEBUG_UNTAG(copy_mp); 6197 freemsg(copy_mp); 6198 copy_mp = NULL; 6199 } else { 6200 /* 6201 * ipif_lookup_group_v6() calls 6202 * ire_lookup_multi_v6() that 6203 * uses ire_ftable_lookup_v6() 6204 * to find an IRE_INTERFACE for 6205 * the group. In the multirt 6206 * case, ire_lookup_multi_v6() 6207 * then invokes 6208 * ire_multirt_lookup_v6() to 6209 * find the next resolvable ire. 6210 * As a result, we obtain a new 6211 * interface, derived from the 6212 * next ire. 6213 */ 6214 if (ipif_held) { 6215 ipif_refrele(ipif); 6216 ipif_held = B_FALSE; 6217 } 6218 ipif = ipif_lookup_group_v6( 6219 v6dstp, zoneid); 6220 ip2dbg(("ip_newroute_ipif: " 6221 "multirt dst %08x, " 6222 "ipif %p\n", 6223 ntohl(V4_PART_OF_V6( 6224 (*v6dstp))), 6225 (void *)ipif)); 6226 if (ipif != NULL) { 6227 ipif_held = B_TRUE; 6228 mp = copy_mp; 6229 copy_mp = NULL; 6230 multirt_resolve_next = 6231 B_TRUE; 6232 continue; 6233 } else { 6234 freemsg(copy_mp); 6235 } 6236 } 6237 } 6238 ill_refrele(dst_ill); 6239 if (ipif_held) { 6240 ipif_refrele(ipif); 6241 ipif_held = B_FALSE; 6242 } 6243 if (src_ipif != NULL) 6244 ipif_refrele(src_ipif); 6245 return; 6246 6247 case EINPROGRESS: 6248 /* 6249 * mp was consumed - presumably queued. 6250 * No need for ire, presumably resolution is 6251 * in progress, and ire will be added when the 6252 * address is resolved. 6253 */ 6254 if (ip6_asp_table_held) { 6255 ip6_asp_table_refrele(); 6256 ip6_asp_table_held = B_FALSE; 6257 } 6258 ire_delete(ire); 6259 ire_refrele(save_ire); 6260 if (fire != NULL) { 6261 ire_refrele(fire); 6262 fire = NULL; 6263 } 6264 6265 /* 6266 * The resolution loop is re-entered if we 6267 * actually are in a multirouting case. 6268 */ 6269 if (copy_mp != NULL) { 6270 boolean_t need_resolve = 6271 ire_multirt_need_resolve_v6(v6dstp, 6272 MBLK_GETLABEL(copy_mp)); 6273 if (!need_resolve) { 6274 MULTIRT_DEBUG_UNTAG(copy_mp); 6275 freemsg(copy_mp); 6276 copy_mp = NULL; 6277 } else { 6278 /* 6279 * ipif_lookup_group_v6() calls 6280 * ire_lookup_multi_v6() that 6281 * uses ire_ftable_lookup_v6() 6282 * to find an IRE_INTERFACE for 6283 * the group. In the multirt 6284 * case, ire_lookup_multi_v6() 6285 * then invokes 6286 * ire_multirt_lookup_v6() to 6287 * find the next resolvable ire. 6288 * As a result, we obtain a new 6289 * interface, derived from the 6290 * next ire. 6291 */ 6292 if (ipif_held) { 6293 ipif_refrele(ipif); 6294 ipif_held = B_FALSE; 6295 } 6296 ipif = ipif_lookup_group_v6( 6297 v6dstp, zoneid); 6298 ip2dbg(("ip_newroute_ipif: " 6299 "multirt dst %08x, " 6300 "ipif %p\n", 6301 ntohl(V4_PART_OF_V6( 6302 (*v6dstp))), 6303 (void *)ipif)); 6304 if (ipif != NULL) { 6305 ipif_held = B_TRUE; 6306 mp = copy_mp; 6307 copy_mp = NULL; 6308 multirt_resolve_next = 6309 B_TRUE; 6310 continue; 6311 } else { 6312 freemsg(copy_mp); 6313 } 6314 } 6315 } 6316 ill_refrele(dst_ill); 6317 if (ipif_held) { 6318 ipif_refrele(ipif); 6319 ipif_held = B_FALSE; 6320 } 6321 if (src_ipif != NULL) 6322 ipif_refrele(src_ipif); 6323 return; 6324 default: 6325 /* Some transient error */ 6326 ire_refrele(save_ire); 6327 break; 6328 } 6329 break; 6330 } 6331 default: 6332 break; 6333 } 6334 if (ip6_asp_table_held) { 6335 ip6_asp_table_refrele(); 6336 ip6_asp_table_held = B_FALSE; 6337 } 6338 } while (multirt_resolve_next); 6339 6340 err_ret: 6341 if (ip6_asp_table_held) 6342 ip6_asp_table_refrele(); 6343 if (ire != NULL) 6344 ire_refrele(ire); 6345 if (fire != NULL) 6346 ire_refrele(fire); 6347 if (ipif != NULL && ipif_held) 6348 ipif_refrele(ipif); 6349 if (src_ipif != NULL) 6350 ipif_refrele(src_ipif); 6351 /* Multicast - no point in trying to generate ICMP error */ 6352 ASSERT((attach_ill == NULL) || (dst_ill == attach_ill)); 6353 if (dst_ill != NULL) { 6354 ill = dst_ill; 6355 ill_held = B_TRUE; 6356 } 6357 if (mp->b_prev || mp->b_next) { 6358 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 6359 } else { 6360 BUMP_MIB(ill->ill_ip6_mib, ipv6OutDiscards); 6361 } 6362 ip1dbg(("ip_newroute_ipif_v6: dropped\n")); 6363 mp->b_next = NULL; 6364 mp->b_prev = NULL; 6365 freemsg(first_mp); 6366 if (ill_held) 6367 ill_refrele(ill); 6368 } 6369 6370 /* 6371 * Parse and process any hop-by-hop or destination options. 6372 * 6373 * Assumes that q is an ill read queue so that ICMP errors for link-local 6374 * destinations are sent out the correct interface. 6375 * 6376 * Returns -1 if there was an error and mp has been consumed. 6377 * Returns 0 if no special action is needed. 6378 * Returns 1 if the packet contained a router alert option for this node 6379 * which is verified to be "interesting/known" for our implementation. 6380 * 6381 * XXX Note: In future as more hbh or dest options are defined, 6382 * it may be better to have different routines for hbh and dest 6383 * options as opt_type fields other than IP6OPT_PAD1 and IP6OPT_PADN 6384 * may have same value in different namespaces. Or is it same namespace ?? 6385 * Current code checks for each opt_type (other than pads) if it is in 6386 * the expected nexthdr (hbh or dest) 6387 */ 6388 static int 6389 ip_process_options_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, 6390 uint8_t *optptr, uint_t optlen, uint8_t hdr_type) 6391 { 6392 uint8_t opt_type; 6393 uint_t optused; 6394 int ret = 0; 6395 mblk_t *first_mp; 6396 const char *errtype; 6397 6398 first_mp = mp; 6399 if (mp->b_datap->db_type == M_CTL) { 6400 mp = mp->b_cont; 6401 } 6402 6403 while (optlen != 0) { 6404 opt_type = *optptr; 6405 if (opt_type == IP6OPT_PAD1) { 6406 optused = 1; 6407 } else { 6408 if (optlen < 2) 6409 goto bad_opt; 6410 errtype = "malformed"; 6411 if (opt_type == ip6opt_ls) { 6412 optused = 2 + optptr[1]; 6413 if (optused > optlen) 6414 goto bad_opt; 6415 } else switch (opt_type) { 6416 case IP6OPT_PADN: 6417 /* 6418 * Note:We don't verify that (N-2) pad octets 6419 * are zero as required by spec. Adhere to 6420 * "be liberal in what you accept..." part of 6421 * implementation philosophy (RFC791,RFC1122) 6422 */ 6423 optused = 2 + optptr[1]; 6424 if (optused > optlen) 6425 goto bad_opt; 6426 break; 6427 6428 case IP6OPT_JUMBO: 6429 if (hdr_type != IPPROTO_HOPOPTS) 6430 goto opt_error; 6431 goto opt_error; /* XXX Not implemented! */ 6432 6433 case IP6OPT_ROUTER_ALERT: { 6434 struct ip6_opt_router *or; 6435 6436 if (hdr_type != IPPROTO_HOPOPTS) 6437 goto opt_error; 6438 optused = 2 + optptr[1]; 6439 if (optused > optlen) 6440 goto bad_opt; 6441 or = (struct ip6_opt_router *)optptr; 6442 /* Check total length and alignment */ 6443 if (optused != sizeof (*or) || 6444 ((uintptr_t)or->ip6or_value & 0x1) != 0) 6445 goto opt_error; 6446 /* Check value */ 6447 switch (*((uint16_t *)or->ip6or_value)) { 6448 case IP6_ALERT_MLD: 6449 case IP6_ALERT_RSVP: 6450 ret = 1; 6451 } 6452 break; 6453 } 6454 case IP6OPT_HOME_ADDRESS: { 6455 /* 6456 * Minimal support for the home address option 6457 * (which is required by all IPv6 nodes). 6458 * Implement by just swapping the home address 6459 * and source address. 6460 * XXX Note: this has IPsec implications since 6461 * AH needs to take this into account. 6462 * Also, when IPsec is used we need to ensure 6463 * that this is only processed once 6464 * in the received packet (to avoid swapping 6465 * back and forth). 6466 * NOTE:This option processing is considered 6467 * to be unsafe and prone to a denial of 6468 * service attack. 6469 * The current processing is not safe even with 6470 * IPsec secured IP packets. Since the home 6471 * address option processing requirement still 6472 * is in the IETF draft and in the process of 6473 * being redefined for its usage, it has been 6474 * decided to turn off the option by default. 6475 * If this section of code needs to be executed, 6476 * ndd variable ip6_ignore_home_address_opt 6477 * should be set to 0 at the user's own risk. 6478 */ 6479 struct ip6_opt_home_address *oh; 6480 in6_addr_t tmp; 6481 6482 if (ipv6_ignore_home_address_opt) 6483 goto opt_error; 6484 6485 if (hdr_type != IPPROTO_DSTOPTS) 6486 goto opt_error; 6487 optused = 2 + optptr[1]; 6488 if (optused > optlen) 6489 goto bad_opt; 6490 6491 /* 6492 * We did this dest. opt the first time 6493 * around (i.e. before AH processing). 6494 * If we've done AH... stop now. 6495 */ 6496 if (first_mp != mp) { 6497 ipsec_in_t *ii; 6498 6499 ii = (ipsec_in_t *)first_mp->b_rptr; 6500 if (ii->ipsec_in_ah_sa != NULL) 6501 break; 6502 } 6503 6504 oh = (struct ip6_opt_home_address *)optptr; 6505 /* Check total length and alignment */ 6506 if (optused < sizeof (*oh) || 6507 ((uintptr_t)oh->ip6oh_addr & 0x7) != 0) 6508 goto opt_error; 6509 /* Swap ip6_src and the home address */ 6510 tmp = ip6h->ip6_src; 6511 /* XXX Note: only 8 byte alignment option */ 6512 ip6h->ip6_src = *(in6_addr_t *)oh->ip6oh_addr; 6513 *(in6_addr_t *)oh->ip6oh_addr = tmp; 6514 break; 6515 } 6516 6517 case IP6OPT_TUNNEL_LIMIT: 6518 if (hdr_type != IPPROTO_DSTOPTS) { 6519 goto opt_error; 6520 } 6521 optused = 2 + optptr[1]; 6522 if (optused > optlen) { 6523 goto bad_opt; 6524 } 6525 if (optused != 3) { 6526 goto opt_error; 6527 } 6528 break; 6529 6530 default: 6531 errtype = "unknown"; 6532 /* FALLTHROUGH */ 6533 opt_error: 6534 switch (IP6OPT_TYPE(opt_type)) { 6535 case IP6OPT_TYPE_SKIP: 6536 optused = 2 + optptr[1]; 6537 if (optused > optlen) 6538 goto bad_opt; 6539 ip1dbg(("ip_process_options_v6: %s " 6540 "opt 0x%x skipped\n", 6541 errtype, opt_type)); 6542 break; 6543 case IP6OPT_TYPE_DISCARD: 6544 ip1dbg(("ip_process_options_v6: %s " 6545 "opt 0x%x; packet dropped\n", 6546 errtype, opt_type)); 6547 freemsg(first_mp); 6548 return (-1); 6549 case IP6OPT_TYPE_ICMP: 6550 icmp_param_problem_v6(WR(q), first_mp, 6551 ICMP6_PARAMPROB_OPTION, 6552 (uint32_t)(optptr - 6553 (uint8_t *)ip6h), 6554 B_FALSE, B_FALSE); 6555 return (-1); 6556 case IP6OPT_TYPE_FORCEICMP: 6557 icmp_param_problem_v6(WR(q), first_mp, 6558 ICMP6_PARAMPROB_OPTION, 6559 (uint32_t)(optptr - 6560 (uint8_t *)ip6h), 6561 B_FALSE, B_TRUE); 6562 return (-1); 6563 default: 6564 ASSERT(0); 6565 } 6566 } 6567 } 6568 optlen -= optused; 6569 optptr += optused; 6570 } 6571 return (ret); 6572 6573 bad_opt: 6574 icmp_param_problem_v6(WR(q), first_mp, ICMP6_PARAMPROB_OPTION, 6575 (uint32_t)(optptr - (uint8_t *)ip6h), 6576 B_FALSE, B_FALSE); 6577 return (-1); 6578 } 6579 6580 /* 6581 * Process a routing header that is not yet empty. 6582 * Only handles type 0 routing headers. 6583 */ 6584 static void 6585 ip_process_rthdr(queue_t *q, mblk_t *mp, ip6_t *ip6h, ip6_rthdr_t *rth, 6586 ill_t *ill, uint_t flags, mblk_t *hada_mp, mblk_t *dl_mp) 6587 { 6588 ip6_rthdr0_t *rthdr; 6589 uint_t ehdrlen; 6590 uint_t numaddr; 6591 in6_addr_t *addrptr; 6592 in6_addr_t tmp; 6593 6594 ASSERT(rth->ip6r_segleft != 0); 6595 6596 if (!ipv6_forward_src_routed) { 6597 /* XXX Check for source routed out same interface? */ 6598 BUMP_MIB(ill->ill_ip6_mib, ipv6ForwProhibits); 6599 BUMP_MIB(ill->ill_ip6_mib, ipv6InAddrErrors); 6600 freemsg(hada_mp); 6601 freemsg(mp); 6602 return; 6603 } 6604 6605 if (rth->ip6r_type != 0) { 6606 if (hada_mp != NULL) 6607 goto hada_drop; 6608 icmp_param_problem_v6(WR(q), mp, 6609 ICMP6_PARAMPROB_HEADER, 6610 (uint32_t)((uchar_t *)&rth->ip6r_type - (uchar_t *)ip6h), 6611 B_FALSE, B_FALSE); 6612 return; 6613 } 6614 rthdr = (ip6_rthdr0_t *)rth; 6615 ehdrlen = 8 * (rthdr->ip6r0_len + 1); 6616 ASSERT(mp->b_rptr + ehdrlen <= mp->b_wptr); 6617 addrptr = (in6_addr_t *)((char *)rthdr + sizeof (*rthdr)); 6618 /* rthdr->ip6r0_len is twice the number of addresses in the header */ 6619 if (rthdr->ip6r0_len & 0x1) { 6620 /* An odd length is impossible */ 6621 if (hada_mp != NULL) 6622 goto hada_drop; 6623 icmp_param_problem_v6(WR(q), mp, 6624 ICMP6_PARAMPROB_HEADER, 6625 (uint32_t)((uchar_t *)&rthdr->ip6r0_len - (uchar_t *)ip6h), 6626 B_FALSE, B_FALSE); 6627 return; 6628 } 6629 numaddr = rthdr->ip6r0_len / 2; 6630 if (rthdr->ip6r0_segleft > numaddr) { 6631 /* segleft exceeds number of addresses in routing header */ 6632 if (hada_mp != NULL) 6633 goto hada_drop; 6634 icmp_param_problem_v6(WR(q), mp, 6635 ICMP6_PARAMPROB_HEADER, 6636 (uint32_t)((uchar_t *)&rthdr->ip6r0_segleft - 6637 (uchar_t *)ip6h), 6638 B_FALSE, B_FALSE); 6639 return; 6640 } 6641 addrptr += (numaddr - rthdr->ip6r0_segleft); 6642 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst) || 6643 IN6_IS_ADDR_MULTICAST(addrptr)) { 6644 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 6645 freemsg(hada_mp); 6646 freemsg(mp); 6647 return; 6648 } 6649 /* Swap */ 6650 tmp = *addrptr; 6651 *addrptr = ip6h->ip6_dst; 6652 ip6h->ip6_dst = tmp; 6653 rthdr->ip6r0_segleft--; 6654 /* Don't allow any mapped addresses - ip_wput_v6 can't handle them */ 6655 if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_dst)) { 6656 if (hada_mp != NULL) 6657 goto hada_drop; 6658 icmp_unreachable_v6(WR(q), mp, ICMP6_DST_UNREACH_NOROUTE, 6659 B_FALSE, B_FALSE); 6660 return; 6661 } 6662 ip_rput_data_v6(q, ill, mp, ip6h, flags, hada_mp, dl_mp); 6663 return; 6664 hada_drop: 6665 /* IPsec kstats: bean counter? */ 6666 freemsg(hada_mp); 6667 freemsg(mp); 6668 } 6669 6670 /* 6671 * Read side put procedure for IPv6 module. 6672 */ 6673 static void 6674 ip_rput_v6(queue_t *q, mblk_t *mp) 6675 { 6676 mblk_t *first_mp; 6677 mblk_t *hada_mp = NULL; 6678 ip6_t *ip6h; 6679 boolean_t ll_multicast = B_FALSE; 6680 boolean_t mctl_present = B_FALSE; 6681 ill_t *ill; 6682 struct iocblk *iocp; 6683 uint_t flags = 0; 6684 mblk_t *dl_mp; 6685 6686 ill = (ill_t *)q->q_ptr; 6687 if (ill->ill_state_flags & ILL_CONDEMNED) { 6688 union DL_primitives *dl; 6689 6690 dl = (union DL_primitives *)mp->b_rptr; 6691 /* 6692 * Things are opening or closing - only accept DLPI 6693 * ack messages. If the stream is closing and ip_wsrv 6694 * has completed, ip_close is out of the qwait, but has 6695 * not yet completed qprocsoff. Don't proceed any further 6696 * because the ill has been cleaned up and things hanging 6697 * off the ill have been freed. 6698 */ 6699 if ((mp->b_datap->db_type != M_PCPROTO) || 6700 (dl->dl_primitive == DL_UNITDATA_IND)) { 6701 inet_freemsg(mp); 6702 return; 6703 } 6704 } 6705 6706 dl_mp = NULL; 6707 switch (mp->b_datap->db_type) { 6708 case M_DATA: { 6709 int hlen; 6710 uchar_t *ucp; 6711 struct ether_header *eh; 6712 dl_unitdata_ind_t *dui; 6713 6714 /* 6715 * This is a work-around for CR 6451644, a bug in Nemo. It 6716 * should be removed when that problem is fixed. 6717 */ 6718 if (ill->ill_mactype == DL_ETHER && 6719 (hlen = MBLKHEAD(mp)) >= sizeof (struct ether_header) && 6720 (ucp = mp->b_rptr)[-1] == (IP6_DL_SAP & 0xFF) && 6721 ucp[-2] == (IP6_DL_SAP >> 8)) { 6722 if (hlen >= sizeof (struct ether_vlan_header) && 6723 ucp[-5] == 0 && ucp[-6] == 0x81) 6724 ucp -= sizeof (struct ether_vlan_header); 6725 else 6726 ucp -= sizeof (struct ether_header); 6727 /* 6728 * If it's a group address, then fabricate a 6729 * DL_UNITDATA_IND message. 6730 */ 6731 if ((ll_multicast = (ucp[0] & 1)) != 0 && 6732 (dl_mp = allocb(DL_UNITDATA_IND_SIZE + 16, 6733 BPRI_HI)) != NULL) { 6734 eh = (struct ether_header *)ucp; 6735 dui = (dl_unitdata_ind_t *)dl_mp->b_rptr; 6736 DB_TYPE(dl_mp) = M_PROTO; 6737 dl_mp->b_wptr = (uchar_t *)(dui + 1) + 16; 6738 dui->dl_primitive = DL_UNITDATA_IND; 6739 dui->dl_dest_addr_length = 8; 6740 dui->dl_dest_addr_offset = DL_UNITDATA_IND_SIZE; 6741 dui->dl_src_addr_length = 8; 6742 dui->dl_src_addr_offset = DL_UNITDATA_IND_SIZE + 6743 8; 6744 dui->dl_group_address = 1; 6745 ucp = (uchar_t *)(dui + 1); 6746 if (ill->ill_sap_length > 0) 6747 ucp += ill->ill_sap_length; 6748 bcopy(&eh->ether_dhost, ucp, 6); 6749 bcopy(&eh->ether_shost, ucp + 8, 6); 6750 ucp = (uchar_t *)(dui + 1); 6751 if (ill->ill_sap_length < 0) 6752 ucp += 8 + ill->ill_sap_length; 6753 bcopy(&eh->ether_type, ucp, 2); 6754 bcopy(&eh->ether_type, ucp + 8, 2); 6755 } 6756 } 6757 break; 6758 } 6759 6760 case M_PROTO: 6761 case M_PCPROTO: 6762 if (((dl_unitdata_ind_t *)mp->b_rptr)->dl_primitive != 6763 DL_UNITDATA_IND) { 6764 /* Go handle anything other than data elsewhere. */ 6765 ip_rput_dlpi(q, mp); 6766 return; 6767 } 6768 #define dlur ((dl_unitdata_ind_t *)mp->b_rptr) 6769 ll_multicast = dlur->dl_group_address; 6770 #undef dlur 6771 /* Save the DLPI header. */ 6772 dl_mp = mp; 6773 mp = mp->b_cont; 6774 dl_mp->b_cont = NULL; 6775 break; 6776 case M_BREAK: 6777 panic("ip_rput_v6: got an M_BREAK"); 6778 /*NOTREACHED*/ 6779 case M_IOCACK: 6780 iocp = (struct iocblk *)mp->b_rptr; 6781 switch (iocp->ioc_cmd) { 6782 case DL_IOC_HDR_INFO: 6783 ill = (ill_t *)q->q_ptr; 6784 ill_fastpath_ack(ill, mp); 6785 return; 6786 case SIOCSTUNPARAM: 6787 case SIOCGTUNPARAM: 6788 case OSIOCSTUNPARAM: 6789 case OSIOCGTUNPARAM: 6790 /* Go through qwriter */ 6791 break; 6792 default: 6793 putnext(q, mp); 6794 return; 6795 } 6796 /* FALLTHRU */ 6797 case M_ERROR: 6798 case M_HANGUP: 6799 mutex_enter(&ill->ill_lock); 6800 if (ill->ill_state_flags & ILL_CONDEMNED) { 6801 mutex_exit(&ill->ill_lock); 6802 freemsg(mp); 6803 return; 6804 } 6805 ill_refhold_locked(ill); 6806 mutex_exit(&ill->ill_lock); 6807 qwriter_ip(NULL, ill, q, mp, ip_rput_other, CUR_OP, B_FALSE); 6808 return; 6809 case M_CTL: 6810 if ((MBLKL(mp) > sizeof (int)) && 6811 ((da_ipsec_t *)mp->b_rptr)->da_type == IPHADA_M_CTL) { 6812 ASSERT(MBLKL(mp) >= sizeof (da_ipsec_t)); 6813 mctl_present = B_TRUE; 6814 break; 6815 } 6816 putnext(q, mp); 6817 return; 6818 case M_IOCNAK: 6819 iocp = (struct iocblk *)mp->b_rptr; 6820 switch (iocp->ioc_cmd) { 6821 case DL_IOC_HDR_INFO: 6822 case SIOCSTUNPARAM: 6823 case SIOCGTUNPARAM: 6824 case OSIOCSTUNPARAM: 6825 case OSIOCGTUNPARAM: 6826 mutex_enter(&ill->ill_lock); 6827 if (ill->ill_state_flags & ILL_CONDEMNED) { 6828 mutex_exit(&ill->ill_lock); 6829 freemsg(mp); 6830 return; 6831 } 6832 ill_refhold_locked(ill); 6833 mutex_exit(&ill->ill_lock); 6834 qwriter_ip(NULL, ill, q, mp, ip_rput_other, CUR_OP, 6835 B_FALSE); 6836 return; 6837 default: 6838 break; 6839 } 6840 /* FALLTHRU */ 6841 default: 6842 putnext(q, mp); 6843 return; 6844 } 6845 6846 BUMP_MIB(ill->ill_ip6_mib, ipv6InReceives); 6847 /* 6848 * if db_ref > 1 then copymsg and free original. Packet may be 6849 * changed and do not want other entity who has a reference to this 6850 * message to trip over the changes. This is a blind change because 6851 * trying to catch all places that might change packet is too 6852 * difficult (since it may be a module above this one). 6853 */ 6854 if (mp->b_datap->db_ref > 1) { 6855 mblk_t *mp1; 6856 6857 mp1 = copymsg(mp); 6858 freemsg(mp); 6859 if (mp1 == NULL) { 6860 first_mp = NULL; 6861 goto discard; 6862 } 6863 mp = mp1; 6864 } 6865 first_mp = mp; 6866 if (mctl_present) { 6867 hada_mp = first_mp; 6868 mp = first_mp->b_cont; 6869 } 6870 6871 ip6h = (ip6_t *)mp->b_rptr; 6872 6873 /* check for alignment and full IPv6 header */ 6874 if (!OK_32PTR((uchar_t *)ip6h) || 6875 (mp->b_wptr - (uchar_t *)ip6h) < IPV6_HDR_LEN) { 6876 if (!pullupmsg(mp, IPV6_HDR_LEN)) { 6877 ip1dbg(("ip_rput_v6: pullupmsg failed\n")); 6878 goto discard; 6879 } 6880 ip6h = (ip6_t *)mp->b_rptr; 6881 } 6882 if ((ip6h->ip6_vcf & IPV6_VERS_AND_FLOW_MASK) == 6883 IPV6_DEFAULT_VERS_AND_FLOW) { 6884 /* 6885 * It may be a bit too expensive to do this mapped address 6886 * check here, but in the interest of robustness, it seems 6887 * like the correct place. 6888 * TODO: Avoid this check for e.g. connected TCP sockets 6889 */ 6890 if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_src)) { 6891 ip1dbg(("ip_rput_v6: pkt with mapped src addr\n")); 6892 goto discard; 6893 } 6894 6895 if (IN6_IS_ADDR_LOOPBACK(&ip6h->ip6_src)) { 6896 ip1dbg(("ip_rput_v6: pkt with loopback src")); 6897 goto discard; 6898 } else if (IN6_IS_ADDR_LOOPBACK(&ip6h->ip6_dst)) { 6899 ip1dbg(("ip_rput_v6: pkt with loopback dst")); 6900 goto discard; 6901 } 6902 6903 flags |= (ll_multicast ? IP6_IN_LLMCAST : 0); 6904 ip_rput_data_v6(q, ill, mp, ip6h, flags, hada_mp, dl_mp); 6905 } else { 6906 BUMP_MIB(ill->ill_ip6_mib, ipv6InIPv4); 6907 goto discard; 6908 } 6909 freemsg(dl_mp); 6910 return; 6911 6912 discard: 6913 if (dl_mp != NULL) 6914 freeb(dl_mp); 6915 freemsg(first_mp); 6916 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 6917 } 6918 6919 /* 6920 * Walk through the IPv6 packet in mp and see if there's an AH header 6921 * in it. See if the AH header needs to get done before other headers in 6922 * the packet. (Worker function for ipsec_early_ah_v6().) 6923 */ 6924 #define IPSEC_HDR_DONT_PROCESS 0 6925 #define IPSEC_HDR_PROCESS 1 6926 #define IPSEC_MEMORY_ERROR 2 6927 static int 6928 ipsec_needs_processing_v6(mblk_t *mp, uint8_t *nexthdr) 6929 { 6930 uint_t length; 6931 uint_t ehdrlen; 6932 uint8_t *whereptr; 6933 uint8_t *endptr; 6934 uint8_t *nexthdrp; 6935 ip6_dest_t *desthdr; 6936 ip6_rthdr_t *rthdr; 6937 ip6_t *ip6h; 6938 6939 /* 6940 * For now just pullup everything. In general, the less pullups, 6941 * the better, but there's so much squirrelling through anyway, 6942 * it's just easier this way. 6943 */ 6944 if (!pullupmsg(mp, -1)) { 6945 return (IPSEC_MEMORY_ERROR); 6946 } 6947 6948 ip6h = (ip6_t *)mp->b_rptr; 6949 length = IPV6_HDR_LEN; 6950 whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */ 6951 endptr = mp->b_wptr; 6952 6953 /* 6954 * We can't just use the argument nexthdr in the place 6955 * of nexthdrp becaue we don't dereference nexthdrp 6956 * till we confirm whether it is a valid address. 6957 */ 6958 nexthdrp = &ip6h->ip6_nxt; 6959 while (whereptr < endptr) { 6960 /* Is there enough left for len + nexthdr? */ 6961 if (whereptr + MIN_EHDR_LEN > endptr) 6962 return (IPSEC_MEMORY_ERROR); 6963 6964 switch (*nexthdrp) { 6965 case IPPROTO_HOPOPTS: 6966 case IPPROTO_DSTOPTS: 6967 /* Assumes the headers are identical for hbh and dst */ 6968 desthdr = (ip6_dest_t *)whereptr; 6969 ehdrlen = 8 * (desthdr->ip6d_len + 1); 6970 if ((uchar_t *)desthdr + ehdrlen > endptr) 6971 return (IPSEC_MEMORY_ERROR); 6972 /* 6973 * Return DONT_PROCESS because of potential Mobile IPv6 6974 * cruft for destination options. 6975 */ 6976 if (*nexthdrp == IPPROTO_DSTOPTS) 6977 return (IPSEC_HDR_DONT_PROCESS); 6978 nexthdrp = &desthdr->ip6d_nxt; 6979 break; 6980 case IPPROTO_ROUTING: 6981 rthdr = (ip6_rthdr_t *)whereptr; 6982 6983 /* 6984 * If there's more hops left on the routing header, 6985 * return now with DON'T PROCESS. 6986 */ 6987 if (rthdr->ip6r_segleft > 0) 6988 return (IPSEC_HDR_DONT_PROCESS); 6989 6990 ehdrlen = 8 * (rthdr->ip6r_len + 1); 6991 if ((uchar_t *)rthdr + ehdrlen > endptr) 6992 return (IPSEC_MEMORY_ERROR); 6993 nexthdrp = &rthdr->ip6r_nxt; 6994 break; 6995 case IPPROTO_FRAGMENT: 6996 /* Wait for reassembly */ 6997 return (IPSEC_HDR_DONT_PROCESS); 6998 case IPPROTO_AH: 6999 *nexthdr = IPPROTO_AH; 7000 return (IPSEC_HDR_PROCESS); 7001 case IPPROTO_NONE: 7002 /* No next header means we're finished */ 7003 default: 7004 return (IPSEC_HDR_DONT_PROCESS); 7005 } 7006 length += ehdrlen; 7007 whereptr += ehdrlen; 7008 } 7009 panic("ipsec_needs_processing_v6"); 7010 /*NOTREACHED*/ 7011 } 7012 7013 /* 7014 * Path for AH if options are present. If this is the first time we are 7015 * sending a datagram to AH, allocate a IPSEC_IN message and prepend it. 7016 * Otherwise, just fanout. Return value answers the boolean question: 7017 * "Did I consume the mblk you sent me?" 7018 * 7019 * Sometimes AH needs to be done before other IPv6 headers for security 7020 * reasons. This function (and its ipsec_needs_processing_v6() above) 7021 * indicates if that is so, and fans out to the appropriate IPsec protocol 7022 * for the datagram passed in. 7023 */ 7024 static boolean_t 7025 ipsec_early_ah_v6(queue_t *q, mblk_t *first_mp, boolean_t mctl_present, 7026 ill_t *ill, ire_t *ire, mblk_t *hada_mp, zoneid_t zoneid) 7027 { 7028 mblk_t *mp; 7029 uint8_t nexthdr; 7030 ipsec_in_t *ii = NULL; 7031 ah_t *ah; 7032 ipsec_status_t ipsec_rc; 7033 7034 ASSERT((hada_mp == NULL) || (!mctl_present)); 7035 7036 switch (ipsec_needs_processing_v6( 7037 (mctl_present ? first_mp->b_cont : first_mp), &nexthdr)) { 7038 case IPSEC_MEMORY_ERROR: 7039 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 7040 freemsg(hada_mp); 7041 freemsg(first_mp); 7042 return (B_TRUE); 7043 case IPSEC_HDR_DONT_PROCESS: 7044 return (B_FALSE); 7045 } 7046 7047 /* Default means send it to AH! */ 7048 ASSERT(nexthdr == IPPROTO_AH); 7049 if (!mctl_present) { 7050 mp = first_mp; 7051 if ((first_mp = ipsec_in_alloc(B_FALSE)) == NULL) { 7052 ip1dbg(("ipsec_early_ah_v6: IPSEC_IN " 7053 "allocation failure.\n")); 7054 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 7055 freemsg(hada_mp); 7056 freemsg(mp); 7057 return (B_TRUE); 7058 } 7059 /* 7060 * Store the ill_index so that when we come back 7061 * from IPSEC we ride on the same queue. 7062 */ 7063 ii = (ipsec_in_t *)first_mp->b_rptr; 7064 ii->ipsec_in_ill_index = ill->ill_phyint->phyint_ifindex; 7065 ii->ipsec_in_rill_index = ii->ipsec_in_ill_index; 7066 first_mp->b_cont = mp; 7067 } 7068 /* 7069 * Cache hardware acceleration info. 7070 */ 7071 if (hada_mp != NULL) { 7072 ASSERT(ii != NULL); 7073 IPSECHW_DEBUG(IPSECHW_PKT, ("ipsec_early_ah_v6: " 7074 "caching data attr.\n")); 7075 ii->ipsec_in_accelerated = B_TRUE; 7076 ii->ipsec_in_da = hada_mp; 7077 } 7078 7079 if (!ipsec_loaded()) { 7080 ip_proto_not_sup(q, first_mp, IP_FF_SEND_ICMP, zoneid); 7081 return (B_TRUE); 7082 } 7083 7084 ah = ipsec_inbound_ah_sa(first_mp); 7085 if (ah == NULL) 7086 return (B_TRUE); 7087 ASSERT(ii->ipsec_in_ah_sa != NULL); 7088 ASSERT(ii->ipsec_in_ah_sa->ipsa_input_func != NULL); 7089 ipsec_rc = ii->ipsec_in_ah_sa->ipsa_input_func(first_mp, ah); 7090 7091 switch (ipsec_rc) { 7092 case IPSEC_STATUS_SUCCESS: 7093 /* we're done with IPsec processing, send it up */ 7094 ip_fanout_proto_again(first_mp, ill, ill, ire); 7095 break; 7096 case IPSEC_STATUS_FAILED: 7097 BUMP_MIB(&ip6_mib, ipv6InDiscards); 7098 break; 7099 case IPSEC_STATUS_PENDING: 7100 /* no action needed */ 7101 break; 7102 } 7103 return (B_TRUE); 7104 } 7105 7106 /* 7107 * ip_rput_data_v6 -- received IPv6 packets in M_DATA messages show up here. 7108 * ip_rput_v6 has already verified alignment, the min length, the version, 7109 * and db_ref = 1. 7110 * 7111 * The ill passed in (the arg named inill) is the ill that the packet 7112 * actually arrived on. We need to remember this when saving the 7113 * input interface index into potential IPV6_PKTINFO data in 7114 * ip_add_info_v6(). 7115 * 7116 * This routine doesn't free dl_mp; that's the caller's responsibility on 7117 * return. (Note that the callers are complex enough that there's no tail 7118 * recursion here anyway.) 7119 */ 7120 void 7121 ip_rput_data_v6(queue_t *q, ill_t *inill, mblk_t *mp, ip6_t *ip6h, 7122 uint_t flags, mblk_t *hada_mp, mblk_t *dl_mp) 7123 { 7124 ire_t *ire = NULL; 7125 queue_t *rq; 7126 ill_t *ill = inill; 7127 ipif_t *ipif; 7128 uint8_t *whereptr; 7129 uint8_t nexthdr; 7130 uint16_t remlen; 7131 uint_t prev_nexthdr_offset; 7132 uint_t used; 7133 size_t pkt_len; 7134 uint16_t ip6_len; 7135 uint_t hdr_len; 7136 boolean_t mctl_present; 7137 mblk_t *first_mp; 7138 mblk_t *first_mp1; 7139 boolean_t no_forward; 7140 ip6_hbh_t *hbhhdr; 7141 boolean_t ll_multicast = (flags & IP6_IN_LLMCAST); 7142 conn_t *connp; 7143 ilm_t *ilm; 7144 uint32_t ports; 7145 uint_t ipif_id = 0; 7146 zoneid_t zoneid = GLOBAL_ZONEID; 7147 uint16_t hck_flags, reass_hck_flags; 7148 uint32_t reass_sum; 7149 boolean_t cksum_err; 7150 mblk_t *mp1; 7151 7152 EXTRACT_PKT_MP(mp, first_mp, mctl_present); 7153 7154 if (hada_mp != NULL) { 7155 /* 7156 * It's an IPsec accelerated packet. 7157 * Keep a pointer to the data attributes around until 7158 * we allocate the ipsecinfo structure. 7159 */ 7160 IPSECHW_DEBUG(IPSECHW_PKT, 7161 ("ip_rput_data_v6: inbound HW accelerated IPsec pkt\n")); 7162 hada_mp->b_cont = NULL; 7163 /* 7164 * Since it is accelerated, it came directly from 7165 * the ill. 7166 */ 7167 ASSERT(mctl_present == B_FALSE); 7168 ASSERT(mp->b_datap->db_type != M_CTL); 7169 } 7170 7171 ASSERT(OK_32PTR((uchar_t *)ip6h) && 7172 (mp->b_wptr - (uchar_t *)ip6h) >= IPV6_HDR_LEN); 7173 7174 if (mp->b_cont == NULL) 7175 pkt_len = mp->b_wptr - mp->b_rptr; 7176 else 7177 pkt_len = msgdsize(mp); 7178 ip6_len = ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN; 7179 7180 /* 7181 * Check for bogus (too short packet) and packet which 7182 * was padded by the link layer. 7183 */ 7184 if (ip6_len != pkt_len) { 7185 ssize_t diff; 7186 7187 if (ip6_len > pkt_len) { 7188 ip1dbg(("ip_rput_data_v6: packet too short %d %lu\n", 7189 ip6_len, pkt_len)); 7190 BUMP_MIB(ill->ill_ip6_mib, ipv6InTruncatedPkts); 7191 freemsg(hada_mp); 7192 freemsg(first_mp); 7193 return; 7194 } 7195 diff = (ssize_t)(pkt_len - ip6_len); 7196 7197 if (!adjmsg(mp, -diff)) { 7198 ip1dbg(("ip_rput_data_v6: adjmsg failed\n")); 7199 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 7200 freemsg(hada_mp); 7201 freemsg(first_mp); 7202 return; 7203 } 7204 pkt_len -= diff; 7205 } 7206 7207 if (ILL_HCKSUM_CAPABLE(ill) && !mctl_present && dohwcksum) 7208 hck_flags = DB_CKSUMFLAGS(mp); 7209 else 7210 hck_flags = 0; 7211 7212 /* Clear checksum flags in case we need to forward */ 7213 DB_CKSUMFLAGS(mp) = 0; 7214 reass_sum = reass_hck_flags = 0; 7215 7216 nexthdr = ip6h->ip6_nxt; 7217 7218 prev_nexthdr_offset = (uint_t)((uchar_t *)&ip6h->ip6_nxt - 7219 (uchar_t *)ip6h); 7220 whereptr = (uint8_t *)&ip6h[1]; 7221 remlen = pkt_len - IPV6_HDR_LEN; /* Track how much is left */ 7222 7223 /* Process hop by hop header options */ 7224 if (nexthdr == IPPROTO_HOPOPTS) { 7225 uint_t ehdrlen; 7226 uint8_t *optptr; 7227 7228 if (remlen < MIN_EHDR_LEN) 7229 goto pkt_too_short; 7230 if (mp->b_cont != NULL && 7231 whereptr + MIN_EHDR_LEN > mp->b_wptr) { 7232 if (!pullupmsg(mp, IPV6_HDR_LEN + MIN_EHDR_LEN)) { 7233 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 7234 freemsg(hada_mp); 7235 freemsg(first_mp); 7236 return; 7237 } 7238 ip6h = (ip6_t *)mp->b_rptr; 7239 whereptr = (uint8_t *)ip6h + pkt_len - remlen; 7240 } 7241 hbhhdr = (ip6_hbh_t *)whereptr; 7242 nexthdr = hbhhdr->ip6h_nxt; 7243 prev_nexthdr_offset = (uint_t)(whereptr - (uint8_t *)ip6h); 7244 ehdrlen = 8 * (hbhhdr->ip6h_len + 1); 7245 7246 if (remlen < ehdrlen) 7247 goto pkt_too_short; 7248 if (mp->b_cont != NULL && 7249 whereptr + ehdrlen > mp->b_wptr) { 7250 if (!pullupmsg(mp, IPV6_HDR_LEN + ehdrlen)) { 7251 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 7252 freemsg(hada_mp); 7253 freemsg(first_mp); 7254 return; 7255 } 7256 ip6h = (ip6_t *)mp->b_rptr; 7257 whereptr = (uint8_t *)ip6h + pkt_len - remlen; 7258 hbhhdr = (ip6_hbh_t *)whereptr; 7259 } 7260 7261 optptr = whereptr + 2; 7262 whereptr += ehdrlen; 7263 remlen -= ehdrlen; 7264 switch (ip_process_options_v6(q, first_mp, ip6h, optptr, 7265 ehdrlen - 2, IPPROTO_HOPOPTS)) { 7266 case -1: 7267 /* 7268 * Packet has been consumed and any 7269 * needed ICMP messages sent. 7270 */ 7271 BUMP_MIB(ill->ill_ip6_mib, ipv6InHdrErrors); 7272 freemsg(hada_mp); 7273 return; 7274 case 0: 7275 /* no action needed */ 7276 break; 7277 case 1: 7278 /* Known router alert */ 7279 goto ipv6forus; 7280 } 7281 } 7282 7283 /* 7284 * Attach any necessary label information to this packet. 7285 */ 7286 if (is_system_labeled() && !tsol_get_pkt_label(mp, IPV6_VERSION)) { 7287 if (ip6opt_ls != 0) 7288 ip0dbg(("tsol_get_pkt_label v6 failed\n")); 7289 BUMP_MIB(ill->ill_ip6_mib, ipv6InHdrErrors); 7290 freemsg(hada_mp); 7291 freemsg(first_mp); 7292 return; 7293 } 7294 7295 /* 7296 * On incoming v6 multicast packets we will bypass the ire table, 7297 * and assume that the read queue corresponds to the targetted 7298 * interface. 7299 * 7300 * The effect of this is the same as the IPv4 original code, but is 7301 * much cleaner I think. See ip_rput for how that was done. 7302 */ 7303 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 7304 BUMP_MIB(ill->ill_ip6_mib, ipv6InMcastPkts); 7305 /* 7306 * XXX TODO Give to mrouted to for multicast forwarding. 7307 */ 7308 ILM_WALKER_HOLD(ill); 7309 ilm = ilm_lookup_ill_v6(ill, &ip6h->ip6_dst, ALL_ZONES); 7310 ILM_WALKER_RELE(ill); 7311 if (ilm == NULL) { 7312 if (ip_debug > 3) { 7313 /* ip2dbg */ 7314 pr_addr_dbg("ip_rput_data_v6: got mcast packet" 7315 " which is not for us: %s\n", AF_INET6, 7316 &ip6h->ip6_dst); 7317 } 7318 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 7319 freemsg(hada_mp); 7320 freemsg(first_mp); 7321 return; 7322 } 7323 if (ip_debug > 3) { 7324 /* ip2dbg */ 7325 pr_addr_dbg("ip_rput_data_v6: multicast for us: %s\n", 7326 AF_INET6, &ip6h->ip6_dst); 7327 } 7328 rq = ill->ill_rq; 7329 zoneid = GLOBAL_ZONEID; 7330 goto ipv6forus; 7331 } 7332 7333 ipif = ill->ill_ipif; 7334 7335 /* 7336 * If a packet was received on an interface that is a 6to4 tunnel, 7337 * incoming IPv6 packets, with a 6to4 addressed IPv6 destination, must 7338 * be checked to have a 6to4 prefix (2002:V4ADDR::/48) that is equal to 7339 * the 6to4 prefix of the address configured on the receiving interface. 7340 * Otherwise, the packet was delivered to this interface in error and 7341 * the packet must be dropped. 7342 */ 7343 if ((ill->ill_is_6to4tun) && IN6_IS_ADDR_6TO4(&ip6h->ip6_dst)) { 7344 7345 if (!IN6_ARE_6TO4_PREFIX_EQUAL(&ipif->ipif_v6lcl_addr, 7346 &ip6h->ip6_dst)) { 7347 if (ip_debug > 2) { 7348 /* ip1dbg */ 7349 pr_addr_dbg("ip_rput_data_v6: received 6to4 " 7350 "addressed packet which is not for us: " 7351 "%s\n", AF_INET6, &ip6h->ip6_dst); 7352 } 7353 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 7354 freemsg(first_mp); 7355 return; 7356 } 7357 } 7358 7359 /* 7360 * Find an ire that matches destination. For link-local addresses 7361 * we have to match the ill. 7362 * TBD for site local addresses. 7363 */ 7364 if (IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_dst)) { 7365 ire = ire_ctable_lookup_v6(&ip6h->ip6_dst, NULL, 7366 IRE_CACHE|IRE_LOCAL, ill->ill_ipif, ALL_ZONES, NULL, 7367 MATCH_IRE_TYPE | MATCH_IRE_ILL_GROUP); 7368 } else { 7369 ire = ire_cache_lookup_v6(&ip6h->ip6_dst, ALL_ZONES, 7370 MBLK_GETLABEL(mp)); 7371 } 7372 if (ire == NULL) { 7373 /* 7374 * No matching IRE found. Mark this packet as having 7375 * originated externally. 7376 */ 7377 if (!(ill->ill_flags & ILLF_ROUTER) || ll_multicast) { 7378 BUMP_MIB(ill->ill_ip6_mib, ipv6ForwProhibits); 7379 if (!(ill->ill_flags & ILLF_ROUTER)) 7380 BUMP_MIB(ill->ill_ip6_mib, ipv6InAddrErrors); 7381 freemsg(hada_mp); 7382 freemsg(first_mp); 7383 return; 7384 } 7385 if (ip6h->ip6_hops <= 1) { 7386 if (hada_mp != NULL) 7387 goto hada_drop; 7388 icmp_time_exceeded_v6(WR(q), first_mp, 7389 ICMP6_TIME_EXCEED_TRANSIT, ll_multicast, B_FALSE); 7390 return; 7391 } 7392 /* 7393 * Per RFC 3513 section 2.5.2, we must not forward packets with 7394 * an unspecified source address. 7395 */ 7396 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 7397 BUMP_MIB(ill->ill_ip6_mib, ipv6ForwProhibits); 7398 freemsg(hada_mp); 7399 freemsg(first_mp); 7400 return; 7401 } 7402 mp->b_prev = (mblk_t *)(uintptr_t) 7403 ill->ill_phyint->phyint_ifindex; 7404 ip_newroute_v6(q, mp, &ip6h->ip6_dst, &ip6h->ip6_src, 7405 IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_dst) ? ill : NULL, 7406 ALL_ZONES); 7407 return; 7408 } 7409 ipif_id = ire->ire_ipif->ipif_seqid; 7410 /* we have a matching IRE */ 7411 if (ire->ire_stq != NULL) { 7412 ill_group_t *ill_group; 7413 ill_group_t *ire_group; 7414 7415 /* 7416 * To be quicker, we may wish not to chase pointers 7417 * (ire->ire_ipif->ipif_ill...) and instead store the 7418 * forwarding policy in the ire. An unfortunate side- 7419 * effect of this would be requiring an ire flush whenever 7420 * the ILLF_ROUTER flag changes. For now, chase pointers 7421 * once and store in the boolean no_forward. 7422 * 7423 * This appears twice to keep it out of the non-forwarding, 7424 * yes-it's-for-us-on-the-right-interface case. 7425 */ 7426 no_forward = ((ill->ill_flags & 7427 ire->ire_ipif->ipif_ill->ill_flags & ILLF_ROUTER) == 0); 7428 7429 7430 ASSERT(first_mp == mp); 7431 /* 7432 * This ire has a send-to queue - forward the packet. 7433 */ 7434 if (no_forward || ll_multicast || (hada_mp != NULL)) { 7435 freemsg(hada_mp); 7436 BUMP_MIB(ill->ill_ip6_mib, ipv6ForwProhibits); 7437 if (no_forward) 7438 BUMP_MIB(ill->ill_ip6_mib, ipv6InAddrErrors); 7439 freemsg(mp); 7440 ire_refrele(ire); 7441 return; 7442 } 7443 if (ip6h->ip6_hops <= 1) { 7444 ip1dbg(("ip_rput_data_v6: hop limit expired.\n")); 7445 icmp_time_exceeded_v6(WR(q), mp, 7446 ICMP6_TIME_EXCEED_TRANSIT, ll_multicast, B_FALSE); 7447 ire_refrele(ire); 7448 return; 7449 } 7450 /* 7451 * Per RFC 3513 section 2.5.2, we must not forward packets with 7452 * an unspecified source address. 7453 */ 7454 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 7455 BUMP_MIB(ill->ill_ip6_mib, ipv6ForwProhibits); 7456 freemsg(mp); 7457 ire_refrele(ire); 7458 return; 7459 } 7460 7461 if (is_system_labeled()) { 7462 mblk_t *mp1; 7463 7464 if ((mp1 = tsol_ip_forward(ire, mp)) == NULL) { 7465 BUMP_MIB(ill->ill_ip6_mib, ipv6ForwProhibits); 7466 freemsg(mp); 7467 ire_refrele(ire); 7468 return; 7469 } 7470 /* Size may have changed */ 7471 mp = mp1; 7472 ip6h = (ip6_t *)mp->b_rptr; 7473 pkt_len = msgdsize(mp); 7474 } 7475 7476 if (pkt_len > ire->ire_max_frag) { 7477 BUMP_MIB(ill->ill_ip6_mib, ipv6InTooBigErrors); 7478 icmp_pkt2big_v6(WR(q), mp, ire->ire_max_frag, 7479 ll_multicast, B_TRUE); 7480 ire_refrele(ire); 7481 return; 7482 } 7483 7484 /* 7485 * Check to see if we're forwarding the packet to a 7486 * different link from which it came. If so, check the 7487 * source and destination addresses since routers must not 7488 * forward any packets with link-local source or 7489 * destination addresses to other links. Otherwise (if 7490 * we're forwarding onto the same link), conditionally send 7491 * a redirect message. 7492 */ 7493 ill_group = ill->ill_group; 7494 ire_group = ((ill_t *)(ire->ire_rfq)->q_ptr)->ill_group; 7495 if (ire->ire_rfq != q && (ill_group == NULL || 7496 ill_group != ire_group)) { 7497 if (IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_dst) || 7498 IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src)) { 7499 BUMP_MIB(ill->ill_ip6_mib, ipv6InAddrErrors); 7500 freemsg(mp); 7501 ire_refrele(ire); 7502 return; 7503 } 7504 /* TBD add site-local check at site boundary? */ 7505 } else if (ipv6_send_redirects) { 7506 in6_addr_t *v6targ; 7507 in6_addr_t gw_addr_v6; 7508 ire_t *src_ire_v6 = NULL; 7509 7510 /* 7511 * Don't send a redirect when forwarding a source 7512 * routed packet. 7513 */ 7514 if (ip_source_routed_v6(ip6h, mp)) 7515 goto forward; 7516 7517 mutex_enter(&ire->ire_lock); 7518 gw_addr_v6 = ire->ire_gateway_addr_v6; 7519 mutex_exit(&ire->ire_lock); 7520 if (!IN6_IS_ADDR_UNSPECIFIED(&gw_addr_v6)) { 7521 v6targ = &gw_addr_v6; 7522 /* 7523 * We won't send redirects to a router 7524 * that doesn't have a link local 7525 * address, but will forward. 7526 */ 7527 if (!IN6_IS_ADDR_LINKLOCAL(v6targ)) { 7528 BUMP_MIB(ill->ill_ip6_mib, 7529 ipv6InAddrErrors); 7530 goto forward; 7531 } 7532 } else { 7533 v6targ = &ip6h->ip6_dst; 7534 } 7535 7536 src_ire_v6 = ire_ftable_lookup_v6(&ip6h->ip6_src, 7537 NULL, NULL, IRE_INTERFACE, ire->ire_ipif, NULL, 7538 ALL_ZONES, 0, NULL, 7539 MATCH_IRE_IPIF | MATCH_IRE_TYPE); 7540 7541 if (src_ire_v6 != NULL) { 7542 /* 7543 * The source is directly connected. 7544 */ 7545 mp1 = copymsg(mp); 7546 if (mp1 != NULL) { 7547 icmp_send_redirect_v6(WR(q), 7548 mp1, v6targ, &ip6h->ip6_dst, 7549 ill, B_FALSE); 7550 } 7551 ire_refrele(src_ire_v6); 7552 } 7553 } 7554 7555 forward: 7556 /* Hoplimit verified above */ 7557 ip6h->ip6_hops--; 7558 UPDATE_IB_PKT_COUNT(ire); 7559 ire->ire_last_used_time = lbolt; 7560 BUMP_MIB(ill->ill_ip6_mib, ipv6OutForwDatagrams); 7561 ip_xmit_v6(mp, ire, 0, NULL, B_FALSE, NULL); 7562 IRE_REFRELE(ire); 7563 return; 7564 } 7565 rq = ire->ire_rfq; 7566 7567 /* 7568 * Need to put on correct queue for reassembly to find it. 7569 * No need to use put() since reassembly has its own locks. 7570 * Note: multicast packets and packets destined to addresses 7571 * assigned to loopback (ire_rfq is NULL) will be reassembled on 7572 * the arriving ill. 7573 */ 7574 if (rq != q) { 7575 boolean_t check_multi = B_TRUE; 7576 ill_group_t *ill_group = NULL; 7577 ill_group_t *ire_group = NULL; 7578 ill_t *ire_ill = NULL; 7579 uint_t ill_ifindex = ill->ill_usesrc_ifindex; 7580 7581 /* 7582 * To be quicker, we may wish not to chase pointers 7583 * (ire->ire_ipif->ipif_ill...) and instead store the 7584 * forwarding policy in the ire. An unfortunate side- 7585 * effect of this would be requiring an ire flush whenever 7586 * the ILLF_ROUTER flag changes. For now, chase pointers 7587 * once and store in the boolean no_forward. 7588 */ 7589 no_forward = ((ill->ill_flags & 7590 ire->ire_ipif->ipif_ill->ill_flags & ILLF_ROUTER) == 0); 7591 7592 ill_group = ill->ill_group; 7593 if (rq != NULL) { 7594 ire_ill = (ill_t *)(rq->q_ptr); 7595 ire_group = ire_ill->ill_group; 7596 } 7597 7598 /* 7599 * If it's part of the same IPMP group, or if it's a legal 7600 * address on the 'usesrc' interface, then bypass strict 7601 * checks. 7602 */ 7603 if (ill_group != NULL && ill_group == ire_group) { 7604 check_multi = B_FALSE; 7605 } else if (ill_ifindex != 0 && ire_ill != NULL && 7606 ill_ifindex == ire_ill->ill_phyint->phyint_ifindex) { 7607 check_multi = B_FALSE; 7608 } 7609 7610 ASSERT(!IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)); 7611 if (check_multi && ipv6_strict_dst_multihoming && no_forward) { 7612 /* 7613 * This packet came in on an interface other than the 7614 * one associated with the destination address 7615 * and we are strict about matches. 7616 * 7617 * As long as the ills belong to the same group, 7618 * we don't consider them to arriving on the wrong 7619 * interface. Thus, when the switch is doing inbound 7620 * load spreading, we won't drop packets when we 7621 * are doing strict multihoming checks. 7622 */ 7623 BUMP_MIB(ill->ill_ip6_mib, ipv6ForwProhibits); 7624 freemsg(hada_mp); 7625 freemsg(first_mp); 7626 ire_refrele(ire); 7627 return; 7628 } 7629 7630 if (rq != NULL) 7631 q = rq; 7632 7633 ill = (ill_t *)q->q_ptr; 7634 ASSERT(ill); 7635 } 7636 7637 zoneid = ire->ire_zoneid; 7638 UPDATE_IB_PKT_COUNT(ire); 7639 ire->ire_last_used_time = lbolt; 7640 /* Don't use the ire after this point. */ 7641 ire_refrele(ire); 7642 ipv6forus: 7643 /* 7644 * Looks like this packet is for us one way or another. 7645 * This is where we'll process destination headers etc. 7646 */ 7647 for (; ; ) { 7648 switch (nexthdr) { 7649 case IPPROTO_TCP: { 7650 uint16_t *up; 7651 uint32_t sum; 7652 int offset; 7653 7654 hdr_len = pkt_len - remlen; 7655 7656 if (hada_mp != NULL) { 7657 ip0dbg(("tcp hada drop\n")); 7658 goto hada_drop; 7659 } 7660 7661 7662 /* TCP needs all of the TCP header */ 7663 if (remlen < TCP_MIN_HEADER_LENGTH) 7664 goto pkt_too_short; 7665 if (mp->b_cont != NULL && 7666 whereptr + TCP_MIN_HEADER_LENGTH > mp->b_wptr) { 7667 if (!pullupmsg(mp, 7668 hdr_len + TCP_MIN_HEADER_LENGTH)) { 7669 BUMP_MIB(ill->ill_ip6_mib, 7670 ipv6InDiscards); 7671 freemsg(first_mp); 7672 return; 7673 } 7674 hck_flags = 0; 7675 ip6h = (ip6_t *)mp->b_rptr; 7676 whereptr = (uint8_t *)ip6h + hdr_len; 7677 } 7678 /* 7679 * Extract the offset field from the TCP header. 7680 */ 7681 offset = ((uchar_t *)ip6h)[hdr_len + 12] >> 4; 7682 if (offset != 5) { 7683 if (offset < 5) { 7684 ip1dbg(("ip_rput_data_v6: short " 7685 "TCP data offset")); 7686 BUMP_MIB(ill->ill_ip6_mib, 7687 ipv6InDiscards); 7688 freemsg(first_mp); 7689 return; 7690 } 7691 /* 7692 * There must be TCP options. 7693 * Make sure we can grab them. 7694 */ 7695 offset <<= 2; 7696 if (remlen < offset) 7697 goto pkt_too_short; 7698 if (mp->b_cont != NULL && 7699 whereptr + offset > mp->b_wptr) { 7700 if (!pullupmsg(mp, 7701 hdr_len + offset)) { 7702 BUMP_MIB(ill->ill_ip6_mib, 7703 ipv6InDiscards); 7704 freemsg(first_mp); 7705 return; 7706 } 7707 hck_flags = 0; 7708 ip6h = (ip6_t *)mp->b_rptr; 7709 whereptr = (uint8_t *)ip6h + hdr_len; 7710 } 7711 } 7712 7713 up = (uint16_t *)&ip6h->ip6_src; 7714 /* 7715 * TCP checksum calculation. First sum up the 7716 * pseudo-header fields: 7717 * - Source IPv6 address 7718 * - Destination IPv6 address 7719 * - TCP payload length 7720 * - TCP protocol ID 7721 */ 7722 sum = htons(IPPROTO_TCP + remlen) + 7723 up[0] + up[1] + up[2] + up[3] + 7724 up[4] + up[5] + up[6] + up[7] + 7725 up[8] + up[9] + up[10] + up[11] + 7726 up[12] + up[13] + up[14] + up[15]; 7727 7728 /* Fold initial sum */ 7729 sum = (sum & 0xffff) + (sum >> 16); 7730 7731 mp1 = mp->b_cont; 7732 7733 if ((hck_flags & (HCK_FULLCKSUM|HCK_PARTIALCKSUM)) == 0) 7734 IP6_STAT(ip6_in_sw_cksum); 7735 7736 IP_CKSUM_RECV(hck_flags, sum, (uchar_t *) 7737 ((uchar_t *)mp->b_rptr + DB_CKSUMSTART(mp)), 7738 (int32_t)(whereptr - (uchar_t *)mp->b_rptr), 7739 mp, mp1, cksum_err); 7740 7741 if (cksum_err) { 7742 BUMP_MIB(&ip_mib, tcpInErrs); 7743 7744 if (hck_flags & HCK_FULLCKSUM) 7745 IP6_STAT(ip6_tcp_in_full_hw_cksum_err); 7746 else if (hck_flags & HCK_PARTIALCKSUM) 7747 IP6_STAT(ip6_tcp_in_part_hw_cksum_err); 7748 else 7749 IP6_STAT(ip6_tcp_in_sw_cksum_err); 7750 7751 freemsg(first_mp); 7752 return; 7753 } 7754 tcp_fanout: 7755 ip_fanout_tcp_v6(q, first_mp, ip6h, ill, inill, 7756 (flags|IP_FF_SEND_ICMP|IP_FF_SYN_ADDIRE| 7757 IP_FF_IP6INFO), hdr_len, mctl_present, zoneid); 7758 return; 7759 } 7760 case IPPROTO_SCTP: 7761 { 7762 sctp_hdr_t *sctph; 7763 uint32_t calcsum, pktsum; 7764 uint_t hdr_len = pkt_len - remlen; 7765 7766 /* SCTP needs all of the SCTP header */ 7767 if (remlen < sizeof (*sctph)) { 7768 goto pkt_too_short; 7769 } 7770 if (whereptr + sizeof (*sctph) > mp->b_wptr) { 7771 ASSERT(mp->b_cont != NULL); 7772 if (!pullupmsg(mp, hdr_len + sizeof (*sctph))) { 7773 BUMP_MIB(ill->ill_ip6_mib, 7774 ipv6InDiscards); 7775 freemsg(mp); 7776 return; 7777 } 7778 ip6h = (ip6_t *)mp->b_rptr; 7779 whereptr = (uint8_t *)ip6h + hdr_len; 7780 } 7781 7782 sctph = (sctp_hdr_t *)(mp->b_rptr + hdr_len); 7783 /* checksum */ 7784 pktsum = sctph->sh_chksum; 7785 sctph->sh_chksum = 0; 7786 calcsum = sctp_cksum(mp, hdr_len); 7787 if (calcsum != pktsum) { 7788 BUMP_MIB(&sctp_mib, sctpChecksumError); 7789 freemsg(mp); 7790 return; 7791 } 7792 sctph->sh_chksum = pktsum; 7793 ports = *(uint32_t *)(mp->b_rptr + hdr_len); 7794 if ((connp = sctp_fanout(&ip6h->ip6_src, &ip6h->ip6_dst, 7795 ports, ipif_id, zoneid, mp)) == NULL) { 7796 ip_fanout_sctp_raw(first_mp, ill, 7797 (ipha_t *)ip6h, B_FALSE, ports, 7798 mctl_present, 7799 (flags|IP_FF_SEND_ICMP|IP_FF_IP6INFO), 7800 B_TRUE, ipif_id, zoneid); 7801 return; 7802 } 7803 BUMP_MIB(&ip_mib, ipInDelivers); 7804 sctp_input(connp, (ipha_t *)ip6h, mp, first_mp, ill, 7805 B_FALSE, mctl_present); 7806 return; 7807 } 7808 case IPPROTO_UDP: { 7809 uint16_t *up; 7810 uint32_t sum; 7811 7812 hdr_len = pkt_len - remlen; 7813 7814 if (hada_mp != NULL) { 7815 ip0dbg(("udp hada drop\n")); 7816 goto hada_drop; 7817 } 7818 7819 /* Verify that at least the ports are present */ 7820 if (remlen < UDPH_SIZE) 7821 goto pkt_too_short; 7822 if (mp->b_cont != NULL && 7823 whereptr + UDPH_SIZE > mp->b_wptr) { 7824 if (!pullupmsg(mp, hdr_len + UDPH_SIZE)) { 7825 BUMP_MIB(ill->ill_ip6_mib, 7826 ipv6InDiscards); 7827 freemsg(first_mp); 7828 return; 7829 } 7830 hck_flags = 0; 7831 ip6h = (ip6_t *)mp->b_rptr; 7832 whereptr = (uint8_t *)ip6h + hdr_len; 7833 } 7834 7835 /* 7836 * Before going through the regular checksum 7837 * calculation, make sure the received checksum 7838 * is non-zero. RFC 2460 says, a 0x0000 checksum 7839 * in a UDP packet (within IPv6 packet) is invalid 7840 * and should be replaced by 0xffff. This makes 7841 * sense as regular checksum calculation will 7842 * pass for both the cases i.e. 0x0000 and 0xffff. 7843 * Removing one of the case makes error detection 7844 * stronger. 7845 */ 7846 7847 if (((udpha_t *)whereptr)->uha_checksum == 0) { 7848 /* 0x0000 checksum is invalid */ 7849 ip1dbg(("ip_rput_data_v6: Invalid UDP " 7850 "checksum value 0x0000\n")); 7851 BUMP_MIB(ill->ill_ip6_mib, udpInCksumErrs); 7852 freemsg(first_mp); 7853 return; 7854 } 7855 7856 up = (uint16_t *)&ip6h->ip6_src; 7857 7858 /* 7859 * UDP checksum calculation. First sum up the 7860 * pseudo-header fields: 7861 * - Source IPv6 address 7862 * - Destination IPv6 address 7863 * - UDP payload length 7864 * - UDP protocol ID 7865 */ 7866 7867 sum = htons(IPPROTO_UDP + remlen) + 7868 up[0] + up[1] + up[2] + up[3] + 7869 up[4] + up[5] + up[6] + up[7] + 7870 up[8] + up[9] + up[10] + up[11] + 7871 up[12] + up[13] + up[14] + up[15]; 7872 7873 /* Fold initial sum */ 7874 sum = (sum & 0xffff) + (sum >> 16); 7875 7876 if (reass_hck_flags != 0) { 7877 hck_flags = reass_hck_flags; 7878 7879 IP_CKSUM_RECV_REASS(hck_flags, 7880 (int32_t)(whereptr - (uchar_t *)mp->b_rptr), 7881 sum, reass_sum, cksum_err); 7882 } else { 7883 mp1 = mp->b_cont; 7884 7885 IP_CKSUM_RECV(hck_flags, sum, (uchar_t *) 7886 ((uchar_t *)mp->b_rptr + DB_CKSUMSTART(mp)), 7887 (int32_t)(whereptr - (uchar_t *)mp->b_rptr), 7888 mp, mp1, cksum_err); 7889 } 7890 7891 if ((hck_flags & (HCK_FULLCKSUM|HCK_PARTIALCKSUM)) == 0) 7892 IP6_STAT(ip6_in_sw_cksum); 7893 7894 if (cksum_err) { 7895 BUMP_MIB(ill->ill_ip6_mib, udpInCksumErrs); 7896 7897 if (hck_flags & HCK_FULLCKSUM) 7898 IP6_STAT(ip6_udp_in_full_hw_cksum_err); 7899 else if (hck_flags & HCK_PARTIALCKSUM) 7900 IP6_STAT(ip6_udp_in_part_hw_cksum_err); 7901 else 7902 IP6_STAT(ip6_udp_in_sw_cksum_err); 7903 7904 freemsg(first_mp); 7905 return; 7906 } 7907 goto udp_fanout; 7908 } 7909 case IPPROTO_ICMPV6: { 7910 uint16_t *up; 7911 uint32_t sum; 7912 uint_t hdr_len = pkt_len - remlen; 7913 7914 if (hada_mp != NULL) { 7915 ip0dbg(("icmp hada drop\n")); 7916 goto hada_drop; 7917 } 7918 7919 up = (uint16_t *)&ip6h->ip6_src; 7920 sum = htons(IPPROTO_ICMPV6 + remlen) + 7921 up[0] + up[1] + up[2] + up[3] + 7922 up[4] + up[5] + up[6] + up[7] + 7923 up[8] + up[9] + up[10] + up[11] + 7924 up[12] + up[13] + up[14] + up[15]; 7925 sum = (sum & 0xffff) + (sum >> 16); 7926 sum = IP_CSUM(mp, hdr_len, sum); 7927 if (sum != 0) { 7928 /* IPv6 ICMP checksum failed */ 7929 ip1dbg(("ip_rput_data_v6: ICMPv6 checksum " 7930 "failed %x\n", 7931 sum)); 7932 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInMsgs); 7933 BUMP_MIB(ill->ill_icmp6_mib, 7934 ipv6IfIcmpInErrors); 7935 freemsg(first_mp); 7936 return; 7937 } 7938 7939 icmp_fanout: 7940 /* Check variable for testing applications */ 7941 if (ipv6_drop_inbound_icmpv6) { 7942 freemsg(first_mp); 7943 return; 7944 } 7945 /* 7946 * Assume that there is always at least one conn for 7947 * ICMPv6 (in.ndpd) i.e. don't optimize the case 7948 * where there is no conn. 7949 */ 7950 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 7951 ASSERT(!(ill->ill_phyint->phyint_flags & 7952 PHYI_LOOPBACK)); 7953 /* 7954 * In the multicast case, applications may have 7955 * joined the group from different zones, so we 7956 * need to deliver the packet to each of them. 7957 * Loop through the multicast memberships 7958 * structures (ilm) on the receive ill and send 7959 * a copy of the packet up each matching one. 7960 */ 7961 ILM_WALKER_HOLD(ill); 7962 for (ilm = ill->ill_ilm; ilm != NULL; 7963 ilm = ilm->ilm_next) { 7964 if (ilm->ilm_flags & ILM_DELETED) 7965 continue; 7966 if (!IN6_ARE_ADDR_EQUAL( 7967 &ilm->ilm_v6addr, &ip6h->ip6_dst)) 7968 continue; 7969 if (!ipif_lookup_zoneid(ill, 7970 ilm->ilm_zoneid, IPIF_UP, NULL)) 7971 continue; 7972 7973 first_mp1 = ip_copymsg(first_mp); 7974 if (first_mp1 == NULL) 7975 continue; 7976 icmp_inbound_v6(q, first_mp1, ill, 7977 hdr_len, mctl_present, 0, 7978 ilm->ilm_zoneid, dl_mp); 7979 } 7980 ILM_WALKER_RELE(ill); 7981 } else { 7982 first_mp1 = ip_copymsg(first_mp); 7983 if (first_mp1 != NULL) 7984 icmp_inbound_v6(q, first_mp1, ill, 7985 hdr_len, mctl_present, 0, zoneid, 7986 dl_mp); 7987 } 7988 } 7989 /* FALLTHRU */ 7990 default: { 7991 /* 7992 * Handle protocols with which IPv6 is less intimate. 7993 */ 7994 uint_t proto_flags = IP_FF_RAWIP|IP_FF_IP6INFO; 7995 7996 if (hada_mp != NULL) { 7997 ip0dbg(("default hada drop\n")); 7998 goto hada_drop; 7999 } 8000 8001 /* 8002 * Enable sending ICMP for "Unknown" nexthdr 8003 * case. i.e. where we did not FALLTHRU from 8004 * IPPROTO_ICMPV6 processing case above. 8005 * If we did FALLTHRU, then the packet has already been 8006 * processed for IPPF, don't process it again in 8007 * ip_fanout_proto_v6; set IP6_NO_IPPOLICY in the 8008 * flags 8009 */ 8010 if (nexthdr != IPPROTO_ICMPV6) 8011 proto_flags |= IP_FF_SEND_ICMP; 8012 else 8013 proto_flags |= IP6_NO_IPPOLICY; 8014 8015 ip_fanout_proto_v6(q, first_mp, ip6h, ill, inill, 8016 nexthdr, prev_nexthdr_offset, (flags|proto_flags), 8017 mctl_present, zoneid); 8018 return; 8019 } 8020 8021 case IPPROTO_DSTOPTS: { 8022 uint_t ehdrlen; 8023 uint8_t *optptr; 8024 ip6_dest_t *desthdr; 8025 8026 /* Check if AH is present. */ 8027 if (ipsec_early_ah_v6(q, first_mp, mctl_present, ill, 8028 ire, hada_mp, zoneid)) { 8029 ip0dbg(("dst early hada drop\n")); 8030 return; 8031 } 8032 8033 /* 8034 * Reinitialize pointers, as ipsec_early_ah_v6() does 8035 * complete pullups. We don't have to do more pullups 8036 * as a result. 8037 */ 8038 whereptr = (uint8_t *)((uintptr_t)mp->b_rptr + 8039 (uintptr_t)(whereptr - ((uint8_t *)ip6h))); 8040 ip6h = (ip6_t *)mp->b_rptr; 8041 8042 if (remlen < MIN_EHDR_LEN) 8043 goto pkt_too_short; 8044 8045 desthdr = (ip6_dest_t *)whereptr; 8046 nexthdr = desthdr->ip6d_nxt; 8047 prev_nexthdr_offset = (uint_t)(whereptr - 8048 (uint8_t *)ip6h); 8049 ehdrlen = 8 * (desthdr->ip6d_len + 1); 8050 if (remlen < ehdrlen) 8051 goto pkt_too_short; 8052 optptr = whereptr + 2; 8053 /* 8054 * Note: XXX This code does not seem to make 8055 * distinction between Destination Options Header 8056 * being before/after Routing Header which can 8057 * happen if we are at the end of source route. 8058 * This may become significant in future. 8059 * (No real significant Destination Options are 8060 * defined/implemented yet ). 8061 */ 8062 switch (ip_process_options_v6(q, first_mp, ip6h, optptr, 8063 ehdrlen - 2, IPPROTO_DSTOPTS)) { 8064 case -1: 8065 /* 8066 * Packet has been consumed and any needed 8067 * ICMP errors sent. 8068 */ 8069 BUMP_MIB(ill->ill_ip6_mib, ipv6InHdrErrors); 8070 freemsg(hada_mp); 8071 return; 8072 case 0: 8073 /* No action needed continue */ 8074 break; 8075 case 1: 8076 /* 8077 * Unnexpected return value 8078 * (Router alert is a Hop-by-Hop option) 8079 */ 8080 #ifdef DEBUG 8081 panic("ip_rput_data_v6: router " 8082 "alert hbh opt indication in dest opt"); 8083 /*NOTREACHED*/ 8084 #else 8085 freemsg(hada_mp); 8086 freemsg(first_mp); 8087 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 8088 return; 8089 #endif 8090 } 8091 used = ehdrlen; 8092 break; 8093 } 8094 case IPPROTO_FRAGMENT: { 8095 ip6_frag_t *fraghdr; 8096 size_t no_frag_hdr_len; 8097 8098 if (hada_mp != NULL) { 8099 ip0dbg(("frag hada drop\n")); 8100 goto hada_drop; 8101 } 8102 8103 ASSERT(first_mp == mp); 8104 if (remlen < sizeof (ip6_frag_t)) 8105 goto pkt_too_short; 8106 8107 if (mp->b_cont != NULL && 8108 whereptr + sizeof (ip6_frag_t) > mp->b_wptr) { 8109 if (!pullupmsg(mp, 8110 pkt_len - remlen + sizeof (ip6_frag_t))) { 8111 BUMP_MIB(ill->ill_ip6_mib, 8112 ipv6InDiscards); 8113 freemsg(mp); 8114 return; 8115 } 8116 hck_flags = 0; 8117 ip6h = (ip6_t *)mp->b_rptr; 8118 whereptr = (uint8_t *)ip6h + pkt_len - remlen; 8119 } 8120 8121 fraghdr = (ip6_frag_t *)whereptr; 8122 used = (uint_t)sizeof (ip6_frag_t); 8123 BUMP_MIB(ill->ill_ip6_mib, ipv6ReasmReqds); 8124 8125 /* 8126 * Invoke the CGTP (multirouting) filtering module to 8127 * process the incoming packet. Packets identified as 8128 * duplicates must be discarded. Filtering is active 8129 * only if the the ip_cgtp_filter ndd variable is 8130 * non-zero. 8131 */ 8132 if (ip_cgtp_filter && (ip_cgtp_filter_ops != NULL)) { 8133 int cgtp_flt_pkt = 8134 ip_cgtp_filter_ops->cfo_filter_v6( 8135 inill->ill_rq, ip6h, fraghdr); 8136 if (cgtp_flt_pkt == CGTP_IP_PKT_DUPLICATE) { 8137 freemsg(mp); 8138 return; 8139 } 8140 } 8141 8142 /* Restore the flags */ 8143 DB_CKSUMFLAGS(mp) = hck_flags; 8144 8145 mp = ip_rput_frag_v6(q, mp, ip6h, fraghdr, 8146 remlen - used, &prev_nexthdr_offset, 8147 &reass_sum, &reass_hck_flags); 8148 if (mp == NULL) { 8149 /* Reassembly is still pending */ 8150 return; 8151 } 8152 /* The first mblk are the headers before the frag hdr */ 8153 BUMP_MIB(ill->ill_ip6_mib, ipv6ReasmOKs); 8154 8155 first_mp = mp; /* mp has most likely changed! */ 8156 no_frag_hdr_len = mp->b_wptr - mp->b_rptr; 8157 ip6h = (ip6_t *)mp->b_rptr; 8158 nexthdr = ((char *)ip6h)[prev_nexthdr_offset]; 8159 whereptr = mp->b_rptr + no_frag_hdr_len; 8160 remlen = ntohs(ip6h->ip6_plen) + 8161 (uint16_t)(IPV6_HDR_LEN - no_frag_hdr_len); 8162 pkt_len = msgdsize(mp); 8163 used = 0; 8164 break; 8165 } 8166 case IPPROTO_HOPOPTS: 8167 if (hada_mp != NULL) { 8168 ip0dbg(("hop hada drop\n")); 8169 goto hada_drop; 8170 } 8171 /* 8172 * Illegal header sequence. 8173 * (Hop-by-hop headers are processed above 8174 * and required to immediately follow IPv6 header) 8175 */ 8176 icmp_param_problem_v6(WR(q), first_mp, 8177 ICMP6_PARAMPROB_NEXTHEADER, 8178 prev_nexthdr_offset, 8179 B_FALSE, B_FALSE); 8180 return; 8181 8182 case IPPROTO_ROUTING: { 8183 uint_t ehdrlen; 8184 ip6_rthdr_t *rthdr; 8185 8186 /* Check if AH is present. */ 8187 if (ipsec_early_ah_v6(q, first_mp, mctl_present, ill, 8188 ire, hada_mp, zoneid)) { 8189 ip0dbg(("routing hada drop\n")); 8190 return; 8191 } 8192 8193 /* 8194 * Reinitialize pointers, as ipsec_early_ah_v6() does 8195 * complete pullups. We don't have to do more pullups 8196 * as a result. 8197 */ 8198 whereptr = (uint8_t *)((uintptr_t)mp->b_rptr + 8199 (uintptr_t)(whereptr - ((uint8_t *)ip6h))); 8200 ip6h = (ip6_t *)mp->b_rptr; 8201 8202 if (remlen < MIN_EHDR_LEN) 8203 goto pkt_too_short; 8204 rthdr = (ip6_rthdr_t *)whereptr; 8205 nexthdr = rthdr->ip6r_nxt; 8206 prev_nexthdr_offset = (uint_t)(whereptr - 8207 (uint8_t *)ip6h); 8208 ehdrlen = 8 * (rthdr->ip6r_len + 1); 8209 if (remlen < ehdrlen) 8210 goto pkt_too_short; 8211 if (rthdr->ip6r_segleft != 0) { 8212 /* Not end of source route */ 8213 if (ll_multicast) { 8214 BUMP_MIB(ill->ill_ip6_mib, 8215 ipv6ForwProhibits); 8216 freemsg(hada_mp); 8217 freemsg(mp); 8218 return; 8219 } 8220 ip_process_rthdr(q, mp, ip6h, rthdr, ill, 8221 flags, hada_mp, dl_mp); 8222 return; 8223 } 8224 used = ehdrlen; 8225 break; 8226 } 8227 case IPPROTO_AH: 8228 case IPPROTO_ESP: { 8229 /* 8230 * Fast path for AH/ESP. If this is the first time 8231 * we are sending a datagram to AH/ESP, allocate 8232 * a IPSEC_IN message and prepend it. Otherwise, 8233 * just fanout. 8234 */ 8235 8236 ipsec_in_t *ii; 8237 int ipsec_rc; 8238 8239 if (!mctl_present) { 8240 ASSERT(first_mp == mp); 8241 if ((first_mp = ipsec_in_alloc(B_FALSE)) == 8242 NULL) { 8243 ip1dbg(("ip_rput_data_v6: IPSEC_IN " 8244 "allocation failure.\n")); 8245 BUMP_MIB(ill->ill_ip6_mib, 8246 ipv6InDiscards); 8247 freemsg(mp); 8248 return; 8249 } 8250 /* 8251 * Store the ill_index so that when we come back 8252 * from IPSEC we ride on the same queue. 8253 */ 8254 ii = (ipsec_in_t *)first_mp->b_rptr; 8255 ii->ipsec_in_ill_index = 8256 ill->ill_phyint->phyint_ifindex; 8257 ii->ipsec_in_rill_index = 8258 ii->ipsec_in_ill_index; 8259 first_mp->b_cont = mp; 8260 /* 8261 * Cache hardware acceleration info. 8262 */ 8263 if (hada_mp != NULL) { 8264 IPSECHW_DEBUG(IPSECHW_PKT, 8265 ("ip_rput_data_v6: " 8266 "caching data attr.\n")); 8267 ii->ipsec_in_accelerated = B_TRUE; 8268 ii->ipsec_in_da = hada_mp; 8269 hada_mp = NULL; 8270 } 8271 } else { 8272 ii = (ipsec_in_t *)first_mp->b_rptr; 8273 } 8274 8275 if (!ipsec_loaded()) { 8276 ip_proto_not_sup(q, first_mp, IP_FF_SEND_ICMP, 8277 ire->ire_zoneid); 8278 return; 8279 } 8280 8281 /* select inbound SA and have IPsec process the pkt */ 8282 if (nexthdr == IPPROTO_ESP) { 8283 esph_t *esph = ipsec_inbound_esp_sa(first_mp); 8284 if (esph == NULL) 8285 return; 8286 ASSERT(ii->ipsec_in_esp_sa != NULL); 8287 ASSERT(ii->ipsec_in_esp_sa->ipsa_input_func != 8288 NULL); 8289 ipsec_rc = ii->ipsec_in_esp_sa->ipsa_input_func( 8290 first_mp, esph); 8291 } else { 8292 ah_t *ah = ipsec_inbound_ah_sa(first_mp); 8293 if (ah == NULL) 8294 return; 8295 ASSERT(ii->ipsec_in_ah_sa != NULL); 8296 ASSERT(ii->ipsec_in_ah_sa->ipsa_input_func != 8297 NULL); 8298 ipsec_rc = ii->ipsec_in_ah_sa->ipsa_input_func( 8299 first_mp, ah); 8300 } 8301 8302 switch (ipsec_rc) { 8303 case IPSEC_STATUS_SUCCESS: 8304 break; 8305 case IPSEC_STATUS_FAILED: 8306 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 8307 /* FALLTHRU */ 8308 case IPSEC_STATUS_PENDING: 8309 return; 8310 } 8311 /* we're done with IPsec processing, send it up */ 8312 ip_fanout_proto_again(first_mp, ill, inill, ire); 8313 return; 8314 } 8315 case IPPROTO_NONE: 8316 /* All processing is done. Count as "delivered". */ 8317 freemsg(hada_mp); 8318 freemsg(first_mp); 8319 BUMP_MIB(ill->ill_ip6_mib, ipv6InDelivers); 8320 return; 8321 } 8322 whereptr += used; 8323 ASSERT(remlen >= used); 8324 remlen -= used; 8325 } 8326 /* NOTREACHED */ 8327 8328 pkt_too_short: 8329 ip1dbg(("ip_rput_data_v6: packet too short %d %lu %d\n", 8330 ip6_len, pkt_len, remlen)); 8331 BUMP_MIB(ill->ill_ip6_mib, ipv6InTruncatedPkts); 8332 freemsg(hada_mp); 8333 freemsg(first_mp); 8334 return; 8335 udp_fanout: 8336 if (mctl_present || IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 8337 connp = NULL; 8338 } else { 8339 connp = ipcl_classify_v6(mp, IPPROTO_UDP, hdr_len, zoneid); 8340 if ((connp != NULL) && (connp->conn_upq == NULL)) { 8341 CONN_DEC_REF(connp); 8342 connp = NULL; 8343 } 8344 } 8345 8346 if (connp == NULL) { 8347 uint32_t ports; 8348 8349 ports = *(uint32_t *)(mp->b_rptr + hdr_len + 8350 UDP_PORTS_OFFSET); 8351 IP6_STAT(ip6_udp_slow_path); 8352 ip_fanout_udp_v6(q, first_mp, ip6h, ports, ill, inill, 8353 (flags|IP_FF_SEND_ICMP|IP_FF_IP6INFO), mctl_present, 8354 zoneid); 8355 return; 8356 } 8357 8358 if (CONN_UDP_FLOWCTLD(connp)) { 8359 freemsg(first_mp); 8360 BUMP_MIB(ill->ill_ip6_mib, udpInOverflows); 8361 CONN_DEC_REF(connp); 8362 return; 8363 } 8364 8365 /* Initiate IPPF processing */ 8366 if (IP6_IN_IPP(flags)) { 8367 ip_process(IPP_LOCAL_IN, &mp, ill->ill_phyint->phyint_ifindex); 8368 if (mp == NULL) { 8369 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 8370 CONN_DEC_REF(connp); 8371 return; 8372 } 8373 } 8374 8375 if (connp->conn_ipv6_recvpktinfo || 8376 IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src)) { 8377 mp = ip_add_info_v6(mp, inill, &ip6h->ip6_dst); 8378 if (mp == NULL) { 8379 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 8380 CONN_DEC_REF(connp); 8381 return; 8382 } 8383 } 8384 8385 IP6_STAT(ip6_udp_fast_path); 8386 BUMP_MIB(ill->ill_ip6_mib, ipv6InReceives); 8387 BUMP_MIB(ill->ill_ip6_mib, ipv6InDelivers); 8388 8389 /* Send it upstream */ 8390 CONN_UDP_RECV(connp, mp); 8391 8392 CONN_DEC_REF(connp); 8393 freemsg(hada_mp); 8394 return; 8395 8396 hada_drop: 8397 ip1dbg(("ip_rput_data_v6: malformed accelerated packet\n")); 8398 /* IPsec kstats: bump counter here */ 8399 freemsg(hada_mp); 8400 freemsg(first_mp); 8401 } 8402 8403 /* 8404 * Reassemble fragment. 8405 * When it returns a completed message the first mblk will only contain 8406 * the headers prior to the fragment header. 8407 * 8408 * prev_nexthdr_offset is an offset indication of where the nexthdr field is 8409 * of the preceding header. This is needed to patch the previous header's 8410 * nexthdr field when reassembly completes. 8411 */ 8412 static mblk_t * 8413 ip_rput_frag_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, 8414 ip6_frag_t *fraghdr, uint_t remlen, uint_t *prev_nexthdr_offset, 8415 uint32_t *cksum_val, uint16_t *cksum_flags) 8416 { 8417 ill_t *ill = (ill_t *)q->q_ptr; 8418 uint32_t ident = ntohl(fraghdr->ip6f_ident); 8419 uint16_t offset; 8420 boolean_t more_frags; 8421 uint8_t nexthdr = fraghdr->ip6f_nxt; 8422 in6_addr_t *v6dst_ptr; 8423 in6_addr_t *v6src_ptr; 8424 uint_t end; 8425 uint_t hdr_length; 8426 size_t count; 8427 ipf_t *ipf; 8428 ipf_t **ipfp; 8429 ipfb_t *ipfb; 8430 mblk_t *mp1; 8431 uint8_t ecn_info = 0; 8432 size_t msg_len; 8433 mblk_t *tail_mp; 8434 mblk_t *t_mp; 8435 boolean_t pruned = B_FALSE; 8436 uint32_t sum_val; 8437 uint16_t sum_flags; 8438 8439 8440 if (cksum_val != NULL) 8441 *cksum_val = 0; 8442 if (cksum_flags != NULL) 8443 *cksum_flags = 0; 8444 8445 /* 8446 * We utilize hardware computed checksum info only for UDP since 8447 * IP fragmentation is a normal occurence for the protocol. In 8448 * addition, checksum offload support for IP fragments carrying 8449 * UDP payload is commonly implemented across network adapters. 8450 */ 8451 ASSERT(ill != NULL); 8452 if (nexthdr == IPPROTO_UDP && dohwcksum && ILL_HCKSUM_CAPABLE(ill) && 8453 (DB_CKSUMFLAGS(mp) & (HCK_FULLCKSUM | HCK_PARTIALCKSUM))) { 8454 mblk_t *mp1 = mp->b_cont; 8455 int32_t len; 8456 8457 /* Record checksum information from the packet */ 8458 sum_val = (uint32_t)DB_CKSUM16(mp); 8459 sum_flags = DB_CKSUMFLAGS(mp); 8460 8461 /* fragmented payload offset from beginning of mblk */ 8462 offset = (uint16_t)((uchar_t *)&fraghdr[1] - mp->b_rptr); 8463 8464 if ((sum_flags & HCK_PARTIALCKSUM) && 8465 (mp1 == NULL || mp1->b_cont == NULL) && 8466 offset >= (uint16_t)DB_CKSUMSTART(mp) && 8467 ((len = offset - (uint16_t)DB_CKSUMSTART(mp)) & 1) == 0) { 8468 uint32_t adj; 8469 /* 8470 * Partial checksum has been calculated by hardware 8471 * and attached to the packet; in addition, any 8472 * prepended extraneous data is even byte aligned. 8473 * If any such data exists, we adjust the checksum; 8474 * this would also handle any postpended data. 8475 */ 8476 IP_ADJCKSUM_PARTIAL(mp->b_rptr + DB_CKSUMSTART(mp), 8477 mp, mp1, len, adj); 8478 8479 /* One's complement subtract extraneous checksum */ 8480 if (adj >= sum_val) 8481 sum_val = ~(adj - sum_val) & 0xFFFF; 8482 else 8483 sum_val -= adj; 8484 } 8485 } else { 8486 sum_val = 0; 8487 sum_flags = 0; 8488 } 8489 8490 /* Clear hardware checksumming flag */ 8491 DB_CKSUMFLAGS(mp) = 0; 8492 8493 /* 8494 * Note: Fragment offset in header is in 8-octet units. 8495 * Clearing least significant 3 bits not only extracts 8496 * it but also gets it in units of octets. 8497 */ 8498 offset = ntohs(fraghdr->ip6f_offlg) & ~7; 8499 more_frags = (fraghdr->ip6f_offlg & IP6F_MORE_FRAG); 8500 8501 /* 8502 * Is the more frags flag on and the payload length not a multiple 8503 * of eight? 8504 */ 8505 if (more_frags && (ntohs(ip6h->ip6_plen) & 7)) { 8506 BUMP_MIB(ill->ill_ip6_mib, ipv6InHdrErrors); 8507 icmp_param_problem_v6(WR(q), mp, ICMP6_PARAMPROB_HEADER, 8508 (uint32_t)((char *)&ip6h->ip6_plen - 8509 (char *)ip6h), B_FALSE, B_FALSE); 8510 return (NULL); 8511 } 8512 8513 v6src_ptr = &ip6h->ip6_src; 8514 v6dst_ptr = &ip6h->ip6_dst; 8515 end = remlen; 8516 8517 hdr_length = (uint_t)((char *)&fraghdr[1] - (char *)ip6h); 8518 end += offset; 8519 8520 /* 8521 * Would fragment cause reassembled packet to have a payload length 8522 * greater than IP_MAXPACKET - the max payload size? 8523 */ 8524 if (end > IP_MAXPACKET) { 8525 BUMP_MIB(ill->ill_ip6_mib, ipv6InHdrErrors); 8526 icmp_param_problem_v6(WR(q), mp, ICMP6_PARAMPROB_HEADER, 8527 (uint32_t)((char *)&fraghdr->ip6f_offlg - 8528 (char *)ip6h), B_FALSE, B_FALSE); 8529 return (NULL); 8530 } 8531 8532 /* 8533 * This packet just has one fragment. Reassembly not 8534 * needed. 8535 */ 8536 if (!more_frags && offset == 0) { 8537 goto reass_done; 8538 } 8539 8540 /* 8541 * Drop the fragmented as early as possible, if 8542 * we don't have resource(s) to re-assemble. 8543 */ 8544 if (ip_reass_queue_bytes == 0) { 8545 freemsg(mp); 8546 return (NULL); 8547 } 8548 8549 /* Record the ECN field info. */ 8550 ecn_info = (uint8_t)(ntohl(ip6h->ip6_vcf & htonl(~0xFFCFFFFF)) >> 20); 8551 /* 8552 * If this is not the first fragment, dump the unfragmentable 8553 * portion of the packet. 8554 */ 8555 if (offset) 8556 mp->b_rptr = (uchar_t *)&fraghdr[1]; 8557 8558 /* 8559 * Fragmentation reassembly. Each ILL has a hash table for 8560 * queueing packets undergoing reassembly for all IPIFs 8561 * associated with the ILL. The hash is based on the packet 8562 * IP ident field. The ILL frag hash table was allocated 8563 * as a timer block at the time the ILL was created. Whenever 8564 * there is anything on the reassembly queue, the timer will 8565 * be running. 8566 */ 8567 msg_len = MBLKSIZE(mp); 8568 tail_mp = mp; 8569 while (tail_mp->b_cont != NULL) { 8570 tail_mp = tail_mp->b_cont; 8571 msg_len += MBLKSIZE(tail_mp); 8572 } 8573 /* 8574 * If the reassembly list for this ILL will get too big 8575 * prune it. 8576 */ 8577 8578 if ((msg_len + sizeof (*ipf) + ill->ill_frag_count) >= 8579 ip_reass_queue_bytes) { 8580 ill_frag_prune(ill, (ip_reass_queue_bytes < msg_len) ? 0 8581 : (ip_reass_queue_bytes - msg_len)); 8582 pruned = B_TRUE; 8583 } 8584 8585 ipfb = &ill->ill_frag_hash_tbl[ILL_FRAG_HASH_V6(*v6src_ptr, ident)]; 8586 mutex_enter(&ipfb->ipfb_lock); 8587 8588 ipfp = &ipfb->ipfb_ipf; 8589 /* Try to find an existing fragment queue for this packet. */ 8590 for (;;) { 8591 ipf = ipfp[0]; 8592 if (ipf) { 8593 /* 8594 * It has to match on ident, source address, and 8595 * dest address. 8596 */ 8597 if (ipf->ipf_ident == ident && 8598 IN6_ARE_ADDR_EQUAL(&ipf->ipf_v6src, v6src_ptr) && 8599 IN6_ARE_ADDR_EQUAL(&ipf->ipf_v6dst, v6dst_ptr)) { 8600 8601 /* 8602 * If we have received too many 8603 * duplicate fragments for this packet 8604 * free it. 8605 */ 8606 if (ipf->ipf_num_dups > ip_max_frag_dups) { 8607 ill_frag_free_pkts(ill, ipfb, ipf, 1); 8608 freemsg(mp); 8609 mutex_exit(&ipfb->ipfb_lock); 8610 return (NULL); 8611 } 8612 8613 break; 8614 } 8615 ipfp = &ipf->ipf_hash_next; 8616 continue; 8617 } 8618 8619 8620 /* 8621 * If we pruned the list, do we want to store this new 8622 * fragment?. We apply an optimization here based on the 8623 * fact that most fragments will be received in order. 8624 * So if the offset of this incoming fragment is zero, 8625 * it is the first fragment of a new packet. We will 8626 * keep it. Otherwise drop the fragment, as we have 8627 * probably pruned the packet already (since the 8628 * packet cannot be found). 8629 */ 8630 8631 if (pruned && offset != 0) { 8632 mutex_exit(&ipfb->ipfb_lock); 8633 freemsg(mp); 8634 return (NULL); 8635 } 8636 8637 /* New guy. Allocate a frag message. */ 8638 mp1 = allocb(sizeof (*ipf), BPRI_MED); 8639 if (!mp1) { 8640 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 8641 freemsg(mp); 8642 partial_reass_done: 8643 mutex_exit(&ipfb->ipfb_lock); 8644 return (NULL); 8645 } 8646 8647 if (ipfb->ipfb_frag_pkts >= MAX_FRAG_PKTS) { 8648 /* 8649 * Too many fragmented packets in this hash bucket. 8650 * Free the oldest. 8651 */ 8652 ill_frag_free_pkts(ill, ipfb, ipfb->ipfb_ipf, 1); 8653 } 8654 8655 mp1->b_cont = mp; 8656 8657 /* Initialize the fragment header. */ 8658 ipf = (ipf_t *)mp1->b_rptr; 8659 ipf->ipf_mp = mp1; 8660 ipf->ipf_ptphn = ipfp; 8661 ipfp[0] = ipf; 8662 ipf->ipf_hash_next = NULL; 8663 ipf->ipf_ident = ident; 8664 ipf->ipf_v6src = *v6src_ptr; 8665 ipf->ipf_v6dst = *v6dst_ptr; 8666 /* Record reassembly start time. */ 8667 ipf->ipf_timestamp = gethrestime_sec(); 8668 /* Record ipf generation and account for frag header */ 8669 ipf->ipf_gen = ill->ill_ipf_gen++; 8670 ipf->ipf_count = MBLKSIZE(mp1); 8671 ipf->ipf_protocol = nexthdr; 8672 ipf->ipf_nf_hdr_len = 0; 8673 ipf->ipf_prev_nexthdr_offset = 0; 8674 ipf->ipf_last_frag_seen = B_FALSE; 8675 ipf->ipf_ecn = ecn_info; 8676 ipf->ipf_num_dups = 0; 8677 ipfb->ipfb_frag_pkts++; 8678 ipf->ipf_checksum = 0; 8679 ipf->ipf_checksum_flags = 0; 8680 8681 /* Store checksum value in fragment header */ 8682 if (sum_flags != 0) { 8683 sum_val = (sum_val & 0xFFFF) + (sum_val >> 16); 8684 sum_val = (sum_val & 0xFFFF) + (sum_val >> 16); 8685 ipf->ipf_checksum = sum_val; 8686 ipf->ipf_checksum_flags = sum_flags; 8687 } 8688 8689 /* 8690 * We handle reassembly two ways. In the easy case, 8691 * where all the fragments show up in order, we do 8692 * minimal bookkeeping, and just clip new pieces on 8693 * the end. If we ever see a hole, then we go off 8694 * to ip_reassemble which has to mark the pieces and 8695 * keep track of the number of holes, etc. Obviously, 8696 * the point of having both mechanisms is so we can 8697 * handle the easy case as efficiently as possible. 8698 */ 8699 if (offset == 0) { 8700 /* Easy case, in-order reassembly so far. */ 8701 /* Update the byte count */ 8702 ipf->ipf_count += msg_len; 8703 ipf->ipf_tail_mp = tail_mp; 8704 /* 8705 * Keep track of next expected offset in 8706 * ipf_end. 8707 */ 8708 ipf->ipf_end = end; 8709 ipf->ipf_nf_hdr_len = hdr_length; 8710 ipf->ipf_prev_nexthdr_offset = *prev_nexthdr_offset; 8711 } else { 8712 /* Hard case, hole at the beginning. */ 8713 ipf->ipf_tail_mp = NULL; 8714 /* 8715 * ipf_end == 0 means that we have given up 8716 * on easy reassembly. 8717 */ 8718 ipf->ipf_end = 0; 8719 8720 /* Forget checksum offload from now on */ 8721 ipf->ipf_checksum_flags = 0; 8722 8723 /* 8724 * ipf_hole_cnt is set by ip_reassemble. 8725 * ipf_count is updated by ip_reassemble. 8726 * No need to check for return value here 8727 * as we don't expect reassembly to complete or 8728 * fail for the first fragment itself. 8729 */ 8730 (void) ip_reassemble(mp, ipf, offset, more_frags, ill, 8731 msg_len); 8732 } 8733 /* Update per ipfb and ill byte counts */ 8734 ipfb->ipfb_count += ipf->ipf_count; 8735 ASSERT(ipfb->ipfb_count > 0); /* Wraparound */ 8736 ill->ill_frag_count += ipf->ipf_count; 8737 ASSERT(ill->ill_frag_count > 0); /* Wraparound */ 8738 /* If the frag timer wasn't already going, start it. */ 8739 mutex_enter(&ill->ill_lock); 8740 ill_frag_timer_start(ill); 8741 mutex_exit(&ill->ill_lock); 8742 goto partial_reass_done; 8743 } 8744 8745 /* 8746 * If the packet's flag has changed (it could be coming up 8747 * from an interface different than the previous, therefore 8748 * possibly different checksum capability), then forget about 8749 * any stored checksum states. Otherwise add the value to 8750 * the existing one stored in the fragment header. 8751 */ 8752 if (sum_flags != 0 && sum_flags == ipf->ipf_checksum_flags) { 8753 sum_val += ipf->ipf_checksum; 8754 sum_val = (sum_val & 0xFFFF) + (sum_val >> 16); 8755 sum_val = (sum_val & 0xFFFF) + (sum_val >> 16); 8756 ipf->ipf_checksum = sum_val; 8757 } else if (ipf->ipf_checksum_flags != 0) { 8758 /* Forget checksum offload from now on */ 8759 ipf->ipf_checksum_flags = 0; 8760 } 8761 8762 /* 8763 * We have a new piece of a datagram which is already being 8764 * reassembled. Update the ECN info if all IP fragments 8765 * are ECN capable. If there is one which is not, clear 8766 * all the info. If there is at least one which has CE 8767 * code point, IP needs to report that up to transport. 8768 */ 8769 if (ecn_info != IPH_ECN_NECT && ipf->ipf_ecn != IPH_ECN_NECT) { 8770 if (ecn_info == IPH_ECN_CE) 8771 ipf->ipf_ecn = IPH_ECN_CE; 8772 } else { 8773 ipf->ipf_ecn = IPH_ECN_NECT; 8774 } 8775 8776 if (offset && ipf->ipf_end == offset) { 8777 /* The new fragment fits at the end */ 8778 ipf->ipf_tail_mp->b_cont = mp; 8779 /* Update the byte count */ 8780 ipf->ipf_count += msg_len; 8781 /* Update per ipfb and ill byte counts */ 8782 ipfb->ipfb_count += msg_len; 8783 ASSERT(ipfb->ipfb_count > 0); /* Wraparound */ 8784 ill->ill_frag_count += msg_len; 8785 ASSERT(ill->ill_frag_count > 0); /* Wraparound */ 8786 if (more_frags) { 8787 /* More to come. */ 8788 ipf->ipf_end = end; 8789 ipf->ipf_tail_mp = tail_mp; 8790 goto partial_reass_done; 8791 } 8792 } else { 8793 /* 8794 * Go do the hard cases. 8795 * Call ip_reassemble(). 8796 */ 8797 int ret; 8798 8799 if (offset == 0) { 8800 if (ipf->ipf_prev_nexthdr_offset == 0) { 8801 ipf->ipf_nf_hdr_len = hdr_length; 8802 ipf->ipf_prev_nexthdr_offset = 8803 *prev_nexthdr_offset; 8804 } 8805 } 8806 /* Save current byte count */ 8807 count = ipf->ipf_count; 8808 ret = ip_reassemble(mp, ipf, offset, more_frags, ill, msg_len); 8809 8810 /* Count of bytes added and subtracted (freeb()ed) */ 8811 count = ipf->ipf_count - count; 8812 if (count) { 8813 /* Update per ipfb and ill byte counts */ 8814 ipfb->ipfb_count += count; 8815 ASSERT(ipfb->ipfb_count > 0); /* Wraparound */ 8816 ill->ill_frag_count += count; 8817 ASSERT(ill->ill_frag_count > 0); /* Wraparound */ 8818 } 8819 if (ret == IP_REASS_PARTIAL) { 8820 goto partial_reass_done; 8821 } else if (ret == IP_REASS_FAILED) { 8822 /* Reassembly failed. Free up all resources */ 8823 ill_frag_free_pkts(ill, ipfb, ipf, 1); 8824 for (t_mp = mp; t_mp != NULL; t_mp = t_mp->b_cont) { 8825 IP_REASS_SET_START(t_mp, 0); 8826 IP_REASS_SET_END(t_mp, 0); 8827 } 8828 freemsg(mp); 8829 goto partial_reass_done; 8830 } 8831 8832 /* We will reach here iff 'ret' is IP_REASS_COMPLETE */ 8833 } 8834 /* 8835 * We have completed reassembly. Unhook the frag header from 8836 * the reassembly list. 8837 * 8838 * Grab the unfragmentable header length next header value out 8839 * of the first fragment 8840 */ 8841 ASSERT(ipf->ipf_nf_hdr_len != 0); 8842 hdr_length = ipf->ipf_nf_hdr_len; 8843 8844 /* 8845 * Before we free the frag header, record the ECN info 8846 * to report back to the transport. 8847 */ 8848 ecn_info = ipf->ipf_ecn; 8849 8850 /* 8851 * Store the nextheader field in the header preceding the fragment 8852 * header 8853 */ 8854 nexthdr = ipf->ipf_protocol; 8855 *prev_nexthdr_offset = ipf->ipf_prev_nexthdr_offset; 8856 ipfp = ipf->ipf_ptphn; 8857 8858 /* We need to supply these to caller */ 8859 if ((sum_flags = ipf->ipf_checksum_flags) != 0) 8860 sum_val = ipf->ipf_checksum; 8861 else 8862 sum_val = 0; 8863 8864 mp1 = ipf->ipf_mp; 8865 count = ipf->ipf_count; 8866 ipf = ipf->ipf_hash_next; 8867 if (ipf) 8868 ipf->ipf_ptphn = ipfp; 8869 ipfp[0] = ipf; 8870 ill->ill_frag_count -= count; 8871 ASSERT(ipfb->ipfb_count >= count); 8872 ipfb->ipfb_count -= count; 8873 ipfb->ipfb_frag_pkts--; 8874 mutex_exit(&ipfb->ipfb_lock); 8875 /* Ditch the frag header. */ 8876 mp = mp1->b_cont; 8877 freeb(mp1); 8878 8879 /* 8880 * Make sure the packet is good by doing some sanity 8881 * check. If bad we can silentely drop the packet. 8882 */ 8883 reass_done: 8884 if (hdr_length < sizeof (ip6_frag_t)) { 8885 BUMP_MIB(ill->ill_ip6_mib, ipv6InHdrErrors); 8886 ip1dbg(("ip_rput_frag_v6: bad packet\n")); 8887 freemsg(mp); 8888 return (NULL); 8889 } 8890 8891 /* 8892 * Remove the fragment header from the initial header by 8893 * splitting the mblk into the non-fragmentable header and 8894 * everthing after the fragment extension header. This has the 8895 * side effect of putting all the headers that need destination 8896 * processing into the b_cont block-- on return this fact is 8897 * used in order to avoid having to look at the extensions 8898 * already processed. 8899 * 8900 * Note that this code assumes that the unfragmentable portion 8901 * of the header is in the first mblk and increments 8902 * the read pointer past it. If this assumption is broken 8903 * this code fails badly. 8904 */ 8905 if (mp->b_rptr + hdr_length != mp->b_wptr) { 8906 mblk_t *nmp; 8907 8908 if (!(nmp = dupb(mp))) { 8909 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 8910 ip1dbg(("ip_rput_frag_v6: dupb failed\n")); 8911 freemsg(mp); 8912 return (NULL); 8913 } 8914 nmp->b_cont = mp->b_cont; 8915 mp->b_cont = nmp; 8916 nmp->b_rptr += hdr_length; 8917 } 8918 mp->b_wptr = mp->b_rptr + hdr_length - sizeof (ip6_frag_t); 8919 8920 ip6h = (ip6_t *)mp->b_rptr; 8921 ((char *)ip6h)[*prev_nexthdr_offset] = nexthdr; 8922 8923 /* Restore original IP length in header. */ 8924 ip6h->ip6_plen = htons((uint16_t)(msgdsize(mp) - IPV6_HDR_LEN)); 8925 /* Record the ECN info. */ 8926 ip6h->ip6_vcf &= htonl(0xFFCFFFFF); 8927 ip6h->ip6_vcf |= htonl(ecn_info << 20); 8928 8929 /* Reassembly is successful; return checksum information if needed */ 8930 if (cksum_val != NULL) 8931 *cksum_val = sum_val; 8932 if (cksum_flags != NULL) 8933 *cksum_flags = sum_flags; 8934 8935 return (mp); 8936 } 8937 8938 /* 8939 * Walk through the options to see if there is a routing header. 8940 * If present get the destination which is the last address of 8941 * the option. 8942 */ 8943 in6_addr_t 8944 ip_get_dst_v6(ip6_t *ip6h, boolean_t *is_fragment) 8945 { 8946 uint8_t nexthdr; 8947 uint8_t *whereptr; 8948 ip6_hbh_t *hbhhdr; 8949 ip6_dest_t *dsthdr; 8950 ip6_rthdr0_t *rthdr; 8951 ip6_frag_t *fraghdr; 8952 int ehdrlen; 8953 int left; 8954 in6_addr_t *ap, rv; 8955 8956 if (is_fragment != NULL) 8957 *is_fragment = B_FALSE; 8958 8959 rv = ip6h->ip6_dst; 8960 8961 nexthdr = ip6h->ip6_nxt; 8962 whereptr = (uint8_t *)&ip6h[1]; 8963 for (;;) { 8964 8965 ASSERT(nexthdr != IPPROTO_RAW); 8966 switch (nexthdr) { 8967 case IPPROTO_HOPOPTS: 8968 hbhhdr = (ip6_hbh_t *)whereptr; 8969 nexthdr = hbhhdr->ip6h_nxt; 8970 ehdrlen = 8 * (hbhhdr->ip6h_len + 1); 8971 break; 8972 case IPPROTO_DSTOPTS: 8973 dsthdr = (ip6_dest_t *)whereptr; 8974 nexthdr = dsthdr->ip6d_nxt; 8975 ehdrlen = 8 * (dsthdr->ip6d_len + 1); 8976 break; 8977 case IPPROTO_ROUTING: 8978 rthdr = (ip6_rthdr0_t *)whereptr; 8979 nexthdr = rthdr->ip6r0_nxt; 8980 ehdrlen = 8 * (rthdr->ip6r0_len + 1); 8981 8982 left = rthdr->ip6r0_segleft; 8983 ap = (in6_addr_t *)((char *)rthdr + sizeof (*rthdr)); 8984 rv = *(ap + left - 1); 8985 /* 8986 * If the caller doesn't care whether the packet 8987 * is a fragment or not, we can stop here since 8988 * we have our destination. 8989 */ 8990 if (is_fragment == NULL) 8991 goto done; 8992 break; 8993 case IPPROTO_FRAGMENT: 8994 fraghdr = (ip6_frag_t *)whereptr; 8995 nexthdr = fraghdr->ip6f_nxt; 8996 ehdrlen = sizeof (ip6_frag_t); 8997 if (is_fragment != NULL) 8998 *is_fragment = B_TRUE; 8999 goto done; 9000 default : 9001 goto done; 9002 } 9003 whereptr += ehdrlen; 9004 } 9005 9006 done: 9007 return (rv); 9008 } 9009 9010 /* 9011 * ip_source_routed_v6: 9012 * This function is called by redirect code in ip_rput_data_v6 to 9013 * know whether this packet is source routed through this node i.e 9014 * whether this node (router) is part of the journey. This 9015 * function is called under two cases : 9016 * 9017 * case 1 : Routing header was processed by this node and 9018 * ip_process_rthdr replaced ip6_dst with the next hop 9019 * and we are forwarding the packet to the next hop. 9020 * 9021 * case 2 : Routing header was not processed by this node and we 9022 * are just forwarding the packet. 9023 * 9024 * For case (1) we don't want to send redirects. For case(2) we 9025 * want to send redirects. 9026 */ 9027 static boolean_t 9028 ip_source_routed_v6(ip6_t *ip6h, mblk_t *mp) 9029 { 9030 uint8_t nexthdr; 9031 in6_addr_t *addrptr; 9032 ip6_rthdr0_t *rthdr; 9033 uint8_t numaddr; 9034 ip6_hbh_t *hbhhdr; 9035 uint_t ehdrlen; 9036 uint8_t *byteptr; 9037 9038 ip2dbg(("ip_source_routed_v6\n")); 9039 nexthdr = ip6h->ip6_nxt; 9040 ehdrlen = IPV6_HDR_LEN; 9041 9042 /* if a routing hdr is preceeded by HOPOPT or DSTOPT */ 9043 while (nexthdr == IPPROTO_HOPOPTS || 9044 nexthdr == IPPROTO_DSTOPTS) { 9045 byteptr = (uint8_t *)ip6h + ehdrlen; 9046 /* 9047 * Check if we have already processed 9048 * packets or we are just a forwarding 9049 * router which only pulled up msgs up 9050 * to IPV6HDR and one HBH ext header 9051 */ 9052 if (byteptr + MIN_EHDR_LEN > mp->b_wptr) { 9053 ip2dbg(("ip_source_routed_v6: Extension" 9054 " headers not processed\n")); 9055 return (B_FALSE); 9056 } 9057 hbhhdr = (ip6_hbh_t *)byteptr; 9058 nexthdr = hbhhdr->ip6h_nxt; 9059 ehdrlen = ehdrlen + 8 * (hbhhdr->ip6h_len + 1); 9060 } 9061 switch (nexthdr) { 9062 case IPPROTO_ROUTING: 9063 byteptr = (uint8_t *)ip6h + ehdrlen; 9064 /* 9065 * If for some reason, we haven't pulled up 9066 * the routing hdr data mblk, then we must 9067 * not have processed it at all. So for sure 9068 * we are not part of the source routed journey. 9069 */ 9070 if (byteptr + MIN_EHDR_LEN > mp->b_wptr) { 9071 ip2dbg(("ip_source_routed_v6: Routing" 9072 " header not processed\n")); 9073 return (B_FALSE); 9074 } 9075 rthdr = (ip6_rthdr0_t *)byteptr; 9076 /* 9077 * Either we are an intermediate router or the 9078 * last hop before destination and we have 9079 * already processed the routing header. 9080 * If segment_left is greater than or equal to zero, 9081 * then we must be the (numaddr - segleft) entry 9082 * of the routing header. Although ip6r0_segleft 9083 * is a unit8_t variable, we still check for zero 9084 * or greater value, if in case the data type 9085 * is changed someday in future. 9086 */ 9087 if (rthdr->ip6r0_segleft > 0 || 9088 rthdr->ip6r0_segleft == 0) { 9089 ire_t *ire = NULL; 9090 9091 numaddr = rthdr->ip6r0_len / 2; 9092 addrptr = (in6_addr_t *)((char *)rthdr + 9093 sizeof (*rthdr)); 9094 addrptr += (numaddr - (rthdr->ip6r0_segleft + 1)); 9095 if (addrptr != NULL) { 9096 ire = ire_ctable_lookup_v6(addrptr, NULL, 9097 IRE_LOCAL, NULL, ALL_ZONES, NULL, 9098 MATCH_IRE_TYPE); 9099 if (ire != NULL) { 9100 ire_refrele(ire); 9101 return (B_TRUE); 9102 } 9103 ip1dbg(("ip_source_routed_v6: No ire found\n")); 9104 } 9105 } 9106 /* FALLTHRU */ 9107 default: 9108 ip2dbg(("ip_source_routed_v6: Not source routed here\n")); 9109 return (B_FALSE); 9110 } 9111 } 9112 9113 /* 9114 * ip_wput_v6 -- Packets sent down from transport modules show up here. 9115 * Assumes that the following set of headers appear in the first 9116 * mblk: 9117 * ip6i_t (if present) CAN also appear as a separate mblk. 9118 * ip6_t 9119 * Any extension headers 9120 * TCP/UDP/SCTP header (if present) 9121 * The routine can handle an ICMPv6 header that is not in the first mblk. 9122 * 9123 * The order to determine the outgoing interface is as follows: 9124 * 1. IPV6_BOUND_PIF is set, use that ill (conn_outgoing_pill) 9125 * 2. If conn_nofailover_ill is set then use that ill. 9126 * 3. If an ip6i_t with IP6I_IFINDEX set then use that ill. 9127 * 4. If q is an ill queue and (link local or multicast destination) then 9128 * use that ill. 9129 * 5. If IPV6_BOUND_IF has been set use that ill. 9130 * 6. For multicast: if IPV6_MULTICAST_IF has been set use it. Otherwise 9131 * look for the best IRE match for the unspecified group to determine 9132 * the ill. 9133 * 7. For unicast: Just do an IRE lookup for the best match. 9134 */ 9135 void 9136 ip_output_v6(void *arg, mblk_t *mp, void *arg2, int caller) 9137 { 9138 conn_t *connp = NULL; 9139 queue_t *q = (queue_t *)arg2; 9140 ire_t *ire = NULL; 9141 ire_t *sctp_ire = NULL; 9142 ip6_t *ip6h; 9143 in6_addr_t *v6dstp; 9144 ill_t *ill = NULL; 9145 ipif_t *ipif; 9146 ip6i_t *ip6i; 9147 int cksum_request; /* -1 => normal. */ 9148 /* 1 => Skip TCP/UDP/SCTP checksum */ 9149 /* Otherwise contains insert offset for checksum */ 9150 int unspec_src; 9151 boolean_t do_outrequests; /* Increment OutRequests? */ 9152 mib2_ipv6IfStatsEntry_t *mibptr; 9153 int match_flags = MATCH_IRE_ILL_GROUP; 9154 boolean_t attach_if = B_FALSE; 9155 mblk_t *first_mp; 9156 boolean_t mctl_present; 9157 ipsec_out_t *io; 9158 boolean_t drop_if_delayed = B_FALSE; 9159 boolean_t multirt_need_resolve = B_FALSE; 9160 mblk_t *copy_mp = NULL; 9161 int err; 9162 int ip6i_flags = 0; 9163 zoneid_t zoneid; 9164 ill_t *saved_ill = NULL; 9165 boolean_t conn_lock_held; 9166 boolean_t need_decref = B_FALSE; 9167 9168 /* 9169 * Highest bit in version field is Reachability Confirmation bit 9170 * used by NUD in ip_xmit_v6(). 9171 */ 9172 #ifdef _BIG_ENDIAN 9173 #define IPVER(ip6h) ((((uint32_t *)ip6h)[0] >> 28) & 0x7) 9174 #else 9175 #define IPVER(ip6h) ((((uint32_t *)ip6h)[0] >> 4) & 0x7) 9176 #endif 9177 9178 /* 9179 * M_CTL comes from 5 places 9180 * 9181 * 1) TCP sends down IPSEC_OUT(M_CTL) for detached connections 9182 * both V4 and V6 datagrams. 9183 * 9184 * 2) AH/ESP sends down M_CTL after doing their job with both 9185 * V4 and V6 datagrams. 9186 * 9187 * 3) NDP callbacks when nce is resolved and IPSEC_OUT has been 9188 * attached. 9189 * 9190 * 4) Notifications from an external resolver (for XRESOLV ifs) 9191 * 9192 * 5) AH/ESP send down IPSEC_CTL(M_CTL) to be relayed to hardware for 9193 * IPsec hardware acceleration support. 9194 * 9195 * We need to handle (1)'s IPv6 case and (3) here. For the 9196 * IPv4 case in (1), and (2), IPSEC processing has already 9197 * started. The code in ip_wput() already knows how to handle 9198 * continuing IPSEC processing (for IPv4 and IPv6). All other 9199 * M_CTLs (including case (4)) are passed on to ip_wput_nondata() 9200 * for handling. 9201 */ 9202 first_mp = mp; 9203 mctl_present = B_FALSE; 9204 io = NULL; 9205 9206 /* Multidata transmit? */ 9207 if (DB_TYPE(mp) == M_MULTIDATA) { 9208 /* 9209 * We should never get here, since all Multidata messages 9210 * originating from tcp should have been directed over to 9211 * tcp_multisend() in the first place. 9212 */ 9213 BUMP_MIB(&ip6_mib, ipv6OutDiscards); 9214 freemsg(mp); 9215 return; 9216 } else if (DB_TYPE(mp) == M_CTL) { 9217 uint32_t mctltype = 0; 9218 uint32_t mlen = MBLKL(first_mp); 9219 9220 mp = mp->b_cont; 9221 mctl_present = B_TRUE; 9222 io = (ipsec_out_t *)first_mp->b_rptr; 9223 9224 /* 9225 * Validate this M_CTL message. The only three types of 9226 * M_CTL messages we expect to see in this code path are 9227 * ipsec_out_t or ipsec_in_t structures (allocated as 9228 * ipsec_info_t unions), or ipsec_ctl_t structures. 9229 * The ipsec_out_type and ipsec_in_type overlap in the two 9230 * data structures, and they are either set to IPSEC_OUT 9231 * or IPSEC_IN depending on which data structure it is. 9232 * ipsec_ctl_t is an IPSEC_CTL. 9233 * 9234 * All other M_CTL messages are sent to ip_wput_nondata() 9235 * for handling. 9236 */ 9237 if (mlen >= sizeof (io->ipsec_out_type)) 9238 mctltype = io->ipsec_out_type; 9239 9240 if ((mlen == sizeof (ipsec_ctl_t)) && 9241 (mctltype == IPSEC_CTL)) { 9242 ip_output(Q_TO_CONN(q), first_mp, q, caller); 9243 return; 9244 } 9245 9246 if ((mlen < sizeof (ipsec_info_t)) || 9247 (mctltype != IPSEC_OUT && mctltype != IPSEC_IN) || 9248 mp == NULL) { 9249 ip_wput_nondata(NULL, q, first_mp, NULL); 9250 return; 9251 } 9252 /* NDP callbacks have q_next non-NULL. That's case #3. */ 9253 if (q->q_next == NULL) { 9254 ip6h = (ip6_t *)mp->b_rptr; 9255 /* 9256 * For a freshly-generated TCP dgram that needs IPV6 9257 * processing, don't call ip_wput immediately. We can 9258 * tell this by the ipsec_out_proc_begin. In-progress 9259 * IPSEC_OUT messages have proc_begin set to TRUE, 9260 * and we want to send all IPSEC_IN messages to 9261 * ip_wput() for IPsec processing or finishing. 9262 */ 9263 if (mctltype == IPSEC_IN || 9264 IPVER(ip6h) != IPV6_VERSION || 9265 io->ipsec_out_proc_begin) { 9266 mibptr = &ip6_mib; 9267 goto notv6; 9268 } 9269 } 9270 } else if (DB_TYPE(mp) != M_DATA) { 9271 ip_wput_nondata(NULL, q, mp, NULL); 9272 return; 9273 } 9274 9275 ip6h = (ip6_t *)mp->b_rptr; 9276 9277 if (IPVER(ip6h) != IPV6_VERSION) { 9278 mibptr = &ip6_mib; 9279 goto notv6; 9280 } 9281 9282 if (q->q_next != NULL) { 9283 ill = (ill_t *)q->q_ptr; 9284 /* 9285 * We don't know if this ill will be used for IPv6 9286 * until the ILLF_IPV6 flag is set via SIOCSLIFNAME. 9287 * ipif_set_values() sets the ill_isv6 flag to true if 9288 * ILLF_IPV6 is set. If the ill_isv6 flag isn't true, 9289 * just drop the packet. 9290 */ 9291 if (!ill->ill_isv6) { 9292 ip1dbg(("ip_wput_v6: Received an IPv6 packet before " 9293 "ILLF_IPV6 was set\n")); 9294 freemsg(first_mp); 9295 return; 9296 } 9297 /* For uniformity do a refhold */ 9298 mutex_enter(&ill->ill_lock); 9299 if (!ILL_CAN_LOOKUP(ill)) { 9300 mutex_exit(&ill->ill_lock); 9301 freemsg(first_mp); 9302 return; 9303 } 9304 ill_refhold_locked(ill); 9305 mutex_exit(&ill->ill_lock); 9306 mibptr = ill->ill_ip6_mib; 9307 /* 9308 * ill_ip6_mib is allocated by ipif_set_values() when 9309 * ill_isv6 is set. Thus if ill_isv6 is true, 9310 * ill_ip6_mib had better not be NULL. 9311 */ 9312 ASSERT(mibptr != NULL); 9313 unspec_src = 0; 9314 BUMP_MIB(mibptr, ipv6OutRequests); 9315 do_outrequests = B_FALSE; 9316 } else { 9317 connp = (conn_t *)arg; 9318 ASSERT(connp != NULL); 9319 9320 /* is queue flow controlled? */ 9321 if ((q->q_first || connp->conn_draining) && 9322 (caller == IP_WPUT)) { 9323 /* 9324 * 1) TCP sends down M_CTL for detached connections. 9325 * 2) AH/ESP sends down M_CTL. 9326 * 9327 * We don't flow control either of the above. Only 9328 * UDP and others are flow controlled for which we 9329 * can't have a M_CTL. 9330 */ 9331 ASSERT(first_mp == mp); 9332 (void) putq(q, mp); 9333 return; 9334 } 9335 mibptr = &ip6_mib; 9336 unspec_src = connp->conn_unspec_src; 9337 do_outrequests = B_TRUE; 9338 if (mp->b_flag & MSGHASREF) { 9339 mp->b_flag &= ~MSGHASREF; 9340 ASSERT(connp->conn_ulp == IPPROTO_SCTP); 9341 SCTP_EXTRACT_IPINFO(mp, sctp_ire); 9342 need_decref = B_TRUE; 9343 } 9344 9345 /* 9346 * If there is a policy, try to attach an ipsec_out in 9347 * the front. At the end, first_mp either points to a 9348 * M_DATA message or IPSEC_OUT message linked to a 9349 * M_DATA message. We have to do it now as we might 9350 * lose the "conn" if we go through ip_newroute. 9351 */ 9352 if (!mctl_present && 9353 (connp->conn_out_enforce_policy || 9354 connp->conn_latch != NULL)) { 9355 ASSERT(first_mp == mp); 9356 /* XXX Any better way to get the protocol fast ? */ 9357 if (((mp = ipsec_attach_ipsec_out(mp, connp, NULL, 9358 connp->conn_ulp)) == NULL)) { 9359 if (need_decref) 9360 CONN_DEC_REF(connp); 9361 return; 9362 } else { 9363 ASSERT(mp->b_datap->db_type == M_CTL); 9364 first_mp = mp; 9365 mp = mp->b_cont; 9366 mctl_present = B_TRUE; 9367 io = (ipsec_out_t *)first_mp->b_rptr; 9368 } 9369 } 9370 } 9371 9372 /* check for alignment and full IPv6 header */ 9373 if (!OK_32PTR((uchar_t *)ip6h) || 9374 (mp->b_wptr - (uchar_t *)ip6h) < IPV6_HDR_LEN) { 9375 ip0dbg(("ip_wput_v6: bad alignment or length\n")); 9376 if (do_outrequests) 9377 BUMP_MIB(mibptr, ipv6OutRequests); 9378 BUMP_MIB(mibptr, ipv6OutDiscards); 9379 freemsg(first_mp); 9380 if (ill != NULL) 9381 ill_refrele(ill); 9382 if (need_decref) 9383 CONN_DEC_REF(connp); 9384 return; 9385 } 9386 v6dstp = &ip6h->ip6_dst; 9387 cksum_request = -1; 9388 ip6i = NULL; 9389 9390 /* 9391 * Once neighbor discovery has completed, ndp_process() will provide 9392 * locally generated packets for which processing can be reattempted. 9393 * In these cases, connp is NULL and the original zone is part of a 9394 * prepended ipsec_out_t. 9395 */ 9396 if (io != NULL) { 9397 zoneid = io->ipsec_out_zoneid; 9398 ASSERT(zoneid != ALL_ZONES); 9399 } else { 9400 zoneid = (connp != NULL ? connp->conn_zoneid : ALL_ZONES); 9401 } 9402 9403 if (ip6h->ip6_nxt == IPPROTO_RAW) { 9404 /* 9405 * This is an ip6i_t header followed by an ip6_hdr. 9406 * Check which fields are set. 9407 * 9408 * When the packet comes from a transport we should have 9409 * all needed headers in the first mblk. However, when 9410 * going through ip_newroute*_v6 the ip6i might be in 9411 * a separate mblk when we return here. In that case 9412 * we pullup everything to ensure that extension and transport 9413 * headers "stay" in the first mblk. 9414 */ 9415 ip6i = (ip6i_t *)ip6h; 9416 ip6i_flags = ip6i->ip6i_flags; 9417 9418 ASSERT((mp->b_wptr - (uchar_t *)ip6i) == sizeof (ip6i_t) || 9419 ((mp->b_wptr - (uchar_t *)ip6i) >= 9420 sizeof (ip6i_t) + IPV6_HDR_LEN)); 9421 9422 if ((mp->b_wptr - (uchar_t *)ip6i) == sizeof (ip6i_t)) { 9423 if (!pullupmsg(mp, -1)) { 9424 ip1dbg(("ip_wput_v6: pullupmsg failed\n")); 9425 if (do_outrequests) 9426 BUMP_MIB(mibptr, ipv6OutRequests); 9427 BUMP_MIB(mibptr, ipv6OutDiscards); 9428 freemsg(first_mp); 9429 if (ill != NULL) 9430 ill_refrele(ill); 9431 if (need_decref) 9432 CONN_DEC_REF(connp); 9433 return; 9434 } 9435 ip6h = (ip6_t *)mp->b_rptr; 9436 v6dstp = &ip6h->ip6_dst; 9437 ip6i = (ip6i_t *)ip6h; 9438 } 9439 ip6h = (ip6_t *)&ip6i[1]; 9440 9441 /* 9442 * Advance rptr past the ip6i_t to get ready for 9443 * transmitting the packet. However, if the packet gets 9444 * passed to ip_newroute*_v6 then rptr is moved back so 9445 * that the ip6i_t header can be inspected when the 9446 * packet comes back here after passing through 9447 * ire_add_then_send. 9448 */ 9449 mp->b_rptr = (uchar_t *)ip6h; 9450 9451 /* 9452 * IP6I_ATTACH_IF is set in this function when we had a 9453 * conn and it was either bound to the IPFF_NOFAILOVER address 9454 * or IPV6_BOUND_PIF was set. These options override other 9455 * options that set the ifindex. We come here with 9456 * IP6I_ATTACH_IF set when we can't find the ire and 9457 * ip_newroute_v6 is feeding the packet for second time. 9458 */ 9459 if ((ip6i->ip6i_flags & IP6I_IFINDEX) || 9460 (ip6i->ip6i_flags & IP6I_ATTACH_IF)) { 9461 ASSERT(ip6i->ip6i_ifindex != 0); 9462 if (ill != NULL) 9463 ill_refrele(ill); 9464 ill = ill_lookup_on_ifindex(ip6i->ip6i_ifindex, 1, 9465 NULL, NULL, NULL, NULL); 9466 if (ill == NULL) { 9467 if (do_outrequests) 9468 BUMP_MIB(mibptr, ipv6OutRequests); 9469 BUMP_MIB(mibptr, ipv6OutDiscards); 9470 ip1dbg(("ip_wput_v6: bad ifindex %d\n", 9471 ip6i->ip6i_ifindex)); 9472 if (need_decref) 9473 CONN_DEC_REF(connp); 9474 freemsg(first_mp); 9475 return; 9476 } 9477 mibptr = ill->ill_ip6_mib; 9478 if (ip6i->ip6i_flags & IP6I_IFINDEX) { 9479 /* 9480 * Preserve the index so that when we return 9481 * from IPSEC processing, we know where to 9482 * send the packet. 9483 */ 9484 if (mctl_present) { 9485 ASSERT(io != NULL); 9486 io->ipsec_out_ill_index = 9487 ip6i->ip6i_ifindex; 9488 } 9489 } 9490 if (ip6i->ip6i_flags & IP6I_ATTACH_IF) { 9491 /* 9492 * This is a multipathing probe packet that has 9493 * been delayed in ND resolution. Drop the 9494 * packet for the reasons mentioned in 9495 * nce_queue_mp() 9496 */ 9497 if ((ip6i->ip6i_flags & IP6I_DROP_IFDELAYED) && 9498 (ip6i->ip6i_flags & IP6I_ND_DELAYED)) { 9499 freemsg(first_mp); 9500 ill_refrele(ill); 9501 if (need_decref) 9502 CONN_DEC_REF(connp); 9503 return; 9504 } 9505 } 9506 } 9507 if (ip6i->ip6i_flags & IP6I_VERIFY_SRC) { 9508 cred_t *cr = DB_CREDDEF(mp, GET_QUEUE_CRED(q)); 9509 9510 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)); 9511 if (secpolicy_net_rawaccess(cr) != 0) { 9512 ire = ire_route_lookup_v6(&ip6h->ip6_src, 9513 0, 0, (IRE_LOCAL|IRE_LOOPBACK), NULL, 9514 NULL, zoneid, NULL, 9515 MATCH_IRE_TYPE | MATCH_IRE_ZONEONLY); 9516 if (ire == NULL) { 9517 if (do_outrequests) 9518 BUMP_MIB(mibptr, 9519 ipv6OutRequests); 9520 BUMP_MIB(mibptr, ipv6OutDiscards); 9521 ip1dbg(("ip_wput_v6: bad source " 9522 "addr\n")); 9523 freemsg(first_mp); 9524 if (ill != NULL) 9525 ill_refrele(ill); 9526 if (need_decref) 9527 CONN_DEC_REF(connp); 9528 return; 9529 } 9530 ire_refrele(ire); 9531 } 9532 /* No need to verify again when using ip_newroute */ 9533 ip6i->ip6i_flags &= ~IP6I_VERIFY_SRC; 9534 } 9535 if (!(ip6i->ip6i_flags & IP6I_NEXTHOP)) { 9536 /* 9537 * Make sure they match since ip_newroute*_v6 etc might 9538 * (unknown to them) inspect ip6i_nexthop when 9539 * they think they access ip6_dst. 9540 */ 9541 ip6i->ip6i_nexthop = ip6h->ip6_dst; 9542 } 9543 if (ip6i->ip6i_flags & IP6I_NO_ULP_CKSUM) 9544 cksum_request = 1; 9545 if (ip6i->ip6i_flags & IP6I_RAW_CHECKSUM) 9546 cksum_request = ip6i->ip6i_checksum_off; 9547 if (ip6i->ip6i_flags & IP6I_UNSPEC_SRC) 9548 unspec_src = 1; 9549 9550 if (do_outrequests && ill != NULL) { 9551 BUMP_MIB(mibptr, ipv6OutRequests); 9552 do_outrequests = B_FALSE; 9553 } 9554 /* 9555 * Store ip6i_t info that we need after we come back 9556 * from IPSEC processing. 9557 */ 9558 if (mctl_present) { 9559 ASSERT(io != NULL); 9560 io->ipsec_out_unspec_src = unspec_src; 9561 } 9562 } 9563 if (connp != NULL && connp->conn_dontroute) 9564 ip6h->ip6_hops = 1; 9565 9566 if (IN6_IS_ADDR_MULTICAST(v6dstp)) 9567 goto ipv6multicast; 9568 9569 /* 1. IPV6_BOUND_PIF takes precedence over all the ifindex settings. */ 9570 if (connp != NULL && connp->conn_outgoing_pill != NULL) { 9571 ill_t *conn_outgoing_pill; 9572 9573 conn_outgoing_pill = conn_get_held_ill(connp, 9574 &connp->conn_outgoing_pill, &err); 9575 if (err == ILL_LOOKUP_FAILED) { 9576 if (ill != NULL) 9577 ill_refrele(ill); 9578 if (need_decref) 9579 CONN_DEC_REF(connp); 9580 freemsg(first_mp); 9581 return; 9582 } 9583 if (conn_outgoing_pill != NULL) { 9584 if (ill != NULL) 9585 ill_refrele(ill); 9586 ill = conn_outgoing_pill; 9587 attach_if = B_TRUE; 9588 match_flags = MATCH_IRE_ILL; 9589 mibptr = ill->ill_ip6_mib; 9590 9591 /* 9592 * Check if we need an ire that will not be 9593 * looked up by anybody else i.e. HIDDEN. 9594 */ 9595 if (ill_is_probeonly(ill)) 9596 match_flags |= MATCH_IRE_MARK_HIDDEN; 9597 goto send_from_ill; 9598 } 9599 } 9600 9601 /* 2. If ipc_nofailover_ill is set then use that ill. */ 9602 if (connp != NULL && connp->conn_nofailover_ill != NULL) { 9603 ill_t *conn_nofailover_ill; 9604 9605 conn_nofailover_ill = conn_get_held_ill(connp, 9606 &connp->conn_nofailover_ill, &err); 9607 if (err == ILL_LOOKUP_FAILED) { 9608 if (ill != NULL) 9609 ill_refrele(ill); 9610 if (need_decref) 9611 CONN_DEC_REF(connp); 9612 freemsg(first_mp); 9613 return; 9614 } 9615 if (conn_nofailover_ill != NULL) { 9616 if (ill != NULL) 9617 ill_refrele(ill); 9618 ill = conn_nofailover_ill; 9619 attach_if = B_TRUE; 9620 /* 9621 * Assumes that ipc_nofailover_ill is used only for 9622 * multipathing probe packets. These packets are better 9623 * dropped, if they are delayed in ND resolution, for 9624 * the reasons described in nce_queue_mp(). 9625 * IP6I_DROP_IFDELAYED will be set later on in this 9626 * function for this packet. 9627 */ 9628 drop_if_delayed = B_TRUE; 9629 match_flags = MATCH_IRE_ILL; 9630 mibptr = ill->ill_ip6_mib; 9631 9632 /* 9633 * Check if we need an ire that will not be 9634 * looked up by anybody else i.e. HIDDEN. 9635 */ 9636 if (ill_is_probeonly(ill)) 9637 match_flags |= MATCH_IRE_MARK_HIDDEN; 9638 goto send_from_ill; 9639 } 9640 } 9641 9642 /* 9643 * Redo 1. If we did not find an IRE_CACHE the first time, we should 9644 * have an ip6i_t with IP6I_ATTACH_IF if IPV6_BOUND_PIF or 9645 * bind to the IPIF_NOFAILOVER address was used on this endpoint. 9646 */ 9647 if (ip6i != NULL && (ip6i->ip6i_flags & IP6I_ATTACH_IF)) { 9648 ASSERT(ip6i->ip6i_ifindex != 0); 9649 attach_if = B_TRUE; 9650 ASSERT(ill != NULL); 9651 match_flags = MATCH_IRE_ILL; 9652 9653 /* 9654 * Check if we need an ire that will not be 9655 * looked up by anybody else i.e. HIDDEN. 9656 */ 9657 if (ill_is_probeonly(ill)) 9658 match_flags |= MATCH_IRE_MARK_HIDDEN; 9659 goto send_from_ill; 9660 } 9661 9662 /* 3. If an ip6i_t with IP6I_IFINDEX set then use that ill. */ 9663 if (ip6i != NULL && (ip6i->ip6i_flags & IP6I_IFINDEX)) { 9664 ASSERT(ill != NULL); 9665 goto send_from_ill; 9666 } 9667 9668 /* 9669 * 4. If q is an ill queue and (link local or multicast destination) 9670 * then use that ill. 9671 */ 9672 if (ill != NULL && IN6_IS_ADDR_LINKLOCAL(v6dstp)) { 9673 goto send_from_ill; 9674 } 9675 9676 /* 5. If IPV6_BOUND_IF has been set use that ill. */ 9677 if (connp != NULL && connp->conn_outgoing_ill != NULL) { 9678 ill_t *conn_outgoing_ill; 9679 9680 conn_outgoing_ill = conn_get_held_ill(connp, 9681 &connp->conn_outgoing_ill, &err); 9682 if (err == ILL_LOOKUP_FAILED) { 9683 if (ill != NULL) 9684 ill_refrele(ill); 9685 if (need_decref) 9686 CONN_DEC_REF(connp); 9687 freemsg(first_mp); 9688 return; 9689 } 9690 if (ill != NULL) 9691 ill_refrele(ill); 9692 ill = conn_outgoing_ill; 9693 mibptr = ill->ill_ip6_mib; 9694 goto send_from_ill; 9695 } 9696 9697 /* 9698 * 6. For unicast: Just do an IRE lookup for the best match. 9699 * If we get here for a link-local address it is rather random 9700 * what interface we pick on a multihomed host. 9701 * *If* there is an IRE_CACHE (and the link-local address 9702 * isn't duplicated on multi links) this will find the IRE_CACHE. 9703 * Otherwise it will use one of the matching IRE_INTERFACE routes 9704 * for the link-local prefix. Hence, applications 9705 * *should* be encouraged to specify an outgoing interface when sending 9706 * to a link local address. 9707 */ 9708 if (connp == NULL || (IP_FLOW_CONTROLLED_ULP(connp->conn_ulp) && 9709 !connp->conn_fully_bound)) { 9710 /* 9711 * We cache IRE_CACHEs to avoid lookups. We don't do 9712 * this for the tcp global queue and listen end point 9713 * as it does not really have a real destination to 9714 * talk to. 9715 */ 9716 ire = ire_cache_lookup_v6(v6dstp, zoneid, MBLK_GETLABEL(mp)); 9717 } else { 9718 /* 9719 * IRE_MARK_CONDEMNED is marked in ire_delete. We don't 9720 * grab a lock here to check for CONDEMNED as it is okay 9721 * to send a packet or two with the IRE_CACHE that is going 9722 * away. 9723 */ 9724 mutex_enter(&connp->conn_lock); 9725 ire = sctp_ire != NULL ? sctp_ire : connp->conn_ire_cache; 9726 if (ire != NULL && 9727 IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, v6dstp) && 9728 !(ire->ire_marks & IRE_MARK_CONDEMNED)) { 9729 9730 IRE_REFHOLD(ire); 9731 mutex_exit(&connp->conn_lock); 9732 9733 } else { 9734 boolean_t cached = B_FALSE; 9735 9736 connp->conn_ire_cache = NULL; 9737 mutex_exit(&connp->conn_lock); 9738 /* Release the old ire */ 9739 if (ire != NULL && sctp_ire == NULL) 9740 IRE_REFRELE_NOTR(ire); 9741 9742 ire = ire_cache_lookup_v6(v6dstp, zoneid, 9743 MBLK_GETLABEL(mp)); 9744 if (ire != NULL) { 9745 IRE_REFHOLD_NOTR(ire); 9746 9747 mutex_enter(&connp->conn_lock); 9748 if (!(connp->conn_state_flags & CONN_CLOSING) && 9749 (connp->conn_ire_cache == NULL)) { 9750 rw_enter(&ire->ire_bucket->irb_lock, 9751 RW_READER); 9752 if (!(ire->ire_marks & 9753 IRE_MARK_CONDEMNED)) { 9754 connp->conn_ire_cache = ire; 9755 cached = B_TRUE; 9756 } 9757 rw_exit(&ire->ire_bucket->irb_lock); 9758 } 9759 mutex_exit(&connp->conn_lock); 9760 9761 /* 9762 * We can continue to use the ire but since it 9763 * was not cached, we should drop the extra 9764 * reference. 9765 */ 9766 if (!cached) 9767 IRE_REFRELE_NOTR(ire); 9768 } 9769 } 9770 } 9771 9772 if (ire != NULL) { 9773 if (do_outrequests) { 9774 /* Handle IRE_LOCAL's that might appear here */ 9775 if (ire->ire_type == IRE_CACHE) { 9776 mibptr = ((ill_t *)ire->ire_stq->q_ptr)-> 9777 ill_ip6_mib; 9778 } else { 9779 mibptr = ire->ire_ipif->ipif_ill->ill_ip6_mib; 9780 } 9781 BUMP_MIB(mibptr, ipv6OutRequests); 9782 } 9783 ASSERT(!attach_if); 9784 9785 /* 9786 * Check if the ire has the RTF_MULTIRT flag, inherited 9787 * from an IRE_OFFSUBNET ire entry in ip_newroute(). 9788 */ 9789 if (ire->ire_flags & RTF_MULTIRT) { 9790 /* 9791 * Force hop limit of multirouted packets if required. 9792 * The hop limit of such packets is bounded by the 9793 * ip_multirt_ttl ndd variable. 9794 * NDP packets must have a hop limit of 255; don't 9795 * change the hop limit in that case. 9796 */ 9797 if ((ip_multirt_ttl > 0) && 9798 (ip6h->ip6_hops > ip_multirt_ttl) && 9799 (ip6h->ip6_hops != IPV6_MAX_HOPS)) { 9800 if (ip_debug > 3) { 9801 ip2dbg(("ip_wput_v6: forcing multirt " 9802 "hop limit to %d (was %d) ", 9803 ip_multirt_ttl, ip6h->ip6_hops)); 9804 pr_addr_dbg("v6dst %s\n", AF_INET6, 9805 &ire->ire_addr_v6); 9806 } 9807 ip6h->ip6_hops = ip_multirt_ttl; 9808 } 9809 9810 /* 9811 * We look at this point if there are pending 9812 * unresolved routes. ire_multirt_need_resolve_v6() 9813 * checks in O(n) that all IRE_OFFSUBNET ire 9814 * entries for the packet's destination and 9815 * flagged RTF_MULTIRT are currently resolved. 9816 * If some remain unresolved, we do a copy 9817 * of the current message. It will be used 9818 * to initiate additional route resolutions. 9819 */ 9820 multirt_need_resolve = 9821 ire_multirt_need_resolve_v6(&ire->ire_addr_v6, 9822 MBLK_GETLABEL(first_mp)); 9823 ip2dbg(("ip_wput_v6: ire %p, " 9824 "multirt_need_resolve %d, first_mp %p\n", 9825 (void *)ire, multirt_need_resolve, 9826 (void *)first_mp)); 9827 if (multirt_need_resolve) { 9828 copy_mp = copymsg(first_mp); 9829 if (copy_mp != NULL) { 9830 MULTIRT_DEBUG_TAG(copy_mp); 9831 } 9832 } 9833 } 9834 ip_wput_ire_v6(q, first_mp, ire, unspec_src, cksum_request, 9835 connp, caller, 0, ip6i_flags); 9836 if (need_decref) { 9837 CONN_DEC_REF(connp); 9838 connp = NULL; 9839 } 9840 IRE_REFRELE(ire); 9841 9842 /* 9843 * Try to resolve another multiroute if 9844 * ire_multirt_need_resolve_v6() deemed it necessary. 9845 * copy_mp will be consumed (sent or freed) by 9846 * ip_newroute_v6(). 9847 */ 9848 if (copy_mp != NULL) { 9849 if (mctl_present) { 9850 ip6h = (ip6_t *)copy_mp->b_cont->b_rptr; 9851 } else { 9852 ip6h = (ip6_t *)copy_mp->b_rptr; 9853 } 9854 ip_newroute_v6(q, copy_mp, &ip6h->ip6_dst, 9855 &ip6h->ip6_src, NULL, zoneid); 9856 } 9857 if (ill != NULL) 9858 ill_refrele(ill); 9859 return; 9860 } 9861 9862 /* 9863 * No full IRE for this destination. Send it to 9864 * ip_newroute_v6 to see if anything else matches. 9865 * Mark this packet as having originated on this 9866 * machine. 9867 * Update rptr if there was an ip6i_t header. 9868 */ 9869 mp->b_prev = NULL; 9870 mp->b_next = NULL; 9871 if (ip6i != NULL) 9872 mp->b_rptr -= sizeof (ip6i_t); 9873 9874 if (unspec_src) { 9875 if (ip6i == NULL) { 9876 /* 9877 * Add ip6i_t header to carry unspec_src 9878 * until the packet comes back in ip_wput_v6. 9879 */ 9880 mp = ip_add_info_v6(mp, NULL, v6dstp); 9881 if (mp == NULL) { 9882 if (do_outrequests) 9883 BUMP_MIB(mibptr, ipv6OutRequests); 9884 BUMP_MIB(mibptr, ipv6OutDiscards); 9885 if (mctl_present) 9886 freeb(first_mp); 9887 if (ill != NULL) 9888 ill_refrele(ill); 9889 if (need_decref) 9890 CONN_DEC_REF(connp); 9891 return; 9892 } 9893 ip6i = (ip6i_t *)mp->b_rptr; 9894 9895 if (mctl_present) { 9896 ASSERT(first_mp != mp); 9897 first_mp->b_cont = mp; 9898 } else { 9899 first_mp = mp; 9900 } 9901 9902 if ((mp->b_wptr - (uchar_t *)ip6i) == 9903 sizeof (ip6i_t)) { 9904 /* 9905 * ndp_resolver called from ip_newroute_v6 9906 * expects pulled up message. 9907 */ 9908 if (!pullupmsg(mp, -1)) { 9909 ip1dbg(("ip_wput_v6: pullupmsg" 9910 " failed\n")); 9911 if (do_outrequests) { 9912 BUMP_MIB(mibptr, 9913 ipv6OutRequests); 9914 } 9915 BUMP_MIB(mibptr, ipv6OutDiscards); 9916 freemsg(first_mp); 9917 if (ill != NULL) 9918 ill_refrele(ill); 9919 if (need_decref) 9920 CONN_DEC_REF(connp); 9921 return; 9922 } 9923 ip6i = (ip6i_t *)mp->b_rptr; 9924 } 9925 ip6h = (ip6_t *)&ip6i[1]; 9926 v6dstp = &ip6h->ip6_dst; 9927 } 9928 ip6i->ip6i_flags |= IP6I_UNSPEC_SRC; 9929 if (mctl_present) { 9930 ASSERT(io != NULL); 9931 io->ipsec_out_unspec_src = unspec_src; 9932 } 9933 } 9934 if (do_outrequests) 9935 BUMP_MIB(mibptr, ipv6OutRequests); 9936 if (need_decref) 9937 CONN_DEC_REF(connp); 9938 ip_newroute_v6(q, first_mp, v6dstp, &ip6h->ip6_src, NULL, zoneid); 9939 if (ill != NULL) 9940 ill_refrele(ill); 9941 return; 9942 9943 9944 /* 9945 * Handle multicast packets with or without an conn. 9946 * Assumes that the transports set ip6_hops taking 9947 * IPV6_MULTICAST_HOPS (and the other ways to set the hoplimit) 9948 * into account. 9949 */ 9950 ipv6multicast: 9951 ip2dbg(("ip_wput_v6: multicast\n")); 9952 9953 /* 9954 * 1. IPV6_BOUND_PIF takes precedence over all the ifindex settings 9955 * 2. If conn_nofailover_ill is set then use that ill. 9956 * 9957 * Hold the conn_lock till we refhold the ill of interest that is 9958 * pointed to from the conn. Since we cannot do an ill/ipif_refrele 9959 * while holding any locks, postpone the refrele until after the 9960 * conn_lock is dropped. 9961 */ 9962 if (connp != NULL) { 9963 mutex_enter(&connp->conn_lock); 9964 conn_lock_held = B_TRUE; 9965 } else { 9966 conn_lock_held = B_FALSE; 9967 } 9968 if (connp != NULL && connp->conn_outgoing_pill != NULL) { 9969 err = ill_check_and_refhold(connp->conn_outgoing_pill); 9970 if (err == ILL_LOOKUP_FAILED) { 9971 ip1dbg(("ip_output_v6: multicast" 9972 " conn_outgoing_pill no ipif\n")); 9973 multicast_discard: 9974 ASSERT(saved_ill == NULL); 9975 if (conn_lock_held) 9976 mutex_exit(&connp->conn_lock); 9977 if (ill != NULL) 9978 ill_refrele(ill); 9979 freemsg(first_mp); 9980 if (do_outrequests) 9981 BUMP_MIB(mibptr, ipv6OutDiscards); 9982 if (need_decref) 9983 CONN_DEC_REF(connp); 9984 return; 9985 } 9986 saved_ill = ill; 9987 ill = connp->conn_outgoing_pill; 9988 attach_if = B_TRUE; 9989 match_flags = MATCH_IRE_ILL; 9990 mibptr = ill->ill_ip6_mib; 9991 9992 /* 9993 * Check if we need an ire that will not be 9994 * looked up by anybody else i.e. HIDDEN. 9995 */ 9996 if (ill_is_probeonly(ill)) 9997 match_flags |= MATCH_IRE_MARK_HIDDEN; 9998 } else if (connp != NULL && connp->conn_nofailover_ill != NULL) { 9999 err = ill_check_and_refhold(connp->conn_nofailover_ill); 10000 if (err == ILL_LOOKUP_FAILED) { 10001 ip1dbg(("ip_output_v6: multicast" 10002 " conn_nofailover_ill no ipif\n")); 10003 goto multicast_discard; 10004 } 10005 saved_ill = ill; 10006 ill = connp->conn_nofailover_ill; 10007 attach_if = B_TRUE; 10008 match_flags = MATCH_IRE_ILL; 10009 10010 /* 10011 * Check if we need an ire that will not be 10012 * looked up by anybody else i.e. HIDDEN. 10013 */ 10014 if (ill_is_probeonly(ill)) 10015 match_flags |= MATCH_IRE_MARK_HIDDEN; 10016 } else if (ip6i != NULL && (ip6i->ip6i_flags & IP6I_ATTACH_IF)) { 10017 /* 10018 * Redo 1. If we did not find an IRE_CACHE the first time, 10019 * we should have an ip6i_t with IP6I_ATTACH_IF if 10020 * IPV6_BOUND_PIF or bind to the IPIF_NOFAILOVER address was 10021 * used on this endpoint. 10022 */ 10023 ASSERT(ip6i->ip6i_ifindex != 0); 10024 attach_if = B_TRUE; 10025 ASSERT(ill != NULL); 10026 match_flags = MATCH_IRE_ILL; 10027 10028 /* 10029 * Check if we need an ire that will not be 10030 * looked up by anybody else i.e. HIDDEN. 10031 */ 10032 if (ill_is_probeonly(ill)) 10033 match_flags |= MATCH_IRE_MARK_HIDDEN; 10034 } else if (ip6i != NULL && (ip6i->ip6i_flags & IP6I_IFINDEX)) { 10035 /* 3. If an ip6i_t with IP6I_IFINDEX set then use that ill. */ 10036 10037 ASSERT(ill != NULL); 10038 } else if (ill != NULL) { 10039 /* 10040 * 4. If q is an ill queue and (link local or multicast 10041 * destination) then use that ill. 10042 * We don't need the ipif initialization here. 10043 * This useless assert below is just to prevent lint from 10044 * reporting a null body if statement. 10045 */ 10046 ASSERT(ill != NULL); 10047 } else if (connp != NULL) { 10048 /* 10049 * 5. If IPV6_BOUND_IF has been set use that ill. 10050 * 10051 * 6. For multicast: if IPV6_MULTICAST_IF has been set use it. 10052 * Otherwise look for the best IRE match for the unspecified 10053 * group to determine the ill. 10054 * 10055 * conn_multicast_ill is used for only IPv6 packets. 10056 * conn_multicast_ipif is used for only IPv4 packets. 10057 * Thus a PF_INET6 socket send both IPv4 and IPv6 10058 * multicast packets using different IP*_MULTICAST_IF 10059 * interfaces. 10060 */ 10061 if (connp->conn_outgoing_ill != NULL) { 10062 err = ill_check_and_refhold(connp->conn_outgoing_ill); 10063 if (err == ILL_LOOKUP_FAILED) { 10064 ip1dbg(("ip_output_v6: multicast" 10065 " conn_outgoing_ill no ipif\n")); 10066 goto multicast_discard; 10067 } 10068 ill = connp->conn_outgoing_ill; 10069 } else if (connp->conn_multicast_ill != NULL) { 10070 err = ill_check_and_refhold(connp->conn_multicast_ill); 10071 if (err == ILL_LOOKUP_FAILED) { 10072 ip1dbg(("ip_output_v6: multicast" 10073 " conn_multicast_ill no ipif\n")); 10074 goto multicast_discard; 10075 } 10076 ill = connp->conn_multicast_ill; 10077 } else { 10078 mutex_exit(&connp->conn_lock); 10079 conn_lock_held = B_FALSE; 10080 ipif = ipif_lookup_group_v6(v6dstp, zoneid); 10081 if (ipif == NULL) { 10082 ip1dbg(("ip_output_v6: multicast no ipif\n")); 10083 goto multicast_discard; 10084 } 10085 /* 10086 * We have a ref to this ipif, so we can safely 10087 * access ipif_ill. 10088 */ 10089 ill = ipif->ipif_ill; 10090 mutex_enter(&ill->ill_lock); 10091 if (!ILL_CAN_LOOKUP(ill)) { 10092 mutex_exit(&ill->ill_lock); 10093 ipif_refrele(ipif); 10094 ill = NULL; 10095 ip1dbg(("ip_output_v6: multicast no ipif\n")); 10096 goto multicast_discard; 10097 } 10098 ill_refhold_locked(ill); 10099 mutex_exit(&ill->ill_lock); 10100 ipif_refrele(ipif); 10101 /* 10102 * Save binding until IPV6_MULTICAST_IF 10103 * changes it 10104 */ 10105 mutex_enter(&connp->conn_lock); 10106 connp->conn_multicast_ill = ill; 10107 connp->conn_orig_multicast_ifindex = 10108 ill->ill_phyint->phyint_ifindex; 10109 mutex_exit(&connp->conn_lock); 10110 } 10111 } 10112 if (conn_lock_held) 10113 mutex_exit(&connp->conn_lock); 10114 10115 if (saved_ill != NULL) 10116 ill_refrele(saved_ill); 10117 10118 ASSERT(ill != NULL); 10119 /* 10120 * For multicast loopback interfaces replace the multicast address 10121 * with a unicast address for the ire lookup. 10122 */ 10123 if (ill->ill_phyint->phyint_flags & PHYI_LOOPBACK) 10124 v6dstp = &ill->ill_ipif->ipif_v6lcl_addr; 10125 10126 mibptr = ill->ill_ip6_mib; 10127 if (do_outrequests) { 10128 BUMP_MIB(mibptr, ipv6OutRequests); 10129 do_outrequests = B_FALSE; 10130 } 10131 BUMP_MIB(mibptr, ipv6OutMcastPkts); 10132 10133 /* 10134 * As we may lose the conn by the time we reach ip_wput_ire_v6 10135 * we copy conn_multicast_loop and conn_dontroute on to an 10136 * ipsec_out. In case if this datagram goes out secure, 10137 * we need the ill_index also. Copy that also into the 10138 * ipsec_out. 10139 */ 10140 if (mctl_present) { 10141 io = (ipsec_out_t *)first_mp->b_rptr; 10142 ASSERT(first_mp->b_datap->db_type == M_CTL); 10143 ASSERT(io->ipsec_out_type == IPSEC_OUT); 10144 } else { 10145 ASSERT(mp == first_mp); 10146 if ((first_mp = ipsec_alloc_ipsec_out()) == NULL) { 10147 BUMP_MIB(mibptr, ipv6OutDiscards); 10148 freemsg(mp); 10149 if (ill != NULL) 10150 ill_refrele(ill); 10151 if (need_decref) 10152 CONN_DEC_REF(connp); 10153 return; 10154 } 10155 io = (ipsec_out_t *)first_mp->b_rptr; 10156 /* This is not a secure packet */ 10157 io->ipsec_out_secure = B_FALSE; 10158 io->ipsec_out_use_global_policy = B_TRUE; 10159 io->ipsec_out_zoneid = 10160 (zoneid != ALL_ZONES ? zoneid : GLOBAL_ZONEID); 10161 first_mp->b_cont = mp; 10162 mctl_present = B_TRUE; 10163 } 10164 io->ipsec_out_ill_index = ill->ill_phyint->phyint_ifindex; 10165 io->ipsec_out_unspec_src = unspec_src; 10166 if (connp != NULL) 10167 io->ipsec_out_dontroute = connp->conn_dontroute; 10168 10169 send_from_ill: 10170 ASSERT(ill != NULL); 10171 ASSERT(mibptr == ill->ill_ip6_mib); 10172 if (do_outrequests) { 10173 BUMP_MIB(mibptr, ipv6OutRequests); 10174 do_outrequests = B_FALSE; 10175 } 10176 10177 if (io != NULL) 10178 io->ipsec_out_ill_index = ill->ill_phyint->phyint_ifindex; 10179 10180 /* 10181 * When a specific ill is specified (using IPV6_PKTINFO, 10182 * IPV6_MULTICAST_IF, or IPV6_BOUND_IF) we will only match 10183 * on routing entries (ftable and ctable) that have a matching 10184 * ire->ire_ipif->ipif_ill. Thus this can only be used 10185 * for destinations that are on-link for the specific ill 10186 * and that can appear on multiple links. Thus it is useful 10187 * for multicast destinations, link-local destinations, and 10188 * at some point perhaps for site-local destinations (if the 10189 * node sits at a site boundary). 10190 * We create the cache entries in the regular ctable since 10191 * it can not "confuse" things for other destinations. 10192 * table. 10193 * 10194 * NOTE : conn_ire_cache is not used for caching ire_ctable_lookups. 10195 * It is used only when ire_cache_lookup is used above. 10196 */ 10197 ire = ire_ctable_lookup_v6(v6dstp, 0, 0, ill->ill_ipif, 10198 zoneid, MBLK_GETLABEL(mp), match_flags); 10199 if (ire != NULL) { 10200 /* 10201 * Check if the ire has the RTF_MULTIRT flag, inherited 10202 * from an IRE_OFFSUBNET ire entry in ip_newroute(). 10203 */ 10204 if (ire->ire_flags & RTF_MULTIRT) { 10205 /* 10206 * Force hop limit of multirouted packets if required. 10207 * The hop limit of such packets is bounded by the 10208 * ip_multirt_ttl ndd variable. 10209 * NDP packets must have a hop limit of 255; don't 10210 * change the hop limit in that case. 10211 */ 10212 if ((ip_multirt_ttl > 0) && 10213 (ip6h->ip6_hops > ip_multirt_ttl) && 10214 (ip6h->ip6_hops != IPV6_MAX_HOPS)) { 10215 if (ip_debug > 3) { 10216 ip2dbg(("ip_wput_v6: forcing multirt " 10217 "hop limit to %d (was %d) ", 10218 ip_multirt_ttl, ip6h->ip6_hops)); 10219 pr_addr_dbg("v6dst %s\n", AF_INET6, 10220 &ire->ire_addr_v6); 10221 } 10222 ip6h->ip6_hops = ip_multirt_ttl; 10223 } 10224 10225 /* 10226 * We look at this point if there are pending 10227 * unresolved routes. ire_multirt_need_resolve_v6() 10228 * checks in O(n) that all IRE_OFFSUBNET ire 10229 * entries for the packet's destination and 10230 * flagged RTF_MULTIRT are currently resolved. 10231 * If some remain unresolved, we make a copy 10232 * of the current message. It will be used 10233 * to initiate additional route resolutions. 10234 */ 10235 multirt_need_resolve = 10236 ire_multirt_need_resolve_v6(&ire->ire_addr_v6, 10237 MBLK_GETLABEL(first_mp)); 10238 ip2dbg(("ip_wput_v6[send_from_ill]: ire %p, " 10239 "multirt_need_resolve %d, first_mp %p\n", 10240 (void *)ire, multirt_need_resolve, 10241 (void *)first_mp)); 10242 if (multirt_need_resolve) { 10243 copy_mp = copymsg(first_mp); 10244 if (copy_mp != NULL) { 10245 MULTIRT_DEBUG_TAG(copy_mp); 10246 } 10247 } 10248 } 10249 10250 ip1dbg(("ip_wput_v6: send on %s, ire = %p, ill index = %d\n", 10251 ill->ill_name, (void *)ire, 10252 ill->ill_phyint->phyint_ifindex)); 10253 ip_wput_ire_v6(q, first_mp, ire, unspec_src, cksum_request, 10254 connp, caller, 10255 (attach_if ? ill->ill_phyint->phyint_ifindex : 0), 10256 ip6i_flags); 10257 ire_refrele(ire); 10258 if (need_decref) { 10259 CONN_DEC_REF(connp); 10260 connp = NULL; 10261 } 10262 10263 /* 10264 * Try to resolve another multiroute if 10265 * ire_multirt_need_resolve_v6() deemed it necessary. 10266 * copy_mp will be consumed (sent or freed) by 10267 * ip_newroute_[ipif_]v6(). 10268 */ 10269 if (copy_mp != NULL) { 10270 if (mctl_present) { 10271 ip6h = (ip6_t *)copy_mp->b_cont->b_rptr; 10272 } else { 10273 ip6h = (ip6_t *)copy_mp->b_rptr; 10274 } 10275 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 10276 ipif = ipif_lookup_group_v6(&ip6h->ip6_dst, 10277 zoneid); 10278 if (ipif == NULL) { 10279 ip1dbg(("ip_wput_v6: No ipif for " 10280 "multicast\n")); 10281 MULTIRT_DEBUG_UNTAG(copy_mp); 10282 freemsg(copy_mp); 10283 return; 10284 } 10285 ip_newroute_ipif_v6(q, copy_mp, ipif, 10286 ip6h->ip6_dst, unspec_src, zoneid); 10287 ipif_refrele(ipif); 10288 } else { 10289 ip_newroute_v6(q, copy_mp, &ip6h->ip6_dst, 10290 &ip6h->ip6_src, ill, zoneid); 10291 } 10292 } 10293 ill_refrele(ill); 10294 return; 10295 } 10296 if (need_decref) { 10297 CONN_DEC_REF(connp); 10298 connp = NULL; 10299 } 10300 10301 /* Update rptr if there was an ip6i_t header. */ 10302 if (ip6i != NULL) 10303 mp->b_rptr -= sizeof (ip6i_t); 10304 if (unspec_src || attach_if) { 10305 if (ip6i == NULL) { 10306 /* 10307 * Add ip6i_t header to carry unspec_src 10308 * or attach_if until the packet comes back in 10309 * ip_wput_v6. 10310 */ 10311 if (mctl_present) { 10312 first_mp->b_cont = 10313 ip_add_info_v6(mp, NULL, v6dstp); 10314 mp = first_mp->b_cont; 10315 if (mp == NULL) 10316 freeb(first_mp); 10317 } else { 10318 first_mp = mp = ip_add_info_v6(mp, NULL, 10319 v6dstp); 10320 } 10321 if (mp == NULL) { 10322 BUMP_MIB(mibptr, ipv6OutDiscards); 10323 ill_refrele(ill); 10324 return; 10325 } 10326 ip6i = (ip6i_t *)mp->b_rptr; 10327 if ((mp->b_wptr - (uchar_t *)ip6i) == 10328 sizeof (ip6i_t)) { 10329 /* 10330 * ndp_resolver called from ip_newroute_v6 10331 * expects a pulled up message. 10332 */ 10333 if (!pullupmsg(mp, -1)) { 10334 ip1dbg(("ip_wput_v6: pullupmsg" 10335 " failed\n")); 10336 BUMP_MIB(mibptr, ipv6OutDiscards); 10337 freemsg(first_mp); 10338 return; 10339 } 10340 ip6i = (ip6i_t *)mp->b_rptr; 10341 } 10342 ip6h = (ip6_t *)&ip6i[1]; 10343 v6dstp = &ip6h->ip6_dst; 10344 } 10345 if (unspec_src) 10346 ip6i->ip6i_flags |= IP6I_UNSPEC_SRC; 10347 if (attach_if) { 10348 /* 10349 * Bind to nofailover/BOUND_PIF overrides ifindex. 10350 */ 10351 ip6i->ip6i_flags |= IP6I_ATTACH_IF; 10352 ip6i->ip6i_flags &= ~IP6I_IFINDEX; 10353 ip6i->ip6i_ifindex = ill->ill_phyint->phyint_ifindex; 10354 if (drop_if_delayed) { 10355 /* This is a multipathing probe packet */ 10356 ip6i->ip6i_flags |= IP6I_DROP_IFDELAYED; 10357 } 10358 } 10359 if (mctl_present) { 10360 ASSERT(io != NULL); 10361 io->ipsec_out_unspec_src = unspec_src; 10362 } 10363 } 10364 if (IN6_IS_ADDR_MULTICAST(v6dstp)) { 10365 ip_newroute_ipif_v6(q, first_mp, ill->ill_ipif, *v6dstp, 10366 unspec_src, zoneid); 10367 } else { 10368 ip_newroute_v6(q, first_mp, v6dstp, &ip6h->ip6_src, ill, 10369 zoneid); 10370 } 10371 ill_refrele(ill); 10372 return; 10373 10374 notv6: 10375 /* 10376 * XXX implement a IPv4 and IPv6 packet counter per conn and 10377 * switch when ratio exceeds e.g. 10:1 10378 */ 10379 if (q->q_next == NULL) { 10380 connp = Q_TO_CONN(q); 10381 10382 if (IPCL_IS_TCP(connp)) { 10383 /* change conn_send for the tcp_v4_connections */ 10384 connp->conn_send = ip_output; 10385 } else if (connp->conn_ulp == IPPROTO_SCTP) { 10386 /* The 'q' is the default SCTP queue */ 10387 connp = (conn_t *)arg; 10388 } else { 10389 ip_setqinfo(RD(q), IPV4_MINOR, B_TRUE); 10390 } 10391 } 10392 BUMP_MIB(mibptr, ipv6OutIPv4); 10393 (void) ip_output(connp, first_mp, q, caller); 10394 if (ill != NULL) 10395 ill_refrele(ill); 10396 } 10397 10398 static void 10399 ip_wput_v6(queue_t *q, mblk_t *mp) 10400 { 10401 ip_output_v6(Q_TO_CONN(q), mp, q, IP_WPUT); 10402 } 10403 10404 static void 10405 ipsec_out_attach_if(ipsec_out_t *io, int attach_index) 10406 { 10407 ASSERT(io->ipsec_out_type == IPSEC_OUT); 10408 io->ipsec_out_attach_if = B_TRUE; 10409 io->ipsec_out_ill_index = attach_index; 10410 } 10411 10412 /* 10413 * NULL send-to queue - packet is to be delivered locally. 10414 */ 10415 void 10416 ip_wput_local_v6(queue_t *q, ill_t *ill, ip6_t *ip6h, mblk_t *first_mp, 10417 ire_t *ire, int fanout_flags) 10418 { 10419 uint32_t ports; 10420 mblk_t *mp = first_mp, *first_mp1; 10421 boolean_t mctl_present; 10422 uint8_t nexthdr; 10423 uint16_t hdr_length; 10424 ipsec_out_t *io; 10425 mib2_ipv6IfStatsEntry_t *mibptr; 10426 ilm_t *ilm; 10427 uint_t nexthdr_offset; 10428 10429 if (DB_TYPE(mp) == M_CTL) { 10430 io = (ipsec_out_t *)mp->b_rptr; 10431 if (!io->ipsec_out_secure) { 10432 mp = mp->b_cont; 10433 freeb(first_mp); 10434 first_mp = mp; 10435 mctl_present = B_FALSE; 10436 } else { 10437 mctl_present = B_TRUE; 10438 mp = first_mp->b_cont; 10439 ipsec_out_to_in(first_mp); 10440 } 10441 } else { 10442 mctl_present = B_FALSE; 10443 } 10444 10445 nexthdr = ip6h->ip6_nxt; 10446 mibptr = ill->ill_ip6_mib; 10447 10448 /* Fastpath */ 10449 switch (nexthdr) { 10450 case IPPROTO_TCP: 10451 case IPPROTO_UDP: 10452 case IPPROTO_ICMPV6: 10453 case IPPROTO_SCTP: 10454 hdr_length = IPV6_HDR_LEN; 10455 nexthdr_offset = (uint_t)((uchar_t *)&ip6h->ip6_nxt - 10456 (uchar_t *)ip6h); 10457 break; 10458 default: { 10459 uint8_t *nexthdrp; 10460 10461 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, 10462 &hdr_length, &nexthdrp)) { 10463 /* Malformed packet */ 10464 BUMP_MIB(mibptr, ipv6OutDiscards); 10465 freemsg(first_mp); 10466 return; 10467 } 10468 nexthdr = *nexthdrp; 10469 nexthdr_offset = nexthdrp - (uint8_t *)ip6h; 10470 break; 10471 } 10472 } 10473 10474 10475 UPDATE_OB_PKT_COUNT(ire); 10476 ire->ire_last_used_time = lbolt; 10477 10478 /* 10479 * Remove reacability confirmation bit from version field 10480 * before looping back the packet. 10481 */ 10482 if (ip6h->ip6_vcf & IP_FORWARD_PROG) { 10483 ip6h->ip6_vcf &= ~IP_FORWARD_PROG; 10484 } 10485 10486 switch (nexthdr) { 10487 case IPPROTO_TCP: 10488 if (DB_TYPE(mp) == M_DATA) { 10489 /* 10490 * M_DATA mblk, so init mblk (chain) for 10491 * no struio(). 10492 */ 10493 mblk_t *mp1 = mp; 10494 10495 do { 10496 mp1->b_datap->db_struioflag = 0; 10497 } while ((mp1 = mp1->b_cont) != NULL); 10498 } 10499 ports = *(uint32_t *)(mp->b_rptr + hdr_length + 10500 TCP_PORTS_OFFSET); 10501 ip_fanout_tcp_v6(q, first_mp, ip6h, ill, ill, 10502 fanout_flags|IP_FF_SEND_ICMP|IP_FF_SYN_ADDIRE| 10503 IP_FF_IP6INFO|IP6_NO_IPPOLICY|IP_FF_LOOPBACK, 10504 hdr_length, mctl_present, ire->ire_zoneid); 10505 return; 10506 10507 case IPPROTO_UDP: 10508 ports = *(uint32_t *)(mp->b_rptr + hdr_length + 10509 UDP_PORTS_OFFSET); 10510 ip_fanout_udp_v6(q, first_mp, ip6h, ports, ill, ill, 10511 fanout_flags|IP_FF_SEND_ICMP|IP_FF_IP6INFO| 10512 IP6_NO_IPPOLICY, mctl_present, ire->ire_zoneid); 10513 return; 10514 10515 case IPPROTO_SCTP: 10516 { 10517 uint_t ipif_seqid = ire->ire_ipif->ipif_seqid; 10518 10519 ports = *(uint32_t *)(mp->b_rptr + hdr_length); 10520 ip_fanout_sctp(mp, ill, (ipha_t *)ip6h, ports, 10521 fanout_flags|IP_FF_SEND_ICMP|IP_FF_IP6INFO, 10522 mctl_present, IP6_NO_IPPOLICY, ipif_seqid, 10523 ire->ire_zoneid); 10524 return; 10525 } 10526 case IPPROTO_ICMPV6: { 10527 icmp6_t *icmp6; 10528 10529 /* check for full IPv6+ICMPv6 header */ 10530 if ((mp->b_wptr - mp->b_rptr) < 10531 (hdr_length + ICMP6_MINLEN)) { 10532 if (!pullupmsg(mp, hdr_length + ICMP6_MINLEN)) { 10533 ip1dbg(("ip_wput_v6: ICMP hdr pullupmsg" 10534 " failed\n")); 10535 BUMP_MIB(mibptr, ipv6OutDiscards); 10536 freemsg(first_mp); 10537 return; 10538 } 10539 ip6h = (ip6_t *)mp->b_rptr; 10540 } 10541 icmp6 = (icmp6_t *)((uchar_t *)ip6h + hdr_length); 10542 10543 /* Update output mib stats */ 10544 icmp_update_out_mib_v6(ill, icmp6); 10545 10546 /* Check variable for testing applications */ 10547 if (ipv6_drop_inbound_icmpv6) { 10548 freemsg(first_mp); 10549 return; 10550 } 10551 /* 10552 * Assume that there is always at least one conn for 10553 * ICMPv6 (in.ndpd) i.e. don't optimize the case 10554 * where there is no conn. 10555 */ 10556 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst) && 10557 !(ill->ill_phyint->phyint_flags & PHYI_LOOPBACK)) { 10558 /* 10559 * In the multicast case, applications may have 10560 * joined the group from different zones, so we 10561 * need to deliver the packet to each of them. 10562 * Loop through the multicast memberships 10563 * structures (ilm) on the receive ill and send 10564 * a copy of the packet up each matching one. 10565 * However, we don't do this for multicasts sent 10566 * on the loopback interface (PHYI_LOOPBACK flag 10567 * set) as they must stay in the sender's zone. 10568 */ 10569 ILM_WALKER_HOLD(ill); 10570 for (ilm = ill->ill_ilm; ilm != NULL; 10571 ilm = ilm->ilm_next) { 10572 if (ilm->ilm_flags & ILM_DELETED) 10573 continue; 10574 if (!IN6_ARE_ADDR_EQUAL( 10575 &ilm->ilm_v6addr, &ip6h->ip6_dst)) 10576 continue; 10577 if ((fanout_flags & 10578 IP_FF_NO_MCAST_LOOP) && 10579 ilm->ilm_zoneid == ire->ire_zoneid) 10580 continue; 10581 if (!ipif_lookup_zoneid(ill, 10582 ilm->ilm_zoneid, IPIF_UP, NULL)) 10583 continue; 10584 10585 first_mp1 = ip_copymsg(first_mp); 10586 if (first_mp1 == NULL) 10587 continue; 10588 icmp_inbound_v6(q, first_mp1, ill, 10589 hdr_length, mctl_present, 10590 IP6_NO_IPPOLICY, ilm->ilm_zoneid, 10591 NULL); 10592 } 10593 ILM_WALKER_RELE(ill); 10594 } else { 10595 first_mp1 = ip_copymsg(first_mp); 10596 if (first_mp1 != NULL) 10597 icmp_inbound_v6(q, first_mp1, ill, 10598 hdr_length, mctl_present, 10599 IP6_NO_IPPOLICY, ire->ire_zoneid, 10600 NULL); 10601 } 10602 } 10603 /* FALLTHRU */ 10604 default: { 10605 /* 10606 * Handle protocols with which IPv6 is less intimate. 10607 */ 10608 fanout_flags |= IP_FF_RAWIP|IP_FF_IP6INFO; 10609 10610 /* 10611 * Enable sending ICMP for "Unknown" nexthdr 10612 * case. i.e. where we did not FALLTHRU from 10613 * IPPROTO_ICMPV6 processing case above. 10614 */ 10615 if (nexthdr != IPPROTO_ICMPV6) 10616 fanout_flags |= IP_FF_SEND_ICMP; 10617 /* 10618 * Note: There can be more than one stream bound 10619 * to a particular protocol. When this is the case, 10620 * each one gets a copy of any incoming packets. 10621 */ 10622 ip_fanout_proto_v6(q, first_mp, ip6h, ill, ill, nexthdr, 10623 nexthdr_offset, fanout_flags|IP6_NO_IPPOLICY, 10624 mctl_present, ire->ire_zoneid); 10625 return; 10626 } 10627 } 10628 } 10629 10630 /* 10631 * Send packet using IRE. 10632 * Checksumming is controlled by cksum_request: 10633 * -1 => normal i.e. TCP/UDP/SCTP/ICMPv6 are checksummed and nothing else. 10634 * 1 => Skip TCP/UDP/SCTP checksum 10635 * Otherwise => checksum_request contains insert offset for checksum 10636 * 10637 * Assumes that the following set of headers appear in the first 10638 * mblk: 10639 * ip6_t 10640 * Any extension headers 10641 * TCP/UDP/SCTP header (if present) 10642 * The routine can handle an ICMPv6 header that is not in the first mblk. 10643 * 10644 * NOTE : This function does not ire_refrele the ire passed in as the 10645 * argument unlike ip_wput_ire where the REFRELE is done. 10646 * Refer to ip_wput_ire for more on this. 10647 */ 10648 static void 10649 ip_wput_ire_v6(queue_t *q, mblk_t *mp, ire_t *ire, int unspec_src, 10650 int cksum_request, conn_t *connp, int caller, int attach_index, int flags) 10651 { 10652 ip6_t *ip6h; 10653 uint8_t nexthdr; 10654 uint16_t hdr_length; 10655 uint_t reachable = 0x0; 10656 ill_t *ill; 10657 mib2_ipv6IfStatsEntry_t *mibptr; 10658 mblk_t *first_mp; 10659 boolean_t mctl_present; 10660 ipsec_out_t *io; 10661 boolean_t conn_dontroute; /* conn value for multicast */ 10662 boolean_t conn_multicast_loop; /* conn value for multicast */ 10663 boolean_t multicast_forward; /* Should we forward ? */ 10664 int max_frag; 10665 zoneid_t zoneid; 10666 10667 zoneid = (connp != NULL ? connp->conn_zoneid : ALL_ZONES); 10668 ill = ire_to_ill(ire); 10669 first_mp = mp; 10670 multicast_forward = B_FALSE; 10671 10672 if (mp->b_datap->db_type != M_CTL) { 10673 ip6h = (ip6_t *)first_mp->b_rptr; 10674 } else { 10675 io = (ipsec_out_t *)first_mp->b_rptr; 10676 ASSERT(io->ipsec_out_type == IPSEC_OUT); 10677 /* 10678 * Grab the zone id now because the M_CTL can be discarded by 10679 * ip_wput_ire_parse_ipsec_out() below. 10680 */ 10681 zoneid = io->ipsec_out_zoneid; 10682 ASSERT(zoneid != ALL_ZONES); 10683 ip6h = (ip6_t *)first_mp->b_cont->b_rptr; 10684 /* 10685 * For the multicast case, ipsec_out carries conn_dontroute and 10686 * conn_multicast_loop as conn may not be available here. We 10687 * need this for multicast loopback and forwarding which is done 10688 * later in the code. 10689 */ 10690 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 10691 conn_dontroute = io->ipsec_out_dontroute; 10692 conn_multicast_loop = io->ipsec_out_multicast_loop; 10693 /* 10694 * If conn_dontroute is not set or conn_multicast_loop 10695 * is set, we need to do forwarding/loopback. For 10696 * datagrams from ip_wput_multicast, conn_dontroute is 10697 * set to B_TRUE and conn_multicast_loop is set to 10698 * B_FALSE so that we neither do forwarding nor 10699 * loopback. 10700 */ 10701 if (!conn_dontroute || conn_multicast_loop) 10702 multicast_forward = B_TRUE; 10703 } 10704 } 10705 10706 /* 10707 * If the sender didn't supply the hop limit and there is a default 10708 * unicast hop limit associated with the output interface, we use 10709 * that if the packet is unicast. Interface specific unicast hop 10710 * limits as set via the SIOCSLIFLNKINFO ioctl. 10711 */ 10712 if (ill->ill_max_hops != 0 && !(flags & IP6I_HOPLIMIT) && 10713 !(IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst))) { 10714 ip6h->ip6_hops = ill->ill_max_hops; 10715 } 10716 10717 if (ire->ire_type == IRE_LOCAL && ire->ire_zoneid != zoneid && 10718 ire->ire_zoneid != ALL_ZONES) { 10719 /* 10720 * When a zone sends a packet to another zone, we try to deliver 10721 * the packet under the same conditions as if the destination 10722 * was a real node on the network. To do so, we look for a 10723 * matching route in the forwarding table. 10724 * RTF_REJECT and RTF_BLACKHOLE are handled just like 10725 * ip_newroute_v6() does. 10726 */ 10727 ire_t *src_ire = ire_ftable_lookup_v6(&ip6h->ip6_dst, 0, 0, 0, 10728 NULL, NULL, zoneid, 0, NULL, (MATCH_IRE_RECURSIVE | 10729 MATCH_IRE_DEFAULT | MATCH_IRE_RJ_BHOLE)); 10730 if (src_ire != NULL && 10731 !(src_ire->ire_flags & (RTF_REJECT | RTF_BLACKHOLE))) { 10732 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src) && 10733 !unspec_src) { 10734 ip6h->ip6_src = src_ire->ire_src_addr_v6; 10735 } 10736 ire_refrele(src_ire); 10737 } else { 10738 BUMP_MIB(ill->ill_ip6_mib, ipv6OutNoRoutes); 10739 if (src_ire != NULL) { 10740 if (src_ire->ire_flags & RTF_BLACKHOLE) { 10741 ire_refrele(src_ire); 10742 freemsg(first_mp); 10743 return; 10744 } 10745 ire_refrele(src_ire); 10746 } 10747 if (ip_hdr_complete_v6(ip6h, zoneid)) { 10748 /* Failed */ 10749 freemsg(first_mp); 10750 return; 10751 } 10752 icmp_unreachable_v6(q, first_mp, 10753 ICMP6_DST_UNREACH_NOROUTE, B_FALSE, B_FALSE); 10754 return; 10755 } 10756 } 10757 10758 if (mp->b_datap->db_type == M_CTL || ipsec_outbound_v6_policy_present) { 10759 mp = ip_wput_ire_parse_ipsec_out(first_mp, NULL, ip6h, ire, 10760 connp, unspec_src); 10761 if (mp == NULL) { 10762 return; 10763 } 10764 } 10765 10766 first_mp = mp; 10767 if (mp->b_datap->db_type == M_CTL) { 10768 io = (ipsec_out_t *)mp->b_rptr; 10769 ASSERT(io->ipsec_out_type == IPSEC_OUT); 10770 mp = mp->b_cont; 10771 mctl_present = B_TRUE; 10772 } else { 10773 mctl_present = B_FALSE; 10774 } 10775 10776 ip6h = (ip6_t *)mp->b_rptr; 10777 nexthdr = ip6h->ip6_nxt; 10778 mibptr = ill->ill_ip6_mib; 10779 10780 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src) && !unspec_src) { 10781 ipif_t *ipif; 10782 10783 /* 10784 * Select the source address using ipif_select_source_v6. 10785 */ 10786 if (attach_index != 0) { 10787 ipif = ipif_select_source_v6(ill, &ip6h->ip6_dst, 10788 RESTRICT_TO_ILL, IPV6_PREFER_SRC_DEFAULT, zoneid); 10789 } else { 10790 ipif = ipif_select_source_v6(ill, &ip6h->ip6_dst, 10791 RESTRICT_TO_NONE, IPV6_PREFER_SRC_DEFAULT, zoneid); 10792 } 10793 if (ipif == NULL) { 10794 if (ip_debug > 2) { 10795 /* ip1dbg */ 10796 pr_addr_dbg("ip_wput_ire_v6: no src for " 10797 "dst %s\n, ", AF_INET6, &ip6h->ip6_dst); 10798 printf("ip_wput_ire_v6: interface name %s\n", 10799 ill->ill_name); 10800 } 10801 freemsg(first_mp); 10802 return; 10803 } 10804 ip6h->ip6_src = ipif->ipif_v6src_addr; 10805 ipif_refrele(ipif); 10806 } 10807 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 10808 if ((connp != NULL && connp->conn_multicast_loop) || 10809 !(ill->ill_phyint->phyint_flags & PHYI_LOOPBACK)) { 10810 ilm_t *ilm; 10811 10812 ILM_WALKER_HOLD(ill); 10813 ilm = ilm_lookup_ill_v6(ill, &ip6h->ip6_dst, ALL_ZONES); 10814 ILM_WALKER_RELE(ill); 10815 if (ilm != NULL) { 10816 mblk_t *nmp; 10817 int fanout_flags = 0; 10818 10819 if (connp != NULL && 10820 !connp->conn_multicast_loop) { 10821 fanout_flags |= IP_FF_NO_MCAST_LOOP; 10822 } 10823 ip1dbg(("ip_wput_ire_v6: " 10824 "Loopback multicast\n")); 10825 nmp = ip_copymsg(first_mp); 10826 if (nmp != NULL) { 10827 ip6_t *nip6h; 10828 10829 if (mctl_present) { 10830 nip6h = (ip6_t *) 10831 nmp->b_cont->b_rptr; 10832 } else { 10833 nip6h = (ip6_t *)nmp->b_rptr; 10834 } 10835 /* 10836 * Deliver locally and to every local 10837 * zone, except the sending zone when 10838 * IPV6_MULTICAST_LOOP is disabled. 10839 */ 10840 ip_wput_local_v6(RD(q), ill, nip6h, nmp, 10841 ire, fanout_flags); 10842 } else { 10843 BUMP_MIB(mibptr, ipv6OutDiscards); 10844 ip1dbg(("ip_wput_ire_v6: " 10845 "copymsg failed\n")); 10846 } 10847 } 10848 } 10849 if (ip6h->ip6_hops == 0 || 10850 IN6_IS_ADDR_MC_NODELOCAL(&ip6h->ip6_dst) || 10851 (ill->ill_phyint->phyint_flags & PHYI_LOOPBACK)) { 10852 /* 10853 * Local multicast or just loopback on loopback 10854 * interface. 10855 */ 10856 BUMP_MIB(mibptr, ipv6OutMcastPkts); 10857 ip1dbg(("ip_wput_ire_v6: local multicast only\n")); 10858 freemsg(first_mp); 10859 return; 10860 } 10861 } 10862 10863 if (ire->ire_stq != NULL) { 10864 uint32_t sum; 10865 uint_t ill_index = ((ill_t *)ire->ire_stq->q_ptr)-> 10866 ill_phyint->phyint_ifindex; 10867 queue_t *dev_q = ire->ire_stq->q_next; 10868 10869 /* 10870 * non-NULL send-to queue - packet is to be sent 10871 * out an interface. 10872 */ 10873 10874 /* Driver is flow-controlling? */ 10875 if (!IP_FLOW_CONTROLLED_ULP(nexthdr) && 10876 ((dev_q->q_next || dev_q->q_first) && !canput(dev_q))) { 10877 /* 10878 * Queue packet if we have an conn to give back 10879 * pressure. We can't queue packets intended for 10880 * hardware acceleration since we've tossed that 10881 * state already. If the packet is being fed back 10882 * from ire_send_v6, we don't know the position in 10883 * the queue to enqueue the packet and we discard 10884 * the packet. 10885 */ 10886 if (ip_output_queue && connp != NULL && 10887 !mctl_present && caller != IRE_SEND) { 10888 if (caller == IP_WSRV) { 10889 connp->conn_did_putbq = 1; 10890 (void) putbq(connp->conn_wq, mp); 10891 conn_drain_insert(connp); 10892 /* 10893 * caller == IP_WSRV implies we are 10894 * the service thread, and the 10895 * queue is already noenabled. 10896 * The check for canput and 10897 * the putbq is not atomic. 10898 * So we need to check again. 10899 */ 10900 if (canput(dev_q)) 10901 connp->conn_did_putbq = 0; 10902 } else { 10903 (void) putq(connp->conn_wq, mp); 10904 } 10905 return; 10906 } 10907 BUMP_MIB(mibptr, ipv6OutDiscards); 10908 freemsg(first_mp); 10909 return; 10910 } 10911 10912 /* 10913 * Look for reachability confirmations from the transport. 10914 */ 10915 if (ip6h->ip6_vcf & IP_FORWARD_PROG) { 10916 reachable |= IPV6_REACHABILITY_CONFIRMATION; 10917 ip6h->ip6_vcf &= ~IP_FORWARD_PROG; 10918 if (mctl_present) 10919 io->ipsec_out_reachable = B_TRUE; 10920 } 10921 /* Fastpath */ 10922 switch (nexthdr) { 10923 case IPPROTO_TCP: 10924 case IPPROTO_UDP: 10925 case IPPROTO_ICMPV6: 10926 case IPPROTO_SCTP: 10927 hdr_length = IPV6_HDR_LEN; 10928 break; 10929 default: { 10930 uint8_t *nexthdrp; 10931 10932 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, 10933 &hdr_length, &nexthdrp)) { 10934 /* Malformed packet */ 10935 BUMP_MIB(mibptr, ipv6OutDiscards); 10936 freemsg(first_mp); 10937 return; 10938 } 10939 nexthdr = *nexthdrp; 10940 break; 10941 } 10942 } 10943 10944 if (cksum_request != -1 && nexthdr != IPPROTO_ICMPV6) { 10945 uint16_t *up; 10946 uint16_t *insp; 10947 10948 /* 10949 * The packet header is processed once for all, even 10950 * in the multirouting case. We disable hardware 10951 * checksum if the packet is multirouted, as it will be 10952 * replicated via several interfaces, and not all of 10953 * them may have this capability. 10954 */ 10955 if (cksum_request == 1 && 10956 !(ire->ire_flags & RTF_MULTIRT)) { 10957 /* Skip the transport checksum */ 10958 goto cksum_done; 10959 } 10960 /* 10961 * Do user-configured raw checksum. 10962 * Compute checksum and insert at offset "cksum_request" 10963 */ 10964 10965 /* check for enough headers for checksum */ 10966 cksum_request += hdr_length; /* offset from rptr */ 10967 if ((mp->b_wptr - mp->b_rptr) < 10968 (cksum_request + sizeof (int16_t))) { 10969 if (!pullupmsg(mp, 10970 cksum_request + sizeof (int16_t))) { 10971 ip1dbg(("ip_wput_v6: ICMP hdr pullupmsg" 10972 " failed\n")); 10973 BUMP_MIB(mibptr, ipv6OutDiscards); 10974 freemsg(first_mp); 10975 return; 10976 } 10977 ip6h = (ip6_t *)mp->b_rptr; 10978 } 10979 insp = (uint16_t *)((uchar_t *)ip6h + cksum_request); 10980 ASSERT(((uintptr_t)insp & 0x1) == 0); 10981 up = (uint16_t *)&ip6h->ip6_src; 10982 /* 10983 * icmp has placed length and routing 10984 * header adjustment in *insp. 10985 */ 10986 sum = htons(nexthdr) + 10987 up[0] + up[1] + up[2] + up[3] + 10988 up[4] + up[5] + up[6] + up[7] + 10989 up[8] + up[9] + up[10] + up[11] + 10990 up[12] + up[13] + up[14] + up[15]; 10991 sum = (sum & 0xffff) + (sum >> 16); 10992 *insp = IP_CSUM(mp, hdr_length, sum); 10993 if (*insp == 0) 10994 *insp = 0xFFFF; 10995 } else if (nexthdr == IPPROTO_TCP) { 10996 uint16_t *up; 10997 10998 /* 10999 * Check for full IPv6 header + enough TCP header 11000 * to get at the checksum field. 11001 */ 11002 if ((mp->b_wptr - mp->b_rptr) < 11003 (hdr_length + TCP_CHECKSUM_OFFSET + 11004 TCP_CHECKSUM_SIZE)) { 11005 if (!pullupmsg(mp, hdr_length + 11006 TCP_CHECKSUM_OFFSET + TCP_CHECKSUM_SIZE)) { 11007 ip1dbg(("ip_wput_v6: TCP hdr pullupmsg" 11008 " failed\n")); 11009 BUMP_MIB(mibptr, ipv6OutDiscards); 11010 freemsg(first_mp); 11011 return; 11012 } 11013 ip6h = (ip6_t *)mp->b_rptr; 11014 } 11015 11016 up = (uint16_t *)&ip6h->ip6_src; 11017 /* 11018 * Note: The TCP module has stored the length value 11019 * into the tcp checksum field, so we don't 11020 * need to explicitly sum it in here. 11021 */ 11022 sum = up[0] + up[1] + up[2] + up[3] + 11023 up[4] + up[5] + up[6] + up[7] + 11024 up[8] + up[9] + up[10] + up[11] + 11025 up[12] + up[13] + up[14] + up[15]; 11026 11027 /* Fold the initial sum */ 11028 sum = (sum & 0xffff) + (sum >> 16); 11029 11030 up = (uint16_t *)(((uchar_t *)ip6h) + 11031 hdr_length + TCP_CHECKSUM_OFFSET); 11032 11033 IP_CKSUM_XMIT(ill, ire, mp, ip6h, up, IPPROTO_TCP, 11034 hdr_length, ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN, 11035 ire->ire_max_frag, mctl_present, sum); 11036 11037 /* Software checksum? */ 11038 if (DB_CKSUMFLAGS(mp) == 0) { 11039 IP6_STAT(ip6_out_sw_cksum); 11040 IP6_STAT_UPDATE(ip6_tcp_out_sw_cksum_bytes, 11041 (ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN) - 11042 hdr_length); 11043 } 11044 } else if (nexthdr == IPPROTO_UDP) { 11045 uint16_t *up; 11046 11047 /* 11048 * check for full IPv6 header + enough UDP header 11049 * to get at the UDP checksum field 11050 */ 11051 if ((mp->b_wptr - mp->b_rptr) < (hdr_length + 11052 UDP_CHECKSUM_OFFSET + UDP_CHECKSUM_SIZE)) { 11053 if (!pullupmsg(mp, hdr_length + 11054 UDP_CHECKSUM_OFFSET + UDP_CHECKSUM_SIZE)) { 11055 ip1dbg(("ip_wput_v6: UDP hdr pullupmsg" 11056 " failed\n")); 11057 BUMP_MIB(mibptr, ipv6OutDiscards); 11058 freemsg(first_mp); 11059 return; 11060 } 11061 ip6h = (ip6_t *)mp->b_rptr; 11062 } 11063 up = (uint16_t *)&ip6h->ip6_src; 11064 /* 11065 * Note: The UDP module has stored the length value 11066 * into the udp checksum field, so we don't 11067 * need to explicitly sum it in here. 11068 */ 11069 sum = up[0] + up[1] + up[2] + up[3] + 11070 up[4] + up[5] + up[6] + up[7] + 11071 up[8] + up[9] + up[10] + up[11] + 11072 up[12] + up[13] + up[14] + up[15]; 11073 11074 /* Fold the initial sum */ 11075 sum = (sum & 0xffff) + (sum >> 16); 11076 11077 up = (uint16_t *)(((uchar_t *)ip6h) + 11078 hdr_length + UDP_CHECKSUM_OFFSET); 11079 11080 IP_CKSUM_XMIT(ill, ire, mp, ip6h, up, IPPROTO_UDP, 11081 hdr_length, ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN, 11082 ire->ire_max_frag, mctl_present, sum); 11083 11084 /* Software checksum? */ 11085 if (DB_CKSUMFLAGS(mp) == 0) { 11086 IP6_STAT(ip6_out_sw_cksum); 11087 IP6_STAT_UPDATE(ip6_udp_out_sw_cksum_bytes, 11088 (ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN) - 11089 hdr_length); 11090 } 11091 } else if (nexthdr == IPPROTO_ICMPV6) { 11092 uint16_t *up; 11093 icmp6_t *icmp6; 11094 11095 /* check for full IPv6+ICMPv6 header */ 11096 if ((mp->b_wptr - mp->b_rptr) < 11097 (hdr_length + ICMP6_MINLEN)) { 11098 if (!pullupmsg(mp, hdr_length + ICMP6_MINLEN)) { 11099 ip1dbg(("ip_wput_v6: ICMP hdr pullupmsg" 11100 " failed\n")); 11101 BUMP_MIB(mibptr, ipv6OutDiscards); 11102 freemsg(first_mp); 11103 return; 11104 } 11105 ip6h = (ip6_t *)mp->b_rptr; 11106 } 11107 icmp6 = (icmp6_t *)((uchar_t *)ip6h + hdr_length); 11108 up = (uint16_t *)&ip6h->ip6_src; 11109 /* 11110 * icmp has placed length and routing 11111 * header adjustment in icmp6_cksum. 11112 */ 11113 sum = htons(IPPROTO_ICMPV6) + 11114 up[0] + up[1] + up[2] + up[3] + 11115 up[4] + up[5] + up[6] + up[7] + 11116 up[8] + up[9] + up[10] + up[11] + 11117 up[12] + up[13] + up[14] + up[15]; 11118 sum = (sum & 0xffff) + (sum >> 16); 11119 icmp6->icmp6_cksum = IP_CSUM(mp, hdr_length, sum); 11120 if (icmp6->icmp6_cksum == 0) 11121 icmp6->icmp6_cksum = 0xFFFF; 11122 11123 /* Update output mib stats */ 11124 icmp_update_out_mib_v6(ill, icmp6); 11125 } else if (nexthdr == IPPROTO_SCTP) { 11126 sctp_hdr_t *sctph; 11127 11128 if (MBLKL(mp) < (hdr_length + sizeof (*sctph))) { 11129 if (!pullupmsg(mp, hdr_length + 11130 sizeof (*sctph))) { 11131 ip1dbg(("ip_wput_v6: SCTP hdr pullupmsg" 11132 " failed\n")); 11133 BUMP_MIB(ill->ill_ip6_mib, 11134 ipv6OutDiscards); 11135 freemsg(mp); 11136 return; 11137 } 11138 ip6h = (ip6_t *)mp->b_rptr; 11139 } 11140 sctph = (sctp_hdr_t *)(mp->b_rptr + hdr_length); 11141 sctph->sh_chksum = 0; 11142 sctph->sh_chksum = sctp_cksum(mp, hdr_length); 11143 } 11144 11145 cksum_done: 11146 /* 11147 * We force the insertion of a fragment header using the 11148 * IPH_FRAG_HDR flag in two cases: 11149 * - after reception of an ICMPv6 "packet too big" message 11150 * with a MTU < 1280 (cf. RFC 2460 section 5) 11151 * - for multirouted IPv6 packets, so that the receiver can 11152 * discard duplicates according to their fragment identifier 11153 * 11154 * Two flags modifed from the API can modify this behavior. 11155 * The first is IPV6_USE_MIN_MTU. With this API the user 11156 * can specify how to manage PMTUD for unicast and multicast. 11157 * 11158 * IPV6_DONTFRAG disallows fragmentation. 11159 */ 11160 max_frag = ire->ire_max_frag; 11161 switch (IP6I_USE_MIN_MTU_API(flags)) { 11162 case IPV6_USE_MIN_MTU_DEFAULT: 11163 case IPV6_USE_MIN_MTU_UNICAST: 11164 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 11165 max_frag = IPV6_MIN_MTU; 11166 } 11167 break; 11168 11169 case IPV6_USE_MIN_MTU_NEVER: 11170 max_frag = IPV6_MIN_MTU; 11171 break; 11172 } 11173 if (ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN > max_frag || 11174 (ire->ire_frag_flag & IPH_FRAG_HDR)) { 11175 if (connp != NULL && (flags & IP6I_DONTFRAG)) { 11176 icmp_pkt2big_v6(ire->ire_stq, first_mp, 11177 max_frag, B_FALSE, B_TRUE); 11178 return; 11179 } 11180 11181 if (ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN != 11182 (mp->b_cont ? msgdsize(mp) : 11183 mp->b_wptr - (uchar_t *)ip6h)) { 11184 ip0dbg(("Packet length mismatch: %d, %ld\n", 11185 ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN, 11186 msgdsize(mp))); 11187 freemsg(first_mp); 11188 return; 11189 } 11190 /* Do IPSEC processing first */ 11191 if (mctl_present) { 11192 if (attach_index != 0) 11193 ipsec_out_attach_if(io, attach_index); 11194 ipsec_out_process(q, first_mp, ire, ill_index); 11195 return; 11196 } 11197 ASSERT(mp->b_prev == NULL); 11198 ip2dbg(("Fragmenting Size = %d, mtu = %d\n", 11199 ntohs(ip6h->ip6_plen) + 11200 IPV6_HDR_LEN, max_frag)); 11201 ASSERT(mp == first_mp); 11202 /* Initiate IPPF processing */ 11203 if (IPP_ENABLED(IPP_LOCAL_OUT)) { 11204 ip_process(IPP_LOCAL_OUT, &mp, ill_index); 11205 if (mp == NULL) { 11206 return; 11207 } 11208 } 11209 ip_wput_frag_v6(mp, ire, reachable, connp, 11210 caller, max_frag); 11211 return; 11212 } 11213 /* Do IPSEC processing first */ 11214 if (mctl_present) { 11215 int extra_len = ipsec_out_extra_length(first_mp); 11216 11217 if (ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN + extra_len > 11218 max_frag && ip_ulp_cando_pkt2big(nexthdr)) { 11219 /* 11220 * IPsec headers will push the packet over the 11221 * MTU limit. Issue an ICMPv6 Packet Too Big 11222 * message for this packet if the upper-layer 11223 * that issued this packet will be able to 11224 * react to the icmp_pkt2big_v6() that we'll 11225 * generate. 11226 */ 11227 icmp_pkt2big_v6(ire->ire_stq, first_mp, 11228 max_frag, B_FALSE, B_TRUE); 11229 return; 11230 } 11231 if (attach_index != 0) 11232 ipsec_out_attach_if(io, attach_index); 11233 ipsec_out_process(q, first_mp, ire, ill_index); 11234 return; 11235 } 11236 /* 11237 * XXX multicast: add ip_mforward_v6() here. 11238 * Check conn_dontroute 11239 */ 11240 #ifdef lint 11241 /* 11242 * XXX The only purpose of this statement is to avoid lint 11243 * errors. See the above "XXX multicast". When that gets 11244 * fixed, remove this whole #ifdef lint section. 11245 */ 11246 ip3dbg(("multicast forward is %s.\n", 11247 (multicast_forward ? "TRUE" : "FALSE"))); 11248 #endif 11249 11250 UPDATE_OB_PKT_COUNT(ire); 11251 ire->ire_last_used_time = lbolt; 11252 ASSERT(mp == first_mp); 11253 ip_xmit_v6(mp, ire, reachable, connp, caller, NULL); 11254 } else { 11255 ip_wput_local_v6(RD(q), ill, ip6h, first_mp, ire, 0); 11256 } 11257 } 11258 11259 /* 11260 * Outbound IPv6 fragmentation routine using MDT. 11261 */ 11262 static void 11263 ip_wput_frag_mdt_v6(mblk_t *mp, ire_t *ire, size_t max_chunk, 11264 size_t unfragmentable_len, uint8_t nexthdr, uint_t prev_nexthdr_offset) 11265 { 11266 ip6_t *ip6h = (ip6_t *)mp->b_rptr; 11267 uint_t pkts, wroff, hdr_chunk_len, pbuf_idx; 11268 mblk_t *hdr_mp, *md_mp = NULL; 11269 int i1; 11270 multidata_t *mmd; 11271 unsigned char *hdr_ptr, *pld_ptr; 11272 ip_pdescinfo_t pdi; 11273 uint32_t ident; 11274 size_t len; 11275 uint16_t offset; 11276 queue_t *stq = ire->ire_stq; 11277 ill_t *ill = (ill_t *)stq->q_ptr; 11278 11279 ASSERT(DB_TYPE(mp) == M_DATA); 11280 ASSERT(MBLKL(mp) > unfragmentable_len); 11281 11282 /* 11283 * Move read ptr past unfragmentable portion, we don't want this part 11284 * of the data in our fragments. 11285 */ 11286 mp->b_rptr += unfragmentable_len; 11287 11288 /* Calculate how many packets we will send out */ 11289 i1 = (mp->b_cont == NULL) ? MBLKL(mp) : msgsize(mp); 11290 pkts = (i1 + max_chunk - 1) / max_chunk; 11291 ASSERT(pkts > 1); 11292 11293 /* Allocate a message block which will hold all the IP Headers. */ 11294 wroff = ip_wroff_extra; 11295 hdr_chunk_len = wroff + unfragmentable_len + sizeof (ip6_frag_t); 11296 11297 i1 = pkts * hdr_chunk_len; 11298 /* 11299 * Create the header buffer, Multidata and destination address 11300 * and SAP attribute that should be associated with it. 11301 */ 11302 if ((hdr_mp = allocb(i1, BPRI_HI)) == NULL || 11303 ((hdr_mp->b_wptr += i1), 11304 (mmd = mmd_alloc(hdr_mp, &md_mp, KM_NOSLEEP)) == NULL) || 11305 !ip_md_addr_attr(mmd, NULL, ire->ire_nce->nce_res_mp)) { 11306 freemsg(mp); 11307 if (md_mp == NULL) { 11308 freemsg(hdr_mp); 11309 } else { 11310 free_mmd: IP6_STAT(ip6_frag_mdt_discarded); 11311 freemsg(md_mp); 11312 } 11313 IP6_STAT(ip6_frag_mdt_allocfail); 11314 BUMP_MIB(ill->ill_ip6_mib, ipv6OutFragFails); 11315 UPDATE_MIB(ill->ill_ip6_mib, ipv6OutDiscards, pkts); 11316 return; 11317 } 11318 IP6_STAT(ip6_frag_mdt_allocd); 11319 11320 /* 11321 * Add a payload buffer to the Multidata; this operation must not 11322 * fail, or otherwise our logic in this routine is broken. There 11323 * is no memory allocation done by the routine, so any returned 11324 * failure simply tells us that we've done something wrong. 11325 * 11326 * A failure tells us that either we're adding the same payload 11327 * buffer more than once, or we're trying to add more buffers than 11328 * allowed. None of the above cases should happen, and we panic 11329 * because either there's horrible heap corruption, and/or 11330 * programming mistake. 11331 */ 11332 if ((pbuf_idx = mmd_addpldbuf(mmd, mp)) < 0) { 11333 goto pbuf_panic; 11334 } 11335 11336 hdr_ptr = hdr_mp->b_rptr; 11337 pld_ptr = mp->b_rptr; 11338 11339 pdi.flags = PDESC_HBUF_REF | PDESC_PBUF_REF; 11340 11341 ident = htonl(atomic_add_32_nv(&ire->ire_ident, 1)); 11342 11343 /* 11344 * len is the total length of the fragmentable data in this 11345 * datagram. For each fragment sent, we will decrement len 11346 * by the amount of fragmentable data sent in that fragment 11347 * until len reaches zero. 11348 */ 11349 len = ntohs(ip6h->ip6_plen) - (unfragmentable_len - IPV6_HDR_LEN); 11350 11351 offset = 0; 11352 prev_nexthdr_offset += wroff; 11353 11354 while (len != 0) { 11355 size_t mlen; 11356 ip6_t *fip6h; 11357 ip6_frag_t *fraghdr; 11358 int error; 11359 11360 ASSERT((hdr_ptr + hdr_chunk_len) <= hdr_mp->b_wptr); 11361 mlen = MIN(len, max_chunk); 11362 len -= mlen; 11363 11364 fip6h = (ip6_t *)(hdr_ptr + wroff); 11365 ASSERT(OK_32PTR(fip6h)); 11366 bcopy(ip6h, fip6h, unfragmentable_len); 11367 hdr_ptr[prev_nexthdr_offset] = IPPROTO_FRAGMENT; 11368 11369 fip6h->ip6_plen = htons((uint16_t)(mlen + 11370 unfragmentable_len - IPV6_HDR_LEN + sizeof (ip6_frag_t))); 11371 11372 fraghdr = (ip6_frag_t *)((unsigned char *)fip6h + 11373 unfragmentable_len); 11374 fraghdr->ip6f_nxt = nexthdr; 11375 fraghdr->ip6f_reserved = 0; 11376 fraghdr->ip6f_offlg = htons(offset) | 11377 ((len != 0) ? IP6F_MORE_FRAG : 0); 11378 fraghdr->ip6f_ident = ident; 11379 11380 /* 11381 * Record offset and size of header and data of the next packet 11382 * in the multidata message. 11383 */ 11384 PDESC_HDR_ADD(&pdi, hdr_ptr, wroff, 11385 unfragmentable_len + sizeof (ip6_frag_t), 0); 11386 PDESC_PLD_INIT(&pdi); 11387 i1 = MIN(mp->b_wptr - pld_ptr, mlen); 11388 ASSERT(i1 > 0); 11389 PDESC_PLD_SPAN_ADD(&pdi, pbuf_idx, pld_ptr, i1); 11390 if (i1 == mlen) { 11391 pld_ptr += mlen; 11392 } else { 11393 i1 = mlen - i1; 11394 mp = mp->b_cont; 11395 ASSERT(mp != NULL); 11396 ASSERT(MBLKL(mp) >= i1); 11397 /* 11398 * Attach the next payload message block to the 11399 * multidata message. 11400 */ 11401 if ((pbuf_idx = mmd_addpldbuf(mmd, mp)) < 0) 11402 goto pbuf_panic; 11403 PDESC_PLD_SPAN_ADD(&pdi, pbuf_idx, mp->b_rptr, i1); 11404 pld_ptr = mp->b_rptr + i1; 11405 } 11406 11407 if ((mmd_addpdesc(mmd, (pdescinfo_t *)&pdi, &error, 11408 KM_NOSLEEP)) == NULL) { 11409 /* 11410 * Any failure other than ENOMEM indicates that we 11411 * have passed in invalid pdesc info or parameters 11412 * to mmd_addpdesc, which must not happen. 11413 * 11414 * EINVAL is a result of failure on boundary checks 11415 * against the pdesc info contents. It should not 11416 * happen, and we panic because either there's 11417 * horrible heap corruption, and/or programming 11418 * mistake. 11419 */ 11420 if (error != ENOMEM) { 11421 cmn_err(CE_PANIC, "ip_wput_frag_mdt_v6: " 11422 "pdesc logic error detected for " 11423 "mmd %p pinfo %p (%d)\n", 11424 (void *)mmd, (void *)&pdi, error); 11425 /* NOTREACHED */ 11426 } 11427 IP6_STAT(ip6_frag_mdt_addpdescfail); 11428 /* Free unattached payload message blocks as well */ 11429 md_mp->b_cont = mp->b_cont; 11430 goto free_mmd; 11431 } 11432 11433 /* Advance fragment offset. */ 11434 offset += mlen; 11435 11436 /* Advance to location for next header in the buffer. */ 11437 hdr_ptr += hdr_chunk_len; 11438 11439 /* Did we reach the next payload message block? */ 11440 if (pld_ptr == mp->b_wptr && mp->b_cont != NULL) { 11441 mp = mp->b_cont; 11442 /* 11443 * Attach the next message block with payload 11444 * data to the multidata message. 11445 */ 11446 if ((pbuf_idx = mmd_addpldbuf(mmd, mp)) < 0) 11447 goto pbuf_panic; 11448 pld_ptr = mp->b_rptr; 11449 } 11450 } 11451 11452 ASSERT(hdr_mp->b_wptr == hdr_ptr); 11453 ASSERT(mp->b_wptr == pld_ptr); 11454 11455 /* Update IP statistics */ 11456 UPDATE_MIB(ill->ill_ip6_mib, ipv6OutFragCreates, pkts); 11457 BUMP_MIB(ill->ill_ip6_mib, ipv6OutFragOKs); 11458 IP6_STAT_UPDATE(ip6_frag_mdt_pkt_out, pkts); 11459 11460 ire->ire_ob_pkt_count += pkts; 11461 if (ire->ire_ipif != NULL) 11462 atomic_add_32(&ire->ire_ipif->ipif_ob_pkt_count, pkts); 11463 11464 ire->ire_last_used_time = lbolt; 11465 /* Send it down */ 11466 putnext(stq, md_mp); 11467 return; 11468 11469 pbuf_panic: 11470 cmn_err(CE_PANIC, "ip_wput_frag_mdt_v6: payload buffer logic " 11471 "error for mmd %p pbuf %p (%d)", (void *)mmd, (void *)mp, 11472 pbuf_idx); 11473 /* NOTREACHED */ 11474 } 11475 11476 /* 11477 * IPv6 fragmentation. Essentially the same as IPv4 fragmentation. 11478 * We have not optimized this in terms of number of mblks 11479 * allocated. For instance, for each fragment sent we always allocate a 11480 * mblk to hold the IPv6 header and fragment header. 11481 * 11482 * Assumes that all the extension headers are contained in the first mblk. 11483 * 11484 * The fragment header is inserted after an hop-by-hop options header 11485 * and after [an optional destinations header followed by] a routing header. 11486 * 11487 * NOTE : This function does not ire_refrele the ire passed in as 11488 * the argument. 11489 */ 11490 void 11491 ip_wput_frag_v6(mblk_t *mp, ire_t *ire, uint_t reachable, conn_t *connp, 11492 int caller, int max_frag) 11493 { 11494 ip6_t *ip6h = (ip6_t *)mp->b_rptr; 11495 ip6_t *fip6h; 11496 mblk_t *hmp; 11497 mblk_t *hmp0; 11498 mblk_t *dmp; 11499 ip6_frag_t *fraghdr; 11500 size_t unfragmentable_len; 11501 size_t len; 11502 size_t mlen; 11503 size_t max_chunk; 11504 uint32_t ident; 11505 uint16_t off_flags; 11506 uint16_t offset = 0; 11507 ill_t *ill; 11508 uint8_t nexthdr; 11509 uint_t prev_nexthdr_offset; 11510 uint8_t *ptr; 11511 11512 ASSERT(ire->ire_type == IRE_CACHE); 11513 ill = (ill_t *)ire->ire_stq->q_ptr; 11514 11515 /* 11516 * Determine the length of the unfragmentable portion of this 11517 * datagram. This consists of the IPv6 header, a potential 11518 * hop-by-hop options header, a potential pre-routing-header 11519 * destination options header, and a potential routing header. 11520 */ 11521 nexthdr = ip6h->ip6_nxt; 11522 prev_nexthdr_offset = (uint8_t *)&ip6h->ip6_nxt - (uint8_t *)ip6h; 11523 ptr = (uint8_t *)&ip6h[1]; 11524 11525 if (nexthdr == IPPROTO_HOPOPTS) { 11526 ip6_hbh_t *hbh_hdr; 11527 uint_t hdr_len; 11528 11529 hbh_hdr = (ip6_hbh_t *)ptr; 11530 hdr_len = 8 * (hbh_hdr->ip6h_len + 1); 11531 nexthdr = hbh_hdr->ip6h_nxt; 11532 prev_nexthdr_offset = (uint8_t *)&hbh_hdr->ip6h_nxt 11533 - (uint8_t *)ip6h; 11534 ptr += hdr_len; 11535 } 11536 if (nexthdr == IPPROTO_DSTOPTS) { 11537 ip6_dest_t *dest_hdr; 11538 uint_t hdr_len; 11539 11540 dest_hdr = (ip6_dest_t *)ptr; 11541 if (dest_hdr->ip6d_nxt == IPPROTO_ROUTING) { 11542 hdr_len = 8 * (dest_hdr->ip6d_len + 1); 11543 nexthdr = dest_hdr->ip6d_nxt; 11544 prev_nexthdr_offset = (uint8_t *)&dest_hdr->ip6d_nxt 11545 - (uint8_t *)ip6h; 11546 ptr += hdr_len; 11547 } 11548 } 11549 if (nexthdr == IPPROTO_ROUTING) { 11550 ip6_rthdr_t *rthdr; 11551 uint_t hdr_len; 11552 11553 rthdr = (ip6_rthdr_t *)ptr; 11554 nexthdr = rthdr->ip6r_nxt; 11555 prev_nexthdr_offset = (uint8_t *)&rthdr->ip6r_nxt 11556 - (uint8_t *)ip6h; 11557 hdr_len = 8 * (rthdr->ip6r_len + 1); 11558 ptr += hdr_len; 11559 } 11560 unfragmentable_len = (uint_t)(ptr - (uint8_t *)ip6h); 11561 11562 max_chunk = (min(max_frag, ire->ire_max_frag) - unfragmentable_len - 11563 sizeof (ip6_frag_t)) & ~7; 11564 11565 /* Check if we can use MDT to send out the frags. */ 11566 ASSERT(!IRE_IS_LOCAL(ire)); 11567 if (ip_multidata_outbound && reachable == 0 && 11568 !(ire->ire_flags & RTF_MULTIRT) && ILL_MDT_CAPABLE(ill) && 11569 IP_CAN_FRAG_MDT(mp, unfragmentable_len, max_chunk)) { 11570 ip_wput_frag_mdt_v6(mp, ire, max_chunk, unfragmentable_len, 11571 nexthdr, prev_nexthdr_offset); 11572 return; 11573 } 11574 11575 /* 11576 * Allocate an mblk with enough room for the link-layer 11577 * header, the unfragmentable part of the datagram, and the 11578 * fragment header. This (or a copy) will be used as the 11579 * first mblk for each fragment we send. 11580 */ 11581 hmp = allocb(unfragmentable_len + sizeof (ip6_frag_t) + ip_wroff_extra, 11582 BPRI_HI); 11583 if (hmp == NULL) { 11584 BUMP_MIB(ill->ill_ip6_mib, ipv6OutFragFails); 11585 freemsg(mp); 11586 return; 11587 } 11588 hmp->b_rptr += ip_wroff_extra; 11589 hmp->b_wptr = hmp->b_rptr + unfragmentable_len + sizeof (ip6_frag_t); 11590 11591 fip6h = (ip6_t *)hmp->b_rptr; 11592 fraghdr = (ip6_frag_t *)(hmp->b_rptr + unfragmentable_len); 11593 11594 bcopy(ip6h, fip6h, unfragmentable_len); 11595 hmp->b_rptr[prev_nexthdr_offset] = IPPROTO_FRAGMENT; 11596 11597 ident = atomic_add_32_nv(&ire->ire_ident, 1); 11598 11599 fraghdr->ip6f_nxt = nexthdr; 11600 fraghdr->ip6f_reserved = 0; 11601 fraghdr->ip6f_offlg = 0; 11602 fraghdr->ip6f_ident = htonl(ident); 11603 11604 /* 11605 * len is the total length of the fragmentable data in this 11606 * datagram. For each fragment sent, we will decrement len 11607 * by the amount of fragmentable data sent in that fragment 11608 * until len reaches zero. 11609 */ 11610 len = ntohs(ip6h->ip6_plen) - (unfragmentable_len - IPV6_HDR_LEN); 11611 11612 /* 11613 * Move read ptr past unfragmentable portion, we don't want this part 11614 * of the data in our fragments. 11615 */ 11616 mp->b_rptr += unfragmentable_len; 11617 11618 while (len != 0) { 11619 mlen = MIN(len, max_chunk); 11620 len -= mlen; 11621 if (len != 0) { 11622 /* Not last */ 11623 hmp0 = copyb(hmp); 11624 if (hmp0 == NULL) { 11625 freeb(hmp); 11626 freemsg(mp); 11627 BUMP_MIB(ill->ill_ip6_mib, ipv6OutFragFails); 11628 ip1dbg(("ip_wput_frag_v6: copyb failed\n")); 11629 return; 11630 } 11631 off_flags = IP6F_MORE_FRAG; 11632 } else { 11633 /* Last fragment */ 11634 hmp0 = hmp; 11635 hmp = NULL; 11636 off_flags = 0; 11637 } 11638 fip6h = (ip6_t *)(hmp0->b_rptr); 11639 fraghdr = (ip6_frag_t *)(hmp0->b_rptr + unfragmentable_len); 11640 11641 fip6h->ip6_plen = htons((uint16_t)(mlen + 11642 unfragmentable_len - IPV6_HDR_LEN + sizeof (ip6_frag_t))); 11643 /* 11644 * Note: Optimization alert. 11645 * In IPv6 (and IPv4) protocol header, Fragment Offset 11646 * ("offset") is 13 bits wide and in 8-octet units. 11647 * In IPv6 protocol header (unlike IPv4) in a 16 bit field, 11648 * it occupies the most significant 13 bits. 11649 * (least significant 13 bits in IPv4). 11650 * We do not do any shifts here. Not shifting is same effect 11651 * as taking offset value in octet units, dividing by 8 and 11652 * then shifting 3 bits left to line it up in place in proper 11653 * place protocol header. 11654 */ 11655 fraghdr->ip6f_offlg = htons(offset) | off_flags; 11656 11657 if (!(dmp = ip_carve_mp(&mp, mlen))) { 11658 /* mp has already been freed by ip_carve_mp() */ 11659 if (hmp != NULL) 11660 freeb(hmp); 11661 freeb(hmp0); 11662 ip1dbg(("ip_carve_mp: failed\n")); 11663 BUMP_MIB(ill->ill_ip6_mib, ipv6OutFragFails); 11664 return; 11665 } 11666 hmp0->b_cont = dmp; 11667 /* Get the priority marking, if any */ 11668 hmp0->b_band = dmp->b_band; 11669 UPDATE_OB_PKT_COUNT(ire); 11670 ire->ire_last_used_time = lbolt; 11671 ip_xmit_v6(hmp0, ire, reachable | IP6_NO_IPPOLICY, connp, 11672 caller, NULL); 11673 reachable = 0; /* No need to redo state machine in loop */ 11674 BUMP_MIB(ill->ill_ip6_mib, ipv6OutFragCreates); 11675 offset += mlen; 11676 } 11677 BUMP_MIB(ill->ill_ip6_mib, ipv6OutFragOKs); 11678 } 11679 11680 /* 11681 * Determine if the ill and multicast aspects of that packets 11682 * "matches" the conn. 11683 */ 11684 boolean_t 11685 conn_wantpacket_v6(conn_t *connp, ill_t *ill, ip6_t *ip6h, int fanout_flags, 11686 zoneid_t zoneid) 11687 { 11688 ill_t *in_ill; 11689 boolean_t wantpacket = B_TRUE; 11690 in6_addr_t *v6dst_ptr = &ip6h->ip6_dst; 11691 in6_addr_t *v6src_ptr = &ip6h->ip6_src; 11692 11693 /* 11694 * conn_incoming_ill is set by IPV6_BOUND_IF which limits 11695 * unicast and multicast reception to conn_incoming_ill. 11696 * conn_wantpacket_v6 is called both for unicast and 11697 * multicast. 11698 * 11699 * 1) The unicast copy of the packet can come anywhere in 11700 * the ill group if it is part of the group. Thus, we 11701 * need to check to see whether the ill group matches 11702 * if in_ill is part of a group. 11703 * 11704 * 2) ip_rput does not suppress duplicate multicast packets. 11705 * If there are two interfaces in a ill group and we have 11706 * 2 applications (conns) joined a multicast group G on 11707 * both the interfaces, ilm_lookup_ill filter in ip_rput 11708 * will give us two packets because we join G on both the 11709 * interfaces rather than nominating just one interface 11710 * for receiving multicast like broadcast above. So, 11711 * we have to call ilg_lookup_ill to filter out duplicate 11712 * copies, if ill is part of a group, to supress duplicates. 11713 */ 11714 in_ill = connp->conn_incoming_ill; 11715 if (in_ill != NULL) { 11716 mutex_enter(&connp->conn_lock); 11717 in_ill = connp->conn_incoming_ill; 11718 mutex_enter(&ill->ill_lock); 11719 /* 11720 * No IPMP, and the packet did not arrive on conn_incoming_ill 11721 * OR, IPMP in use and the packet arrived on an IPMP group 11722 * different from the conn_incoming_ill's IPMP group. 11723 * Reject the packet. 11724 */ 11725 if ((in_ill->ill_group == NULL && in_ill != ill) || 11726 (in_ill->ill_group != NULL && 11727 in_ill->ill_group != ill->ill_group)) { 11728 wantpacket = B_FALSE; 11729 } 11730 mutex_exit(&ill->ill_lock); 11731 mutex_exit(&connp->conn_lock); 11732 if (!wantpacket) 11733 return (B_FALSE); 11734 } 11735 11736 if (connp->conn_multi_router) 11737 return (B_TRUE); 11738 11739 if (!IN6_IS_ADDR_MULTICAST(v6dst_ptr) && 11740 !IN6_IS_ADDR_V4MAPPED_CLASSD(v6dst_ptr)) { 11741 /* 11742 * Unicast case: we match the conn only if it's in the specified 11743 * zone. 11744 */ 11745 return (connp->conn_zoneid == zoneid || zoneid == ALL_ZONES); 11746 } 11747 11748 if ((fanout_flags & IP_FF_NO_MCAST_LOOP) && 11749 (connp->conn_zoneid == zoneid || zoneid == ALL_ZONES)) { 11750 /* 11751 * Loopback case: the sending endpoint has IP_MULTICAST_LOOP 11752 * disabled, therefore we don't dispatch the multicast packet to 11753 * the sending zone. 11754 */ 11755 return (B_FALSE); 11756 } 11757 11758 if ((ill->ill_phyint->phyint_flags & PHYI_LOOPBACK) && 11759 connp->conn_zoneid != zoneid && zoneid != ALL_ZONES) { 11760 /* 11761 * Multicast packet on the loopback interface: we only match 11762 * conns who joined the group in the specified zone. 11763 */ 11764 return (B_FALSE); 11765 } 11766 11767 mutex_enter(&connp->conn_lock); 11768 wantpacket = 11769 ilg_lookup_ill_withsrc_v6(connp, v6dst_ptr, v6src_ptr, ill) != NULL; 11770 mutex_exit(&connp->conn_lock); 11771 11772 return (wantpacket); 11773 } 11774 11775 11776 /* 11777 * Transmit a packet and update any NUD state based on the flags 11778 * XXX need to "recover" any ip6i_t when doing putq! 11779 * 11780 * NOTE : This function does not ire_refrele the ire passed in as the 11781 * argument. 11782 */ 11783 void 11784 ip_xmit_v6(mblk_t *mp, ire_t *ire, uint_t flags, conn_t *connp, 11785 int caller, ipsec_out_t *io) 11786 { 11787 mblk_t *mp1; 11788 nce_t *nce = ire->ire_nce; 11789 ill_t *ill; 11790 uint64_t delta; 11791 ip6_t *ip6h; 11792 queue_t *stq = ire->ire_stq; 11793 ire_t *ire1 = NULL; 11794 ire_t *save_ire = ire; 11795 boolean_t multirt_send = B_FALSE; 11796 mblk_t *next_mp = NULL; 11797 11798 ip6h = (ip6_t *)mp->b_rptr; 11799 ASSERT(!IN6_IS_ADDR_V4MAPPED(&ire->ire_addr_v6)); 11800 ASSERT(ire->ire_ipversion == IPV6_VERSION); 11801 ASSERT(nce != NULL); 11802 ASSERT(mp->b_datap->db_type == M_DATA); 11803 ASSERT(stq != NULL); 11804 11805 ill = ire_to_ill(ire); 11806 if (!ill) { 11807 ip0dbg(("ip_xmit_v6: ire_to_ill failed\n")); 11808 freemsg(mp); 11809 return; 11810 } 11811 11812 /* 11813 * If a packet is to be sent out an interface that is a 6to4 11814 * tunnel, outgoing IPv6 packets, with a 6to4 addressed IPv6 11815 * destination, must be checked to have a 6to4 prefix 11816 * (2002:V4ADDR::/48) that is NOT equal to the 6to4 prefix of 11817 * address configured on the sending interface. Otherwise, 11818 * the packet was delivered to this interface in error and the 11819 * packet must be dropped. 11820 */ 11821 if ((ill->ill_is_6to4tun) && IN6_IS_ADDR_6TO4(&ip6h->ip6_dst)) { 11822 ipif_t *ipif = ill->ill_ipif; 11823 11824 if (IN6_ARE_6TO4_PREFIX_EQUAL(&ipif->ipif_v6lcl_addr, 11825 &ip6h->ip6_dst)) { 11826 if (ip_debug > 2) { 11827 /* ip1dbg */ 11828 pr_addr_dbg("ip_xmit_v6: attempting to " 11829 "send 6to4 addressed IPv6 " 11830 "destination (%s) out the wrong " 11831 "interface.\n", AF_INET6, 11832 &ip6h->ip6_dst); 11833 } 11834 BUMP_MIB(ill->ill_ip6_mib, ipv6OutDiscards); 11835 freemsg(mp); 11836 return; 11837 } 11838 } 11839 11840 /* Flow-control check has been done in ip_wput_ire_v6 */ 11841 if (IP_FLOW_CONTROLLED_ULP(ip6h->ip6_nxt) || caller == IP_WPUT || 11842 caller == IP_WSRV || canput(stq->q_next)) { 11843 uint32_t ill_index; 11844 11845 /* 11846 * In most cases, the emission loop below is entered only 11847 * once. Only in the case where the ire holds the 11848 * RTF_MULTIRT flag, do we loop to process all RTF_MULTIRT 11849 * flagged ires in the bucket, and send the packet 11850 * through all crossed RTF_MULTIRT routes. 11851 */ 11852 if (ire->ire_flags & RTF_MULTIRT) { 11853 /* 11854 * Multirouting case. The bucket where ire is stored 11855 * probably holds other RTF_MULTIRT flagged ires 11856 * to the destination. In this call to ip_xmit_v6, 11857 * we attempt to send the packet through all 11858 * those ires. Thus, we first ensure that ire is the 11859 * first RTF_MULTIRT ire in the bucket, 11860 * before walking the ire list. 11861 */ 11862 ire_t *first_ire; 11863 irb_t *irb = ire->ire_bucket; 11864 ASSERT(irb != NULL); 11865 multirt_send = B_TRUE; 11866 11867 /* Make sure we do not omit any multiroute ire. */ 11868 IRB_REFHOLD(irb); 11869 for (first_ire = irb->irb_ire; 11870 first_ire != NULL; 11871 first_ire = first_ire->ire_next) { 11872 if ((first_ire->ire_flags & RTF_MULTIRT) && 11873 (IN6_ARE_ADDR_EQUAL(&first_ire->ire_addr_v6, 11874 &ire->ire_addr_v6)) && 11875 !(first_ire->ire_marks & 11876 (IRE_MARK_CONDEMNED | IRE_MARK_HIDDEN))) 11877 break; 11878 } 11879 11880 if ((first_ire != NULL) && (first_ire != ire)) { 11881 IRE_REFHOLD(first_ire); 11882 /* ire will be released by the caller */ 11883 ire = first_ire; 11884 nce = ire->ire_nce; 11885 stq = ire->ire_stq; 11886 ill = ire_to_ill(ire); 11887 } 11888 IRB_REFRELE(irb); 11889 } else if (connp != NULL && IPCL_IS_TCP(connp) && 11890 connp->conn_mdt_ok && !connp->conn_tcp->tcp_mdt && 11891 ILL_MDT_USABLE(ill)) { 11892 /* 11893 * This tcp connection was marked as MDT-capable, but 11894 * it has been turned off due changes in the interface. 11895 * Now that the interface support is back, turn it on 11896 * by notifying tcp. We don't directly modify tcp_mdt, 11897 * since we leave all the details to the tcp code that 11898 * knows better. 11899 */ 11900 mblk_t *mdimp = ip_mdinfo_alloc(ill->ill_mdt_capab); 11901 11902 if (mdimp == NULL) { 11903 ip0dbg(("ip_xmit_v6: can't re-enable MDT for " 11904 "connp %p (ENOMEM)\n", (void *)connp)); 11905 } else { 11906 CONN_INC_REF(connp); 11907 squeue_fill(connp->conn_sqp, mdimp, tcp_input, 11908 connp, SQTAG_TCP_INPUT_MCTL); 11909 } 11910 } 11911 11912 do { 11913 boolean_t qos_done = B_FALSE; 11914 11915 if (multirt_send) { 11916 irb_t *irb; 11917 /* 11918 * We are in a multiple send case, need to get 11919 * the next ire and make a duplicate of the 11920 * packet. ire1 holds here the next ire to 11921 * process in the bucket. If multirouting is 11922 * expected, any non-RTF_MULTIRT ire that has 11923 * the right destination address is ignored. 11924 */ 11925 irb = ire->ire_bucket; 11926 ASSERT(irb != NULL); 11927 11928 IRB_REFHOLD(irb); 11929 for (ire1 = ire->ire_next; 11930 ire1 != NULL; 11931 ire1 = ire1->ire_next) { 11932 if (!(ire1->ire_flags & RTF_MULTIRT)) 11933 continue; 11934 if (!IN6_ARE_ADDR_EQUAL( 11935 &ire1->ire_addr_v6, 11936 &ire->ire_addr_v6)) 11937 continue; 11938 if (ire1->ire_marks & 11939 (IRE_MARK_CONDEMNED| 11940 IRE_MARK_HIDDEN)) 11941 continue; 11942 11943 /* Got one */ 11944 if (ire1 != save_ire) { 11945 IRE_REFHOLD(ire1); 11946 } 11947 break; 11948 } 11949 IRB_REFRELE(irb); 11950 11951 if (ire1 != NULL) { 11952 next_mp = copyb(mp); 11953 if ((next_mp == NULL) || 11954 ((mp->b_cont != NULL) && 11955 ((next_mp->b_cont = 11956 dupmsg(mp->b_cont)) == 11957 NULL))) { 11958 freemsg(next_mp); 11959 next_mp = NULL; 11960 ire_refrele(ire1); 11961 ire1 = NULL; 11962 } 11963 } 11964 11965 /* Last multiroute ire; don't loop anymore. */ 11966 if (ire1 == NULL) { 11967 multirt_send = B_FALSE; 11968 } 11969 } 11970 11971 ill_index = 11972 ((ill_t *)stq->q_ptr)->ill_phyint->phyint_ifindex; 11973 11974 /* 11975 * Check for fastpath, we need to hold nce_lock to 11976 * prevent fastpath update from chaining nce_fp_mp. 11977 */ 11978 11979 ASSERT(nce->nce_ipversion != IPV4_VERSION); 11980 mutex_enter(&nce->nce_lock); 11981 if ((mp1 = nce->nce_fp_mp) != NULL) { 11982 uint32_t hlen; 11983 uchar_t *rptr; 11984 11985 /* Initiate IPPF processing */ 11986 if (IP6_OUT_IPP(flags)) { 11987 /* 11988 * We have to release the nce lock since 11989 * IPPF components use 11990 * ill_lookup_on_ifindex(), 11991 * which takes the ill_g_lock and the 11992 * ill_lock locks. 11993 */ 11994 mutex_exit(&nce->nce_lock); 11995 ip_process(IPP_LOCAL_OUT, &mp, 11996 ill_index); 11997 if (mp == NULL) { 11998 BUMP_MIB( 11999 ill->ill_ip6_mib, 12000 ipv6OutDiscards); 12001 if (next_mp != NULL) 12002 freemsg(next_mp); 12003 if (ire != save_ire) { 12004 ire_refrele(ire); 12005 } 12006 return; 12007 } 12008 mutex_enter(&nce->nce_lock); 12009 if ((mp1 = nce->nce_fp_mp) == NULL) { 12010 /* 12011 * Probably disappeared during 12012 * IPQoS processing. 12013 */ 12014 qos_done = B_TRUE; 12015 goto prepend_unitdata; 12016 } 12017 } 12018 hlen = MBLKL(mp1); 12019 rptr = mp->b_rptr - hlen; 12020 /* 12021 * make sure there is room for the fastpath 12022 * datalink header 12023 */ 12024 if (rptr < mp->b_datap->db_base) { 12025 mp1 = copyb(mp1); 12026 if (mp1 == NULL) { 12027 mutex_exit(&nce->nce_lock); 12028 BUMP_MIB(ill->ill_ip6_mib, 12029 ipv6OutDiscards); 12030 freemsg(mp); 12031 if (next_mp != NULL) 12032 freemsg(next_mp); 12033 if (ire != save_ire) { 12034 ire_refrele(ire); 12035 } 12036 return; 12037 } 12038 mp1->b_cont = mp; 12039 12040 /* Get the priority marking, if any */ 12041 mp1->b_band = mp->b_band; 12042 mp = mp1; 12043 } else { 12044 mp->b_rptr = rptr; 12045 /* 12046 * fastpath - pre-pend datalink 12047 * header 12048 */ 12049 bcopy(mp1->b_rptr, rptr, hlen); 12050 } 12051 12052 mutex_exit(&nce->nce_lock); 12053 12054 } else { 12055 prepend_unitdata: 12056 mutex_exit(&nce->nce_lock); 12057 mp1 = nce->nce_res_mp; 12058 if (mp1 == NULL) { 12059 ip1dbg(("ip_xmit_v6: No resolution " 12060 "block ire = %p\n", (void *)ire)); 12061 freemsg(mp); 12062 if (next_mp != NULL) 12063 freemsg(next_mp); 12064 if (ire != save_ire) { 12065 ire_refrele(ire); 12066 } 12067 return; 12068 } 12069 /* 12070 * Prepend the DL_UNITDATA_REQ. 12071 */ 12072 mp1 = copyb(mp1); 12073 if (mp1 == NULL) { 12074 BUMP_MIB(ill->ill_ip6_mib, 12075 ipv6OutDiscards); 12076 freemsg(mp); 12077 if (next_mp != NULL) 12078 freemsg(next_mp); 12079 if (ire != save_ire) { 12080 ire_refrele(ire); 12081 } 12082 return; 12083 } 12084 mp1->b_cont = mp; 12085 mp = mp1; 12086 /* 12087 * Initiate IPPF processing, if it is 12088 * already done, bypass. 12089 */ 12090 if (!qos_done && IP6_OUT_IPP(flags)) { 12091 ip_process(IPP_LOCAL_OUT, &mp, 12092 ill_index); 12093 if (mp == NULL) { 12094 BUMP_MIB(ill->ill_ip6_mib, 12095 ipv6OutDiscards); 12096 if (next_mp != NULL) 12097 freemsg(next_mp); 12098 if (ire != save_ire) { 12099 ire_refrele(ire); 12100 } 12101 return; 12102 } 12103 } 12104 } 12105 12106 /* 12107 * Update ire counters; for save_ire, this has been 12108 * done by the caller. 12109 */ 12110 if (ire != save_ire) { 12111 UPDATE_OB_PKT_COUNT(ire); 12112 ire->ire_last_used_time = lbolt; 12113 } 12114 12115 /* 12116 * Send it down. XXX Do we want to flow control AH/ESP 12117 * packets that carry TCP payloads? We don't flow 12118 * control TCP packets, but we should also not 12119 * flow-control TCP packets that have been protected. 12120 * We don't have an easy way to find out if an AH/ESP 12121 * packet was originally TCP or not currently. 12122 */ 12123 if (io == NULL) { 12124 putnext(stq, mp); 12125 } else { 12126 /* 12127 * Safety Pup says: make sure this is 12128 * going to the right interface! 12129 */ 12130 if (io->ipsec_out_capab_ill_index != 12131 ill_index) { 12132 /* IPsec kstats: bump lose counter */ 12133 freemsg(mp1); 12134 } else { 12135 ipsec_hw_putnext(stq, mp); 12136 } 12137 } 12138 12139 if (nce->nce_flags & (NCE_F_NONUD|NCE_F_PERMANENT)) { 12140 if (ire != save_ire) { 12141 ire_refrele(ire); 12142 } 12143 if (multirt_send) { 12144 ASSERT(ire1 != NULL); 12145 /* 12146 * Proceed with the next RTF_MULTIRT 12147 * ire, also set up the send-to queue 12148 * accordingly. 12149 */ 12150 ire = ire1; 12151 ire1 = NULL; 12152 stq = ire->ire_stq; 12153 nce = ire->ire_nce; 12154 ill = ire_to_ill(ire); 12155 mp = next_mp; 12156 next_mp = NULL; 12157 continue; 12158 } 12159 ASSERT(next_mp == NULL); 12160 ASSERT(ire1 == NULL); 12161 return; 12162 } 12163 12164 ASSERT(nce->nce_state != ND_INCOMPLETE); 12165 12166 /* 12167 * Check for upper layer advice 12168 */ 12169 if (flags & IPV6_REACHABILITY_CONFIRMATION) { 12170 /* 12171 * It should be o.k. to check the state without 12172 * a lock here, at most we lose an advice. 12173 */ 12174 nce->nce_last = TICK_TO_MSEC(lbolt64); 12175 if (nce->nce_state != ND_REACHABLE) { 12176 12177 mutex_enter(&nce->nce_lock); 12178 nce->nce_state = ND_REACHABLE; 12179 nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT; 12180 mutex_exit(&nce->nce_lock); 12181 (void) untimeout(nce->nce_timeout_id); 12182 if (ip_debug > 2) { 12183 /* ip1dbg */ 12184 pr_addr_dbg("ip_xmit_v6: state" 12185 " for %s changed to" 12186 " REACHABLE\n", AF_INET6, 12187 &ire->ire_addr_v6); 12188 } 12189 } 12190 if (ire != save_ire) { 12191 ire_refrele(ire); 12192 } 12193 if (multirt_send) { 12194 ASSERT(ire1 != NULL); 12195 /* 12196 * Proceed with the next RTF_MULTIRT 12197 * ire, also set up the send-to queue 12198 * accordingly. 12199 */ 12200 ire = ire1; 12201 ire1 = NULL; 12202 stq = ire->ire_stq; 12203 nce = ire->ire_nce; 12204 ill = ire_to_ill(ire); 12205 mp = next_mp; 12206 next_mp = NULL; 12207 continue; 12208 } 12209 ASSERT(next_mp == NULL); 12210 ASSERT(ire1 == NULL); 12211 return; 12212 } 12213 12214 delta = TICK_TO_MSEC(lbolt64) - nce->nce_last; 12215 ip1dbg(("ip_xmit_v6: delta = %" PRId64 12216 " ill_reachable_time = %d \n", delta, 12217 ill->ill_reachable_time)); 12218 if (delta > (uint64_t)ill->ill_reachable_time) { 12219 nce = ire->ire_nce; 12220 mutex_enter(&nce->nce_lock); 12221 switch (nce->nce_state) { 12222 case ND_REACHABLE: 12223 case ND_STALE: 12224 /* 12225 * ND_REACHABLE is identical to 12226 * ND_STALE in this specific case. If 12227 * reachable time has expired for this 12228 * neighbor (delta is greater than 12229 * reachable time), conceptually, the 12230 * neighbor cache is no longer in 12231 * REACHABLE state, but already in 12232 * STALE state. So the correct 12233 * transition here is to ND_DELAY. 12234 */ 12235 nce->nce_state = ND_DELAY; 12236 mutex_exit(&nce->nce_lock); 12237 NDP_RESTART_TIMER(nce, 12238 delay_first_probe_time); 12239 if (ip_debug > 3) { 12240 /* ip2dbg */ 12241 pr_addr_dbg("ip_xmit_v6: state" 12242 " for %s changed to" 12243 " DELAY\n", AF_INET6, 12244 &ire->ire_addr_v6); 12245 } 12246 break; 12247 case ND_DELAY: 12248 case ND_PROBE: 12249 mutex_exit(&nce->nce_lock); 12250 /* Timers have already started */ 12251 break; 12252 case ND_UNREACHABLE: 12253 /* 12254 * ndp timer has detected that this nce 12255 * is unreachable and initiated deleting 12256 * this nce and all its associated IREs. 12257 * This is a race where we found the 12258 * ire before it was deleted and have 12259 * just sent out a packet using this 12260 * unreachable nce. 12261 */ 12262 mutex_exit(&nce->nce_lock); 12263 break; 12264 default: 12265 ASSERT(0); 12266 } 12267 } 12268 12269 if (multirt_send) { 12270 ASSERT(ire1 != NULL); 12271 /* 12272 * Proceed with the next RTF_MULTIRT ire, 12273 * Also set up the send-to queue accordingly. 12274 */ 12275 if (ire != save_ire) { 12276 ire_refrele(ire); 12277 } 12278 ire = ire1; 12279 ire1 = NULL; 12280 stq = ire->ire_stq; 12281 nce = ire->ire_nce; 12282 ill = ire_to_ill(ire); 12283 mp = next_mp; 12284 next_mp = NULL; 12285 } 12286 } while (multirt_send); 12287 /* 12288 * In the multirouting case, release the last ire used for 12289 * emission. save_ire will be released by the caller. 12290 */ 12291 if (ire != save_ire) { 12292 ire_refrele(ire); 12293 } 12294 } else { 12295 /* 12296 * Queue packet if we have an conn to give back pressure. 12297 * We can't queue packets intended for hardware acceleration 12298 * since we've tossed that state already. If the packet is 12299 * being fed back from ire_send_v6, we don't know the 12300 * position in the queue to enqueue the packet and we discard 12301 * the packet. 12302 */ 12303 if (ip_output_queue && (connp != NULL) && (io == NULL) && 12304 (caller != IRE_SEND)) { 12305 if (caller == IP_WSRV) { 12306 connp->conn_did_putbq = 1; 12307 (void) putbq(connp->conn_wq, mp); 12308 conn_drain_insert(connp); 12309 /* 12310 * caller == IP_WSRV implies we are 12311 * the service thread, and the 12312 * queue is already noenabled. 12313 * The check for canput and 12314 * the putbq is not atomic. 12315 * So we need to check again. 12316 */ 12317 if (canput(stq->q_next)) 12318 connp->conn_did_putbq = 0; 12319 } else { 12320 (void) putq(connp->conn_wq, mp); 12321 } 12322 return; 12323 } 12324 BUMP_MIB(ill->ill_ip6_mib, ipv6OutDiscards); 12325 freemsg(mp); 12326 return; 12327 } 12328 } 12329 12330 /* 12331 * pr_addr_dbg function provides the needed buffer space to call 12332 * inet_ntop() function's 3rd argument. This function should be 12333 * used by any kernel routine which wants to save INET6_ADDRSTRLEN 12334 * stack buffer space in it's own stack frame. This function uses 12335 * a buffer from it's own stack and prints the information. 12336 * Example: pr_addr_dbg("func: no route for %s\n ", AF_INET, addr) 12337 * 12338 * Note: This function can call inet_ntop() once. 12339 */ 12340 void 12341 pr_addr_dbg(char *fmt1, int af, const void *addr) 12342 { 12343 char buf[INET6_ADDRSTRLEN]; 12344 12345 if (fmt1 == NULL) { 12346 ip0dbg(("pr_addr_dbg: Wrong arguments\n")); 12347 return; 12348 } 12349 12350 /* 12351 * This does not compare debug level and just prints 12352 * out. Thus it is the responsibility of the caller 12353 * to check the appropriate debug-level before calling 12354 * this function. 12355 */ 12356 if (ip_debug > 0) { 12357 printf(fmt1, inet_ntop(af, addr, buf, sizeof (buf))); 12358 } 12359 12360 12361 } 12362 12363 12364 /* 12365 * Return the length in bytes of the IPv6 headers (base header, ip6i_t 12366 * if needed and extension headers) that will be needed based on the 12367 * ip6_pkt_t structure passed by the caller. 12368 * 12369 * The returned length does not include the length of the upper level 12370 * protocol (ULP) header. 12371 */ 12372 int 12373 ip_total_hdrs_len_v6(ip6_pkt_t *ipp) 12374 { 12375 int len; 12376 12377 len = IPV6_HDR_LEN; 12378 if (ipp->ipp_fields & IPPF_HAS_IP6I) 12379 len += sizeof (ip6i_t); 12380 if (ipp->ipp_fields & IPPF_HOPOPTS) { 12381 ASSERT(ipp->ipp_hopoptslen != 0); 12382 len += ipp->ipp_hopoptslen; 12383 } 12384 if (ipp->ipp_fields & IPPF_RTHDR) { 12385 ASSERT(ipp->ipp_rthdrlen != 0); 12386 len += ipp->ipp_rthdrlen; 12387 } 12388 /* 12389 * En-route destination options 12390 * Only do them if there's a routing header as well 12391 */ 12392 if ((ipp->ipp_fields & (IPPF_RTDSTOPTS|IPPF_RTHDR)) == 12393 (IPPF_RTDSTOPTS|IPPF_RTHDR)) { 12394 ASSERT(ipp->ipp_rtdstoptslen != 0); 12395 len += ipp->ipp_rtdstoptslen; 12396 } 12397 if (ipp->ipp_fields & IPPF_DSTOPTS) { 12398 ASSERT(ipp->ipp_dstoptslen != 0); 12399 len += ipp->ipp_dstoptslen; 12400 } 12401 return (len); 12402 } 12403 12404 /* 12405 * All-purpose routine to build a header chain of an IPv6 header 12406 * followed by any required extension headers and a proto header, 12407 * preceeded (where necessary) by an ip6i_t private header. 12408 * 12409 * The fields of the IPv6 header that are derived from the ip6_pkt_t 12410 * will be filled in appropriately. 12411 * Thus the caller must fill in the rest of the IPv6 header, such as 12412 * traffic class/flowid, source address (if not set here), hoplimit (if not 12413 * set here) and destination address. 12414 * 12415 * The extension headers and ip6i_t header will all be fully filled in. 12416 */ 12417 void 12418 ip_build_hdrs_v6(uchar_t *ext_hdrs, uint_t ext_hdrs_len, 12419 ip6_pkt_t *ipp, uint8_t protocol) 12420 { 12421 uint8_t *nxthdr_ptr; 12422 uint8_t *cp; 12423 ip6i_t *ip6i; 12424 ip6_t *ip6h = (ip6_t *)ext_hdrs; 12425 12426 /* 12427 * If sending private ip6i_t header down (checksum info, nexthop, 12428 * or ifindex), adjust ip header pointer and set ip6i_t header pointer, 12429 * then fill it in. (The checksum info will be filled in by icmp). 12430 */ 12431 if (ipp->ipp_fields & IPPF_HAS_IP6I) { 12432 ip6i = (ip6i_t *)ip6h; 12433 ip6h = (ip6_t *)&ip6i[1]; 12434 12435 ip6i->ip6i_flags = 0; 12436 ip6i->ip6i_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 12437 if (ipp->ipp_fields & IPPF_IFINDEX || 12438 ipp->ipp_fields & IPPF_SCOPE_ID) { 12439 ASSERT(ipp->ipp_ifindex != 0); 12440 ip6i->ip6i_flags |= IP6I_IFINDEX; 12441 ip6i->ip6i_ifindex = ipp->ipp_ifindex; 12442 } 12443 if (ipp->ipp_fields & IPPF_ADDR) { 12444 /* 12445 * Enable per-packet source address verification if 12446 * IPV6_PKTINFO specified the source address. 12447 * ip6_src is set in the transport's _wput function. 12448 */ 12449 ASSERT(!IN6_IS_ADDR_UNSPECIFIED( 12450 &ipp->ipp_addr)); 12451 ip6i->ip6i_flags |= IP6I_VERIFY_SRC; 12452 } 12453 if (ipp->ipp_fields & IPPF_UNICAST_HOPS) { 12454 ip6h->ip6_hops = ipp->ipp_unicast_hops; 12455 /* 12456 * We need to set this flag so that IP doesn't 12457 * rewrite the IPv6 header's hoplimit with the 12458 * current default value. 12459 */ 12460 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 12461 } 12462 if (ipp->ipp_fields & IPPF_NEXTHOP) { 12463 ASSERT(!IN6_IS_ADDR_UNSPECIFIED( 12464 &ipp->ipp_nexthop)); 12465 ip6i->ip6i_flags |= IP6I_NEXTHOP; 12466 ip6i->ip6i_nexthop = ipp->ipp_nexthop; 12467 } 12468 /* 12469 * tell IP this is an ip6i_t private header 12470 */ 12471 ip6i->ip6i_nxt = IPPROTO_RAW; 12472 } 12473 /* Initialize IPv6 header */ 12474 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 12475 if (ipp->ipp_fields & IPPF_TCLASS) { 12476 ip6h->ip6_vcf = (ip6h->ip6_vcf & ~IPV6_FLOWINFO_TCLASS) | 12477 (ipp->ipp_tclass << 20); 12478 } 12479 if (ipp->ipp_fields & IPPF_ADDR) 12480 ip6h->ip6_src = ipp->ipp_addr; 12481 12482 nxthdr_ptr = (uint8_t *)&ip6h->ip6_nxt; 12483 cp = (uint8_t *)&ip6h[1]; 12484 /* 12485 * Here's where we have to start stringing together 12486 * any extension headers in the right order: 12487 * Hop-by-hop, destination, routing, and final destination opts. 12488 */ 12489 if (ipp->ipp_fields & IPPF_HOPOPTS) { 12490 /* Hop-by-hop options */ 12491 ip6_hbh_t *hbh = (ip6_hbh_t *)cp; 12492 12493 *nxthdr_ptr = IPPROTO_HOPOPTS; 12494 nxthdr_ptr = &hbh->ip6h_nxt; 12495 12496 bcopy(ipp->ipp_hopopts, cp, ipp->ipp_hopoptslen); 12497 cp += ipp->ipp_hopoptslen; 12498 } 12499 /* 12500 * En-route destination options 12501 * Only do them if there's a routing header as well 12502 */ 12503 if ((ipp->ipp_fields & (IPPF_RTDSTOPTS|IPPF_RTHDR)) == 12504 (IPPF_RTDSTOPTS|IPPF_RTHDR)) { 12505 ip6_dest_t *dst = (ip6_dest_t *)cp; 12506 12507 *nxthdr_ptr = IPPROTO_DSTOPTS; 12508 nxthdr_ptr = &dst->ip6d_nxt; 12509 12510 bcopy(ipp->ipp_rtdstopts, cp, ipp->ipp_rtdstoptslen); 12511 cp += ipp->ipp_rtdstoptslen; 12512 } 12513 /* 12514 * Routing header next 12515 */ 12516 if (ipp->ipp_fields & IPPF_RTHDR) { 12517 ip6_rthdr_t *rt = (ip6_rthdr_t *)cp; 12518 12519 *nxthdr_ptr = IPPROTO_ROUTING; 12520 nxthdr_ptr = &rt->ip6r_nxt; 12521 12522 bcopy(ipp->ipp_rthdr, cp, ipp->ipp_rthdrlen); 12523 cp += ipp->ipp_rthdrlen; 12524 } 12525 /* 12526 * Do ultimate destination options 12527 */ 12528 if (ipp->ipp_fields & IPPF_DSTOPTS) { 12529 ip6_dest_t *dest = (ip6_dest_t *)cp; 12530 12531 *nxthdr_ptr = IPPROTO_DSTOPTS; 12532 nxthdr_ptr = &dest->ip6d_nxt; 12533 12534 bcopy(ipp->ipp_dstopts, cp, ipp->ipp_dstoptslen); 12535 cp += ipp->ipp_dstoptslen; 12536 } 12537 /* 12538 * Now set the last header pointer to the proto passed in 12539 */ 12540 *nxthdr_ptr = protocol; 12541 ASSERT((int)(cp - ext_hdrs) == ext_hdrs_len); 12542 } 12543 12544 /* 12545 * Return a pointer to the routing header extension header 12546 * in the IPv6 header(s) chain passed in. 12547 * If none found, return NULL 12548 * Assumes that all extension headers are in same mblk as the v6 header 12549 */ 12550 ip6_rthdr_t * 12551 ip_find_rthdr_v6(ip6_t *ip6h, uint8_t *endptr) 12552 { 12553 ip6_dest_t *desthdr; 12554 ip6_frag_t *fraghdr; 12555 uint_t hdrlen; 12556 uint8_t nexthdr; 12557 uint8_t *ptr = (uint8_t *)&ip6h[1]; 12558 12559 if (ip6h->ip6_nxt == IPPROTO_ROUTING) 12560 return ((ip6_rthdr_t *)ptr); 12561 12562 /* 12563 * The routing header will precede all extension headers 12564 * other than the hop-by-hop and destination options 12565 * extension headers, so if we see anything other than those, 12566 * we're done and didn't find it. 12567 * We could see a destination options header alone but no 12568 * routing header, in which case we'll return NULL as soon as 12569 * we see anything after that. 12570 * Hop-by-hop and destination option headers are identical, 12571 * so we can use either one we want as a template. 12572 */ 12573 nexthdr = ip6h->ip6_nxt; 12574 while (ptr < endptr) { 12575 /* Is there enough left for len + nexthdr? */ 12576 if (ptr + MIN_EHDR_LEN > endptr) 12577 return (NULL); 12578 12579 switch (nexthdr) { 12580 case IPPROTO_HOPOPTS: 12581 case IPPROTO_DSTOPTS: 12582 /* Assumes the headers are identical for hbh and dst */ 12583 desthdr = (ip6_dest_t *)ptr; 12584 hdrlen = 8 * (desthdr->ip6d_len + 1); 12585 nexthdr = desthdr->ip6d_nxt; 12586 break; 12587 12588 case IPPROTO_ROUTING: 12589 return ((ip6_rthdr_t *)ptr); 12590 12591 case IPPROTO_FRAGMENT: 12592 fraghdr = (ip6_frag_t *)ptr; 12593 hdrlen = sizeof (ip6_frag_t); 12594 nexthdr = fraghdr->ip6f_nxt; 12595 break; 12596 12597 default: 12598 return (NULL); 12599 } 12600 ptr += hdrlen; 12601 } 12602 return (NULL); 12603 } 12604 12605 /* 12606 * Called for source-routed packets originating on this node. 12607 * Manipulates the original routing header by moving every entry up 12608 * one slot, placing the first entry in the v6 header's v6_dst field, 12609 * and placing the ultimate destination in the routing header's last 12610 * slot. 12611 * 12612 * Returns the checksum diference between the ultimate destination 12613 * (last hop in the routing header when the packet is sent) and 12614 * the first hop (ip6_dst when the packet is sent) 12615 */ 12616 uint32_t 12617 ip_massage_options_v6(ip6_t *ip6h, ip6_rthdr_t *rth) 12618 { 12619 uint_t numaddr; 12620 uint_t i; 12621 in6_addr_t *addrptr; 12622 in6_addr_t tmp; 12623 ip6_rthdr0_t *rthdr = (ip6_rthdr0_t *)rth; 12624 uint32_t cksm; 12625 uint32_t addrsum = 0; 12626 uint16_t *ptr; 12627 12628 /* 12629 * Perform any processing needed for source routing. 12630 * We know that all extension headers will be in the same mblk 12631 * as the IPv6 header. 12632 */ 12633 12634 /* 12635 * If no segments left in header, or the header length field is zero, 12636 * don't move hop addresses around; 12637 * Checksum difference is zero. 12638 */ 12639 if ((rthdr->ip6r0_segleft == 0) || (rthdr->ip6r0_len == 0)) 12640 return (0); 12641 12642 ptr = (uint16_t *)&ip6h->ip6_dst; 12643 cksm = 0; 12644 for (i = 0; i < (sizeof (in6_addr_t) / sizeof (uint16_t)); i++) { 12645 cksm += ptr[i]; 12646 } 12647 cksm = (cksm & 0xFFFF) + (cksm >> 16); 12648 12649 /* 12650 * Here's where the fun begins - we have to 12651 * move all addresses up one spot, take the 12652 * first hop and make it our first ip6_dst, 12653 * and place the ultimate destination in the 12654 * newly-opened last slot. 12655 */ 12656 addrptr = (in6_addr_t *)((char *)rthdr + sizeof (*rthdr)); 12657 numaddr = rthdr->ip6r0_len / 2; 12658 tmp = *addrptr; 12659 for (i = 0; i < (numaddr - 1); addrptr++, i++) { 12660 *addrptr = addrptr[1]; 12661 } 12662 *addrptr = ip6h->ip6_dst; 12663 ip6h->ip6_dst = tmp; 12664 12665 /* 12666 * From the checksummed ultimate destination subtract the checksummed 12667 * current ip6_dst (the first hop address). Return that number. 12668 * (In the v4 case, the second part of this is done in each routine 12669 * that calls ip_massage_options(). We do it all in this one place 12670 * for v6). 12671 */ 12672 ptr = (uint16_t *)&ip6h->ip6_dst; 12673 for (i = 0; i < (sizeof (in6_addr_t) / sizeof (uint16_t)); i++) { 12674 addrsum += ptr[i]; 12675 } 12676 cksm -= ((addrsum >> 16) + (addrsum & 0xFFFF)); 12677 if ((int)cksm < 0) 12678 cksm--; 12679 cksm = (cksm & 0xFFFF) + (cksm >> 16); 12680 12681 return (cksm); 12682 } 12683 12684 /* 12685 * See if the upper-level protocol indicated by 'proto' will be able 12686 * to do something with an ICMP_FRAGMENTATION_NEEDED (IPv4) or 12687 * ICMP6_PACKET_TOO_BIG (IPv6). 12688 */ 12689 static boolean_t 12690 ip_ulp_cando_pkt2big(int proto) 12691 { 12692 /* 12693 * For now, only TCP can handle this. 12694 * Tunnels may be able to also, but since tun isn't working over 12695 * IPv6 yet, don't worry about it for now. 12696 */ 12697 return (proto == IPPROTO_TCP); 12698 } 12699 12700 12701 /* 12702 * Propagate a multicast group membership operation (join/leave) (*fn) on 12703 * all interfaces crossed by the related multirt routes. 12704 * The call is considered successful if the operation succeeds 12705 * on at least one interface. 12706 * The function is called if the destination address in the packet to send 12707 * is multirouted. 12708 */ 12709 int 12710 ip_multirt_apply_membership_v6(int (*fn)(conn_t *, boolean_t, 12711 const in6_addr_t *, int, mcast_record_t, const in6_addr_t *, mblk_t *), 12712 ire_t *ire, conn_t *connp, boolean_t checkonly, const in6_addr_t *v6grp, 12713 mcast_record_t fmode, const in6_addr_t *v6src, mblk_t *first_mp) 12714 { 12715 ire_t *ire_gw; 12716 irb_t *irb; 12717 int index, error = 0; 12718 opt_restart_t *or; 12719 12720 irb = ire->ire_bucket; 12721 ASSERT(irb != NULL); 12722 12723 ASSERT(DB_TYPE(first_mp) == M_CTL); 12724 or = (opt_restart_t *)first_mp->b_rptr; 12725 12726 IRB_REFHOLD(irb); 12727 for (; ire != NULL; ire = ire->ire_next) { 12728 if ((ire->ire_flags & RTF_MULTIRT) == 0) 12729 continue; 12730 if (!IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, v6grp)) 12731 continue; 12732 12733 ire_gw = ire_ftable_lookup_v6(&ire->ire_gateway_addr_v6, 0, 0, 12734 IRE_INTERFACE, NULL, NULL, ALL_ZONES, 0, NULL, 12735 MATCH_IRE_RECURSIVE | MATCH_IRE_TYPE); 12736 /* No resolver exists for the gateway; skip this ire. */ 12737 if (ire_gw == NULL) 12738 continue; 12739 index = ire_gw->ire_ipif->ipif_ill->ill_phyint->phyint_ifindex; 12740 /* 12741 * A resolver exists: we can get the interface on which we have 12742 * to apply the operation. 12743 */ 12744 error = fn(connp, checkonly, v6grp, index, fmode, v6src, 12745 first_mp); 12746 if (error == 0) 12747 or->or_private = CGTP_MCAST_SUCCESS; 12748 12749 if (ip_debug > 0) { 12750 ulong_t off; 12751 char *ksym; 12752 12753 ksym = kobj_getsymname((uintptr_t)fn, &off); 12754 ip2dbg(("ip_multirt_apply_membership_v6: " 12755 "called %s, multirt group 0x%08x via itf 0x%08x, " 12756 "error %d [success %u]\n", 12757 ksym ? ksym : "?", 12758 ntohl(V4_PART_OF_V6((*v6grp))), 12759 ntohl(V4_PART_OF_V6(ire_gw->ire_src_addr_v6)), 12760 error, or->or_private)); 12761 } 12762 12763 ire_refrele(ire_gw); 12764 if (error == EINPROGRESS) { 12765 IRB_REFRELE(irb); 12766 return (error); 12767 } 12768 } 12769 IRB_REFRELE(irb); 12770 /* 12771 * Consider the call as successful if we succeeded on at least 12772 * one interface. Otherwise, return the last encountered error. 12773 */ 12774 return (or->or_private == CGTP_MCAST_SUCCESS ? 0 : error); 12775 } 12776 12777 void 12778 ip6_kstat_init(void) 12779 { 12780 if ((ip6_kstat = kstat_create("ip", 0, "ip6stat", 12781 "net", KSTAT_TYPE_NAMED, 12782 sizeof (ip6_statistics) / sizeof (kstat_named_t), 12783 KSTAT_FLAG_VIRTUAL)) != NULL) { 12784 ip6_kstat->ks_data = &ip6_statistics; 12785 kstat_install(ip6_kstat); 12786 } 12787 } 12788 12789 /* 12790 * The following two functions set and get the value for the 12791 * IPV6_SRC_PREFERENCES socket option. 12792 */ 12793 int 12794 ip6_set_src_preferences(conn_t *connp, uint32_t prefs) 12795 { 12796 /* 12797 * We only support preferences that are covered by 12798 * IPV6_PREFER_SRC_MASK. 12799 */ 12800 if (prefs & ~IPV6_PREFER_SRC_MASK) 12801 return (EINVAL); 12802 12803 /* 12804 * Look for conflicting preferences or default preferences. If 12805 * both bits of a related pair are clear, the application wants the 12806 * system's default value for that pair. Both bits in a pair can't 12807 * be set. 12808 */ 12809 if ((prefs & IPV6_PREFER_SRC_MIPMASK) == 0) { 12810 prefs |= IPV6_PREFER_SRC_MIPDEFAULT; 12811 } else if ((prefs & IPV6_PREFER_SRC_MIPMASK) == 12812 IPV6_PREFER_SRC_MIPMASK) { 12813 return (EINVAL); 12814 } 12815 if ((prefs & IPV6_PREFER_SRC_TMPMASK) == 0) { 12816 prefs |= IPV6_PREFER_SRC_TMPDEFAULT; 12817 } else if ((prefs & IPV6_PREFER_SRC_TMPMASK) == 12818 IPV6_PREFER_SRC_TMPMASK) { 12819 return (EINVAL); 12820 } 12821 if ((prefs & IPV6_PREFER_SRC_CGAMASK) == 0) { 12822 prefs |= IPV6_PREFER_SRC_CGADEFAULT; 12823 } else if ((prefs & IPV6_PREFER_SRC_CGAMASK) == 12824 IPV6_PREFER_SRC_CGAMASK) { 12825 return (EINVAL); 12826 } 12827 12828 connp->conn_src_preferences = prefs; 12829 return (0); 12830 } 12831 12832 size_t 12833 ip6_get_src_preferences(conn_t *connp, uint32_t *val) 12834 { 12835 *val = connp->conn_src_preferences; 12836 return (sizeof (connp->conn_src_preferences)); 12837 } 12838 12839 int 12840 ip6_set_pktinfo(cred_t *cr, conn_t *connp, struct in6_pktinfo *pkti, mblk_t *mp) 12841 { 12842 ill_t *ill; 12843 ire_t *ire; 12844 int error; 12845 12846 /* 12847 * Verify the source address and ifindex. Privileged users can use 12848 * any source address. For ancillary data the source address is 12849 * checked in ip_wput_v6. 12850 */ 12851 if (pkti->ipi6_ifindex != 0) { 12852 ASSERT(connp != NULL); 12853 ill = ill_lookup_on_ifindex(pkti->ipi6_ifindex, B_TRUE, 12854 CONNP_TO_WQ(connp), mp, ip_restart_optmgmt, &error); 12855 if (ill == NULL) { 12856 /* 12857 * We just want to know if the interface exists, we 12858 * don't really care about the ill pointer itself. 12859 */ 12860 if (error != EINPROGRESS) 12861 return (error); 12862 error = 0; /* Ensure we don't use it below */ 12863 } else { 12864 ill_refrele(ill); 12865 } 12866 } 12867 if (!IN6_IS_ADDR_UNSPECIFIED(&pkti->ipi6_addr) && 12868 secpolicy_net_rawaccess(cr) != 0) { 12869 ire = ire_route_lookup_v6(&pkti->ipi6_addr, 0, 0, 12870 (IRE_LOCAL|IRE_LOOPBACK), NULL, NULL, 12871 connp->conn_zoneid, NULL, MATCH_IRE_TYPE); 12872 if (ire != NULL) 12873 ire_refrele(ire); 12874 else 12875 return (ENXIO); 12876 } 12877 return (0); 12878 } 12879 12880 /* 12881 * Get the size of the IP options (including the IP headers size) 12882 * without including the AH header's size. If till_ah is B_FALSE, 12883 * and if AH header is present, dest options beyond AH header will 12884 * also be included in the returned size. 12885 */ 12886 int 12887 ipsec_ah_get_hdr_size_v6(mblk_t *mp, boolean_t till_ah) 12888 { 12889 ip6_t *ip6h; 12890 uint8_t nexthdr; 12891 uint8_t *whereptr; 12892 ip6_hbh_t *hbhhdr; 12893 ip6_dest_t *dsthdr; 12894 ip6_rthdr_t *rthdr; 12895 int ehdrlen; 12896 int size; 12897 ah_t *ah; 12898 12899 ip6h = (ip6_t *)mp->b_rptr; 12900 size = IPV6_HDR_LEN; 12901 nexthdr = ip6h->ip6_nxt; 12902 whereptr = (uint8_t *)&ip6h[1]; 12903 for (;;) { 12904 /* Assume IP has already stripped it */ 12905 ASSERT(nexthdr != IPPROTO_FRAGMENT && nexthdr != IPPROTO_RAW); 12906 switch (nexthdr) { 12907 case IPPROTO_HOPOPTS: 12908 hbhhdr = (ip6_hbh_t *)whereptr; 12909 nexthdr = hbhhdr->ip6h_nxt; 12910 ehdrlen = 8 * (hbhhdr->ip6h_len + 1); 12911 break; 12912 case IPPROTO_DSTOPTS: 12913 dsthdr = (ip6_dest_t *)whereptr; 12914 nexthdr = dsthdr->ip6d_nxt; 12915 ehdrlen = 8 * (dsthdr->ip6d_len + 1); 12916 break; 12917 case IPPROTO_ROUTING: 12918 rthdr = (ip6_rthdr_t *)whereptr; 12919 nexthdr = rthdr->ip6r_nxt; 12920 ehdrlen = 8 * (rthdr->ip6r_len + 1); 12921 break; 12922 default : 12923 if (till_ah) { 12924 ASSERT(nexthdr == IPPROTO_AH); 12925 return (size); 12926 } 12927 /* 12928 * If we don't have a AH header to traverse, 12929 * return now. This happens normally for 12930 * outbound datagrams where we have not inserted 12931 * the AH header. 12932 */ 12933 if (nexthdr != IPPROTO_AH) { 12934 return (size); 12935 } 12936 12937 /* 12938 * We don't include the AH header's size 12939 * to be symmetrical with other cases where 12940 * we either don't have a AH header (outbound) 12941 * or peek into the AH header yet (inbound and 12942 * not pulled up yet). 12943 */ 12944 ah = (ah_t *)whereptr; 12945 nexthdr = ah->ah_nexthdr; 12946 ehdrlen = (ah->ah_length << 2) + 8; 12947 12948 if (nexthdr == IPPROTO_DSTOPTS) { 12949 if (whereptr + ehdrlen >= mp->b_wptr) { 12950 /* 12951 * The destination options header 12952 * is not part of the first mblk. 12953 */ 12954 whereptr = mp->b_cont->b_rptr; 12955 } else { 12956 whereptr += ehdrlen; 12957 } 12958 12959 dsthdr = (ip6_dest_t *)whereptr; 12960 ehdrlen = 8 * (dsthdr->ip6d_len + 1); 12961 size += ehdrlen; 12962 } 12963 return (size); 12964 } 12965 whereptr += ehdrlen; 12966 size += ehdrlen; 12967 } 12968 } 12969