1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* 26 * Copyright (c) 1990 Mentat Inc. 27 */ 28 29 #pragma ident "%Z%%M% %I% %E% SMI" 30 31 #include <sys/types.h> 32 #include <sys/stream.h> 33 #include <sys/dlpi.h> 34 #include <sys/stropts.h> 35 #include <sys/sysmacros.h> 36 #include <sys/strsun.h> 37 #include <sys/strlog.h> 38 #include <sys/strsubr.h> 39 #define _SUN_TPI_VERSION 2 40 #include <sys/tihdr.h> 41 #include <sys/ddi.h> 42 #include <sys/sunddi.h> 43 #include <sys/cmn_err.h> 44 #include <sys/debug.h> 45 #include <sys/kobj.h> 46 #include <sys/zone.h> 47 48 #include <sys/kmem.h> 49 #include <sys/systm.h> 50 #include <sys/param.h> 51 #include <sys/socket.h> 52 #include <sys/vtrace.h> 53 #include <sys/isa_defs.h> 54 #include <sys/atomic.h> 55 #include <sys/iphada.h> 56 #include <sys/policy.h> 57 #include <net/if.h> 58 #include <net/if_types.h> 59 #include <net/route.h> 60 #include <net/if_dl.h> 61 #include <sys/sockio.h> 62 #include <netinet/in.h> 63 #include <netinet/ip6.h> 64 #include <netinet/icmp6.h> 65 #include <netinet/sctp.h> 66 67 #include <inet/common.h> 68 #include <inet/mi.h> 69 #include <inet/mib2.h> 70 #include <inet/nd.h> 71 #include <inet/arp.h> 72 73 #include <inet/ip.h> 74 #include <inet/ip_impl.h> 75 #include <inet/ip6.h> 76 #include <inet/ip6_asp.h> 77 #include <inet/tcp.h> 78 #include <inet/tcp_impl.h> 79 #include <inet/udp_impl.h> 80 #include <inet/ipp_common.h> 81 82 #include <inet/ip_multi.h> 83 #include <inet/ip_if.h> 84 #include <inet/ip_ire.h> 85 #include <inet/ip_rts.h> 86 #include <inet/optcom.h> 87 #include <inet/ip_ndp.h> 88 #include <net/pfkeyv2.h> 89 #include <inet/ipsec_info.h> 90 #include <inet/sadb.h> 91 #include <inet/ipsec_impl.h> 92 #include <inet/tun.h> 93 #include <inet/sctp_ip.h> 94 #include <sys/pattr.h> 95 #include <inet/ipclassifier.h> 96 #include <inet/ipsecah.h> 97 #include <inet/udp_impl.h> 98 #include <sys/squeue.h> 99 100 #include <sys/tsol/label.h> 101 #include <sys/tsol/tnet.h> 102 103 #include <rpc/pmap_prot.h> 104 105 /* Temporary; for CR 6451644 work-around */ 106 #include <sys/ethernet.h> 107 108 extern squeue_func_t ip_input_proc; 109 110 /* 111 * IP statistics. 112 */ 113 #define IP6_STAT(x) (ip6_statistics.x.value.ui64++) 114 #define IP6_STAT_UPDATE(x, n) (ip6_statistics.x.value.ui64 += (n)) 115 116 typedef struct ip6_stat { 117 kstat_named_t ip6_udp_fast_path; 118 kstat_named_t ip6_udp_slow_path; 119 kstat_named_t ip6_udp_fannorm; 120 kstat_named_t ip6_udp_fanmb; 121 kstat_named_t ip6_out_sw_cksum; 122 kstat_named_t ip6_in_sw_cksum; 123 kstat_named_t ip6_tcp_in_full_hw_cksum_err; 124 kstat_named_t ip6_tcp_in_part_hw_cksum_err; 125 kstat_named_t ip6_tcp_in_sw_cksum_err; 126 kstat_named_t ip6_tcp_out_sw_cksum_bytes; 127 kstat_named_t ip6_udp_in_full_hw_cksum_err; 128 kstat_named_t ip6_udp_in_part_hw_cksum_err; 129 kstat_named_t ip6_udp_in_sw_cksum_err; 130 kstat_named_t ip6_udp_out_sw_cksum_bytes; 131 kstat_named_t ip6_frag_mdt_pkt_out; 132 kstat_named_t ip6_frag_mdt_discarded; 133 kstat_named_t ip6_frag_mdt_allocfail; 134 kstat_named_t ip6_frag_mdt_addpdescfail; 135 kstat_named_t ip6_frag_mdt_allocd; 136 } ip6_stat_t; 137 138 static ip6_stat_t ip6_statistics = { 139 { "ip6_udp_fast_path", KSTAT_DATA_UINT64 }, 140 { "ip6_udp_slow_path", KSTAT_DATA_UINT64 }, 141 { "ip6_udp_fannorm", KSTAT_DATA_UINT64 }, 142 { "ip6_udp_fanmb", KSTAT_DATA_UINT64 }, 143 { "ip6_out_sw_cksum", KSTAT_DATA_UINT64 }, 144 { "ip6_in_sw_cksum", KSTAT_DATA_UINT64 }, 145 { "ip6_tcp_in_full_hw_cksum_err", KSTAT_DATA_UINT64 }, 146 { "ip6_tcp_in_part_hw_cksum_err", KSTAT_DATA_UINT64 }, 147 { "ip6_tcp_in_sw_cksum_err", KSTAT_DATA_UINT64 }, 148 { "ip6_tcp_out_sw_cksum_bytes", KSTAT_DATA_UINT64 }, 149 { "ip6_udp_in_full_hw_cksum_err", KSTAT_DATA_UINT64 }, 150 { "ip6_udp_in_part_hw_cksum_err", KSTAT_DATA_UINT64 }, 151 { "ip6_udp_in_sw_cksum_err", KSTAT_DATA_UINT64 }, 152 { "ip6_udp_out_sw_cksum_bytes", KSTAT_DATA_UINT64 }, 153 { "ip6_frag_mdt_pkt_out", KSTAT_DATA_UINT64 }, 154 { "ip6_frag_mdt_discarded", KSTAT_DATA_UINT64 }, 155 { "ip6_frag_mdt_allocfail", KSTAT_DATA_UINT64 }, 156 { "ip6_frag_mdt_addpdescfail", KSTAT_DATA_UINT64 }, 157 { "ip6_frag_mdt_allocd", KSTAT_DATA_UINT64 }, 158 }; 159 160 static kstat_t *ip6_kstat; 161 162 /* 163 * Naming conventions: 164 * These rules should be judiciously applied 165 * if there is a need to identify something as IPv6 versus IPv4 166 * IPv6 funcions will end with _v6 in the ip module. 167 * IPv6 funcions will end with _ipv6 in the transport modules. 168 * IPv6 macros: 169 * Some macros end with _V6; e.g. ILL_FRAG_HASH_V6 170 * Some macros start with V6_; e.g. V6_OR_V4_INADDR_ANY 171 * And then there are ..V4_PART_OF_V6. 172 * The intent is that macros in the ip module end with _V6. 173 * IPv6 global variables will start with ipv6_ 174 * IPv6 structures will start with ipv6 175 * IPv6 defined constants should start with IPV6_ 176 * (but then there are NDP_DEFAULT_VERS_PRI_AND_FLOW, etc) 177 */ 178 179 /* 180 * IPv6 mibs when the interface (ill) is not known. 181 * When the ill is known the per-interface mib in the ill is used. 182 */ 183 mib2_ipv6IfStatsEntry_t ip6_mib; 184 mib2_ipv6IfIcmpEntry_t icmp6_mib; 185 186 /* 187 * ip6opt_ls is used to enable IPv6 (via /etc/system on TX systems). 188 * We need to do this because we didn't obtain the IP6OPT_LS (0x0a) 189 * from IANA. This mechanism will remain in effect until an official 190 * number is obtained. 191 */ 192 uchar_t ip6opt_ls; 193 194 uint_t ipv6_ire_default_count; /* Number of IPv6 IRE_DEFAULT entries */ 195 uint_t ipv6_ire_default_index; /* Walking IPv6 index used to mod in */ 196 197 const in6_addr_t ipv6_all_ones = 198 { 0xffffffffU, 0xffffffffU, 0xffffffffU, 0xffffffffU }; 199 const in6_addr_t ipv6_all_zeros = { 0, 0, 0, 0 }; 200 201 #ifdef _BIG_ENDIAN 202 const in6_addr_t ipv6_unspecified_group = { 0xff000000U, 0, 0, 0 }; 203 #else /* _BIG_ENDIAN */ 204 const in6_addr_t ipv6_unspecified_group = { 0x000000ffU, 0, 0, 0 }; 205 #endif /* _BIG_ENDIAN */ 206 207 #ifdef _BIG_ENDIAN 208 const in6_addr_t ipv6_loopback = { 0, 0, 0, 0x00000001U }; 209 #else /* _BIG_ENDIAN */ 210 const in6_addr_t ipv6_loopback = { 0, 0, 0, 0x01000000U }; 211 #endif /* _BIG_ENDIAN */ 212 213 #ifdef _BIG_ENDIAN 214 const in6_addr_t ipv6_all_hosts_mcast = { 0xff020000U, 0, 0, 0x00000001U }; 215 #else /* _BIG_ENDIAN */ 216 const in6_addr_t ipv6_all_hosts_mcast = { 0x000002ffU, 0, 0, 0x01000000U }; 217 #endif /* _BIG_ENDIAN */ 218 219 #ifdef _BIG_ENDIAN 220 const in6_addr_t ipv6_all_rtrs_mcast = { 0xff020000U, 0, 0, 0x00000002U }; 221 #else /* _BIG_ENDIAN */ 222 const in6_addr_t ipv6_all_rtrs_mcast = { 0x000002ffU, 0, 0, 0x02000000U }; 223 #endif /* _BIG_ENDIAN */ 224 225 #ifdef _BIG_ENDIAN 226 const in6_addr_t ipv6_all_v2rtrs_mcast = { 0xff020000U, 0, 0, 0x00000016U }; 227 #else /* _BIG_ENDIAN */ 228 const in6_addr_t ipv6_all_v2rtrs_mcast = { 0x000002ffU, 0, 0, 0x16000000U }; 229 #endif /* _BIG_ENDIAN */ 230 231 #ifdef _BIG_ENDIAN 232 const in6_addr_t ipv6_solicited_node_mcast = 233 { 0xff020000U, 0, 0x00000001U, 0xff000000U }; 234 #else /* _BIG_ENDIAN */ 235 const in6_addr_t ipv6_solicited_node_mcast = 236 { 0x000002ffU, 0, 0x01000000U, 0x000000ffU }; 237 #endif /* _BIG_ENDIAN */ 238 239 /* 240 * Used by icmp_send_redirect_v6 for picking random src. 241 */ 242 uint_t icmp_redirect_v6_src_index; 243 244 /* Leave room for ip_newroute to tack on the src and target addresses */ 245 #define OK_RESOLVER_MP_V6(mp) \ 246 ((mp) && ((mp)->b_wptr - (mp)->b_rptr) >= (2 * IPV6_ADDR_LEN)) 247 248 static void icmp_inbound_too_big_v6(queue_t *, mblk_t *, ill_t *ill, 249 boolean_t, zoneid_t); 250 static void icmp_pkt_v6(queue_t *, mblk_t *, void *, size_t, 251 const in6_addr_t *, boolean_t); 252 static void icmp_redirect_v6(queue_t *, mblk_t *, ill_t *ill); 253 static boolean_t icmp_redirect_ok_v6(ill_t *ill, mblk_t *mp); 254 static int ip_bind_connected_v6(conn_t *, mblk_t *, in6_addr_t *, 255 uint16_t, const in6_addr_t *, ip6_pkt_t *, uint16_t, 256 boolean_t, boolean_t, boolean_t, boolean_t); 257 static boolean_t ip_bind_insert_ire_v6(mblk_t *, ire_t *, const in6_addr_t *, 258 iulp_t *); 259 static int ip_bind_laddr_v6(conn_t *, mblk_t *, const in6_addr_t *, 260 uint16_t, boolean_t, boolean_t, boolean_t); 261 static void ip_fanout_proto_v6(queue_t *, mblk_t *, ip6_t *, ill_t *, 262 ill_t *, uint8_t, uint_t, uint_t, boolean_t, zoneid_t); 263 static void ip_fanout_tcp_v6(queue_t *, mblk_t *, ip6_t *, ill_t *, 264 ill_t *, uint_t, uint_t, boolean_t, zoneid_t); 265 static void ip_fanout_udp_v6(queue_t *, mblk_t *, ip6_t *, uint32_t, 266 ill_t *, ill_t *, uint_t, boolean_t, zoneid_t); 267 static int ip_process_options_v6(queue_t *, mblk_t *, ip6_t *, 268 uint8_t *, uint_t, uint8_t); 269 static mblk_t *ip_rput_frag_v6(queue_t *, mblk_t *, ip6_t *, 270 ip6_frag_t *, uint_t, uint_t *, uint32_t *, uint16_t *); 271 static boolean_t ip_source_routed_v6(ip6_t *, mblk_t *); 272 static void ip_wput_ire_v6(queue_t *, mblk_t *, ire_t *, int, int, 273 conn_t *, int, int, int); 274 static boolean_t ip_ulp_cando_pkt2big(int); 275 276 static void ip_rput_v6(queue_t *, mblk_t *); 277 static void ip_wput_v6(queue_t *, mblk_t *); 278 279 /* 280 * A template for an IPv6 AR_ENTRY_QUERY 281 */ 282 static areq_t ipv6_areq_template = { 283 AR_ENTRY_QUERY, /* cmd */ 284 sizeof (areq_t)+(2*IPV6_ADDR_LEN), /* name offset */ 285 sizeof (areq_t), /* name len (filled by ill_arp_alloc) */ 286 IP6_DL_SAP, /* protocol, from arps perspective */ 287 sizeof (areq_t), /* target addr offset */ 288 IPV6_ADDR_LEN, /* target addr_length */ 289 0, /* flags */ 290 sizeof (areq_t) + IPV6_ADDR_LEN, /* sender addr offset */ 291 IPV6_ADDR_LEN, /* sender addr length */ 292 6, /* xmit_count */ 293 1000, /* (re)xmit_interval in milliseconds */ 294 4 /* max # of requests to buffer */ 295 /* anything else filled in by the code */ 296 }; 297 298 struct qinit rinit_ipv6 = { 299 (pfi_t)ip_rput_v6, 300 NULL, 301 ip_open, 302 ip_close, 303 NULL, 304 &ip_mod_info 305 }; 306 307 struct qinit winit_ipv6 = { 308 (pfi_t)ip_wput_v6, 309 (pfi_t)ip_wsrv, 310 ip_open, 311 ip_close, 312 NULL, 313 &ip_mod_info 314 }; 315 316 /* 317 * Handle IPv6 ICMP packets sent to us. Consume the mblk passed in. 318 * The message has already been checksummed and if needed, 319 * a copy has been made to be sent any interested ICMP client (conn) 320 * Note that this is different than icmp_inbound() which does the fanout 321 * to conn's as well as local processing of the ICMP packets. 322 * 323 * All error messages are passed to the matching transport stream. 324 * 325 * Zones notes: 326 * The packet is only processed in the context of the specified zone: typically 327 * only this zone will reply to an echo request. This means that the caller must 328 * call icmp_inbound_v6() for each relevant zone. 329 */ 330 static void 331 icmp_inbound_v6(queue_t *q, mblk_t *mp, ill_t *ill, uint_t hdr_length, 332 boolean_t mctl_present, uint_t flags, zoneid_t zoneid, mblk_t *dl_mp) 333 { 334 icmp6_t *icmp6; 335 ip6_t *ip6h; 336 boolean_t interested; 337 ip6i_t *ip6i; 338 in6_addr_t origsrc; 339 ire_t *ire; 340 mblk_t *first_mp; 341 ipsec_in_t *ii; 342 343 ASSERT(ill != NULL); 344 first_mp = mp; 345 if (mctl_present) { 346 mp = first_mp->b_cont; 347 ASSERT(mp != NULL); 348 349 ii = (ipsec_in_t *)first_mp->b_rptr; 350 ASSERT(ii->ipsec_in_type == IPSEC_IN); 351 } 352 353 ip6h = (ip6_t *)mp->b_rptr; 354 355 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInMsgs); 356 357 if ((mp->b_wptr - mp->b_rptr) < (hdr_length + ICMP6_MINLEN)) { 358 if (!pullupmsg(mp, hdr_length + ICMP6_MINLEN)) { 359 ip1dbg(("icmp_inbound_v6: pullupmsg failed\n")); 360 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors); 361 freemsg(first_mp); 362 return; 363 } 364 ip6h = (ip6_t *)mp->b_rptr; 365 } 366 if (icmp_accept_clear_messages == 0) { 367 first_mp = ipsec_check_global_policy(first_mp, NULL, 368 NULL, ip6h, mctl_present); 369 if (first_mp == NULL) 370 return; 371 } 372 373 /* 374 * On a labeled system, we have to check whether the zone itself is 375 * permitted to receive raw traffic. 376 */ 377 if (is_system_labeled()) { 378 if (zoneid == ALL_ZONES) 379 zoneid = tsol_packet_to_zoneid(mp); 380 if (!tsol_can_accept_raw(mp, B_FALSE)) { 381 ip1dbg(("icmp_inbound_v6: zone %d can't receive raw", 382 zoneid)); 383 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors); 384 freemsg(first_mp); 385 return; 386 } 387 } 388 389 icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]); 390 ip2dbg(("icmp_inbound_v6: type %d code %d\n", icmp6->icmp6_type, 391 icmp6->icmp6_code)); 392 interested = !(icmp6->icmp6_type & ICMP6_INFOMSG_MASK); 393 394 /* Initiate IPPF processing here */ 395 if (IP6_IN_IPP(flags)) { 396 397 /* 398 * If the ifindex changes due to SIOCSLIFINDEX 399 * packet may return to IP on the wrong ill. 400 */ 401 ip_process(IPP_LOCAL_IN, &mp, ill->ill_phyint->phyint_ifindex); 402 if (mp == NULL) { 403 if (mctl_present) { 404 freeb(first_mp); 405 } 406 return; 407 } 408 } 409 410 switch (icmp6->icmp6_type) { 411 case ICMP6_DST_UNREACH: 412 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInDestUnreachs); 413 if (icmp6->icmp6_code == ICMP6_DST_UNREACH_ADMIN) 414 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInAdminProhibs); 415 break; 416 417 case ICMP6_TIME_EXCEEDED: 418 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInTimeExcds); 419 break; 420 421 case ICMP6_PARAM_PROB: 422 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInParmProblems); 423 break; 424 425 case ICMP6_PACKET_TOO_BIG: 426 icmp_inbound_too_big_v6(q, first_mp, ill, mctl_present, 427 zoneid); 428 return; 429 case ICMP6_ECHO_REQUEST: 430 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInEchos); 431 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst) && 432 !ipv6_resp_echo_mcast) 433 break; 434 435 /* 436 * We must have exclusive use of the mblk to convert it to 437 * a response. 438 * If not, we copy it. 439 */ 440 if (mp->b_datap->db_ref > 1) { 441 mblk_t *mp1; 442 443 mp1 = copymsg(mp); 444 freemsg(mp); 445 if (mp1 == NULL) { 446 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 447 if (mctl_present) 448 freeb(first_mp); 449 return; 450 } 451 mp = mp1; 452 ip6h = (ip6_t *)mp->b_rptr; 453 icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]); 454 if (mctl_present) 455 first_mp->b_cont = mp; 456 else 457 first_mp = mp; 458 } 459 460 /* 461 * Turn the echo into an echo reply. 462 * Remove any extension headers (do not reverse a source route) 463 * and clear the flow id (keep traffic class for now). 464 */ 465 if (hdr_length != IPV6_HDR_LEN) { 466 int i; 467 468 for (i = 0; i < IPV6_HDR_LEN; i++) 469 mp->b_rptr[hdr_length - i - 1] = 470 mp->b_rptr[IPV6_HDR_LEN - i - 1]; 471 mp->b_rptr += (hdr_length - IPV6_HDR_LEN); 472 ip6h = (ip6_t *)mp->b_rptr; 473 ip6h->ip6_nxt = IPPROTO_ICMPV6; 474 hdr_length = IPV6_HDR_LEN; 475 } 476 ip6h->ip6_vcf &= ~IPV6_FLOWINFO_FLOWLABEL; 477 icmp6->icmp6_type = ICMP6_ECHO_REPLY; 478 479 ip6h->ip6_plen = 480 htons((uint16_t)(msgdsize(mp) - IPV6_HDR_LEN)); 481 origsrc = ip6h->ip6_src; 482 /* 483 * Reverse the source and destination addresses. 484 * If the return address is a multicast, zero out the source 485 * (ip_wput_v6 will set an address). 486 */ 487 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 488 ip6h->ip6_src = ipv6_all_zeros; 489 ip6h->ip6_dst = origsrc; 490 } else { 491 ip6h->ip6_src = ip6h->ip6_dst; 492 ip6h->ip6_dst = origsrc; 493 } 494 495 /* set the hop limit */ 496 ip6h->ip6_hops = ipv6_def_hops; 497 498 /* 499 * Prepare for checksum by putting icmp length in the icmp 500 * checksum field. The checksum is calculated in ip_wput_v6. 501 */ 502 icmp6->icmp6_cksum = ip6h->ip6_plen; 503 /* 504 * ICMP echo replies should go out on the same interface 505 * the request came on as probes used by in.mpathd for 506 * detecting NIC failures are ECHO packets. We turn-off load 507 * spreading by allocating a ip6i and setting ip6i_attach_if 508 * to B_TRUE which is handled both by ip_wput_v6 and 509 * ip_newroute_v6. If we don't turnoff load spreading, 510 * the packets might get dropped if there are no 511 * non-FAILED/INACTIVE interfaces for it to go out on and 512 * in.mpathd would wrongly detect a failure or mis-detect 513 * a NIC failure as a link failure. As load spreading can 514 * happen only if ill_group is not NULL, we do only for 515 * that case and this does not affect the normal case. 516 * 517 * We force this only on echo packets that came from on-link 518 * hosts. We restrict this to link-local addresses which 519 * is used by in.mpathd for probing. In the IPv6 case, 520 * default routes typically have an ire_ipif pointer and 521 * hence a MATCH_IRE_ILL later in ip_newroute_v6/ip_wput_v6 522 * might work. As a default route out of this interface 523 * may not be present, enforcing this packet to go out in 524 * this case may not work. 525 */ 526 if (ill->ill_group != NULL && 527 IN6_IS_ADDR_LINKLOCAL(&origsrc)) { 528 /* 529 * If we are sending replies to ourselves, don't 530 * set ATTACH_IF as we may not be able to find 531 * the IRE_LOCAL on this ill i.e setting ATTACH_IF 532 * causes ip_wput_v6 to look for an IRE_LOCAL on 533 * "ill" which it may not find and will try to 534 * create an IRE_CACHE for our local address. Once 535 * we do this, we will try to forward all packets 536 * meant to our LOCAL address. 537 */ 538 ire = ire_cache_lookup_v6(&ip6h->ip6_dst, ALL_ZONES, 539 NULL); 540 if (ire == NULL || ire->ire_type != IRE_LOCAL) { 541 mp = ip_add_info_v6(mp, NULL, &ip6h->ip6_dst); 542 if (mp == NULL) { 543 BUMP_MIB(ill->ill_icmp6_mib, 544 ipv6IfIcmpInErrors); 545 if (ire != NULL) 546 ire_refrele(ire); 547 if (mctl_present) 548 freeb(first_mp); 549 return; 550 } else if (mctl_present) { 551 first_mp->b_cont = mp; 552 } else { 553 first_mp = mp; 554 } 555 ip6i = (ip6i_t *)mp->b_rptr; 556 ip6i->ip6i_flags = IP6I_ATTACH_IF; 557 ip6i->ip6i_ifindex = 558 ill->ill_phyint->phyint_ifindex; 559 } 560 if (ire != NULL) 561 ire_refrele(ire); 562 } 563 564 if (!mctl_present) { 565 /* 566 * This packet should go out the same way as it 567 * came in i.e in clear. To make sure that global 568 * policy will not be applied to this in ip_wput, 569 * we attach a IPSEC_IN mp and clear ipsec_in_secure. 570 */ 571 ASSERT(first_mp == mp); 572 if ((first_mp = ipsec_in_alloc(B_FALSE)) == NULL) { 573 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 574 freemsg(mp); 575 return; 576 } 577 ii = (ipsec_in_t *)first_mp->b_rptr; 578 579 /* This is not a secure packet */ 580 ii->ipsec_in_secure = B_FALSE; 581 first_mp->b_cont = mp; 582 } 583 ii->ipsec_in_zoneid = zoneid; 584 ASSERT(zoneid != ALL_ZONES); 585 if (!ipsec_in_to_out(first_mp, NULL, ip6h)) { 586 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 587 return; 588 } 589 put(WR(q), first_mp); 590 return; 591 592 case ICMP6_ECHO_REPLY: 593 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInEchoReplies); 594 break; 595 596 case ND_ROUTER_SOLICIT: 597 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInRouterSolicits); 598 break; 599 600 case ND_ROUTER_ADVERT: 601 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInRouterAdvertisements); 602 break; 603 604 case ND_NEIGHBOR_SOLICIT: 605 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInNeighborSolicits); 606 if (mctl_present) 607 freeb(first_mp); 608 /* XXX may wish to pass first_mp up to ndp_input someday. */ 609 ndp_input(ill, mp, dl_mp); 610 return; 611 612 case ND_NEIGHBOR_ADVERT: 613 BUMP_MIB(ill->ill_icmp6_mib, 614 ipv6IfIcmpInNeighborAdvertisements); 615 if (mctl_present) 616 freeb(first_mp); 617 /* XXX may wish to pass first_mp up to ndp_input someday. */ 618 ndp_input(ill, mp, dl_mp); 619 return; 620 621 case ND_REDIRECT: { 622 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInRedirects); 623 624 if (ipv6_ignore_redirect) 625 break; 626 627 /* 628 * As there is no upper client to deliver, we don't 629 * need the first_mp any more. 630 */ 631 if (mctl_present) 632 freeb(first_mp); 633 if (!pullupmsg(mp, -1) || 634 !icmp_redirect_ok_v6(ill, mp)) { 635 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects); 636 break; 637 } 638 icmp_redirect_v6(q, mp, ill); 639 return; 640 } 641 642 /* 643 * The next three icmp messages will be handled by MLD. 644 * Pass all valid MLD packets up to any process(es) 645 * listening on a raw ICMP socket. MLD messages are 646 * freed by mld_input function. 647 */ 648 case MLD_LISTENER_QUERY: 649 case MLD_LISTENER_REPORT: 650 case MLD_LISTENER_REDUCTION: 651 if (mctl_present) 652 freeb(first_mp); 653 mld_input(q, mp, ill); 654 return; 655 default: 656 break; 657 } 658 if (interested) { 659 icmp_inbound_error_fanout_v6(q, first_mp, ip6h, icmp6, ill, 660 mctl_present, zoneid); 661 } else { 662 freemsg(first_mp); 663 } 664 } 665 666 /* 667 * Process received IPv6 ICMP Packet too big. 668 * After updating any IRE it does the fanout to any matching transport streams. 669 * Assumes the IPv6 plus ICMPv6 headers have been pulled up but nothing else. 670 */ 671 /* ARGSUSED */ 672 static void 673 icmp_inbound_too_big_v6(queue_t *q, mblk_t *mp, ill_t *ill, 674 boolean_t mctl_present, zoneid_t zoneid) 675 { 676 ip6_t *ip6h; 677 ip6_t *inner_ip6h; 678 icmp6_t *icmp6; 679 uint16_t hdr_length; 680 uint32_t mtu; 681 ire_t *ire, *first_ire; 682 mblk_t *first_mp; 683 684 first_mp = mp; 685 if (mctl_present) 686 mp = first_mp->b_cont; 687 /* 688 * We must have exclusive use of the mblk to update the MTU 689 * in the packet. 690 * If not, we copy it. 691 * 692 * If there's an M_CTL present, we know that allocated first_mp 693 * earlier in this function, so we know first_mp has refcnt of one. 694 */ 695 ASSERT(!mctl_present || first_mp->b_datap->db_ref == 1); 696 if (mp->b_datap->db_ref > 1) { 697 mblk_t *mp1; 698 699 mp1 = copymsg(mp); 700 freemsg(mp); 701 if (mp1 == NULL) { 702 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 703 if (mctl_present) 704 freeb(first_mp); 705 return; 706 } 707 mp = mp1; 708 if (mctl_present) 709 first_mp->b_cont = mp; 710 else 711 first_mp = mp; 712 } 713 ip6h = (ip6_t *)mp->b_rptr; 714 if (ip6h->ip6_nxt != IPPROTO_ICMPV6) 715 hdr_length = ip_hdr_length_v6(mp, ip6h); 716 else 717 hdr_length = IPV6_HDR_LEN; 718 719 icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]); 720 ASSERT((size_t)(mp->b_wptr - mp->b_rptr) >= hdr_length + ICMP6_MINLEN); 721 inner_ip6h = (ip6_t *)&icmp6[1]; /* Packet in error */ 722 if ((uchar_t *)&inner_ip6h[1] > mp->b_wptr) { 723 if (!pullupmsg(mp, (uchar_t *)&inner_ip6h[1] - mp->b_rptr)) { 724 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 725 freemsg(first_mp); 726 return; 727 } 728 ip6h = (ip6_t *)mp->b_rptr; 729 icmp6 = (icmp6_t *)&mp->b_rptr[hdr_length]; 730 inner_ip6h = (ip6_t *)&icmp6[1]; 731 } 732 733 /* 734 * For link local destinations matching simply on IRE type is not 735 * sufficient. Same link local addresses for different ILL's is 736 * possible. 737 */ 738 739 if (IN6_IS_ADDR_LINKLOCAL(&inner_ip6h->ip6_dst)) { 740 first_ire = ire_ctable_lookup_v6(&inner_ip6h->ip6_dst, NULL, 741 IRE_CACHE, ill->ill_ipif, ALL_ZONES, NULL, 742 MATCH_IRE_TYPE | MATCH_IRE_ILL_GROUP); 743 744 if (first_ire == NULL) { 745 if (ip_debug > 2) { 746 /* ip1dbg */ 747 pr_addr_dbg("icmp_inbound_too_big_v6:" 748 "no ire for dst %s\n", AF_INET6, 749 &inner_ip6h->ip6_dst); 750 } 751 freemsg(first_mp); 752 return; 753 } 754 755 mtu = ntohl(icmp6->icmp6_mtu); 756 rw_enter(&first_ire->ire_bucket->irb_lock, RW_READER); 757 for (ire = first_ire; ire != NULL && 758 IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, &inner_ip6h->ip6_dst); 759 ire = ire->ire_next) { 760 mutex_enter(&ire->ire_lock); 761 if (mtu < IPV6_MIN_MTU) { 762 ip1dbg(("Received mtu less than IPv6 " 763 "min mtu %d: %d\n", IPV6_MIN_MTU, mtu)); 764 mtu = IPV6_MIN_MTU; 765 /* 766 * If an mtu less than IPv6 min mtu is received, 767 * we must include a fragment header in 768 * subsequent packets. 769 */ 770 ire->ire_frag_flag |= IPH_FRAG_HDR; 771 } 772 ip1dbg(("Received mtu from router: %d\n", mtu)); 773 ire->ire_max_frag = MIN(ire->ire_max_frag, mtu); 774 /* Record the new max frag size for the ULP. */ 775 if (ire->ire_frag_flag & IPH_FRAG_HDR) { 776 /* 777 * If we need a fragment header in every packet 778 * (above case or multirouting), make sure the 779 * ULP takes it into account when computing the 780 * payload size. 781 */ 782 icmp6->icmp6_mtu = htonl(ire->ire_max_frag - 783 sizeof (ip6_frag_t)); 784 } else { 785 icmp6->icmp6_mtu = htonl(ire->ire_max_frag); 786 } 787 mutex_exit(&ire->ire_lock); 788 } 789 rw_exit(&first_ire->ire_bucket->irb_lock); 790 ire_refrele(first_ire); 791 } else { 792 irb_t *irb = NULL; 793 /* 794 * for non-link local destinations we match only on the IRE type 795 */ 796 ire = ire_ctable_lookup_v6(&inner_ip6h->ip6_dst, NULL, 797 IRE_CACHE, ill->ill_ipif, ALL_ZONES, NULL, MATCH_IRE_TYPE); 798 if (ire == NULL) { 799 if (ip_debug > 2) { 800 /* ip1dbg */ 801 pr_addr_dbg("icmp_inbound_too_big_v6:" 802 "no ire for dst %s\n", 803 AF_INET6, &inner_ip6h->ip6_dst); 804 } 805 freemsg(first_mp); 806 return; 807 } 808 irb = ire->ire_bucket; 809 ire_refrele(ire); 810 rw_enter(&irb->irb_lock, RW_READER); 811 for (ire = irb->irb_ire; ire != NULL; ire = ire->ire_next) { 812 if (IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, 813 &inner_ip6h->ip6_dst)) { 814 mtu = ntohl(icmp6->icmp6_mtu); 815 mutex_enter(&ire->ire_lock); 816 if (mtu < IPV6_MIN_MTU) { 817 ip1dbg(("Received mtu less than IPv6" 818 "min mtu %d: %d\n", 819 IPV6_MIN_MTU, mtu)); 820 mtu = IPV6_MIN_MTU; 821 /* 822 * If an mtu less than IPv6 min mtu is 823 * received, we must include a fragment 824 * header in subsequent packets. 825 */ 826 ire->ire_frag_flag |= IPH_FRAG_HDR; 827 } 828 829 ip1dbg(("Received mtu from router: %d\n", mtu)); 830 ire->ire_max_frag = MIN(ire->ire_max_frag, mtu); 831 /* Record the new max frag size for the ULP. */ 832 if (ire->ire_frag_flag & IPH_FRAG_HDR) { 833 /* 834 * If we need a fragment header in 835 * every packet (above case or 836 * multirouting), make sure the ULP 837 * takes it into account when computing 838 * the payload size. 839 */ 840 icmp6->icmp6_mtu = 841 htonl(ire->ire_max_frag - 842 sizeof (ip6_frag_t)); 843 } else { 844 icmp6->icmp6_mtu = 845 htonl(ire->ire_max_frag); 846 } 847 mutex_exit(&ire->ire_lock); 848 } 849 } 850 rw_exit(&irb->irb_lock); 851 } 852 icmp_inbound_error_fanout_v6(q, first_mp, ip6h, icmp6, ill, 853 mctl_present, zoneid); 854 } 855 856 static void 857 pkt_too_big(conn_t *connp, void *arg) 858 { 859 mblk_t *mp; 860 861 if (!connp->conn_ipv6_recvpathmtu) 862 return; 863 864 /* create message and drop it on this connections read queue */ 865 if ((mp = dupb((mblk_t *)arg)) == NULL) { 866 return; 867 } 868 mp->b_datap->db_type = M_CTL; 869 870 putnext(connp->conn_rq, mp); 871 } 872 873 /* 874 * Fanout received ICMPv6 error packets to the transports. 875 * Assumes the IPv6 plus ICMPv6 headers have been pulled up but nothing else. 876 */ 877 void 878 icmp_inbound_error_fanout_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, 879 icmp6_t *icmp6, ill_t *ill, boolean_t mctl_present, zoneid_t zoneid) 880 { 881 uint16_t *up; /* Pointer to ports in ULP header */ 882 uint32_t ports; /* reversed ports for fanout */ 883 ip6_t rip6h; /* With reversed addresses */ 884 uint16_t hdr_length; 885 uint8_t *nexthdrp; 886 uint8_t nexthdr; 887 mblk_t *first_mp; 888 ipsec_in_t *ii; 889 tcpha_t *tcpha; 890 conn_t *connp; 891 892 first_mp = mp; 893 if (mctl_present) { 894 mp = first_mp->b_cont; 895 ASSERT(mp != NULL); 896 897 ii = (ipsec_in_t *)first_mp->b_rptr; 898 ASSERT(ii->ipsec_in_type == IPSEC_IN); 899 } else { 900 ii = NULL; 901 } 902 903 hdr_length = (uint16_t)((uchar_t *)icmp6 - (uchar_t *)ip6h); 904 ASSERT((size_t)(mp->b_wptr - (uchar_t *)icmp6) >= ICMP6_MINLEN); 905 906 /* 907 * Need to pullup everything in order to use 908 * ip_hdr_length_nexthdr_v6() 909 */ 910 if (mp->b_cont != NULL) { 911 if (!pullupmsg(mp, -1)) { 912 ip1dbg(("icmp_inbound_error_fanout_v6: " 913 "pullupmsg failed\n")); 914 goto drop_pkt; 915 } 916 ip6h = (ip6_t *)mp->b_rptr; 917 icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]); 918 } 919 920 ip6h = (ip6_t *)&icmp6[1]; /* Packet in error */ 921 if ((uchar_t *)&ip6h[1] > mp->b_wptr) 922 goto drop_pkt; 923 924 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_length, &nexthdrp)) 925 goto drop_pkt; 926 nexthdr = *nexthdrp; 927 928 /* Set message type, must be done after pullups */ 929 mp->b_datap->db_type = M_CTL; 930 931 if (icmp6->icmp6_type == ICMP6_PACKET_TOO_BIG) { 932 /* 933 * Deliver indication of ICMP6_PACKET_TOO_BIG to interested 934 * sockets. 935 * 936 * Note I don't like walking every connection to deliver 937 * this information to a set of listeners. A separate 938 * list could be kept to keep the cost of this down. 939 */ 940 ipcl_walk(pkt_too_big, (void *)mp); 941 } 942 943 /* Try to pass the ICMP message to clients who need it */ 944 switch (nexthdr) { 945 case IPPROTO_UDP: { 946 /* 947 * Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of 948 * UDP header to get the port information. 949 */ 950 if ((uchar_t *)ip6h + hdr_length + ICMP_MIN_TP_HDR_LEN > 951 mp->b_wptr) { 952 break; 953 } 954 /* 955 * Attempt to find a client stream based on port. 956 * Note that we do a reverse lookup since the header is 957 * in the form we sent it out. 958 * The rip6h header is only used for the IPCL_UDP_MATCH_V6 959 * and we only set the src and dst addresses and nexthdr. 960 */ 961 up = (uint16_t *)((uchar_t *)ip6h + hdr_length); 962 rip6h.ip6_src = ip6h->ip6_dst; 963 rip6h.ip6_dst = ip6h->ip6_src; 964 rip6h.ip6_nxt = nexthdr; 965 ((uint16_t *)&ports)[0] = up[1]; 966 ((uint16_t *)&ports)[1] = up[0]; 967 968 ip_fanout_udp_v6(q, first_mp, &rip6h, ports, ill, ill, 969 IP6_NO_IPPOLICY, mctl_present, zoneid); 970 return; 971 } 972 case IPPROTO_TCP: { 973 /* 974 * Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of 975 * the TCP header to get the port information. 976 */ 977 if ((uchar_t *)ip6h + hdr_length + ICMP_MIN_TP_HDR_LEN > 978 mp->b_wptr) { 979 break; 980 } 981 982 /* 983 * Attempt to find a client stream based on port. 984 * Note that we do a reverse lookup since the header is 985 * in the form we sent it out. 986 * The rip6h header is only used for the IP_TCP_*MATCH_V6 and 987 * we only set the src and dst addresses and nexthdr. 988 */ 989 990 tcpha = (tcpha_t *)((char *)ip6h + hdr_length); 991 connp = ipcl_tcp_lookup_reversed_ipv6(ip6h, tcpha, 992 TCPS_LISTEN, ill->ill_phyint->phyint_ifindex); 993 if (connp == NULL) { 994 goto drop_pkt; 995 } 996 997 squeue_fill(connp->conn_sqp, first_mp, tcp_input, 998 connp, SQTAG_TCP6_INPUT_ICMP_ERR); 999 return; 1000 1001 } 1002 case IPPROTO_SCTP: 1003 /* 1004 * Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of 1005 * the SCTP header to get the port information. 1006 */ 1007 if ((uchar_t *)ip6h + hdr_length + ICMP_MIN_TP_HDR_LEN > 1008 mp->b_wptr) { 1009 break; 1010 } 1011 1012 up = (uint16_t *)((uchar_t *)ip6h + hdr_length); 1013 ((uint16_t *)&ports)[0] = up[1]; 1014 ((uint16_t *)&ports)[1] = up[0]; 1015 ip_fanout_sctp(mp, ill, (ipha_t *)ip6h, ports, 0, mctl_present, 1016 IP6_NO_IPPOLICY, 0, zoneid); 1017 return; 1018 case IPPROTO_ESP: 1019 case IPPROTO_AH: { 1020 int ipsec_rc; 1021 1022 /* 1023 * We need a IPSEC_IN in the front to fanout to AH/ESP. 1024 * We will re-use the IPSEC_IN if it is already present as 1025 * AH/ESP will not affect any fields in the IPSEC_IN for 1026 * ICMP errors. If there is no IPSEC_IN, allocate a new 1027 * one and attach it in the front. 1028 */ 1029 if (ii != NULL) { 1030 /* 1031 * ip_fanout_proto_again converts the ICMP errors 1032 * that come back from AH/ESP to M_DATA so that 1033 * if it is non-AH/ESP and we do a pullupmsg in 1034 * this function, it would work. Convert it back 1035 * to M_CTL before we send up as this is a ICMP 1036 * error. This could have been generated locally or 1037 * by some router. Validate the inner IPSEC 1038 * headers. 1039 * 1040 * NOTE : ill_index is used by ip_fanout_proto_again 1041 * to locate the ill. 1042 */ 1043 ASSERT(ill != NULL); 1044 ii->ipsec_in_ill_index = 1045 ill->ill_phyint->phyint_ifindex; 1046 ii->ipsec_in_rill_index = ii->ipsec_in_ill_index; 1047 first_mp->b_cont->b_datap->db_type = M_CTL; 1048 } else { 1049 /* 1050 * IPSEC_IN is not present. We attach a ipsec_in 1051 * message and send up to IPSEC for validating 1052 * and removing the IPSEC headers. Clear 1053 * ipsec_in_secure so that when we return 1054 * from IPSEC, we don't mistakenly think that this 1055 * is a secure packet came from the network. 1056 * 1057 * NOTE : ill_index is used by ip_fanout_proto_again 1058 * to locate the ill. 1059 */ 1060 ASSERT(first_mp == mp); 1061 first_mp = ipsec_in_alloc(B_FALSE); 1062 if (first_mp == NULL) { 1063 freemsg(mp); 1064 BUMP_MIB(&ip_mib, ipInDiscards); 1065 return; 1066 } 1067 ii = (ipsec_in_t *)first_mp->b_rptr; 1068 1069 /* This is not a secure packet */ 1070 ii->ipsec_in_secure = B_FALSE; 1071 first_mp->b_cont = mp; 1072 mp->b_datap->db_type = M_CTL; 1073 ASSERT(ill != NULL); 1074 ii->ipsec_in_ill_index = 1075 ill->ill_phyint->phyint_ifindex; 1076 ii->ipsec_in_rill_index = ii->ipsec_in_ill_index; 1077 } 1078 1079 if (!ipsec_loaded()) { 1080 ip_proto_not_sup(q, first_mp, 0, zoneid); 1081 return; 1082 } 1083 1084 if (nexthdr == IPPROTO_ESP) 1085 ipsec_rc = ipsecesp_icmp_error(first_mp); 1086 else 1087 ipsec_rc = ipsecah_icmp_error(first_mp); 1088 if (ipsec_rc == IPSEC_STATUS_FAILED) 1089 return; 1090 1091 ip_fanout_proto_again(first_mp, ill, ill, NULL); 1092 return; 1093 } 1094 case IPPROTO_ENCAP: 1095 case IPPROTO_IPV6: 1096 if ((uint8_t *)ip6h + hdr_length + 1097 (nexthdr == IPPROTO_ENCAP ? sizeof (ipha_t) : 1098 sizeof (ip6_t)) > mp->b_wptr) 1099 goto drop_pkt; 1100 1101 if (nexthdr == IPPROTO_ENCAP || 1102 !IN6_ARE_ADDR_EQUAL( 1103 &((ip6_t *)(((uint8_t *)ip6h) + hdr_length))->ip6_src, 1104 &ip6h->ip6_src) || 1105 !IN6_ARE_ADDR_EQUAL( 1106 &((ip6_t *)(((uint8_t *)ip6h) + hdr_length))->ip6_dst, 1107 &ip6h->ip6_dst)) { 1108 /* 1109 * For tunnels that have used IPsec protection, 1110 * we need to adjust the MTU to take into account 1111 * the IPsec overhead. 1112 */ 1113 if (ii != NULL) 1114 icmp6->icmp6_mtu = htons( 1115 ntohs(icmp6->icmp6_mtu) - 1116 ipsec_in_extra_length(first_mp)); 1117 } else { 1118 /* 1119 * Self-encapsulated case. As in the ipv4 case, 1120 * we need to strip the 2nd IP header. Since mp 1121 * is already pulled-up, we can simply bcopy 1122 * the 3rd header + data over the 2nd header. 1123 */ 1124 uint16_t unused_len; 1125 ip6_t *inner_ip6h = (ip6_t *) 1126 ((uchar_t *)ip6h + hdr_length); 1127 1128 /* 1129 * Make sure we don't do recursion more than once. 1130 */ 1131 if (!ip_hdr_length_nexthdr_v6(mp, inner_ip6h, 1132 &unused_len, &nexthdrp) || 1133 *nexthdrp == IPPROTO_IPV6) { 1134 goto drop_pkt; 1135 } 1136 1137 /* 1138 * We are about to modify the packet. Make a copy if 1139 * someone else has a reference to it. 1140 */ 1141 if (DB_REF(mp) > 1) { 1142 mblk_t *mp1; 1143 uint16_t icmp6_offset; 1144 1145 mp1 = copymsg(mp); 1146 if (mp1 == NULL) { 1147 goto drop_pkt; 1148 } 1149 icmp6_offset = (uint16_t) 1150 ((uchar_t *)icmp6 - mp->b_rptr); 1151 freemsg(mp); 1152 mp = mp1; 1153 1154 icmp6 = (icmp6_t *)(mp->b_rptr + icmp6_offset); 1155 ip6h = (ip6_t *)&icmp6[1]; 1156 inner_ip6h = (ip6_t *) 1157 ((uchar_t *)ip6h + hdr_length); 1158 1159 if (mctl_present) 1160 first_mp->b_cont = mp; 1161 else 1162 first_mp = mp; 1163 } 1164 1165 /* 1166 * Need to set db_type back to M_DATA before 1167 * refeeding mp into this function. 1168 */ 1169 DB_TYPE(mp) = M_DATA; 1170 1171 /* 1172 * Copy the 3rd header + remaining data on top 1173 * of the 2nd header. 1174 */ 1175 bcopy(inner_ip6h, ip6h, 1176 mp->b_wptr - (uchar_t *)inner_ip6h); 1177 1178 /* 1179 * Subtract length of the 2nd header. 1180 */ 1181 mp->b_wptr -= hdr_length; 1182 1183 /* 1184 * Now recurse, and see what I _really_ should be 1185 * doing here. 1186 */ 1187 icmp_inbound_error_fanout_v6(q, first_mp, 1188 (ip6_t *)mp->b_rptr, icmp6, ill, mctl_present, 1189 zoneid); 1190 return; 1191 } 1192 /* FALLTHRU */ 1193 default: 1194 /* 1195 * The rip6h header is only used for the lookup and we 1196 * only set the src and dst addresses and nexthdr. 1197 */ 1198 rip6h.ip6_src = ip6h->ip6_dst; 1199 rip6h.ip6_dst = ip6h->ip6_src; 1200 rip6h.ip6_nxt = nexthdr; 1201 ip_fanout_proto_v6(q, first_mp, &rip6h, ill, ill, nexthdr, 0, 1202 IP6_NO_IPPOLICY, mctl_present, zoneid); 1203 return; 1204 } 1205 /* NOTREACHED */ 1206 drop_pkt: 1207 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors); 1208 ip1dbg(("icmp_inbound_error_fanout_v6: drop pkt\n")); 1209 freemsg(first_mp); 1210 } 1211 1212 /* 1213 * Validate the incoming redirect message, if valid redirect 1214 * processing is done later. This is separated from the actual 1215 * redirect processing to avoid becoming single threaded when not 1216 * necessary. (i.e invalid packet) 1217 * Assumes that any AH or ESP headers have already been removed. 1218 * The mp has already been pulled up. 1219 */ 1220 boolean_t 1221 icmp_redirect_ok_v6(ill_t *ill, mblk_t *mp) 1222 { 1223 ip6_t *ip6h = (ip6_t *)mp->b_rptr; 1224 nd_redirect_t *rd; 1225 ire_t *ire; 1226 uint16_t len; 1227 uint16_t hdr_length; 1228 1229 ASSERT(mp->b_cont == NULL); 1230 if (ip6h->ip6_nxt != IPPROTO_ICMPV6) 1231 hdr_length = ip_hdr_length_v6(mp, ip6h); 1232 else 1233 hdr_length = IPV6_HDR_LEN; 1234 rd = (nd_redirect_t *)&mp->b_rptr[hdr_length]; 1235 len = mp->b_wptr - mp->b_rptr - hdr_length; 1236 if (!IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src) || 1237 (ip6h->ip6_hops != IPV6_MAX_HOPS) || 1238 (rd->nd_rd_code != 0) || 1239 (len < sizeof (nd_redirect_t)) || 1240 (IN6_IS_ADDR_V4MAPPED(&rd->nd_rd_dst)) || 1241 (IN6_IS_ADDR_MULTICAST(&rd->nd_rd_dst))) { 1242 return (B_FALSE); 1243 } 1244 if (!(IN6_IS_ADDR_LINKLOCAL(&rd->nd_rd_target) || 1245 IN6_ARE_ADDR_EQUAL(&rd->nd_rd_target, &rd->nd_rd_dst))) { 1246 return (B_FALSE); 1247 } 1248 1249 /* 1250 * Verify that the IP source address of the redirect is 1251 * the same as the current first-hop router for the specified 1252 * ICMP destination address. Just to be cautious, this test 1253 * will be done again before we add the redirect, in case 1254 * router goes away between now and then. 1255 */ 1256 ire = ire_route_lookup_v6(&rd->nd_rd_dst, 0, 1257 &ip6h->ip6_src, 0, ill->ill_ipif, NULL, ALL_ZONES, NULL, 1258 MATCH_IRE_GW | MATCH_IRE_ILL_GROUP); 1259 if (ire == NULL) 1260 return (B_FALSE); 1261 ire_refrele(ire); 1262 if (len > sizeof (nd_redirect_t)) { 1263 if (!ndp_verify_optlen((nd_opt_hdr_t *)&rd[1], 1264 len - sizeof (nd_redirect_t))) 1265 return (B_FALSE); 1266 } 1267 return (B_TRUE); 1268 } 1269 1270 /* 1271 * Process received IPv6 ICMP Redirect messages. 1272 * Assumes that the icmp packet has already been verfied to be 1273 * valid, aligned and in a single mblk all done in icmp_redirect_ok_v6(). 1274 */ 1275 /* ARGSUSED */ 1276 static void 1277 icmp_redirect_v6(queue_t *q, mblk_t *mp, ill_t *ill) 1278 { 1279 ip6_t *ip6h; 1280 uint16_t hdr_length; 1281 nd_redirect_t *rd; 1282 ire_t *ire; 1283 ire_t *prev_ire; 1284 ire_t *redir_ire; 1285 in6_addr_t *src, *dst, *gateway; 1286 nd_opt_hdr_t *opt; 1287 nce_t *nce; 1288 int nce_flags = 0; 1289 int err = 0; 1290 boolean_t redirect_to_router = B_FALSE; 1291 int len; 1292 iulp_t ulp_info = { 0 }; 1293 ill_t *prev_ire_ill; 1294 ipif_t *ipif; 1295 1296 ip6h = (ip6_t *)mp->b_rptr; 1297 if (ip6h->ip6_nxt != IPPROTO_ICMPV6) 1298 hdr_length = ip_hdr_length_v6(mp, ip6h); 1299 else 1300 hdr_length = IPV6_HDR_LEN; 1301 1302 rd = (nd_redirect_t *)&mp->b_rptr[hdr_length]; 1303 src = &ip6h->ip6_src; 1304 dst = &rd->nd_rd_dst; 1305 gateway = &rd->nd_rd_target; 1306 if (!IN6_ARE_ADDR_EQUAL(gateway, dst)) { 1307 redirect_to_router = B_TRUE; 1308 nce_flags |= NCE_F_ISROUTER; 1309 } 1310 /* 1311 * Make sure we had a route for the dest in question and that 1312 * route was pointing to the old gateway (the source of the 1313 * redirect packet.) 1314 */ 1315 ipif = ipif_get_next_ipif(NULL, ill); 1316 if (ipif == NULL) { 1317 freemsg(mp); 1318 return; 1319 } 1320 prev_ire = ire_route_lookup_v6(dst, 0, src, 0, ipif, NULL, 1321 ALL_ZONES, NULL, MATCH_IRE_GW | MATCH_IRE_ILL_GROUP); 1322 ipif_refrele(ipif); 1323 /* 1324 * Check that 1325 * the redirect was not from ourselves 1326 * old gateway is still directly reachable 1327 */ 1328 if (prev_ire == NULL || 1329 prev_ire->ire_type == IRE_LOCAL) { 1330 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects); 1331 goto fail_redirect; 1332 } 1333 prev_ire_ill = ire_to_ill(prev_ire); 1334 ASSERT(prev_ire_ill != NULL); 1335 if (prev_ire_ill->ill_flags & ILLF_NONUD) 1336 nce_flags |= NCE_F_NONUD; 1337 1338 /* 1339 * Should we use the old ULP info to create the new gateway? From 1340 * a user's perspective, we should inherit the info so that it 1341 * is a "smooth" transition. If we do not do that, then new 1342 * connections going thru the new gateway will have no route metrics, 1343 * which is counter-intuitive to user. From a network point of 1344 * view, this may or may not make sense even though the new gateway 1345 * is still directly connected to us so the route metrics should not 1346 * change much. 1347 * 1348 * But if the old ire_uinfo is not initialized, we do another 1349 * recursive lookup on the dest using the new gateway. There may 1350 * be a route to that. If so, use it to initialize the redirect 1351 * route. 1352 */ 1353 if (prev_ire->ire_uinfo.iulp_set) { 1354 bcopy(&prev_ire->ire_uinfo, &ulp_info, sizeof (iulp_t)); 1355 } else if (redirect_to_router) { 1356 /* 1357 * Only do the following if the redirection is really to 1358 * a router. 1359 */ 1360 ire_t *tmp_ire; 1361 ire_t *sire; 1362 1363 tmp_ire = ire_ftable_lookup_v6(dst, 0, gateway, 0, NULL, &sire, 1364 ALL_ZONES, 0, NULL, 1365 (MATCH_IRE_RECURSIVE | MATCH_IRE_GW | MATCH_IRE_DEFAULT)); 1366 if (sire != NULL) { 1367 bcopy(&sire->ire_uinfo, &ulp_info, sizeof (iulp_t)); 1368 ASSERT(tmp_ire != NULL); 1369 ire_refrele(tmp_ire); 1370 ire_refrele(sire); 1371 } else if (tmp_ire != NULL) { 1372 bcopy(&tmp_ire->ire_uinfo, &ulp_info, 1373 sizeof (iulp_t)); 1374 ire_refrele(tmp_ire); 1375 } 1376 } 1377 1378 len = mp->b_wptr - mp->b_rptr - hdr_length - sizeof (nd_redirect_t); 1379 opt = (nd_opt_hdr_t *)&rd[1]; 1380 opt = ndp_get_option(opt, len, ND_OPT_TARGET_LINKADDR); 1381 if (opt != NULL) { 1382 err = ndp_lookup_then_add(ill, 1383 (uchar_t *)&opt[1], /* Link layer address */ 1384 gateway, 1385 &ipv6_all_ones, /* prefix mask */ 1386 &ipv6_all_zeros, /* Mapping mask */ 1387 0, 1388 nce_flags, 1389 ND_STALE, 1390 &nce, 1391 NULL, 1392 NULL); 1393 switch (err) { 1394 case 0: 1395 NCE_REFRELE(nce); 1396 break; 1397 case EEXIST: 1398 /* 1399 * Check to see if link layer address has changed and 1400 * process the nce_state accordingly. 1401 */ 1402 ndp_process(nce, (uchar_t *)&opt[1], 0, B_FALSE); 1403 NCE_REFRELE(nce); 1404 break; 1405 default: 1406 ip1dbg(("icmp_redirect_v6: NCE create failed %d\n", 1407 err)); 1408 goto fail_redirect; 1409 } 1410 } 1411 if (redirect_to_router) { 1412 /* icmp_redirect_ok_v6() must have already verified this */ 1413 ASSERT(IN6_IS_ADDR_LINKLOCAL(gateway)); 1414 1415 /* 1416 * Create a Route Association. This will allow us to remember 1417 * a router told us to use the particular gateway. 1418 */ 1419 ire = ire_create_v6( 1420 dst, 1421 &ipv6_all_ones, /* mask */ 1422 &prev_ire->ire_src_addr_v6, /* source addr */ 1423 gateway, /* gateway addr */ 1424 &prev_ire->ire_max_frag, /* max frag */ 1425 NULL, /* Fast Path header */ 1426 NULL, /* no rfq */ 1427 NULL, /* no stq */ 1428 IRE_HOST_REDIRECT, 1429 NULL, 1430 prev_ire->ire_ipif, 1431 NULL, 1432 0, 1433 0, 1434 (RTF_DYNAMIC | RTF_GATEWAY | RTF_HOST), 1435 &ulp_info, 1436 NULL, 1437 NULL); 1438 } else { 1439 /* 1440 * Just create an on link entry, may or may not be a router 1441 * If there is no link layer address option ire_add() won't 1442 * add this. 1443 */ 1444 ire = ire_create_v6( 1445 dst, /* gateway == dst */ 1446 &ipv6_all_ones, /* mask */ 1447 &prev_ire->ire_src_addr_v6, /* source addr */ 1448 &ipv6_all_zeros, /* gateway addr */ 1449 &prev_ire->ire_max_frag, /* max frag */ 1450 NULL, /* Fast Path header */ 1451 prev_ire->ire_rfq, /* ire rfq */ 1452 prev_ire->ire_stq, /* ire stq */ 1453 IRE_CACHE, 1454 NULL, 1455 prev_ire->ire_ipif, 1456 &ipv6_all_ones, 1457 0, 1458 0, 1459 0, 1460 &ulp_info, 1461 NULL, 1462 NULL); 1463 } 1464 if (ire == NULL) 1465 goto fail_redirect; 1466 1467 /* 1468 * XXX If there is no nce i.e there is no target link layer address 1469 * option with the redirect message, ire_add will fail. In that 1470 * case we never add the IRE_CACHE/IRE_HOST_REDIRECT. We need 1471 * to fix this. 1472 */ 1473 if (ire_add(&ire, NULL, NULL, NULL, B_FALSE) == 0) { 1474 1475 /* tell routing sockets that we received a redirect */ 1476 ip_rts_change_v6(RTM_REDIRECT, 1477 &rd->nd_rd_dst, 1478 &rd->nd_rd_target, 1479 &ipv6_all_ones, 0, &ire->ire_src_addr_v6, 1480 (RTF_DYNAMIC | RTF_GATEWAY | RTF_HOST), 0, 1481 (RTA_DST | RTA_GATEWAY | RTA_NETMASK | RTA_AUTHOR)); 1482 1483 /* 1484 * Delete any existing IRE_HOST_REDIRECT for this destination. 1485 * This together with the added IRE has the effect of 1486 * modifying an existing redirect. 1487 */ 1488 redir_ire = ire_ftable_lookup_v6(dst, 0, src, IRE_HOST_REDIRECT, 1489 ire->ire_ipif, NULL, ALL_ZONES, 0, NULL, 1490 (MATCH_IRE_GW | MATCH_IRE_TYPE | MATCH_IRE_ILL_GROUP)); 1491 1492 ire_refrele(ire); /* Held in ire_add_v6 */ 1493 1494 if (redir_ire != NULL) { 1495 ire_delete(redir_ire); 1496 ire_refrele(redir_ire); 1497 } 1498 } 1499 1500 if (prev_ire->ire_type == IRE_CACHE) 1501 ire_delete(prev_ire); 1502 ire_refrele(prev_ire); 1503 prev_ire = NULL; 1504 1505 fail_redirect: 1506 if (prev_ire != NULL) 1507 ire_refrele(prev_ire); 1508 freemsg(mp); 1509 } 1510 1511 static ill_t * 1512 ip_queue_to_ill_v6(queue_t *q) 1513 { 1514 ill_t *ill; 1515 1516 ASSERT(WR(q) == q); 1517 1518 if (q->q_next != NULL) { 1519 ill = (ill_t *)q->q_ptr; 1520 if (ILL_CAN_LOOKUP(ill)) 1521 ill_refhold(ill); 1522 else 1523 ill = NULL; 1524 } else { 1525 ill = ill_lookup_on_name(ipif_loopback_name, B_FALSE, B_TRUE, 1526 NULL, NULL, NULL, NULL, NULL); 1527 } 1528 if (ill == NULL) 1529 ip0dbg(("ip_queue_to_ill_v6: no ill\n")); 1530 return (ill); 1531 } 1532 1533 /* 1534 * Assigns an appropriate source address to the packet. 1535 * If origdst is one of our IP addresses that use it as the source. 1536 * If the queue is an ill queue then select a source from that ill. 1537 * Otherwise pick a source based on a route lookup back to the origsrc. 1538 * 1539 * src is the return parameter. Returns a pointer to src or NULL if failure. 1540 */ 1541 static in6_addr_t * 1542 icmp_pick_source_v6(queue_t *wq, in6_addr_t *origsrc, in6_addr_t *origdst, 1543 in6_addr_t *src) 1544 { 1545 ill_t *ill; 1546 ire_t *ire; 1547 ipif_t *ipif; 1548 zoneid_t zoneid; 1549 1550 ASSERT(!(wq->q_flag & QREADR)); 1551 if (wq->q_next != NULL) { 1552 ill = (ill_t *)wq->q_ptr; 1553 zoneid = GLOBAL_ZONEID; 1554 } else { 1555 ill = NULL; 1556 zoneid = Q_TO_CONN(wq)->conn_zoneid; 1557 } 1558 1559 ire = ire_route_lookup_v6(origdst, 0, 0, (IRE_LOCAL|IRE_LOOPBACK), 1560 NULL, NULL, zoneid, NULL, (MATCH_IRE_TYPE|MATCH_IRE_ZONEONLY)); 1561 if (ire != NULL) { 1562 /* Destined to one of our addresses */ 1563 *src = *origdst; 1564 ire_refrele(ire); 1565 return (src); 1566 } 1567 if (ire != NULL) { 1568 ire_refrele(ire); 1569 ire = NULL; 1570 } 1571 if (ill == NULL) { 1572 /* What is the route back to the original source? */ 1573 ire = ire_route_lookup_v6(origsrc, 0, 0, 0, 1574 NULL, NULL, zoneid, NULL, 1575 (MATCH_IRE_DEFAULT|MATCH_IRE_RECURSIVE)); 1576 if (ire == NULL) { 1577 BUMP_MIB(&ip6_mib, ipv6OutNoRoutes); 1578 return (NULL); 1579 } 1580 /* 1581 * Does not matter whether we use ire_stq or ire_ipif here. 1582 * Just pick an ill for ICMP replies. 1583 */ 1584 ASSERT(ire->ire_ipif != NULL); 1585 ill = ire->ire_ipif->ipif_ill; 1586 ire_refrele(ire); 1587 } 1588 ipif = ipif_select_source_v6(ill, origsrc, RESTRICT_TO_NONE, 1589 IPV6_PREFER_SRC_DEFAULT, zoneid); 1590 if (ipif != NULL) { 1591 *src = ipif->ipif_v6src_addr; 1592 ipif_refrele(ipif); 1593 return (src); 1594 } 1595 /* 1596 * Unusual case - can't find a usable source address to reach the 1597 * original source. Use what in the route to the source. 1598 */ 1599 ire = ire_route_lookup_v6(origsrc, 0, 0, 0, 1600 NULL, NULL, zoneid, NULL, 1601 (MATCH_IRE_DEFAULT|MATCH_IRE_RECURSIVE)); 1602 if (ire == NULL) { 1603 BUMP_MIB(&ip6_mib, ipv6OutNoRoutes); 1604 return (NULL); 1605 } 1606 ASSERT(ire != NULL); 1607 *src = ire->ire_src_addr_v6; 1608 ire_refrele(ire); 1609 return (src); 1610 } 1611 1612 /* 1613 * Build and ship an IPv6 ICMP message using the packet data in mp, 1614 * and the ICMP header pointed to by "stuff". (May be called as 1615 * writer.) 1616 * Note: assumes that icmp_pkt_err_ok_v6 has been called to 1617 * verify that an icmp error packet can be sent. 1618 * 1619 * If q is an ill write side queue (which is the case when packets 1620 * arrive from ip_rput) then ip_wput code will ensure that packets to 1621 * link-local destinations are sent out that ill. 1622 * 1623 * If v6src_ptr is set use it as a source. Otherwise select a reasonable 1624 * source address (see above function). 1625 */ 1626 static void 1627 icmp_pkt_v6(queue_t *q, mblk_t *mp, void *stuff, size_t len, 1628 const in6_addr_t *v6src_ptr, boolean_t mctl_present) 1629 { 1630 ip6_t *ip6h; 1631 in6_addr_t v6dst; 1632 size_t len_needed; 1633 size_t msg_len; 1634 mblk_t *mp1; 1635 icmp6_t *icmp6; 1636 ill_t *ill; 1637 in6_addr_t v6src; 1638 mblk_t *ipsec_mp; 1639 ipsec_out_t *io; 1640 1641 ill = ip_queue_to_ill_v6(q); 1642 if (ill == NULL) { 1643 freemsg(mp); 1644 return; 1645 } 1646 1647 if (mctl_present) { 1648 /* 1649 * If it is : 1650 * 1651 * 1) a IPSEC_OUT, then this is caused by outbound 1652 * datagram originating on this host. IPSEC processing 1653 * may or may not have been done. Refer to comments above 1654 * icmp_inbound_error_fanout for details. 1655 * 1656 * 2) a IPSEC_IN if we are generating a icmp_message 1657 * for an incoming datagram destined for us i.e called 1658 * from ip_fanout_send_icmp. 1659 */ 1660 ipsec_info_t *in; 1661 1662 ipsec_mp = mp; 1663 mp = ipsec_mp->b_cont; 1664 1665 in = (ipsec_info_t *)ipsec_mp->b_rptr; 1666 ip6h = (ip6_t *)mp->b_rptr; 1667 1668 ASSERT(in->ipsec_info_type == IPSEC_OUT || 1669 in->ipsec_info_type == IPSEC_IN); 1670 1671 if (in->ipsec_info_type == IPSEC_IN) { 1672 /* 1673 * Convert the IPSEC_IN to IPSEC_OUT. 1674 */ 1675 if (!ipsec_in_to_out(ipsec_mp, NULL, ip6h)) { 1676 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 1677 ill_refrele(ill); 1678 return; 1679 } 1680 } else { 1681 ASSERT(in->ipsec_info_type == IPSEC_OUT); 1682 io = (ipsec_out_t *)in; 1683 /* 1684 * Clear out ipsec_out_proc_begin, so we do a fresh 1685 * ire lookup. 1686 */ 1687 io->ipsec_out_proc_begin = B_FALSE; 1688 } 1689 } else { 1690 /* 1691 * This is in clear. The icmp message we are building 1692 * here should go out in clear. 1693 */ 1694 ipsec_in_t *ii; 1695 ASSERT(mp->b_datap->db_type == M_DATA); 1696 if ((ipsec_mp = ipsec_in_alloc(B_FALSE)) == NULL) { 1697 freemsg(mp); 1698 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 1699 ill_refrele(ill); 1700 return; 1701 } 1702 ii = (ipsec_in_t *)ipsec_mp->b_rptr; 1703 1704 /* This is not a secure packet */ 1705 ii->ipsec_in_secure = B_FALSE; 1706 ipsec_mp->b_cont = mp; 1707 ip6h = (ip6_t *)mp->b_rptr; 1708 /* 1709 * Convert the IPSEC_IN to IPSEC_OUT. 1710 */ 1711 if (!ipsec_in_to_out(ipsec_mp, NULL, ip6h)) { 1712 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 1713 ill_refrele(ill); 1714 return; 1715 } 1716 } 1717 io = (ipsec_out_t *)ipsec_mp->b_rptr; 1718 1719 if (v6src_ptr != NULL) { 1720 v6src = *v6src_ptr; 1721 } else { 1722 if (icmp_pick_source_v6(q, &ip6h->ip6_src, &ip6h->ip6_dst, 1723 &v6src) == NULL) { 1724 freemsg(ipsec_mp); 1725 ill_refrele(ill); 1726 return; 1727 } 1728 } 1729 v6dst = ip6h->ip6_src; 1730 len_needed = ipv6_icmp_return - IPV6_HDR_LEN - len; 1731 msg_len = msgdsize(mp); 1732 if (msg_len > len_needed) { 1733 if (!adjmsg(mp, len_needed - msg_len)) { 1734 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutErrors); 1735 freemsg(ipsec_mp); 1736 ill_refrele(ill); 1737 return; 1738 } 1739 msg_len = len_needed; 1740 } 1741 mp1 = allocb(IPV6_HDR_LEN + len, BPRI_HI); 1742 if (mp1 == NULL) { 1743 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutErrors); 1744 freemsg(ipsec_mp); 1745 ill_refrele(ill); 1746 return; 1747 } 1748 ill_refrele(ill); 1749 mp1->b_cont = mp; 1750 mp = mp1; 1751 ASSERT(ipsec_mp->b_datap->db_type == M_CTL && 1752 io->ipsec_out_type == IPSEC_OUT); 1753 ipsec_mp->b_cont = mp; 1754 1755 /* 1756 * Set ipsec_out_icmp_loopback so we can let the ICMP messages this 1757 * node generates be accepted in peace by all on-host destinations. 1758 * If we do NOT assume that all on-host destinations trust 1759 * self-generated ICMP messages, then rework here, ip.c, and spd.c. 1760 * (Look for ipsec_out_icmp_loopback). 1761 */ 1762 io->ipsec_out_icmp_loopback = B_TRUE; 1763 1764 ip6h = (ip6_t *)mp->b_rptr; 1765 mp1->b_wptr = (uchar_t *)ip6h + (IPV6_HDR_LEN + len); 1766 1767 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 1768 ip6h->ip6_nxt = IPPROTO_ICMPV6; 1769 ip6h->ip6_hops = ipv6_def_hops; 1770 ip6h->ip6_dst = v6dst; 1771 ip6h->ip6_src = v6src; 1772 msg_len += IPV6_HDR_LEN + len; 1773 if (msg_len > IP_MAXPACKET + IPV6_HDR_LEN) { 1774 (void) adjmsg(mp, IP_MAXPACKET + IPV6_HDR_LEN - msg_len); 1775 msg_len = IP_MAXPACKET + IPV6_HDR_LEN; 1776 } 1777 ip6h->ip6_plen = htons((uint16_t)(msgdsize(mp) - IPV6_HDR_LEN)); 1778 icmp6 = (icmp6_t *)&ip6h[1]; 1779 bcopy(stuff, (char *)icmp6, len); 1780 /* 1781 * Prepare for checksum by putting icmp length in the icmp 1782 * checksum field. The checksum is calculated in ip_wput_v6. 1783 */ 1784 icmp6->icmp6_cksum = ip6h->ip6_plen; 1785 if (icmp6->icmp6_type == ND_REDIRECT) { 1786 ip6h->ip6_hops = IPV6_MAX_HOPS; 1787 } 1788 /* Send to V6 writeside put routine */ 1789 put(q, ipsec_mp); 1790 } 1791 1792 /* 1793 * Update the output mib when ICMPv6 packets are sent. 1794 */ 1795 static void 1796 icmp_update_out_mib_v6(ill_t *ill, icmp6_t *icmp6) 1797 { 1798 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutMsgs); 1799 1800 switch (icmp6->icmp6_type) { 1801 case ICMP6_DST_UNREACH: 1802 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutDestUnreachs); 1803 if (icmp6->icmp6_code == ICMP6_DST_UNREACH_ADMIN) 1804 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutAdminProhibs); 1805 break; 1806 1807 case ICMP6_TIME_EXCEEDED: 1808 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutTimeExcds); 1809 break; 1810 1811 case ICMP6_PARAM_PROB: 1812 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutParmProblems); 1813 break; 1814 1815 case ICMP6_PACKET_TOO_BIG: 1816 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutPktTooBigs); 1817 break; 1818 1819 case ICMP6_ECHO_REQUEST: 1820 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutEchos); 1821 break; 1822 1823 case ICMP6_ECHO_REPLY: 1824 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutEchoReplies); 1825 break; 1826 1827 case ND_ROUTER_SOLICIT: 1828 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutRouterSolicits); 1829 break; 1830 1831 case ND_ROUTER_ADVERT: 1832 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutRouterAdvertisements); 1833 break; 1834 1835 case ND_NEIGHBOR_SOLICIT: 1836 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutNeighborSolicits); 1837 break; 1838 1839 case ND_NEIGHBOR_ADVERT: 1840 BUMP_MIB(ill->ill_icmp6_mib, 1841 ipv6IfIcmpOutNeighborAdvertisements); 1842 break; 1843 1844 case ND_REDIRECT: 1845 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutRedirects); 1846 break; 1847 1848 case MLD_LISTENER_QUERY: 1849 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutGroupMembQueries); 1850 break; 1851 1852 case MLD_LISTENER_REPORT: 1853 case MLD_V2_LISTENER_REPORT: 1854 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutGroupMembResponses); 1855 break; 1856 1857 case MLD_LISTENER_REDUCTION: 1858 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutGroupMembReductions); 1859 break; 1860 } 1861 } 1862 1863 /* 1864 * Check if it is ok to send an ICMPv6 error packet in 1865 * response to the IP packet in mp. 1866 * Free the message and return null if no 1867 * ICMP error packet should be sent. 1868 */ 1869 static mblk_t * 1870 icmp_pkt_err_ok_v6(queue_t *q, mblk_t *mp, 1871 boolean_t llbcast, boolean_t mcast_ok) 1872 { 1873 ip6_t *ip6h; 1874 1875 if (!mp) 1876 return (NULL); 1877 1878 ip6h = (ip6_t *)mp->b_rptr; 1879 1880 /* Check if source address uniquely identifies the host */ 1881 1882 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_src) || 1883 IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_src) || 1884 IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 1885 freemsg(mp); 1886 return (NULL); 1887 } 1888 1889 if (ip6h->ip6_nxt == IPPROTO_ICMPV6) { 1890 size_t len_needed = IPV6_HDR_LEN + ICMP6_MINLEN; 1891 icmp6_t *icmp6; 1892 1893 if (mp->b_wptr - mp->b_rptr < len_needed) { 1894 if (!pullupmsg(mp, len_needed)) { 1895 ill_t *ill; 1896 1897 ill = ip_queue_to_ill_v6(q); 1898 if (ill == NULL) { 1899 BUMP_MIB(&icmp6_mib, 1900 ipv6IfIcmpInErrors); 1901 } else { 1902 BUMP_MIB(ill->ill_icmp6_mib, 1903 ipv6IfIcmpInErrors); 1904 ill_refrele(ill); 1905 } 1906 freemsg(mp); 1907 return (NULL); 1908 } 1909 ip6h = (ip6_t *)mp->b_rptr; 1910 } 1911 icmp6 = (icmp6_t *)&ip6h[1]; 1912 /* Explicitly do not generate errors in response to redirects */ 1913 if (ICMP6_IS_ERROR(icmp6->icmp6_type) || 1914 icmp6->icmp6_type == ND_REDIRECT) { 1915 freemsg(mp); 1916 return (NULL); 1917 } 1918 } 1919 /* 1920 * Check that the destination is not multicast and that the packet 1921 * was not sent on link layer broadcast or multicast. (Exception 1922 * is Packet too big message as per the draft - when mcast_ok is set.) 1923 */ 1924 if (!mcast_ok && 1925 (llbcast || IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst))) { 1926 freemsg(mp); 1927 return (NULL); 1928 } 1929 if (icmp_err_rate_limit()) { 1930 /* 1931 * Only send ICMP error packets every so often. 1932 * This should be done on a per port/source basis, 1933 * but for now this will suffice. 1934 */ 1935 freemsg(mp); 1936 return (NULL); 1937 } 1938 return (mp); 1939 } 1940 1941 /* 1942 * Generate an ICMPv6 redirect message. 1943 * Include target link layer address option if it exits. 1944 * Always include redirect header. 1945 */ 1946 static void 1947 icmp_send_redirect_v6(queue_t *q, mblk_t *mp, in6_addr_t *targetp, 1948 in6_addr_t *dest, ill_t *ill, boolean_t llbcast) 1949 { 1950 nd_redirect_t *rd; 1951 nd_opt_rd_hdr_t *rdh; 1952 uchar_t *buf; 1953 nce_t *nce = NULL; 1954 nd_opt_hdr_t *opt; 1955 int len; 1956 int ll_opt_len = 0; 1957 int max_redir_hdr_data_len; 1958 int pkt_len; 1959 in6_addr_t *srcp; 1960 1961 /* 1962 * We are called from ip_rput where we could 1963 * not have attached an IPSEC_IN. 1964 */ 1965 ASSERT(mp->b_datap->db_type == M_DATA); 1966 1967 mp = icmp_pkt_err_ok_v6(q, mp, llbcast, B_FALSE); 1968 if (mp == NULL) 1969 return; 1970 nce = ndp_lookup_v6(ill, targetp, B_FALSE); 1971 if (nce != NULL && nce->nce_state != ND_INCOMPLETE) { 1972 ll_opt_len = (sizeof (nd_opt_hdr_t) + 1973 ill->ill_phys_addr_length + 7)/8 * 8; 1974 } 1975 len = sizeof (nd_redirect_t) + sizeof (nd_opt_rd_hdr_t) + ll_opt_len; 1976 ASSERT(len % 4 == 0); 1977 buf = kmem_alloc(len, KM_NOSLEEP); 1978 if (buf == NULL) { 1979 if (nce != NULL) 1980 NCE_REFRELE(nce); 1981 freemsg(mp); 1982 return; 1983 } 1984 1985 rd = (nd_redirect_t *)buf; 1986 rd->nd_rd_type = (uint8_t)ND_REDIRECT; 1987 rd->nd_rd_code = 0; 1988 rd->nd_rd_reserved = 0; 1989 rd->nd_rd_target = *targetp; 1990 rd->nd_rd_dst = *dest; 1991 1992 opt = (nd_opt_hdr_t *)(buf + sizeof (nd_redirect_t)); 1993 if (nce != NULL && ll_opt_len != 0) { 1994 opt->nd_opt_type = ND_OPT_TARGET_LINKADDR; 1995 opt->nd_opt_len = ll_opt_len/8; 1996 bcopy((char *)nce->nce_res_mp->b_rptr + 1997 NCE_LL_ADDR_OFFSET(ill), &opt[1], 1998 ill->ill_phys_addr_length); 1999 } 2000 if (nce != NULL) 2001 NCE_REFRELE(nce); 2002 rdh = (nd_opt_rd_hdr_t *)(buf + sizeof (nd_redirect_t) + ll_opt_len); 2003 rdh->nd_opt_rh_type = (uint8_t)ND_OPT_REDIRECTED_HEADER; 2004 /* max_redir_hdr_data_len and nd_opt_rh_len must be multiple of 8 */ 2005 max_redir_hdr_data_len = (ipv6_icmp_return - IPV6_HDR_LEN - len)/8*8; 2006 pkt_len = msgdsize(mp); 2007 /* Make sure mp is 8 byte aligned */ 2008 if (pkt_len > max_redir_hdr_data_len) { 2009 rdh->nd_opt_rh_len = (max_redir_hdr_data_len + 2010 sizeof (nd_opt_rd_hdr_t))/8; 2011 (void) adjmsg(mp, max_redir_hdr_data_len - pkt_len); 2012 } else { 2013 rdh->nd_opt_rh_len = (pkt_len + sizeof (nd_opt_rd_hdr_t))/8; 2014 (void) adjmsg(mp, -(pkt_len % 8)); 2015 } 2016 rdh->nd_opt_rh_reserved1 = 0; 2017 rdh->nd_opt_rh_reserved2 = 0; 2018 /* ipif_v6src_addr contains the link-local source address */ 2019 rw_enter(&ill_g_lock, RW_READER); 2020 if (ill->ill_group != NULL) { 2021 /* 2022 * The receiver of the redirect will verify whether it 2023 * had a route through us (srcp that we will use in 2024 * the redirect) or not. As we load spread even link-locals, 2025 * we don't know which source address the receiver of 2026 * redirect has in its route for communicating with us. 2027 * Thus we randomly choose a source here and finally we 2028 * should get to the right one and it will eventually 2029 * accept the redirect from us. We can't call 2030 * ip_lookup_scope_v6 because we don't have the right 2031 * link-local address here. Thus we randomly choose one. 2032 */ 2033 int cnt = ill->ill_group->illgrp_ill_count; 2034 2035 ill = ill->ill_group->illgrp_ill; 2036 cnt = ++icmp_redirect_v6_src_index % cnt; 2037 while (cnt--) 2038 ill = ill->ill_group_next; 2039 srcp = &ill->ill_ipif->ipif_v6src_addr; 2040 } else { 2041 srcp = &ill->ill_ipif->ipif_v6src_addr; 2042 } 2043 rw_exit(&ill_g_lock); 2044 icmp_pkt_v6(q, mp, buf, len, srcp, B_FALSE); 2045 kmem_free(buf, len); 2046 } 2047 2048 2049 /* Generate an ICMP time exceeded message. (May be called as writer.) */ 2050 void 2051 icmp_time_exceeded_v6(queue_t *q, mblk_t *mp, uint8_t code, 2052 boolean_t llbcast, boolean_t mcast_ok) 2053 { 2054 icmp6_t icmp6; 2055 boolean_t mctl_present; 2056 mblk_t *first_mp; 2057 2058 EXTRACT_PKT_MP(mp, first_mp, mctl_present); 2059 2060 mp = icmp_pkt_err_ok_v6(q, mp, llbcast, mcast_ok); 2061 if (mp == NULL) { 2062 if (mctl_present) 2063 freeb(first_mp); 2064 return; 2065 } 2066 bzero(&icmp6, sizeof (icmp6_t)); 2067 icmp6.icmp6_type = ICMP6_TIME_EXCEEDED; 2068 icmp6.icmp6_code = code; 2069 icmp_pkt_v6(q, first_mp, &icmp6, sizeof (icmp6_t), NULL, mctl_present); 2070 } 2071 2072 /* 2073 * Generate an ICMP unreachable message. 2074 */ 2075 void 2076 icmp_unreachable_v6(queue_t *q, mblk_t *mp, uint8_t code, 2077 boolean_t llbcast, boolean_t mcast_ok) 2078 { 2079 icmp6_t icmp6; 2080 boolean_t mctl_present; 2081 mblk_t *first_mp; 2082 2083 EXTRACT_PKT_MP(mp, first_mp, mctl_present); 2084 2085 mp = icmp_pkt_err_ok_v6(q, mp, llbcast, mcast_ok); 2086 if (mp == NULL) { 2087 if (mctl_present) 2088 freeb(first_mp); 2089 return; 2090 } 2091 bzero(&icmp6, sizeof (icmp6_t)); 2092 icmp6.icmp6_type = ICMP6_DST_UNREACH; 2093 icmp6.icmp6_code = code; 2094 icmp_pkt_v6(q, first_mp, &icmp6, sizeof (icmp6_t), NULL, mctl_present); 2095 } 2096 2097 /* 2098 * Generate an ICMP pkt too big message. 2099 */ 2100 static void 2101 icmp_pkt2big_v6(queue_t *q, mblk_t *mp, uint32_t mtu, 2102 boolean_t llbcast, boolean_t mcast_ok) 2103 { 2104 icmp6_t icmp6; 2105 mblk_t *first_mp; 2106 boolean_t mctl_present; 2107 2108 EXTRACT_PKT_MP(mp, first_mp, mctl_present); 2109 2110 mp = icmp_pkt_err_ok_v6(q, mp, llbcast, mcast_ok); 2111 if (mp == NULL) { 2112 if (mctl_present) 2113 freeb(first_mp); 2114 return; 2115 } 2116 bzero(&icmp6, sizeof (icmp6_t)); 2117 icmp6.icmp6_type = ICMP6_PACKET_TOO_BIG; 2118 icmp6.icmp6_code = 0; 2119 icmp6.icmp6_mtu = htonl(mtu); 2120 2121 icmp_pkt_v6(q, first_mp, &icmp6, sizeof (icmp6_t), NULL, mctl_present); 2122 } 2123 2124 /* 2125 * Generate an ICMP parameter problem message. (May be called as writer.) 2126 * 'offset' is the offset from the beginning of the packet in error. 2127 */ 2128 static void 2129 icmp_param_problem_v6(queue_t *q, mblk_t *mp, uint8_t code, 2130 uint32_t offset, boolean_t llbcast, boolean_t mcast_ok) 2131 { 2132 icmp6_t icmp6; 2133 boolean_t mctl_present; 2134 mblk_t *first_mp; 2135 2136 EXTRACT_PKT_MP(mp, first_mp, mctl_present); 2137 2138 mp = icmp_pkt_err_ok_v6(q, mp, llbcast, mcast_ok); 2139 if (mp == NULL) { 2140 if (mctl_present) 2141 freeb(first_mp); 2142 return; 2143 } 2144 bzero((char *)&icmp6, sizeof (icmp6_t)); 2145 icmp6.icmp6_type = ICMP6_PARAM_PROB; 2146 icmp6.icmp6_code = code; 2147 icmp6.icmp6_pptr = htonl(offset); 2148 icmp_pkt_v6(q, first_mp, &icmp6, sizeof (icmp6_t), NULL, mctl_present); 2149 } 2150 2151 /* 2152 * This code will need to take into account the possibility of binding 2153 * to a link local address on a multi-homed host, in which case the 2154 * outgoing interface (from the conn) will need to be used when getting 2155 * an ire for the dst. Going through proper outgoing interface and 2156 * choosing the source address corresponding to the outgoing interface 2157 * is necessary when the destination address is a link-local address and 2158 * IPV6_BOUND_IF or IPV6_PKTINFO or scope_id has been set. 2159 * This can happen when active connection is setup; thus ipp pointer 2160 * is passed here from tcp_connect_*() routines, in non-TCP cases NULL 2161 * pointer is passed as ipp pointer. 2162 */ 2163 mblk_t * 2164 ip_bind_v6(queue_t *q, mblk_t *mp, conn_t *connp, ip6_pkt_t *ipp) 2165 { 2166 ssize_t len; 2167 int protocol; 2168 struct T_bind_req *tbr; 2169 sin6_t *sin6; 2170 ipa6_conn_t *ac6; 2171 in6_addr_t *v6srcp; 2172 in6_addr_t *v6dstp; 2173 uint16_t lport; 2174 uint16_t fport; 2175 uchar_t *ucp; 2176 mblk_t *mp1; 2177 boolean_t ire_requested; 2178 boolean_t ipsec_policy_set; 2179 int error = 0; 2180 boolean_t local_bind; 2181 boolean_t orig_pkt_isv6 = connp->conn_pkt_isv6; 2182 ipa6_conn_x_t *acx6; 2183 boolean_t verify_dst; 2184 2185 ASSERT(connp->conn_af_isv6); 2186 len = mp->b_wptr - mp->b_rptr; 2187 if (len < (sizeof (*tbr) + 1)) { 2188 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 2189 "ip_bind_v6: bogus msg, len %ld", len); 2190 goto bad_addr; 2191 } 2192 /* Back up and extract the protocol identifier. */ 2193 mp->b_wptr--; 2194 tbr = (struct T_bind_req *)mp->b_rptr; 2195 /* Reset the message type in preparation for shipping it back. */ 2196 mp->b_datap->db_type = M_PCPROTO; 2197 2198 protocol = *mp->b_wptr & 0xFF; 2199 connp->conn_ulp = (uint8_t)protocol; 2200 2201 /* 2202 * Check for a zero length address. This is from a protocol that 2203 * wants to register to receive all packets of its type. 2204 */ 2205 if (tbr->ADDR_length == 0) { 2206 if ((protocol == IPPROTO_TCP || protocol == IPPROTO_SCTP || 2207 protocol == IPPROTO_ESP || protocol == IPPROTO_AH) && 2208 ipcl_proto_fanout_v6[protocol].connf_head != NULL) { 2209 /* 2210 * TCP, SCTP, AH, and ESP have single protocol fanouts. 2211 * Do not allow others to bind to these. 2212 */ 2213 goto bad_addr; 2214 } 2215 2216 /* 2217 * 2218 * The udp module never sends down a zero-length address, 2219 * and allowing this on a labeled system will break MLP 2220 * functionality. 2221 */ 2222 if (is_system_labeled() && protocol == IPPROTO_UDP) 2223 goto bad_addr; 2224 2225 /* Allow ipsec plumbing */ 2226 if (connp->conn_mac_exempt && protocol != IPPROTO_AH && 2227 protocol != IPPROTO_ESP) 2228 goto bad_addr; 2229 2230 connp->conn_srcv6 = ipv6_all_zeros; 2231 ipcl_proto_insert_v6(connp, protocol); 2232 2233 tbr->PRIM_type = T_BIND_ACK; 2234 return (mp); 2235 } 2236 2237 /* Extract the address pointer from the message. */ 2238 ucp = (uchar_t *)mi_offset_param(mp, tbr->ADDR_offset, 2239 tbr->ADDR_length); 2240 if (ucp == NULL) { 2241 ip1dbg(("ip_bind_v6: no address\n")); 2242 goto bad_addr; 2243 } 2244 if (!OK_32PTR(ucp)) { 2245 ip1dbg(("ip_bind_v6: unaligned address\n")); 2246 goto bad_addr; 2247 } 2248 mp1 = mp->b_cont; /* trailing mp if any */ 2249 ire_requested = (mp1 && mp1->b_datap->db_type == IRE_DB_REQ_TYPE); 2250 ipsec_policy_set = (mp1 && mp1->b_datap->db_type == IPSEC_POLICY_SET); 2251 2252 switch (tbr->ADDR_length) { 2253 default: 2254 ip1dbg(("ip_bind_v6: bad address length %d\n", 2255 (int)tbr->ADDR_length)); 2256 goto bad_addr; 2257 2258 case IPV6_ADDR_LEN: 2259 /* Verification of local address only */ 2260 v6srcp = (in6_addr_t *)ucp; 2261 lport = 0; 2262 local_bind = B_TRUE; 2263 break; 2264 2265 case sizeof (sin6_t): 2266 sin6 = (sin6_t *)ucp; 2267 v6srcp = &sin6->sin6_addr; 2268 lport = sin6->sin6_port; 2269 local_bind = B_TRUE; 2270 break; 2271 2272 case sizeof (ipa6_conn_t): 2273 /* 2274 * Verify that both the source and destination addresses 2275 * are valid. 2276 * Note that we allow connect to broadcast and multicast 2277 * addresses when ire_requested is set. Thus the ULP 2278 * has to check for IRE_BROADCAST and multicast. 2279 */ 2280 ac6 = (ipa6_conn_t *)ucp; 2281 v6srcp = &ac6->ac6_laddr; 2282 v6dstp = &ac6->ac6_faddr; 2283 fport = ac6->ac6_fport; 2284 /* For raw socket, the local port is not set. */ 2285 lport = ac6->ac6_lport != 0 ? ac6->ac6_lport : 2286 connp->conn_lport; 2287 local_bind = B_FALSE; 2288 /* Always verify destination reachability. */ 2289 verify_dst = B_TRUE; 2290 break; 2291 2292 case sizeof (ipa6_conn_x_t): 2293 /* 2294 * Verify that the source address is valid. 2295 * Note that we allow connect to broadcast and multicast 2296 * addresses when ire_requested is set. Thus the ULP 2297 * has to check for IRE_BROADCAST and multicast. 2298 */ 2299 acx6 = (ipa6_conn_x_t *)ucp; 2300 ac6 = &acx6->ac6x_conn; 2301 v6srcp = &ac6->ac6_laddr; 2302 v6dstp = &ac6->ac6_faddr; 2303 fport = ac6->ac6_fport; 2304 lport = ac6->ac6_lport; 2305 local_bind = B_FALSE; 2306 /* 2307 * Client that passed ipa6_conn_x_t to us specifies whether to 2308 * verify destination reachability. 2309 */ 2310 verify_dst = (acx6->ac6x_flags & ACX_VERIFY_DST) != 0; 2311 break; 2312 } 2313 if (local_bind) { 2314 if (IN6_IS_ADDR_V4MAPPED(v6srcp) && !connp->conn_ipv6_v6only) { 2315 /* Bind to IPv4 address */ 2316 ipaddr_t v4src; 2317 2318 IN6_V4MAPPED_TO_IPADDR(v6srcp, v4src); 2319 2320 error = ip_bind_laddr(connp, mp, v4src, lport, 2321 ire_requested, ipsec_policy_set, 2322 tbr->ADDR_length != IPV6_ADDR_LEN); 2323 if (error != 0) 2324 goto bad_addr; 2325 connp->conn_pkt_isv6 = B_FALSE; 2326 } else { 2327 if (IN6_IS_ADDR_V4MAPPED(v6srcp)) { 2328 error = 0; 2329 goto bad_addr; 2330 } 2331 error = ip_bind_laddr_v6(connp, mp, v6srcp, lport, 2332 ire_requested, ipsec_policy_set, 2333 (tbr->ADDR_length != IPV6_ADDR_LEN)); 2334 if (error != 0) 2335 goto bad_addr; 2336 connp->conn_pkt_isv6 = B_TRUE; 2337 } 2338 if (protocol == IPPROTO_TCP) 2339 connp->conn_recv = tcp_conn_request; 2340 } else { 2341 /* 2342 * Bind to local and remote address. Local might be 2343 * unspecified in which case it will be extracted from 2344 * ire_src_addr_v6 2345 */ 2346 if (IN6_IS_ADDR_V4MAPPED(v6dstp) && !connp->conn_ipv6_v6only) { 2347 /* Connect to IPv4 address */ 2348 ipaddr_t v4src; 2349 ipaddr_t v4dst; 2350 2351 /* Is the source unspecified or mapped? */ 2352 if (!IN6_IS_ADDR_V4MAPPED(v6srcp) && 2353 !IN6_IS_ADDR_UNSPECIFIED(v6srcp)) { 2354 ip1dbg(("ip_bind_v6: " 2355 "dst is mapped, but not the src\n")); 2356 goto bad_addr; 2357 } 2358 IN6_V4MAPPED_TO_IPADDR(v6srcp, v4src); 2359 IN6_V4MAPPED_TO_IPADDR(v6dstp, v4dst); 2360 2361 /* 2362 * XXX Fix needed. Need to pass ipsec_policy_set 2363 * instead of B_FALSE. 2364 */ 2365 2366 /* Always verify destination reachability. */ 2367 error = ip_bind_connected(connp, mp, &v4src, lport, 2368 v4dst, fport, ire_requested, ipsec_policy_set, 2369 B_TRUE, B_TRUE); 2370 if (error != 0) 2371 goto bad_addr; 2372 IN6_IPADDR_TO_V4MAPPED(v4src, v6srcp); 2373 connp->conn_pkt_isv6 = B_FALSE; 2374 } else if (IN6_IS_ADDR_V4MAPPED(v6srcp)) { 2375 ip1dbg(("ip_bind_v6: " 2376 "src is mapped, but not the dst\n")); 2377 goto bad_addr; 2378 } else { 2379 error = ip_bind_connected_v6(connp, mp, v6srcp, 2380 lport, v6dstp, ipp, fport, ire_requested, 2381 ipsec_policy_set, B_TRUE, verify_dst); 2382 if (error != 0) 2383 goto bad_addr; 2384 connp->conn_pkt_isv6 = B_TRUE; 2385 } 2386 if (protocol == IPPROTO_TCP) 2387 connp->conn_recv = tcp_input; 2388 } 2389 /* Update qinfo if v4/v6 changed */ 2390 if ((orig_pkt_isv6 != connp->conn_pkt_isv6) && 2391 !(IPCL_IS_TCP(connp) || IPCL_IS_UDP(connp))) { 2392 if (connp->conn_pkt_isv6) 2393 ip_setqinfo(RD(q), IPV6_MINOR, B_TRUE); 2394 else 2395 ip_setqinfo(RD(q), IPV4_MINOR, B_TRUE); 2396 } 2397 2398 /* 2399 * Pass the IPSEC headers size in ire_ipsec_overhead. 2400 * We can't do this in ip_bind_insert_ire because the policy 2401 * may not have been inherited at that point in time and hence 2402 * conn_out_enforce_policy may not be set. 2403 */ 2404 mp1 = mp->b_cont; 2405 if (ire_requested && connp->conn_out_enforce_policy && 2406 mp1 != NULL && DB_TYPE(mp1) == IRE_DB_REQ_TYPE) { 2407 ire_t *ire = (ire_t *)mp1->b_rptr; 2408 ASSERT(MBLKL(mp1) >= sizeof (ire_t)); 2409 ire->ire_ipsec_overhead = (conn_ipsec_length(connp)); 2410 } 2411 2412 /* Send it home. */ 2413 mp->b_datap->db_type = M_PCPROTO; 2414 tbr->PRIM_type = T_BIND_ACK; 2415 return (mp); 2416 2417 bad_addr: 2418 if (error == EINPROGRESS) 2419 return (NULL); 2420 if (error > 0) 2421 mp = mi_tpi_err_ack_alloc(mp, TSYSERR, error); 2422 else 2423 mp = mi_tpi_err_ack_alloc(mp, TBADADDR, 0); 2424 return (mp); 2425 } 2426 2427 /* 2428 * Here address is verified to be a valid local address. 2429 * If the IRE_DB_REQ_TYPE mp is present, a multicast 2430 * address is also considered a valid local address. 2431 * In the case of a multicast address, however, the 2432 * upper protocol is expected to reset the src address 2433 * to 0 if it sees an ire with IN6_IS_ADDR_MULTICAST returned so that 2434 * no packets are emitted with multicast address as 2435 * source address. 2436 * The addresses valid for bind are: 2437 * (1) - in6addr_any 2438 * (2) - IP address of an UP interface 2439 * (3) - IP address of a DOWN interface 2440 * (4) - a multicast address. In this case 2441 * the conn will only receive packets destined to 2442 * the specified multicast address. Note: the 2443 * application still has to issue an 2444 * IPV6_JOIN_GROUP socket option. 2445 * 2446 * In all the above cases, the bound address must be valid in the current zone. 2447 * When the address is loopback or multicast, there might be many matching IREs 2448 * so bind has to look up based on the zone. 2449 */ 2450 static int 2451 ip_bind_laddr_v6(conn_t *connp, mblk_t *mp, const in6_addr_t *v6src, 2452 uint16_t lport, boolean_t ire_requested, boolean_t ipsec_policy_set, 2453 boolean_t fanout_insert) 2454 { 2455 int error = 0; 2456 ire_t *src_ire = NULL; 2457 ipif_t *ipif = NULL; 2458 mblk_t *policy_mp; 2459 zoneid_t zoneid; 2460 2461 if (ipsec_policy_set) 2462 policy_mp = mp->b_cont; 2463 2464 /* 2465 * If it was previously connected, conn_fully_bound would have 2466 * been set. 2467 */ 2468 connp->conn_fully_bound = B_FALSE; 2469 2470 zoneid = connp->conn_zoneid; 2471 2472 if (!IN6_IS_ADDR_UNSPECIFIED(v6src)) { 2473 src_ire = ire_route_lookup_v6(v6src, 0, 0, 2474 0, NULL, NULL, zoneid, NULL, MATCH_IRE_ZONEONLY); 2475 /* 2476 * If an address other than in6addr_any is requested, 2477 * we verify that it is a valid address for bind 2478 * Note: Following code is in if-else-if form for 2479 * readability compared to a condition check. 2480 */ 2481 ASSERT(src_ire == NULL || !(src_ire->ire_type & IRE_BROADCAST)); 2482 if (IRE_IS_LOCAL(src_ire)) { 2483 /* 2484 * (2) Bind to address of local UP interface 2485 */ 2486 ipif = src_ire->ire_ipif; 2487 } else if (IN6_IS_ADDR_MULTICAST(v6src)) { 2488 ipif_t *multi_ipif = NULL; 2489 ire_t *save_ire; 2490 /* 2491 * (4) bind to multicast address. 2492 * Fake out the IRE returned to upper 2493 * layer to be a broadcast IRE in 2494 * ip_bind_insert_ire_v6(). 2495 * Pass other information that matches 2496 * the ipif (e.g. the source address). 2497 * conn_multicast_ill is only used for 2498 * IPv6 packets 2499 */ 2500 mutex_enter(&connp->conn_lock); 2501 if (connp->conn_multicast_ill != NULL) { 2502 (void) ipif_lookup_zoneid( 2503 connp->conn_multicast_ill, zoneid, 0, 2504 &multi_ipif); 2505 } else { 2506 /* 2507 * Look for default like 2508 * ip_wput_v6 2509 */ 2510 multi_ipif = ipif_lookup_group_v6( 2511 &ipv6_unspecified_group, zoneid); 2512 } 2513 mutex_exit(&connp->conn_lock); 2514 save_ire = src_ire; 2515 src_ire = NULL; 2516 if (multi_ipif == NULL || !ire_requested || 2517 (src_ire = ipif_to_ire_v6(multi_ipif)) == NULL) { 2518 src_ire = save_ire; 2519 error = EADDRNOTAVAIL; 2520 } else { 2521 ASSERT(src_ire != NULL); 2522 if (save_ire != NULL) 2523 ire_refrele(save_ire); 2524 } 2525 if (multi_ipif != NULL) 2526 ipif_refrele(multi_ipif); 2527 } else { 2528 *mp->b_wptr++ = (char)connp->conn_ulp; 2529 ipif = ipif_lookup_addr_v6(v6src, NULL, zoneid, 2530 CONNP_TO_WQ(connp), mp, ip_wput_nondata, &error); 2531 if (ipif == NULL) { 2532 if (error == EINPROGRESS) { 2533 if (src_ire != NULL) 2534 ire_refrele(src_ire); 2535 return (error); 2536 } 2537 /* 2538 * Not a valid address for bind 2539 */ 2540 error = EADDRNOTAVAIL; 2541 } else { 2542 ipif_refrele(ipif); 2543 } 2544 /* 2545 * Just to keep it consistent with the processing in 2546 * ip_bind_v6(). 2547 */ 2548 mp->b_wptr--; 2549 } 2550 2551 if (error != 0) { 2552 /* Red Alert! Attempting to be a bogon! */ 2553 if (ip_debug > 2) { 2554 /* ip1dbg */ 2555 pr_addr_dbg("ip_bind_laddr_v6: bad src" 2556 " address %s\n", AF_INET6, v6src); 2557 } 2558 goto bad_addr; 2559 } 2560 } 2561 2562 /* 2563 * Allow setting new policies. For example, disconnects come 2564 * down as ipa_t bind. As we would have set conn_policy_cached 2565 * to B_TRUE before, we should set it to B_FALSE, so that policy 2566 * can change after the disconnect. 2567 */ 2568 connp->conn_policy_cached = B_FALSE; 2569 2570 /* If not fanout_insert this was just an address verification */ 2571 if (fanout_insert) { 2572 /* 2573 * The addresses have been verified. Time to insert in 2574 * the correct fanout list. 2575 */ 2576 connp->conn_srcv6 = *v6src; 2577 connp->conn_remv6 = ipv6_all_zeros; 2578 connp->conn_lport = lport; 2579 connp->conn_fport = 0; 2580 error = ipcl_bind_insert_v6(connp, *mp->b_wptr, v6src, lport); 2581 } 2582 if (error == 0) { 2583 if (ire_requested) { 2584 if (!ip_bind_insert_ire_v6(mp, src_ire, v6src, NULL)) { 2585 error = -1; 2586 goto bad_addr; 2587 } 2588 } else if (ipsec_policy_set) { 2589 if (!ip_bind_ipsec_policy_set(connp, policy_mp)) { 2590 error = -1; 2591 goto bad_addr; 2592 } 2593 } 2594 } 2595 bad_addr: 2596 if (error != 0) { 2597 if (connp->conn_anon_port) { 2598 (void) tsol_mlp_anon(crgetzone(connp->conn_cred), 2599 connp->conn_mlp_type, connp->conn_ulp, ntohs(lport), 2600 B_FALSE); 2601 } 2602 connp->conn_mlp_type = mlptSingle; 2603 } 2604 2605 if (src_ire != NULL) 2606 ire_refrele(src_ire); 2607 2608 if (ipsec_policy_set) { 2609 ASSERT(policy_mp != NULL); 2610 freeb(policy_mp); 2611 /* 2612 * As of now assume that nothing else accompanies 2613 * IPSEC_POLICY_SET. 2614 */ 2615 mp->b_cont = NULL; 2616 } 2617 return (error); 2618 } 2619 2620 /* ARGSUSED */ 2621 static void 2622 ip_bind_connected_resume_v6(ipsq_t *ipsq, queue_t *q, mblk_t *mp, 2623 void *dummy_arg) 2624 { 2625 conn_t *connp = NULL; 2626 t_scalar_t prim; 2627 2628 ASSERT(DB_TYPE(mp) == M_PROTO || DB_TYPE(mp) == M_PCPROTO); 2629 2630 if (CONN_Q(q)) 2631 connp = Q_TO_CONN(q); 2632 ASSERT(connp != NULL); 2633 2634 prim = ((union T_primitives *)mp->b_rptr)->type; 2635 ASSERT(prim == O_T_BIND_REQ || prim == T_BIND_REQ); 2636 2637 if (IPCL_IS_TCP(connp)) { 2638 /* Pass sticky_ipp for scope_id and pktinfo */ 2639 mp = ip_bind_v6(q, mp, connp, &connp->conn_tcp->tcp_sticky_ipp); 2640 } else { 2641 /* For UDP and ICMP */ 2642 mp = ip_bind_v6(q, mp, connp, NULL); 2643 } 2644 if (mp != NULL) { 2645 if (IPCL_IS_TCP(connp)) { 2646 CONN_INC_REF(connp); 2647 squeue_fill(connp->conn_sqp, mp, ip_resume_tcp_bind, 2648 connp, SQTAG_TCP_RPUTOTHER); 2649 } else if (IPCL_IS_UDP(connp)) { 2650 udp_resume_bind(connp, mp); 2651 } else { 2652 qreply(q, mp); 2653 CONN_OPER_PENDING_DONE(connp); 2654 } 2655 } 2656 } 2657 2658 /* 2659 * Verify that both the source and destination addresses 2660 * are valid. If verify_dst, then destination address must also be reachable, 2661 * i.e. have a route. Protocols like TCP want this. Tunnels do not. 2662 * It takes ip6_pkt_t * as one of the arguments to determine correct 2663 * source address when IPV6_PKTINFO or scope_id is set along with a link-local 2664 * destination address. Note that parameter ipp is only useful for TCP connect 2665 * when scope_id is set or IPV6_PKTINFO option is set with an ifindex. For all 2666 * non-TCP cases, it is NULL and for all other tcp cases it is not useful. 2667 * 2668 */ 2669 static int 2670 ip_bind_connected_v6(conn_t *connp, mblk_t *mp, in6_addr_t *v6src, 2671 uint16_t lport, const in6_addr_t *v6dst, ip6_pkt_t *ipp, uint16_t fport, 2672 boolean_t ire_requested, boolean_t ipsec_policy_set, 2673 boolean_t fanout_insert, boolean_t verify_dst) 2674 { 2675 ire_t *src_ire; 2676 ire_t *dst_ire; 2677 int error = 0; 2678 int protocol; 2679 mblk_t *policy_mp; 2680 ire_t *sire = NULL; 2681 ire_t *md_dst_ire = NULL; 2682 ill_t *md_ill = NULL; 2683 ill_t *dst_ill = NULL; 2684 ipif_t *src_ipif = NULL; 2685 zoneid_t zoneid; 2686 boolean_t ill_held = B_FALSE; 2687 2688 src_ire = dst_ire = NULL; 2689 /* 2690 * NOTE: The protocol is beyond the wptr because that's how 2691 * the undocumented transport<-->IP T_BIND_REQ behavior works. 2692 */ 2693 protocol = *mp->b_wptr & 0xFF; 2694 2695 /* 2696 * If we never got a disconnect before, clear it now. 2697 */ 2698 connp->conn_fully_bound = B_FALSE; 2699 2700 if (ipsec_policy_set) { 2701 policy_mp = mp->b_cont; 2702 } 2703 2704 zoneid = connp->conn_zoneid; 2705 2706 if (IN6_IS_ADDR_MULTICAST(v6dst)) { 2707 ipif_t *ipif; 2708 2709 /* 2710 * Use an "emulated" IRE_BROADCAST to tell the transport it 2711 * is a multicast. 2712 * Pass other information that matches 2713 * the ipif (e.g. the source address). 2714 * 2715 * conn_multicast_ill is only used for IPv6 packets 2716 */ 2717 mutex_enter(&connp->conn_lock); 2718 if (connp->conn_multicast_ill != NULL) { 2719 (void) ipif_lookup_zoneid(connp->conn_multicast_ill, 2720 zoneid, 0, &ipif); 2721 } else { 2722 /* Look for default like ip_wput_v6 */ 2723 ipif = ipif_lookup_group_v6(v6dst, zoneid); 2724 } 2725 mutex_exit(&connp->conn_lock); 2726 if (ipif == NULL || !ire_requested || 2727 (dst_ire = ipif_to_ire_v6(ipif)) == NULL) { 2728 if (ipif != NULL) 2729 ipif_refrele(ipif); 2730 if (ip_debug > 2) { 2731 /* ip1dbg */ 2732 pr_addr_dbg("ip_bind_connected_v6: bad " 2733 "connected multicast %s\n", AF_INET6, 2734 v6dst); 2735 } 2736 error = ENETUNREACH; 2737 goto bad_addr; 2738 } 2739 if (ipif != NULL) 2740 ipif_refrele(ipif); 2741 } else { 2742 dst_ire = ire_route_lookup_v6(v6dst, NULL, NULL, 0, 2743 NULL, &sire, zoneid, MBLK_GETLABEL(mp), 2744 MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT | 2745 MATCH_IRE_PARENT | MATCH_IRE_RJ_BHOLE | MATCH_IRE_SECATTR); 2746 /* 2747 * We also prevent ire's with src address INADDR_ANY to 2748 * be used, which are created temporarily for 2749 * sending out packets from endpoints that have 2750 * conn_unspec_src set. 2751 */ 2752 if (dst_ire == NULL || 2753 (dst_ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) || 2754 IN6_IS_ADDR_UNSPECIFIED(&dst_ire->ire_src_addr_v6)) { 2755 /* 2756 * When verifying destination reachability, we always 2757 * complain. 2758 * 2759 * When not verifying destination reachability but we 2760 * found an IRE, i.e. the destination is reachable, 2761 * then the other tests still apply and we complain. 2762 */ 2763 if (verify_dst || (dst_ire != NULL)) { 2764 if (ip_debug > 2) { 2765 /* ip1dbg */ 2766 pr_addr_dbg("ip_bind_connected_v6: bad" 2767 " connected dst %s\n", AF_INET6, 2768 v6dst); 2769 } 2770 if (dst_ire == NULL || 2771 !(dst_ire->ire_type & IRE_HOST)) { 2772 error = ENETUNREACH; 2773 } else { 2774 error = EHOSTUNREACH; 2775 } 2776 goto bad_addr; 2777 } 2778 } 2779 } 2780 2781 /* 2782 * We now know that routing will allow us to reach the destination. 2783 * Check whether Trusted Solaris policy allows communication with this 2784 * host, and pretend that the destination is unreachable if not. 2785 * 2786 * This is never a problem for TCP, since that transport is known to 2787 * compute the label properly as part of the tcp_rput_other T_BIND_ACK 2788 * handling. If the remote is unreachable, it will be detected at that 2789 * point, so there's no reason to check it here. 2790 * 2791 * Note that for sendto (and other datagram-oriented friends), this 2792 * check is done as part of the data path label computation instead. 2793 * The check here is just to make non-TCP connect() report the right 2794 * error. 2795 */ 2796 if (dst_ire != NULL && is_system_labeled() && 2797 !IPCL_IS_TCP(connp) && 2798 tsol_compute_label_v6(DB_CREDDEF(mp, connp->conn_cred), v6dst, NULL, 2799 connp->conn_mac_exempt) != 0) { 2800 error = EHOSTUNREACH; 2801 if (ip_debug > 2) { 2802 pr_addr_dbg("ip_bind_connected: no label for dst %s\n", 2803 AF_INET6, v6dst); 2804 } 2805 goto bad_addr; 2806 } 2807 2808 /* 2809 * If the app does a connect(), it means that it will most likely 2810 * send more than 1 packet to the destination. It makes sense 2811 * to clear the temporary flag. 2812 */ 2813 if (dst_ire != NULL && dst_ire->ire_type == IRE_CACHE && 2814 (dst_ire->ire_marks & IRE_MARK_TEMPORARY)) { 2815 irb_t *irb = dst_ire->ire_bucket; 2816 2817 rw_enter(&irb->irb_lock, RW_WRITER); 2818 dst_ire->ire_marks &= ~IRE_MARK_TEMPORARY; 2819 irb->irb_tmp_ire_cnt--; 2820 rw_exit(&irb->irb_lock); 2821 } 2822 2823 ASSERT(dst_ire == NULL || dst_ire->ire_ipversion == IPV6_VERSION); 2824 2825 /* 2826 * See if we should notify ULP about MDT; we do this whether or not 2827 * ire_requested is TRUE, in order to handle active connects; MDT 2828 * eligibility tests for passive connects are handled separately 2829 * through tcp_adapt_ire(). We do this before the source address 2830 * selection, because dst_ire may change after a call to 2831 * ipif_select_source_v6(). This is a best-effort check, as the 2832 * packet for this connection may not actually go through 2833 * dst_ire->ire_stq, and the exact IRE can only be known after 2834 * calling ip_newroute_v6(). This is why we further check on the 2835 * IRE during Multidata packet transmission in tcp_multisend(). 2836 */ 2837 if (ip_multidata_outbound && !ipsec_policy_set && dst_ire != NULL && 2838 !(dst_ire->ire_type & (IRE_LOCAL | IRE_LOOPBACK | IRE_BROADCAST)) && 2839 (md_ill = ire_to_ill(dst_ire), md_ill != NULL) && 2840 ILL_MDT_CAPABLE(md_ill)) { 2841 md_dst_ire = dst_ire; 2842 IRE_REFHOLD(md_dst_ire); 2843 } 2844 2845 if (dst_ire != NULL && 2846 dst_ire->ire_type == IRE_LOCAL && 2847 dst_ire->ire_zoneid != zoneid && 2848 dst_ire->ire_zoneid != ALL_ZONES) { 2849 src_ire = ire_ftable_lookup_v6(v6dst, 0, 0, 0, NULL, NULL, 2850 zoneid, 0, NULL, 2851 MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT | 2852 MATCH_IRE_RJ_BHOLE); 2853 if (src_ire == NULL) { 2854 error = EHOSTUNREACH; 2855 goto bad_addr; 2856 } else if (src_ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) { 2857 if (!(src_ire->ire_type & IRE_HOST)) 2858 error = ENETUNREACH; 2859 else 2860 error = EHOSTUNREACH; 2861 goto bad_addr; 2862 } 2863 if (IN6_IS_ADDR_UNSPECIFIED(v6src)) { 2864 src_ipif = src_ire->ire_ipif; 2865 ipif_refhold(src_ipif); 2866 *v6src = src_ipif->ipif_v6lcl_addr; 2867 } 2868 ire_refrele(src_ire); 2869 src_ire = NULL; 2870 } else if (IN6_IS_ADDR_UNSPECIFIED(v6src) && dst_ire != NULL) { 2871 if ((sire != NULL) && (sire->ire_flags & RTF_SETSRC)) { 2872 *v6src = sire->ire_src_addr_v6; 2873 ire_refrele(dst_ire); 2874 dst_ire = sire; 2875 sire = NULL; 2876 } else if (dst_ire->ire_type == IRE_CACHE && 2877 (dst_ire->ire_flags & RTF_SETSRC)) { 2878 ASSERT(dst_ire->ire_zoneid == zoneid || 2879 dst_ire->ire_zoneid == ALL_ZONES); 2880 *v6src = dst_ire->ire_src_addr_v6; 2881 } else { 2882 /* 2883 * Pick a source address so that a proper inbound load 2884 * spreading would happen. Use dst_ill specified by the 2885 * app. when socket option or scopeid is set. 2886 */ 2887 int err; 2888 2889 if (ipp != NULL && ipp->ipp_ifindex != 0) { 2890 uint_t if_index; 2891 2892 /* 2893 * Scope id or IPV6_PKTINFO 2894 */ 2895 2896 if_index = ipp->ipp_ifindex; 2897 dst_ill = ill_lookup_on_ifindex( 2898 if_index, B_TRUE, NULL, NULL, NULL, NULL); 2899 if (dst_ill == NULL) { 2900 ip1dbg(("ip_bind_connected_v6:" 2901 " bad ifindex %d\n", if_index)); 2902 error = EADDRNOTAVAIL; 2903 goto bad_addr; 2904 } 2905 ill_held = B_TRUE; 2906 } else if (connp->conn_outgoing_ill != NULL) { 2907 /* 2908 * For IPV6_BOUND_IF socket option, 2909 * conn_outgoing_ill should be set 2910 * already in TCP or UDP/ICMP. 2911 */ 2912 dst_ill = conn_get_held_ill(connp, 2913 &connp->conn_outgoing_ill, &err); 2914 if (err == ILL_LOOKUP_FAILED) { 2915 ip1dbg(("ip_bind_connected_v6:" 2916 "no ill for bound_if\n")); 2917 error = EADDRNOTAVAIL; 2918 goto bad_addr; 2919 } 2920 ill_held = B_TRUE; 2921 } else if (dst_ire->ire_stq != NULL) { 2922 /* No need to hold ill here */ 2923 dst_ill = (ill_t *)dst_ire->ire_stq->q_ptr; 2924 } else { 2925 /* No need to hold ill here */ 2926 dst_ill = dst_ire->ire_ipif->ipif_ill; 2927 } 2928 if (!ip6_asp_can_lookup()) { 2929 *mp->b_wptr++ = (char)protocol; 2930 ip6_asp_pending_op(CONNP_TO_WQ(connp), mp, 2931 ip_bind_connected_resume_v6); 2932 error = EINPROGRESS; 2933 goto refrele_and_quit; 2934 } 2935 src_ipif = ipif_select_source_v6(dst_ill, v6dst, 2936 RESTRICT_TO_NONE, connp->conn_src_preferences, 2937 zoneid); 2938 ip6_asp_table_refrele(); 2939 if (src_ipif == NULL) { 2940 pr_addr_dbg("ip_bind_connected_v6: " 2941 "no usable source address for " 2942 "connection to %s\n", AF_INET6, v6dst); 2943 error = EADDRNOTAVAIL; 2944 goto bad_addr; 2945 } 2946 *v6src = src_ipif->ipif_v6lcl_addr; 2947 } 2948 } 2949 2950 /* 2951 * We do ire_route_lookup_v6() here (and not an interface lookup) 2952 * as we assert that v6src should only come from an 2953 * UP interface for hard binding. 2954 */ 2955 src_ire = ire_route_lookup_v6(v6src, 0, 0, 0, NULL, 2956 NULL, zoneid, NULL, MATCH_IRE_ZONEONLY); 2957 2958 /* src_ire must be a local|loopback */ 2959 if (!IRE_IS_LOCAL(src_ire)) { 2960 if (ip_debug > 2) { 2961 /* ip1dbg */ 2962 pr_addr_dbg("ip_bind_connected_v6: bad " 2963 "connected src %s\n", AF_INET6, v6src); 2964 } 2965 error = EADDRNOTAVAIL; 2966 goto bad_addr; 2967 } 2968 2969 /* 2970 * If the source address is a loopback address, the 2971 * destination had best be local or multicast. 2972 * The transports that can't handle multicast will reject 2973 * those addresses. 2974 */ 2975 if (src_ire->ire_type == IRE_LOOPBACK && 2976 !(IRE_IS_LOCAL(dst_ire) || IN6_IS_ADDR_MULTICAST(v6dst) || 2977 IN6_IS_ADDR_V4MAPPED_CLASSD(v6dst))) { 2978 ip1dbg(("ip_bind_connected_v6: bad connected loopback\n")); 2979 error = -1; 2980 goto bad_addr; 2981 } 2982 /* 2983 * Allow setting new policies. For example, disconnects come 2984 * down as ipa_t bind. As we would have set conn_policy_cached 2985 * to B_TRUE before, we should set it to B_FALSE, so that policy 2986 * can change after the disconnect. 2987 */ 2988 connp->conn_policy_cached = B_FALSE; 2989 2990 /* 2991 * The addresses have been verified. Initialize the conn 2992 * before calling the policy as they expect the conns 2993 * initialized. 2994 */ 2995 connp->conn_srcv6 = *v6src; 2996 connp->conn_remv6 = *v6dst; 2997 connp->conn_lport = lport; 2998 connp->conn_fport = fport; 2999 3000 ASSERT(!(ipsec_policy_set && ire_requested)); 3001 if (ire_requested) { 3002 iulp_t *ulp_info = NULL; 3003 3004 /* 3005 * Note that sire will not be NULL if this is an off-link 3006 * connection and there is not cache for that dest yet. 3007 * 3008 * XXX Because of an existing bug, if there are multiple 3009 * default routes, the IRE returned now may not be the actual 3010 * default route used (default routes are chosen in a 3011 * round robin fashion). So if the metrics for different 3012 * default routes are different, we may return the wrong 3013 * metrics. This will not be a problem if the existing 3014 * bug is fixed. 3015 */ 3016 if (sire != NULL) 3017 ulp_info = &(sire->ire_uinfo); 3018 3019 if (!ip_bind_insert_ire_v6(mp, dst_ire, v6dst, ulp_info)) { 3020 error = -1; 3021 goto bad_addr; 3022 } 3023 } else if (ipsec_policy_set) { 3024 if (!ip_bind_ipsec_policy_set(connp, policy_mp)) { 3025 error = -1; 3026 goto bad_addr; 3027 } 3028 } 3029 3030 /* 3031 * Cache IPsec policy in this conn. If we have per-socket policy, 3032 * we'll cache that. If we don't, we'll inherit global policy. 3033 * 3034 * We can't insert until the conn reflects the policy. Note that 3035 * conn_policy_cached is set by ipsec_conn_cache_policy() even for 3036 * connections where we don't have a policy. This is to prevent 3037 * global policy lookups in the inbound path. 3038 * 3039 * If we insert before we set conn_policy_cached, 3040 * CONN_INBOUND_POLICY_PRESENT_V6() check can still evaluate true 3041 * because global policy cound be non-empty. We normally call 3042 * ipsec_check_policy() for conn_policy_cached connections only if 3043 * conn_in_enforce_policy is set. But in this case, 3044 * conn_policy_cached can get set anytime since we made the 3045 * CONN_INBOUND_POLICY_PRESENT_V6() check and ipsec_check_policy() 3046 * is called, which will make the above assumption false. Thus, we 3047 * need to insert after we set conn_policy_cached. 3048 */ 3049 if ((error = ipsec_conn_cache_policy(connp, B_FALSE)) != 0) 3050 goto bad_addr; 3051 3052 /* If not fanout_insert this was just an address verification */ 3053 if (fanout_insert) { 3054 /* 3055 * The addresses have been verified. Time to insert in 3056 * the correct fanout list. 3057 */ 3058 error = ipcl_conn_insert_v6(connp, protocol, v6src, v6dst, 3059 connp->conn_ports, 3060 IPCL_IS_TCP(connp) ? connp->conn_tcp->tcp_bound_if : 0); 3061 } 3062 if (error == 0) { 3063 connp->conn_fully_bound = B_TRUE; 3064 /* 3065 * Our initial checks for MDT have passed; the IRE is not 3066 * LOCAL/LOOPBACK/BROADCAST, and the link layer seems to 3067 * be supporting MDT. Pass the IRE, IPC and ILL into 3068 * ip_mdinfo_return(), which performs further checks 3069 * against them and upon success, returns the MDT info 3070 * mblk which we will attach to the bind acknowledgment. 3071 */ 3072 if (md_dst_ire != NULL) { 3073 mblk_t *mdinfo_mp; 3074 3075 ASSERT(md_ill != NULL); 3076 ASSERT(md_ill->ill_mdt_capab != NULL); 3077 if ((mdinfo_mp = ip_mdinfo_return(md_dst_ire, connp, 3078 md_ill->ill_name, md_ill->ill_mdt_capab)) != NULL) 3079 linkb(mp, mdinfo_mp); 3080 } 3081 } 3082 bad_addr: 3083 if (ipsec_policy_set) { 3084 ASSERT(policy_mp != NULL); 3085 freeb(policy_mp); 3086 /* 3087 * As of now assume that nothing else accompanies 3088 * IPSEC_POLICY_SET. 3089 */ 3090 mp->b_cont = NULL; 3091 } 3092 refrele_and_quit: 3093 if (src_ire != NULL) 3094 IRE_REFRELE(src_ire); 3095 if (dst_ire != NULL) 3096 IRE_REFRELE(dst_ire); 3097 if (sire != NULL) 3098 IRE_REFRELE(sire); 3099 if (src_ipif != NULL) 3100 ipif_refrele(src_ipif); 3101 if (md_dst_ire != NULL) 3102 IRE_REFRELE(md_dst_ire); 3103 if (ill_held && dst_ill != NULL) 3104 ill_refrele(dst_ill); 3105 return (error); 3106 } 3107 3108 /* 3109 * Insert the ire in b_cont. Returns false if it fails (due to lack of space). 3110 * Makes the IRE be IRE_BROADCAST if dst is a multicast address. 3111 */ 3112 static boolean_t 3113 ip_bind_insert_ire_v6(mblk_t *mp, ire_t *ire, const in6_addr_t *dst, 3114 iulp_t *ulp_info) 3115 { 3116 mblk_t *mp1; 3117 ire_t *ret_ire; 3118 3119 mp1 = mp->b_cont; 3120 ASSERT(mp1 != NULL); 3121 3122 if (ire != NULL) { 3123 /* 3124 * mp1 initialized above to IRE_DB_REQ_TYPE 3125 * appended mblk. Its <upper protocol>'s 3126 * job to make sure there is room. 3127 */ 3128 if ((mp1->b_datap->db_lim - mp1->b_rptr) < sizeof (ire_t)) 3129 return (B_FALSE); 3130 3131 mp1->b_datap->db_type = IRE_DB_TYPE; 3132 mp1->b_wptr = mp1->b_rptr + sizeof (ire_t); 3133 bcopy(ire, mp1->b_rptr, sizeof (ire_t)); 3134 ret_ire = (ire_t *)mp1->b_rptr; 3135 if (IN6_IS_ADDR_MULTICAST(dst) || 3136 IN6_IS_ADDR_V4MAPPED_CLASSD(dst)) { 3137 ret_ire->ire_type = IRE_BROADCAST; 3138 ret_ire->ire_addr_v6 = *dst; 3139 } 3140 if (ulp_info != NULL) { 3141 bcopy(ulp_info, &(ret_ire->ire_uinfo), 3142 sizeof (iulp_t)); 3143 } 3144 ret_ire->ire_mp = mp1; 3145 } else { 3146 /* 3147 * No IRE was found. Remove IRE mblk. 3148 */ 3149 mp->b_cont = mp1->b_cont; 3150 freeb(mp1); 3151 } 3152 return (B_TRUE); 3153 } 3154 3155 /* 3156 * Add an ip6i_t header to the front of the mblk. 3157 * Inline if possible else allocate a separate mblk containing only the ip6i_t. 3158 * Returns NULL if allocation fails (and frees original message). 3159 * Used in outgoing path when going through ip_newroute_*v6(). 3160 * Used in incoming path to pass ifindex to transports. 3161 */ 3162 mblk_t * 3163 ip_add_info_v6(mblk_t *mp, ill_t *ill, const in6_addr_t *dst) 3164 { 3165 mblk_t *mp1; 3166 ip6i_t *ip6i; 3167 ip6_t *ip6h; 3168 3169 ip6h = (ip6_t *)mp->b_rptr; 3170 ip6i = (ip6i_t *)(mp->b_rptr - sizeof (ip6i_t)); 3171 if ((uchar_t *)ip6i < mp->b_datap->db_base || 3172 mp->b_datap->db_ref > 1) { 3173 mp1 = allocb(sizeof (ip6i_t), BPRI_MED); 3174 if (mp1 == NULL) { 3175 freemsg(mp); 3176 return (NULL); 3177 } 3178 mp1->b_wptr = mp1->b_rptr = mp1->b_datap->db_lim; 3179 mp1->b_cont = mp; 3180 mp = mp1; 3181 ip6i = (ip6i_t *)(mp->b_rptr - sizeof (ip6i_t)); 3182 } 3183 mp->b_rptr = (uchar_t *)ip6i; 3184 ip6i->ip6i_vcf = ip6h->ip6_vcf; 3185 ip6i->ip6i_nxt = IPPROTO_RAW; 3186 if (ill != NULL) { 3187 ip6i->ip6i_flags = IP6I_IFINDEX; 3188 ip6i->ip6i_ifindex = ill->ill_phyint->phyint_ifindex; 3189 } else { 3190 ip6i->ip6i_flags = 0; 3191 } 3192 ip6i->ip6i_nexthop = *dst; 3193 return (mp); 3194 } 3195 3196 /* 3197 * Handle protocols with which IP is less intimate. There 3198 * can be more than one stream bound to a particular 3199 * protocol. When this is the case, normally each one gets a copy 3200 * of any incoming packets. 3201 * However, if the packet was tunneled and not multicast we only send to it 3202 * the first match. 3203 * 3204 * Zones notes: 3205 * Packets will be distributed to streams in all zones. This is really only 3206 * useful for ICMPv6 as only applications in the global zone can create raw 3207 * sockets for other protocols. 3208 */ 3209 static void 3210 ip_fanout_proto_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, ill_t *ill, 3211 ill_t *inill, uint8_t nexthdr, uint_t nexthdr_offset, uint_t flags, 3212 boolean_t mctl_present, zoneid_t zoneid) 3213 { 3214 queue_t *rq; 3215 mblk_t *mp1, *first_mp1; 3216 in6_addr_t dst = ip6h->ip6_dst; 3217 in6_addr_t src = ip6h->ip6_src; 3218 boolean_t one_only; 3219 mblk_t *first_mp = mp; 3220 boolean_t secure, shared_addr; 3221 conn_t *connp, *first_connp, *next_connp; 3222 connf_t *connfp; 3223 3224 if (mctl_present) { 3225 mp = first_mp->b_cont; 3226 secure = ipsec_in_is_secure(first_mp); 3227 ASSERT(mp != NULL); 3228 } else { 3229 secure = B_FALSE; 3230 } 3231 3232 /* 3233 * If the packet was tunneled and not multicast we only send to it 3234 * the first match. 3235 */ 3236 one_only = ((nexthdr == IPPROTO_ENCAP || nexthdr == IPPROTO_IPV6) && 3237 !IN6_IS_ADDR_MULTICAST(&dst)); 3238 3239 shared_addr = (zoneid == ALL_ZONES); 3240 if (shared_addr) { 3241 /* 3242 * We don't allow multilevel ports for raw IP, so no need to 3243 * check for that here. 3244 */ 3245 zoneid = tsol_packet_to_zoneid(mp); 3246 } 3247 3248 connfp = &ipcl_proto_fanout_v6[nexthdr]; 3249 mutex_enter(&connfp->connf_lock); 3250 connp = connfp->connf_head; 3251 for (connp = connfp->connf_head; connp != NULL; 3252 connp = connp->conn_next) { 3253 if (IPCL_PROTO_MATCH_V6(connp, nexthdr, ip6h, ill, flags, 3254 zoneid) && 3255 (!is_system_labeled() || 3256 tsol_receive_local(mp, &dst, IPV6_VERSION, shared_addr, 3257 connp))) 3258 break; 3259 } 3260 3261 if (connp == NULL || connp->conn_upq == NULL) { 3262 /* 3263 * No one bound to this port. Is 3264 * there a client that wants all 3265 * unclaimed datagrams? 3266 */ 3267 mutex_exit(&connfp->connf_lock); 3268 if (ip_fanout_send_icmp_v6(q, first_mp, flags, 3269 ICMP6_PARAM_PROB, ICMP6_PARAMPROB_NEXTHEADER, 3270 nexthdr_offset, mctl_present, zoneid)) { 3271 BUMP_MIB(ill->ill_ip6_mib, ipv6InUnknownProtos); 3272 } 3273 3274 return; 3275 } 3276 3277 CONN_INC_REF(connp); 3278 first_connp = connp; 3279 3280 /* 3281 * XXX: Fix the multiple protocol listeners case. We should not 3282 * be walking the conn->next list here. 3283 */ 3284 if (one_only) { 3285 /* 3286 * Only send message to one tunnel driver by immediately 3287 * terminating the loop. 3288 */ 3289 connp = NULL; 3290 } else { 3291 connp = connp->conn_next; 3292 3293 } 3294 for (;;) { 3295 while (connp != NULL) { 3296 if (IPCL_PROTO_MATCH_V6(connp, nexthdr, ip6h, ill, 3297 flags, zoneid) && 3298 (!is_system_labeled() || 3299 tsol_receive_local(mp, &dst, IPV6_VERSION, 3300 shared_addr, connp))) 3301 break; 3302 connp = connp->conn_next; 3303 } 3304 3305 /* 3306 * Just copy the data part alone. The mctl part is 3307 * needed just for verifying policy and it is never 3308 * sent up. 3309 */ 3310 if (connp == NULL || connp->conn_upq == NULL || 3311 (((first_mp1 = dupmsg(first_mp)) == NULL) && 3312 ((first_mp1 = ip_copymsg(first_mp)) == NULL))) { 3313 /* 3314 * No more intested clients or memory 3315 * allocation failed 3316 */ 3317 connp = first_connp; 3318 break; 3319 } 3320 mp1 = mctl_present ? first_mp1->b_cont : first_mp1; 3321 CONN_INC_REF(connp); 3322 mutex_exit(&connfp->connf_lock); 3323 rq = connp->conn_rq; 3324 /* 3325 * For link-local always add ifindex so that transport can set 3326 * sin6_scope_id. Avoid it for ICMP error fanout. 3327 */ 3328 if ((connp->conn_ipv6_recvpktinfo || 3329 IN6_IS_ADDR_LINKLOCAL(&src)) && 3330 (flags & IP_FF_IP6INFO)) { 3331 /* Add header */ 3332 mp1 = ip_add_info_v6(mp1, inill, &dst); 3333 } 3334 if (mp1 == NULL) { 3335 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 3336 } else if (!canputnext(rq)) { 3337 if (flags & IP_FF_RAWIP) { 3338 BUMP_MIB(ill->ill_ip6_mib, rawipInOverflows); 3339 } else { 3340 BUMP_MIB(ill->ill_icmp6_mib, 3341 ipv6IfIcmpInOverflows); 3342 } 3343 3344 freemsg(mp1); 3345 } else { 3346 if (CONN_INBOUND_POLICY_PRESENT_V6(connp) || secure) { 3347 first_mp1 = ipsec_check_inbound_policy 3348 (first_mp1, connp, NULL, ip6h, 3349 mctl_present); 3350 } 3351 if (first_mp1 != NULL) { 3352 if (mctl_present) 3353 freeb(first_mp1); 3354 BUMP_MIB(ill->ill_ip6_mib, ipv6InDelivers); 3355 putnext(rq, mp1); 3356 } 3357 } 3358 mutex_enter(&connfp->connf_lock); 3359 /* Follow the next pointer before releasing the conn. */ 3360 next_connp = connp->conn_next; 3361 CONN_DEC_REF(connp); 3362 connp = next_connp; 3363 } 3364 3365 /* Last one. Send it upstream. */ 3366 mutex_exit(&connfp->connf_lock); 3367 3368 /* Initiate IPPF processing */ 3369 if (IP6_IN_IPP(flags)) { 3370 uint_t ifindex; 3371 3372 mutex_enter(&ill->ill_lock); 3373 ifindex = ill->ill_phyint->phyint_ifindex; 3374 mutex_exit(&ill->ill_lock); 3375 ip_process(IPP_LOCAL_IN, &mp, ifindex); 3376 if (mp == NULL) { 3377 CONN_DEC_REF(connp); 3378 if (mctl_present) 3379 freeb(first_mp); 3380 return; 3381 } 3382 } 3383 3384 /* 3385 * For link-local always add ifindex so that transport can set 3386 * sin6_scope_id. Avoid it for ICMP error fanout. 3387 */ 3388 if ((connp->conn_ipv6_recvpktinfo || IN6_IS_ADDR_LINKLOCAL(&src)) && 3389 (flags & IP_FF_IP6INFO)) { 3390 /* Add header */ 3391 mp = ip_add_info_v6(mp, inill, &dst); 3392 if (mp == NULL) { 3393 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 3394 CONN_DEC_REF(connp); 3395 if (mctl_present) 3396 freeb(first_mp); 3397 return; 3398 } else if (mctl_present) { 3399 first_mp->b_cont = mp; 3400 } else { 3401 first_mp = mp; 3402 } 3403 } 3404 3405 rq = connp->conn_rq; 3406 if (!canputnext(rq)) { 3407 if (flags & IP_FF_RAWIP) { 3408 BUMP_MIB(ill->ill_ip6_mib, rawipInOverflows); 3409 } else { 3410 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInOverflows); 3411 } 3412 3413 freemsg(first_mp); 3414 } else { 3415 if (CONN_INBOUND_POLICY_PRESENT_V6(connp) || secure) { 3416 first_mp = ipsec_check_inbound_policy(first_mp, connp, 3417 NULL, ip6h, mctl_present); 3418 if (first_mp == NULL) { 3419 CONN_DEC_REF(connp); 3420 return; 3421 } 3422 } 3423 BUMP_MIB(ill->ill_ip6_mib, ipv6InDelivers); 3424 putnext(rq, mp); 3425 if (mctl_present) 3426 freeb(first_mp); 3427 } 3428 CONN_DEC_REF(connp); 3429 } 3430 3431 /* 3432 * Send an ICMP error after patching up the packet appropriately. Returns 3433 * non-zero if the appropriate MIB should be bumped; zero otherwise. 3434 */ 3435 int 3436 ip_fanout_send_icmp_v6(queue_t *q, mblk_t *mp, uint_t flags, 3437 uint_t icmp_type, uint8_t icmp_code, uint_t nexthdr_offset, 3438 boolean_t mctl_present, zoneid_t zoneid) 3439 { 3440 ip6_t *ip6h; 3441 mblk_t *first_mp; 3442 boolean_t secure; 3443 unsigned char db_type; 3444 3445 first_mp = mp; 3446 if (mctl_present) { 3447 mp = mp->b_cont; 3448 secure = ipsec_in_is_secure(first_mp); 3449 ASSERT(mp != NULL); 3450 } else { 3451 /* 3452 * If this is an ICMP error being reported - which goes 3453 * up as M_CTLs, we need to convert them to M_DATA till 3454 * we finish checking with global policy because 3455 * ipsec_check_global_policy() assumes M_DATA as clear 3456 * and M_CTL as secure. 3457 */ 3458 db_type = mp->b_datap->db_type; 3459 mp->b_datap->db_type = M_DATA; 3460 secure = B_FALSE; 3461 } 3462 /* 3463 * We are generating an icmp error for some inbound packet. 3464 * Called from all ip_fanout_(udp, tcp, proto) functions. 3465 * Before we generate an error, check with global policy 3466 * to see whether this is allowed to enter the system. As 3467 * there is no "conn", we are checking with global policy. 3468 */ 3469 ip6h = (ip6_t *)mp->b_rptr; 3470 if (secure || ipsec_inbound_v6_policy_present) { 3471 first_mp = ipsec_check_global_policy(first_mp, NULL, 3472 NULL, ip6h, mctl_present); 3473 if (first_mp == NULL) 3474 return (0); 3475 } 3476 3477 if (!mctl_present) 3478 mp->b_datap->db_type = db_type; 3479 3480 if (flags & IP_FF_SEND_ICMP) { 3481 if (flags & IP_FF_HDR_COMPLETE) { 3482 if (ip_hdr_complete_v6(ip6h, zoneid)) { 3483 freemsg(first_mp); 3484 return (1); 3485 } 3486 } 3487 switch (icmp_type) { 3488 case ICMP6_DST_UNREACH: 3489 icmp_unreachable_v6(WR(q), first_mp, icmp_code, 3490 B_FALSE, B_FALSE); 3491 break; 3492 case ICMP6_PARAM_PROB: 3493 icmp_param_problem_v6(WR(q), first_mp, icmp_code, 3494 nexthdr_offset, B_FALSE, B_FALSE); 3495 break; 3496 default: 3497 #ifdef DEBUG 3498 panic("ip_fanout_send_icmp_v6: wrong type"); 3499 /*NOTREACHED*/ 3500 #else 3501 freemsg(first_mp); 3502 break; 3503 #endif 3504 } 3505 } else { 3506 freemsg(first_mp); 3507 return (0); 3508 } 3509 3510 return (1); 3511 } 3512 3513 3514 /* 3515 * Fanout for TCP packets 3516 * The caller puts <fport, lport> in the ports parameter. 3517 */ 3518 static void 3519 ip_fanout_tcp_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, ill_t *ill, ill_t *inill, 3520 uint_t flags, uint_t hdr_len, boolean_t mctl_present, zoneid_t zoneid) 3521 { 3522 mblk_t *first_mp; 3523 boolean_t secure; 3524 conn_t *connp; 3525 tcph_t *tcph; 3526 boolean_t syn_present = B_FALSE; 3527 3528 first_mp = mp; 3529 if (mctl_present) { 3530 mp = first_mp->b_cont; 3531 secure = ipsec_in_is_secure(first_mp); 3532 ASSERT(mp != NULL); 3533 } else { 3534 secure = B_FALSE; 3535 } 3536 3537 connp = ipcl_classify_v6(mp, IPPROTO_TCP, hdr_len, zoneid); 3538 3539 if (connp == NULL || 3540 !conn_wantpacket_v6(connp, ill, ip6h, flags, zoneid)) { 3541 /* 3542 * No hard-bound match. Send Reset. 3543 */ 3544 dblk_t *dp = mp->b_datap; 3545 uint32_t ill_index; 3546 3547 ASSERT((dp->db_struioflag & STRUIO_IP) == 0); 3548 3549 /* Initiate IPPf processing, if needed. */ 3550 if (IPP_ENABLED(IPP_LOCAL_IN) && (flags & IP6_NO_IPPOLICY)) { 3551 ill_index = ill->ill_phyint->phyint_ifindex; 3552 ip_process(IPP_LOCAL_IN, &first_mp, ill_index); 3553 if (first_mp == NULL) { 3554 if (connp != NULL) 3555 CONN_DEC_REF(connp); 3556 return; 3557 } 3558 } 3559 BUMP_MIB(ill->ill_ip6_mib, ipv6InDelivers); 3560 tcp_xmit_listeners_reset(first_mp, hdr_len); 3561 if (connp != NULL) 3562 CONN_DEC_REF(connp); 3563 return; 3564 } 3565 3566 tcph = (tcph_t *)&mp->b_rptr[hdr_len]; 3567 if ((tcph->th_flags[0] & (TH_SYN|TH_ACK|TH_RST|TH_URG)) == TH_SYN) { 3568 if (connp->conn_flags & IPCL_TCP) { 3569 squeue_t *sqp; 3570 3571 /* 3572 * For fused tcp loopback, assign the eager's 3573 * squeue to be that of the active connect's. 3574 */ 3575 if ((flags & IP_FF_LOOPBACK) && do_tcp_fusion && 3576 !CONN_INBOUND_POLICY_PRESENT_V6(connp) && !secure && 3577 !IP6_IN_IPP(flags)) { 3578 ASSERT(Q_TO_CONN(q) != NULL); 3579 sqp = Q_TO_CONN(q)->conn_sqp; 3580 } else { 3581 sqp = IP_SQUEUE_GET(lbolt); 3582 } 3583 3584 mp->b_datap->db_struioflag |= STRUIO_EAGER; 3585 DB_CKSUMSTART(mp) = (intptr_t)sqp; 3586 3587 /* 3588 * db_cksumstuff is unused in the incoming 3589 * path; Thus store the ifindex here. It will 3590 * be cleared in tcp_conn_create_v6(). 3591 */ 3592 DB_CKSUMSTUFF(mp) = 3593 (intptr_t)ill->ill_phyint->phyint_ifindex; 3594 syn_present = B_TRUE; 3595 } 3596 } 3597 3598 if (IPCL_IS_TCP(connp) && IPCL_IS_BOUND(connp) && !syn_present) { 3599 uint_t flags = (unsigned int)tcph->th_flags[0] & 0xFF; 3600 if ((flags & TH_RST) || (flags & TH_URG)) { 3601 CONN_DEC_REF(connp); 3602 freemsg(first_mp); 3603 return; 3604 } 3605 if (flags & TH_ACK) { 3606 tcp_xmit_listeners_reset(first_mp, hdr_len); 3607 CONN_DEC_REF(connp); 3608 return; 3609 } 3610 3611 CONN_DEC_REF(connp); 3612 freemsg(first_mp); 3613 return; 3614 } 3615 3616 if (CONN_INBOUND_POLICY_PRESENT_V6(connp) || secure) { 3617 first_mp = ipsec_check_inbound_policy(first_mp, connp, 3618 NULL, ip6h, mctl_present); 3619 if (first_mp == NULL) { 3620 CONN_DEC_REF(connp); 3621 return; 3622 } 3623 if (IPCL_IS_TCP(connp) && IPCL_IS_BOUND(connp)) { 3624 ASSERT(syn_present); 3625 if (mctl_present) { 3626 ASSERT(first_mp != mp); 3627 first_mp->b_datap->db_struioflag |= 3628 STRUIO_POLICY; 3629 } else { 3630 ASSERT(first_mp == mp); 3631 mp->b_datap->db_struioflag &= 3632 ~STRUIO_EAGER; 3633 mp->b_datap->db_struioflag |= 3634 STRUIO_POLICY; 3635 } 3636 } else { 3637 /* 3638 * Discard first_mp early since we're dealing with a 3639 * fully-connected conn_t and tcp doesn't do policy in 3640 * this case. Also, if someone is bound to IPPROTO_TCP 3641 * over raw IP, they don't expect to see a M_CTL. 3642 */ 3643 if (mctl_present) { 3644 freeb(first_mp); 3645 mctl_present = B_FALSE; 3646 } 3647 first_mp = mp; 3648 } 3649 } 3650 3651 /* Initiate IPPF processing */ 3652 if (IP6_IN_IPP(flags)) { 3653 uint_t ifindex; 3654 3655 mutex_enter(&ill->ill_lock); 3656 ifindex = ill->ill_phyint->phyint_ifindex; 3657 mutex_exit(&ill->ill_lock); 3658 ip_process(IPP_LOCAL_IN, &mp, ifindex); 3659 if (mp == NULL) { 3660 CONN_DEC_REF(connp); 3661 if (mctl_present) { 3662 freeb(first_mp); 3663 } 3664 return; 3665 } else if (mctl_present) { 3666 /* 3667 * ip_add_info_v6 might return a new mp. 3668 */ 3669 ASSERT(first_mp != mp); 3670 first_mp->b_cont = mp; 3671 } else { 3672 first_mp = mp; 3673 } 3674 } 3675 3676 /* 3677 * For link-local always add ifindex so that TCP can bind to that 3678 * interface. Avoid it for ICMP error fanout. 3679 */ 3680 if (!syn_present && ((connp->conn_ipv6_recvpktinfo || 3681 IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src)) && 3682 (flags & IP_FF_IP6INFO))) { 3683 /* Add header */ 3684 mp = ip_add_info_v6(mp, inill, &ip6h->ip6_dst); 3685 if (mp == NULL) { 3686 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 3687 CONN_DEC_REF(connp); 3688 if (mctl_present) 3689 freeb(first_mp); 3690 return; 3691 } else if (mctl_present) { 3692 ASSERT(first_mp != mp); 3693 first_mp->b_cont = mp; 3694 } else { 3695 first_mp = mp; 3696 } 3697 } 3698 3699 BUMP_MIB(ill->ill_ip6_mib, ipv6InDelivers); 3700 if (IPCL_IS_TCP(connp)) { 3701 (*ip_input_proc)(connp->conn_sqp, first_mp, 3702 connp->conn_recv, connp, SQTAG_IP6_TCP_INPUT); 3703 } else { 3704 putnext(connp->conn_rq, first_mp); 3705 CONN_DEC_REF(connp); 3706 } 3707 } 3708 3709 /* 3710 * Fanout for UDP packets. 3711 * The caller puts <fport, lport> in the ports parameter. 3712 * ire_type must be IRE_BROADCAST for multicast and broadcast packets. 3713 * 3714 * If SO_REUSEADDR is set all multicast and broadcast packets 3715 * will be delivered to all streams bound to the same port. 3716 * 3717 * Zones notes: 3718 * Multicast packets will be distributed to streams in all zones. 3719 */ 3720 static void 3721 ip_fanout_udp_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, uint32_t ports, 3722 ill_t *ill, ill_t *inill, uint_t flags, boolean_t mctl_present, 3723 zoneid_t zoneid) 3724 { 3725 uint32_t dstport, srcport; 3726 in6_addr_t dst; 3727 mblk_t *first_mp; 3728 boolean_t secure; 3729 conn_t *connp; 3730 connf_t *connfp; 3731 conn_t *first_conn; 3732 conn_t *next_conn; 3733 mblk_t *mp1, *first_mp1; 3734 in6_addr_t src; 3735 boolean_t shared_addr; 3736 3737 first_mp = mp; 3738 if (mctl_present) { 3739 mp = first_mp->b_cont; 3740 secure = ipsec_in_is_secure(first_mp); 3741 ASSERT(mp != NULL); 3742 } else { 3743 secure = B_FALSE; 3744 } 3745 3746 /* Extract ports in net byte order */ 3747 dstport = htons(ntohl(ports) & 0xFFFF); 3748 srcport = htons(ntohl(ports) >> 16); 3749 dst = ip6h->ip6_dst; 3750 src = ip6h->ip6_src; 3751 3752 shared_addr = (zoneid == ALL_ZONES); 3753 if (shared_addr) { 3754 zoneid = tsol_mlp_findzone(IPPROTO_UDP, dstport); 3755 /* 3756 * If no shared MLP is found, tsol_mlp_findzone returns 3757 * ALL_ZONES. In that case, we assume it's SLP, and 3758 * search for the zone based on the packet label. 3759 * That will also return ALL_ZONES on failure, but 3760 * we never allow conn_zoneid to be set to ALL_ZONES. 3761 */ 3762 if (zoneid == ALL_ZONES) 3763 zoneid = tsol_packet_to_zoneid(mp); 3764 } 3765 3766 /* Attempt to find a client stream based on destination port. */ 3767 connfp = &ipcl_udp_fanout[IPCL_UDP_HASH(dstport)]; 3768 mutex_enter(&connfp->connf_lock); 3769 connp = connfp->connf_head; 3770 if (!IN6_IS_ADDR_MULTICAST(&dst)) { 3771 /* 3772 * Not multicast. Send to the one (first) client we find. 3773 */ 3774 while (connp != NULL) { 3775 if (IPCL_UDP_MATCH_V6(connp, dstport, dst, srcport, 3776 src) && connp->conn_zoneid == zoneid && 3777 conn_wantpacket_v6(connp, ill, ip6h, 3778 flags, zoneid)) { 3779 break; 3780 } 3781 connp = connp->conn_next; 3782 } 3783 if (connp == NULL || connp->conn_upq == NULL) 3784 goto notfound; 3785 3786 if (is_system_labeled() && 3787 !tsol_receive_local(mp, &dst, IPV6_VERSION, shared_addr, 3788 connp)) 3789 goto notfound; 3790 3791 /* Found a client */ 3792 CONN_INC_REF(connp); 3793 mutex_exit(&connfp->connf_lock); 3794 3795 if (CONN_UDP_FLOWCTLD(connp)) { 3796 freemsg(first_mp); 3797 CONN_DEC_REF(connp); 3798 return; 3799 } 3800 if (CONN_INBOUND_POLICY_PRESENT_V6(connp) || secure) { 3801 first_mp = ipsec_check_inbound_policy(first_mp, 3802 connp, NULL, ip6h, mctl_present); 3803 if (first_mp == NULL) { 3804 CONN_DEC_REF(connp); 3805 return; 3806 } 3807 } 3808 /* Initiate IPPF processing */ 3809 if (IP6_IN_IPP(flags)) { 3810 uint_t ifindex; 3811 3812 mutex_enter(&ill->ill_lock); 3813 ifindex = ill->ill_phyint->phyint_ifindex; 3814 mutex_exit(&ill->ill_lock); 3815 ip_process(IPP_LOCAL_IN, &mp, ifindex); 3816 if (mp == NULL) { 3817 CONN_DEC_REF(connp); 3818 if (mctl_present) 3819 freeb(first_mp); 3820 return; 3821 } 3822 } 3823 /* 3824 * For link-local always add ifindex so that 3825 * transport can set sin6_scope_id. Avoid it for 3826 * ICMP error fanout. 3827 */ 3828 if ((connp->conn_ipv6_recvpktinfo || 3829 IN6_IS_ADDR_LINKLOCAL(&src)) && 3830 (flags & IP_FF_IP6INFO)) { 3831 /* Add header */ 3832 mp = ip_add_info_v6(mp, inill, &dst); 3833 if (mp == NULL) { 3834 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 3835 CONN_DEC_REF(connp); 3836 if (mctl_present) 3837 freeb(first_mp); 3838 return; 3839 } else if (mctl_present) { 3840 first_mp->b_cont = mp; 3841 } else { 3842 first_mp = mp; 3843 } 3844 } 3845 BUMP_MIB(ill->ill_ip6_mib, ipv6InDelivers); 3846 3847 /* Send it upstream */ 3848 CONN_UDP_RECV(connp, mp); 3849 3850 IP6_STAT(ip6_udp_fannorm); 3851 CONN_DEC_REF(connp); 3852 if (mctl_present) 3853 freeb(first_mp); 3854 return; 3855 } 3856 3857 while (connp != NULL) { 3858 if ((IPCL_UDP_MATCH_V6(connp, dstport, dst, srcport, src)) && 3859 conn_wantpacket_v6(connp, ill, ip6h, flags, zoneid) && 3860 (!is_system_labeled() || 3861 tsol_receive_local(mp, &dst, IPV6_VERSION, shared_addr, 3862 connp))) 3863 break; 3864 connp = connp->conn_next; 3865 } 3866 3867 if (connp == NULL || connp->conn_upq == NULL) 3868 goto notfound; 3869 3870 first_conn = connp; 3871 3872 CONN_INC_REF(connp); 3873 connp = connp->conn_next; 3874 for (;;) { 3875 while (connp != NULL) { 3876 if (IPCL_UDP_MATCH_V6(connp, dstport, dst, srcport, 3877 src) && conn_wantpacket_v6(connp, ill, ip6h, 3878 flags, zoneid) && 3879 (!is_system_labeled() || 3880 tsol_receive_local(mp, &dst, IPV6_VERSION, 3881 shared_addr, connp))) 3882 break; 3883 connp = connp->conn_next; 3884 } 3885 /* 3886 * Just copy the data part alone. The mctl part is 3887 * needed just for verifying policy and it is never 3888 * sent up. 3889 */ 3890 if (connp == NULL || 3891 (((first_mp1 = dupmsg(first_mp)) == NULL) && 3892 ((first_mp1 = ip_copymsg(first_mp)) 3893 == NULL))) { 3894 /* 3895 * No more interested clients or memory 3896 * allocation failed 3897 */ 3898 connp = first_conn; 3899 break; 3900 } 3901 mp1 = mctl_present ? first_mp1->b_cont : first_mp1; 3902 CONN_INC_REF(connp); 3903 mutex_exit(&connfp->connf_lock); 3904 /* 3905 * For link-local always add ifindex so that transport 3906 * can set sin6_scope_id. Avoid it for ICMP error 3907 * fanout. 3908 */ 3909 if ((connp->conn_ipv6_recvpktinfo || 3910 IN6_IS_ADDR_LINKLOCAL(&src)) && 3911 (flags & IP_FF_IP6INFO)) { 3912 /* Add header */ 3913 mp1 = ip_add_info_v6(mp1, inill, &dst); 3914 } 3915 /* mp1 could have changed */ 3916 if (mctl_present) 3917 first_mp1->b_cont = mp1; 3918 else 3919 first_mp1 = mp1; 3920 if (mp1 == NULL) { 3921 if (mctl_present) 3922 freeb(first_mp1); 3923 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 3924 goto next_one; 3925 } 3926 if (CONN_UDP_FLOWCTLD(connp)) { 3927 BUMP_MIB(ill->ill_ip6_mib, udpInOverflows); 3928 freemsg(first_mp1); 3929 goto next_one; 3930 } 3931 3932 if (CONN_INBOUND_POLICY_PRESENT_V6(connp) || 3933 secure) { 3934 first_mp1 = ipsec_check_inbound_policy 3935 (first_mp1, connp, NULL, ip6h, 3936 mctl_present); 3937 } 3938 if (first_mp1 != NULL) { 3939 if (mctl_present) 3940 freeb(first_mp1); 3941 BUMP_MIB(ill->ill_ip6_mib, ipv6InDelivers); 3942 3943 /* Send it upstream */ 3944 CONN_UDP_RECV(connp, mp1); 3945 } 3946 next_one: 3947 mutex_enter(&connfp->connf_lock); 3948 /* Follow the next pointer before releasing the conn. */ 3949 next_conn = connp->conn_next; 3950 IP6_STAT(ip6_udp_fanmb); 3951 CONN_DEC_REF(connp); 3952 connp = next_conn; 3953 } 3954 3955 /* Last one. Send it upstream. */ 3956 mutex_exit(&connfp->connf_lock); 3957 3958 /* Initiate IPPF processing */ 3959 if (IP6_IN_IPP(flags)) { 3960 uint_t ifindex; 3961 3962 mutex_enter(&ill->ill_lock); 3963 ifindex = ill->ill_phyint->phyint_ifindex; 3964 mutex_exit(&ill->ill_lock); 3965 ip_process(IPP_LOCAL_IN, &mp, ifindex); 3966 if (mp == NULL) { 3967 CONN_DEC_REF(connp); 3968 if (mctl_present) { 3969 freeb(first_mp); 3970 } 3971 return; 3972 } 3973 } 3974 3975 /* 3976 * For link-local always add ifindex so that transport can set 3977 * sin6_scope_id. Avoid it for ICMP error fanout. 3978 */ 3979 if ((connp->conn_ipv6_recvpktinfo || 3980 IN6_IS_ADDR_LINKLOCAL(&src)) && (flags & IP_FF_IP6INFO)) { 3981 /* Add header */ 3982 mp = ip_add_info_v6(mp, inill, &dst); 3983 if (mp == NULL) { 3984 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 3985 CONN_DEC_REF(connp); 3986 if (mctl_present) 3987 freeb(first_mp); 3988 return; 3989 } else if (mctl_present) { 3990 first_mp->b_cont = mp; 3991 } else { 3992 first_mp = mp; 3993 } 3994 } 3995 if (CONN_UDP_FLOWCTLD(connp)) { 3996 BUMP_MIB(ill->ill_ip6_mib, udpInOverflows); 3997 freemsg(mp); 3998 } else { 3999 if (CONN_INBOUND_POLICY_PRESENT_V6(connp) || secure) { 4000 first_mp = ipsec_check_inbound_policy(first_mp, 4001 connp, NULL, ip6h, mctl_present); 4002 if (first_mp == NULL) { 4003 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 4004 CONN_DEC_REF(connp); 4005 return; 4006 } 4007 } 4008 BUMP_MIB(ill->ill_ip6_mib, ipv6InDelivers); 4009 4010 /* Send it upstream */ 4011 CONN_UDP_RECV(connp, mp); 4012 } 4013 IP6_STAT(ip6_udp_fanmb); 4014 CONN_DEC_REF(connp); 4015 if (mctl_present) 4016 freeb(first_mp); 4017 return; 4018 4019 notfound: 4020 mutex_exit(&connfp->connf_lock); 4021 /* 4022 * No one bound to this port. Is 4023 * there a client that wants all 4024 * unclaimed datagrams? 4025 */ 4026 if (ipcl_proto_fanout_v6[IPPROTO_UDP].connf_head != NULL) { 4027 ip_fanout_proto_v6(q, first_mp, ip6h, ill, inill, IPPROTO_UDP, 4028 0, flags | IP_FF_RAWIP | IP_FF_IP6INFO, mctl_present, 4029 zoneid); 4030 } else { 4031 if (ip_fanout_send_icmp_v6(q, first_mp, flags, 4032 ICMP6_DST_UNREACH, ICMP6_DST_UNREACH_NOPORT, 0, 4033 mctl_present, zoneid)) { 4034 BUMP_MIB(&ip_mib, udpNoPorts); 4035 } 4036 } 4037 } 4038 4039 /* 4040 * int ip_find_hdr_v6() 4041 * 4042 * This routine is used by the upper layer protocols and the IP tunnel 4043 * module to: 4044 * - Set extension header pointers to appropriate locations 4045 * - Determine IPv6 header length and return it 4046 * - Return a pointer to the last nexthdr value 4047 * 4048 * The caller must initialize ipp_fields. 4049 * 4050 * NOTE: If multiple extension headers of the same type are present, 4051 * ip_find_hdr_v6() will set the respective extension header pointers 4052 * to the first one that it encounters in the IPv6 header. It also 4053 * skips fragment headers. This routine deals with malformed packets 4054 * of various sorts in which case the returned length is up to the 4055 * malformed part. 4056 */ 4057 int 4058 ip_find_hdr_v6(mblk_t *mp, ip6_t *ip6h, ip6_pkt_t *ipp, uint8_t *nexthdrp) 4059 { 4060 uint_t length, ehdrlen; 4061 uint8_t nexthdr; 4062 uint8_t *whereptr, *endptr; 4063 ip6_dest_t *tmpdstopts; 4064 ip6_rthdr_t *tmprthdr; 4065 ip6_hbh_t *tmphopopts; 4066 ip6_frag_t *tmpfraghdr; 4067 4068 length = IPV6_HDR_LEN; 4069 whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */ 4070 endptr = mp->b_wptr; 4071 4072 nexthdr = ip6h->ip6_nxt; 4073 while (whereptr < endptr) { 4074 /* Is there enough left for len + nexthdr? */ 4075 if (whereptr + MIN_EHDR_LEN > endptr) 4076 goto done; 4077 4078 switch (nexthdr) { 4079 case IPPROTO_HOPOPTS: 4080 tmphopopts = (ip6_hbh_t *)whereptr; 4081 ehdrlen = 8 * (tmphopopts->ip6h_len + 1); 4082 if ((uchar_t *)tmphopopts + ehdrlen > endptr) 4083 goto done; 4084 nexthdr = tmphopopts->ip6h_nxt; 4085 /* return only 1st hbh */ 4086 if (!(ipp->ipp_fields & IPPF_HOPOPTS)) { 4087 ipp->ipp_fields |= IPPF_HOPOPTS; 4088 ipp->ipp_hopopts = tmphopopts; 4089 ipp->ipp_hopoptslen = ehdrlen; 4090 } 4091 break; 4092 case IPPROTO_DSTOPTS: 4093 tmpdstopts = (ip6_dest_t *)whereptr; 4094 ehdrlen = 8 * (tmpdstopts->ip6d_len + 1); 4095 if ((uchar_t *)tmpdstopts + ehdrlen > endptr) 4096 goto done; 4097 nexthdr = tmpdstopts->ip6d_nxt; 4098 /* 4099 * ipp_dstopts is set to the destination header after a 4100 * routing header. 4101 * Assume it is a post-rthdr destination header 4102 * and adjust when we find an rthdr. 4103 */ 4104 if (!(ipp->ipp_fields & IPPF_DSTOPTS)) { 4105 ipp->ipp_fields |= IPPF_DSTOPTS; 4106 ipp->ipp_dstopts = tmpdstopts; 4107 ipp->ipp_dstoptslen = ehdrlen; 4108 } 4109 break; 4110 case IPPROTO_ROUTING: 4111 tmprthdr = (ip6_rthdr_t *)whereptr; 4112 ehdrlen = 8 * (tmprthdr->ip6r_len + 1); 4113 if ((uchar_t *)tmprthdr + ehdrlen > endptr) 4114 goto done; 4115 nexthdr = tmprthdr->ip6r_nxt; 4116 /* return only 1st rthdr */ 4117 if (!(ipp->ipp_fields & IPPF_RTHDR)) { 4118 ipp->ipp_fields |= IPPF_RTHDR; 4119 ipp->ipp_rthdr = tmprthdr; 4120 ipp->ipp_rthdrlen = ehdrlen; 4121 } 4122 /* 4123 * Make any destination header we've seen be a 4124 * pre-rthdr destination header. 4125 */ 4126 if (ipp->ipp_fields & IPPF_DSTOPTS) { 4127 ipp->ipp_fields &= ~IPPF_DSTOPTS; 4128 ipp->ipp_fields |= IPPF_RTDSTOPTS; 4129 ipp->ipp_rtdstopts = ipp->ipp_dstopts; 4130 ipp->ipp_dstopts = NULL; 4131 ipp->ipp_rtdstoptslen = ipp->ipp_dstoptslen; 4132 ipp->ipp_dstoptslen = 0; 4133 } 4134 break; 4135 case IPPROTO_FRAGMENT: 4136 /* 4137 * Fragment headers are skipped. Currently, only 4138 * IP cares for their existence. If anyone other 4139 * than IP ever has the need to know about the 4140 * location of fragment headers, support can be 4141 * added to the ip6_pkt_t at that time. 4142 */ 4143 tmpfraghdr = (ip6_frag_t *)whereptr; 4144 ehdrlen = sizeof (ip6_frag_t); 4145 if ((uchar_t *)tmpfraghdr + ehdrlen > endptr) 4146 goto done; 4147 nexthdr = tmpfraghdr->ip6f_nxt; 4148 break; 4149 case IPPROTO_NONE: 4150 default: 4151 goto done; 4152 } 4153 length += ehdrlen; 4154 whereptr += ehdrlen; 4155 } 4156 done: 4157 if (nexthdrp != NULL) 4158 *nexthdrp = nexthdr; 4159 return (length); 4160 } 4161 4162 int 4163 ip_hdr_complete_v6(ip6_t *ip6h, zoneid_t zoneid) 4164 { 4165 ire_t *ire; 4166 4167 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 4168 ire = ire_lookup_local_v6(zoneid); 4169 if (ire == NULL) { 4170 ip1dbg(("ip_hdr_complete_v6: no source IRE\n")); 4171 return (1); 4172 } 4173 ip6h->ip6_src = ire->ire_addr_v6; 4174 ire_refrele(ire); 4175 } 4176 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 4177 ip6h->ip6_hops = ipv6_def_hops; 4178 return (0); 4179 } 4180 4181 /* 4182 * Try to determine where and what are the IPv6 header length and 4183 * pointer to nexthdr value for the upper layer protocol (or an 4184 * unknown next hdr). 4185 * 4186 * Parameters returns a pointer to the nexthdr value; 4187 * Must handle malformed packets of various sorts. 4188 * Function returns failure for malformed cases. 4189 */ 4190 boolean_t 4191 ip_hdr_length_nexthdr_v6(mblk_t *mp, ip6_t *ip6h, uint16_t *hdr_length_ptr, 4192 uint8_t **nexthdrpp) 4193 { 4194 uint16_t length; 4195 uint_t ehdrlen; 4196 uint8_t *nexthdrp; 4197 uint8_t *whereptr; 4198 uint8_t *endptr; 4199 ip6_dest_t *desthdr; 4200 ip6_rthdr_t *rthdr; 4201 ip6_frag_t *fraghdr; 4202 4203 length = IPV6_HDR_LEN; 4204 whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */ 4205 endptr = mp->b_wptr; 4206 4207 nexthdrp = &ip6h->ip6_nxt; 4208 while (whereptr < endptr) { 4209 /* Is there enough left for len + nexthdr? */ 4210 if (whereptr + MIN_EHDR_LEN > endptr) 4211 break; 4212 4213 switch (*nexthdrp) { 4214 case IPPROTO_HOPOPTS: 4215 case IPPROTO_DSTOPTS: 4216 /* Assumes the headers are identical for hbh and dst */ 4217 desthdr = (ip6_dest_t *)whereptr; 4218 ehdrlen = 8 * (desthdr->ip6d_len + 1); 4219 if ((uchar_t *)desthdr + ehdrlen > endptr) 4220 return (B_FALSE); 4221 nexthdrp = &desthdr->ip6d_nxt; 4222 break; 4223 case IPPROTO_ROUTING: 4224 rthdr = (ip6_rthdr_t *)whereptr; 4225 ehdrlen = 8 * (rthdr->ip6r_len + 1); 4226 if ((uchar_t *)rthdr + ehdrlen > endptr) 4227 return (B_FALSE); 4228 nexthdrp = &rthdr->ip6r_nxt; 4229 break; 4230 case IPPROTO_FRAGMENT: 4231 fraghdr = (ip6_frag_t *)whereptr; 4232 ehdrlen = sizeof (ip6_frag_t); 4233 if ((uchar_t *)&fraghdr[1] > endptr) 4234 return (B_FALSE); 4235 nexthdrp = &fraghdr->ip6f_nxt; 4236 break; 4237 case IPPROTO_NONE: 4238 /* No next header means we're finished */ 4239 default: 4240 *hdr_length_ptr = length; 4241 *nexthdrpp = nexthdrp; 4242 return (B_TRUE); 4243 } 4244 length += ehdrlen; 4245 whereptr += ehdrlen; 4246 *hdr_length_ptr = length; 4247 *nexthdrpp = nexthdrp; 4248 } 4249 switch (*nexthdrp) { 4250 case IPPROTO_HOPOPTS: 4251 case IPPROTO_DSTOPTS: 4252 case IPPROTO_ROUTING: 4253 case IPPROTO_FRAGMENT: 4254 /* 4255 * If any know extension headers are still to be processed, 4256 * the packet's malformed (or at least all the IP header(s) are 4257 * not in the same mblk - and that should never happen. 4258 */ 4259 return (B_FALSE); 4260 4261 default: 4262 /* 4263 * If we get here, we know that all of the IP headers were in 4264 * the same mblk, even if the ULP header is in the next mblk. 4265 */ 4266 *hdr_length_ptr = length; 4267 *nexthdrpp = nexthdrp; 4268 return (B_TRUE); 4269 } 4270 } 4271 4272 /* 4273 * Return the length of the IPv6 related headers (including extension headers) 4274 * Returns a length even if the packet is malformed. 4275 */ 4276 int 4277 ip_hdr_length_v6(mblk_t *mp, ip6_t *ip6h) 4278 { 4279 uint16_t hdr_len; 4280 uint8_t *nexthdrp; 4281 4282 (void) ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_len, &nexthdrp); 4283 return (hdr_len); 4284 } 4285 4286 /* 4287 * Select an ill for the packet by considering load spreading across 4288 * a different ill in the group if dst_ill is part of some group. 4289 */ 4290 static ill_t * 4291 ip_newroute_get_dst_ill_v6(ill_t *dst_ill) 4292 { 4293 ill_t *ill; 4294 4295 /* 4296 * We schedule irrespective of whether the source address is 4297 * INADDR_UNSPECIED or not. 4298 */ 4299 ill = illgrp_scheduler(dst_ill); 4300 if (ill == NULL) 4301 return (NULL); 4302 4303 /* 4304 * For groups with names ip_sioctl_groupname ensures that all 4305 * ills are of same type. For groups without names, ifgrp_insert 4306 * ensures this. 4307 */ 4308 ASSERT(dst_ill->ill_type == ill->ill_type); 4309 4310 return (ill); 4311 } 4312 4313 /* 4314 * IPv6 - 4315 * ip_newroute_v6 is called by ip_rput_data_v6 or ip_wput_v6 whenever we need 4316 * to send out a packet to a destination address for which we do not have 4317 * specific routing information. 4318 * 4319 * Handle non-multicast packets. If ill is non-NULL the match is done 4320 * for that ill. 4321 * 4322 * When a specific ill is specified (using IPV6_PKTINFO, 4323 * IPV6_MULTICAST_IF, or IPV6_BOUND_IF) we will only match 4324 * on routing entries (ftable and ctable) that have a matching 4325 * ire->ire_ipif->ipif_ill. Thus this can only be used 4326 * for destinations that are on-link for the specific ill 4327 * and that can appear on multiple links. Thus it is useful 4328 * for multicast destinations, link-local destinations, and 4329 * at some point perhaps for site-local destinations (if the 4330 * node sits at a site boundary). 4331 * We create the cache entries in the regular ctable since 4332 * it can not "confuse" things for other destinations. 4333 * table. 4334 * 4335 * When ill is part of a ill group, we subject the packets 4336 * to load spreading even if the ill is specified by the 4337 * means described above. We disable only for IPV6_BOUND_PIF 4338 * and for the cases where IP6I_ATTACH_IF is set i.e NS/NA/ 4339 * Echo replies to link-local destinations have IP6I_ATTACH_IF 4340 * set. 4341 * 4342 * NOTE : These are the scopes of some of the variables that point at IRE, 4343 * which needs to be followed while making any future modifications 4344 * to avoid memory leaks. 4345 * 4346 * - ire and sire are the entries looked up initially by 4347 * ire_ftable_lookup_v6. 4348 * - ipif_ire is used to hold the interface ire associated with 4349 * the new cache ire. But it's scope is limited, so we always REFRELE 4350 * it before branching out to error paths. 4351 * - save_ire is initialized before ire_create, so that ire returned 4352 * by ire_create will not over-write the ire. We REFRELE save_ire 4353 * before breaking out of the switch. 4354 * 4355 * Thus on failures, we have to REFRELE only ire and sire, if they 4356 * are not NULL. 4357 * 4358 * v6srcp may be used in the future. Currently unused. 4359 */ 4360 /* ARGSUSED */ 4361 void 4362 ip_newroute_v6(queue_t *q, mblk_t *mp, const in6_addr_t *v6dstp, 4363 const in6_addr_t *v6srcp, ill_t *ill, zoneid_t zoneid) 4364 { 4365 in6_addr_t v6gw; 4366 in6_addr_t dst; 4367 ire_t *ire = NULL; 4368 ipif_t *src_ipif = NULL; 4369 ill_t *dst_ill = NULL; 4370 ire_t *sire = NULL; 4371 ire_t *save_ire; 4372 mblk_t *dlureq_mp; 4373 ip6_t *ip6h; 4374 int err = 0; 4375 mblk_t *first_mp; 4376 ipsec_out_t *io; 4377 ill_t *attach_ill = NULL; 4378 ushort_t ire_marks = 0; 4379 int match_flags; 4380 boolean_t ip6i_present; 4381 ire_t *first_sire = NULL; 4382 mblk_t *copy_mp = NULL; 4383 mblk_t *xmit_mp = NULL; 4384 in6_addr_t save_dst; 4385 uint32_t multirt_flags = 4386 MULTIRT_CACHEGW | MULTIRT_USESTAMP | MULTIRT_SETSTAMP; 4387 boolean_t multirt_is_resolvable; 4388 boolean_t multirt_resolve_next; 4389 boolean_t need_rele = B_FALSE; 4390 boolean_t do_attach_ill = B_FALSE; 4391 boolean_t ip6_asp_table_held = B_FALSE; 4392 tsol_ire_gw_secattr_t *attrp = NULL; 4393 tsol_gcgrp_t *gcgrp = NULL; 4394 tsol_gcgrp_addr_t ga; 4395 4396 ASSERT(!IN6_IS_ADDR_MULTICAST(v6dstp)); 4397 4398 first_mp = mp; 4399 if (mp->b_datap->db_type == M_CTL) { 4400 mp = mp->b_cont; 4401 io = (ipsec_out_t *)first_mp->b_rptr; 4402 ASSERT(io->ipsec_out_type == IPSEC_OUT); 4403 } else { 4404 io = NULL; 4405 } 4406 4407 /* 4408 * If this end point is bound to IPIF_NOFAILOVER, set bnf_ill and 4409 * bind_to_nofailover B_TRUE. We can't use conn to determine as it 4410 * could be NULL. 4411 * 4412 * This information can appear either in an ip6i_t or an IPSEC_OUT 4413 * message. 4414 */ 4415 ip6h = (ip6_t *)mp->b_rptr; 4416 ip6i_present = (ip6h->ip6_nxt == IPPROTO_RAW); 4417 if (ip6i_present || (io != NULL && io->ipsec_out_attach_if)) { 4418 if (!ip6i_present || 4419 ((ip6i_t *)ip6h)->ip6i_flags & IP6I_ATTACH_IF) { 4420 attach_ill = ip_grab_attach_ill(ill, first_mp, 4421 (ip6i_present ? ((ip6i_t *)ip6h)->ip6i_ifindex : 4422 io->ipsec_out_ill_index), B_TRUE); 4423 /* Failure case frees things for us. */ 4424 if (attach_ill == NULL) 4425 return; 4426 4427 /* 4428 * Check if we need an ire that will not be 4429 * looked up by anybody else i.e. HIDDEN. 4430 */ 4431 if (ill_is_probeonly(attach_ill)) 4432 ire_marks = IRE_MARK_HIDDEN; 4433 } 4434 } 4435 4436 if (IN6_IS_ADDR_LOOPBACK(v6dstp)) { 4437 ip1dbg(("ip_newroute_v6: dst with loopback addr\n")); 4438 goto icmp_err_ret; 4439 } else if ((v6srcp != NULL) && IN6_IS_ADDR_LOOPBACK(v6srcp)) { 4440 ip1dbg(("ip_newroute_v6: src with loopback addr\n")); 4441 goto icmp_err_ret; 4442 } 4443 4444 /* 4445 * If this IRE is created for forwarding or it is not for 4446 * TCP traffic, mark it as temporary. 4447 * 4448 * Is it sufficient just to check the next header?? 4449 */ 4450 if (mp->b_prev != NULL || !IP_FLOW_CONTROLLED_ULP(ip6h->ip6_nxt)) 4451 ire_marks |= IRE_MARK_TEMPORARY; 4452 4453 /* 4454 * Get what we can from ire_ftable_lookup_v6 which will follow an IRE 4455 * chain until it gets the most specific information available. 4456 * For example, we know that there is no IRE_CACHE for this dest, 4457 * but there may be an IRE_OFFSUBNET which specifies a gateway. 4458 * ire_ftable_lookup_v6 will look up the gateway, etc. 4459 */ 4460 4461 if (ill == NULL) { 4462 match_flags = MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT | 4463 MATCH_IRE_PARENT | MATCH_IRE_RJ_BHOLE | MATCH_IRE_SECATTR; 4464 ire = ire_ftable_lookup_v6(v6dstp, 0, 0, 0, 4465 NULL, &sire, zoneid, 0, MBLK_GETLABEL(mp), 4466 match_flags); 4467 /* 4468 * ire_add_then_send -> ip_newroute_v6 in the CGTP case passes 4469 * in a NULL ill, but the packet could be a neighbor 4470 * solicitation/advertisment and could have a valid attach_ill. 4471 */ 4472 if (attach_ill != NULL) 4473 ill_refrele(attach_ill); 4474 } else { 4475 if (attach_ill != NULL) { 4476 /* 4477 * attach_ill is set only for communicating with 4478 * on-link hosts. So, don't look for DEFAULT. 4479 * ip_wput_v6 passes the right ill in this case and 4480 * hence we can assert. 4481 */ 4482 ASSERT(ill == attach_ill); 4483 ill_refrele(attach_ill); 4484 do_attach_ill = B_TRUE; 4485 match_flags = MATCH_IRE_RJ_BHOLE | MATCH_IRE_ILL; 4486 } else { 4487 match_flags = MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT | 4488 MATCH_IRE_RJ_BHOLE | MATCH_IRE_ILL_GROUP; 4489 } 4490 match_flags |= MATCH_IRE_PARENT | MATCH_IRE_SECATTR; 4491 ire = ire_ftable_lookup_v6(v6dstp, NULL, NULL, 0, ill->ill_ipif, 4492 &sire, zoneid, 0, MBLK_GETLABEL(mp), match_flags); 4493 } 4494 4495 ip3dbg(("ip_newroute_v6: ire_ftable_lookup_v6() " 4496 "returned ire %p, sire %p\n", (void *)ire, (void *)sire)); 4497 4498 if (zoneid == ALL_ZONES && ire != NULL) { 4499 /* 4500 * In the forwarding case, we can use a route from any zone 4501 * since we won't change the source address. We can easily 4502 * assert that the source address is already set when there's no 4503 * ip6_info header - otherwise we'd have to call pullupmsg(). 4504 */ 4505 ASSERT(ip6i_present || 4506 !IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)); 4507 zoneid = ire->ire_zoneid; 4508 } 4509 4510 /* 4511 * We enter a loop that will be run only once in most cases. 4512 * The loop is re-entered in the case where the destination 4513 * can be reached through multiple RTF_MULTIRT-flagged routes. 4514 * The intention is to compute multiple routes to a single 4515 * destination in a single ip_newroute_v6 call. 4516 * The information is contained in sire->ire_flags. 4517 */ 4518 do { 4519 multirt_resolve_next = B_FALSE; 4520 4521 if (dst_ill != NULL) { 4522 ill_refrele(dst_ill); 4523 dst_ill = NULL; 4524 } 4525 if (src_ipif != NULL) { 4526 ipif_refrele(src_ipif); 4527 src_ipif = NULL; 4528 } 4529 if ((sire != NULL) && sire->ire_flags & RTF_MULTIRT) { 4530 ip3dbg(("ip_newroute_v6: starting new resolution " 4531 "with first_mp %p, tag %d\n", 4532 (void *)first_mp, MULTIRT_DEBUG_TAGGED(first_mp))); 4533 4534 /* 4535 * We check if there are trailing unresolved routes for 4536 * the destination contained in sire. 4537 */ 4538 multirt_is_resolvable = ire_multirt_lookup_v6(&ire, 4539 &sire, multirt_flags, MBLK_GETLABEL(mp)); 4540 4541 ip3dbg(("ip_newroute_v6: multirt_is_resolvable %d, " 4542 "ire %p, sire %p\n", 4543 multirt_is_resolvable, (void *)ire, (void *)sire)); 4544 4545 if (!multirt_is_resolvable) { 4546 /* 4547 * No more multirt routes to resolve; give up 4548 * (all routes resolved or no more resolvable 4549 * routes). 4550 */ 4551 if (ire != NULL) { 4552 ire_refrele(ire); 4553 ire = NULL; 4554 } 4555 } else { 4556 ASSERT(sire != NULL); 4557 ASSERT(ire != NULL); 4558 /* 4559 * We simply use first_sire as a flag that 4560 * indicates if a resolvable multirt route has 4561 * already been found during the preceding 4562 * loops. If it is not the case, we may have 4563 * to send an ICMP error to report that the 4564 * destination is unreachable. We do not 4565 * IRE_REFHOLD first_sire. 4566 */ 4567 if (first_sire == NULL) { 4568 first_sire = sire; 4569 } 4570 } 4571 } 4572 if ((ire == NULL) || (ire == sire)) { 4573 /* 4574 * either ire == NULL (the destination cannot be 4575 * resolved) or ire == sire (the gateway cannot be 4576 * resolved). At this point, there are no more routes 4577 * to resolve for the destination, thus we exit. 4578 */ 4579 if (ip_debug > 3) { 4580 /* ip2dbg */ 4581 pr_addr_dbg("ip_newroute_v6: " 4582 "can't resolve %s\n", AF_INET6, v6dstp); 4583 } 4584 ip3dbg(("ip_newroute_v6: " 4585 "ire %p, sire %p, first_sire %p\n", 4586 (void *)ire, (void *)sire, (void *)first_sire)); 4587 4588 if (sire != NULL) { 4589 ire_refrele(sire); 4590 sire = NULL; 4591 } 4592 4593 if (first_sire != NULL) { 4594 /* 4595 * At least one multirt route has been found 4596 * in the same ip_newroute() call; there is no 4597 * need to report an ICMP error. 4598 * first_sire was not IRE_REFHOLDed. 4599 */ 4600 MULTIRT_DEBUG_UNTAG(first_mp); 4601 freemsg(first_mp); 4602 return; 4603 } 4604 ip_rts_change_v6(RTM_MISS, v6dstp, 0, 0, 0, 0, 0, 0, 4605 RTA_DST); 4606 goto icmp_err_ret; 4607 } 4608 4609 ASSERT(ire->ire_ipversion == IPV6_VERSION); 4610 4611 /* 4612 * Verify that the returned IRE does not have either the 4613 * RTF_REJECT or RTF_BLACKHOLE flags set and that the IRE is 4614 * either an IRE_CACHE, IRE_IF_NORESOLVER or IRE_IF_RESOLVER. 4615 */ 4616 if ((ire->ire_flags & (RTF_REJECT | RTF_BLACKHOLE)) || 4617 (ire->ire_type & (IRE_CACHE | IRE_INTERFACE)) == 0) 4618 goto icmp_err_ret; 4619 4620 /* 4621 * Increment the ire_ob_pkt_count field for ire if it is an 4622 * INTERFACE (IF_RESOLVER or IF_NORESOLVER) IRE type, and 4623 * increment the same for the parent IRE, sire, if it is some 4624 * sort of prefix IRE (which includes DEFAULT, PREFIX, HOST 4625 * and HOST_REDIRECT). 4626 */ 4627 if ((ire->ire_type & IRE_INTERFACE) != 0) { 4628 UPDATE_OB_PKT_COUNT(ire); 4629 ire->ire_last_used_time = lbolt; 4630 } 4631 4632 if (sire != NULL) { 4633 mutex_enter(&sire->ire_lock); 4634 v6gw = sire->ire_gateway_addr_v6; 4635 mutex_exit(&sire->ire_lock); 4636 ASSERT((sire->ire_type & (IRE_CACHETABLE | 4637 IRE_INTERFACE)) == 0); 4638 UPDATE_OB_PKT_COUNT(sire); 4639 sire->ire_last_used_time = lbolt; 4640 } else { 4641 v6gw = ipv6_all_zeros; 4642 } 4643 4644 /* 4645 * We have a route to reach the destination. 4646 * 4647 * 1) If the interface is part of ill group, try to get a new 4648 * ill taking load spreading into account. 4649 * 4650 * 2) After selecting the ill, get a source address that might 4651 * create good inbound load spreading and that matches the 4652 * right scope. ipif_select_source_v6 does this for us. 4653 * 4654 * If the application specified the ill (ifindex), we still 4655 * load spread. Only if the packets needs to go out specifically 4656 * on a given ill e.g. bind to IPIF_NOFAILOVER address, 4657 * IPV6_BOUND_PIF we don't try to use a different ill for load 4658 * spreading. 4659 */ 4660 if (!do_attach_ill) { 4661 /* 4662 * If the interface belongs to an interface group, 4663 * make sure the next possible interface in the group 4664 * is used. This encourages load spreading among 4665 * peers in an interface group. However, in the case 4666 * of multirouting, load spreading is not used, as we 4667 * actually want to replicate outgoing packets through 4668 * particular interfaces. 4669 * 4670 * Note: While we pick a dst_ill we are really only 4671 * interested in the ill for load spreading. 4672 * The source ipif is determined by source address 4673 * selection below. 4674 */ 4675 if ((sire != NULL) && (sire->ire_flags & RTF_MULTIRT)) { 4676 dst_ill = ire->ire_ipif->ipif_ill; 4677 /* For uniformity do a refhold */ 4678 ill_refhold(dst_ill); 4679 } else { 4680 /* 4681 * If we are here trying to create an IRE_CACHE 4682 * for an offlink destination and have the 4683 * IRE_CACHE for the next hop and the latter is 4684 * using virtual IP source address selection i.e 4685 * it's ire->ire_ipif is pointing to a virtual 4686 * network interface (vni) then 4687 * ip_newroute_get_dst_ll() will return the vni 4688 * interface as the dst_ill. Since the vni is 4689 * virtual i.e not associated with any physical 4690 * interface, it cannot be the dst_ill, hence 4691 * in such a case call ip_newroute_get_dst_ll() 4692 * with the stq_ill instead of the ire_ipif ILL. 4693 * The function returns a refheld ill. 4694 */ 4695 if ((ire->ire_type == IRE_CACHE) && 4696 IS_VNI(ire->ire_ipif->ipif_ill)) 4697 dst_ill = ip_newroute_get_dst_ill_v6( 4698 ire->ire_stq->q_ptr); 4699 else 4700 dst_ill = ip_newroute_get_dst_ill_v6( 4701 ire->ire_ipif->ipif_ill); 4702 } 4703 if (dst_ill == NULL) { 4704 if (ip_debug > 2) { 4705 pr_addr_dbg("ip_newroute_v6 : no dst " 4706 "ill for dst %s\n", 4707 AF_INET6, v6dstp); 4708 } 4709 goto icmp_err_ret; 4710 } else if (dst_ill->ill_group == NULL && ill != NULL && 4711 dst_ill != ill) { 4712 /* 4713 * If "ill" is not part of any group, we should 4714 * have found a route matching "ill" as we 4715 * called ire_ftable_lookup_v6 with 4716 * MATCH_IRE_ILL_GROUP. 4717 * Rather than asserting when there is a 4718 * mismatch, we just drop the packet. 4719 */ 4720 ip0dbg(("ip_newroute_v6: BOUND_IF failed : " 4721 "dst_ill %s ill %s\n", 4722 dst_ill->ill_name, 4723 ill->ill_name)); 4724 goto icmp_err_ret; 4725 } 4726 } else { 4727 dst_ill = ire->ire_ipif->ipif_ill; 4728 /* For uniformity do refhold */ 4729 ill_refhold(dst_ill); 4730 /* 4731 * We should have found a route matching ill as we 4732 * called ire_ftable_lookup_v6 with MATCH_IRE_ILL. 4733 * Rather than asserting, while there is a mismatch, 4734 * we just drop the packet. 4735 */ 4736 if (dst_ill != ill) { 4737 ip0dbg(("ip_newroute_v6: Packet dropped as " 4738 "IP6I_ATTACH_IF ill is %s, " 4739 "ire->ire_ipif->ipif_ill is %s\n", 4740 ill->ill_name, 4741 dst_ill->ill_name)); 4742 goto icmp_err_ret; 4743 } 4744 } 4745 /* 4746 * Pick a source address which matches the scope of the 4747 * destination address. 4748 * For RTF_SETSRC routes, the source address is imposed by the 4749 * parent ire (sire). 4750 */ 4751 ASSERT(src_ipif == NULL); 4752 if (ire->ire_type == IRE_IF_RESOLVER && 4753 !IN6_IS_ADDR_UNSPECIFIED(&v6gw) && 4754 ip6_asp_can_lookup()) { 4755 /* 4756 * The ire cache entry we're adding is for the 4757 * gateway itself. The source address in this case 4758 * is relative to the gateway's address. 4759 */ 4760 ip6_asp_table_held = B_TRUE; 4761 src_ipif = ipif_select_source_v6(dst_ill, &v6gw, 4762 RESTRICT_TO_GROUP, IPV6_PREFER_SRC_DEFAULT, zoneid); 4763 if (src_ipif != NULL) 4764 ire_marks |= IRE_MARK_USESRC_CHECK; 4765 } else { 4766 if ((sire != NULL) && (sire->ire_flags & RTF_SETSRC)) { 4767 /* 4768 * Check that the ipif matching the requested 4769 * source address still exists. 4770 */ 4771 src_ipif = ipif_lookup_addr_v6( 4772 &sire->ire_src_addr_v6, NULL, zoneid, 4773 NULL, NULL, NULL, NULL); 4774 } 4775 if (src_ipif == NULL && ip6_asp_can_lookup()) { 4776 uint_t restrict_ill = RESTRICT_TO_NONE; 4777 4778 if (ip6i_present && ((ip6i_t *)ip6h)->ip6i_flags 4779 & IP6I_ATTACH_IF) 4780 restrict_ill = RESTRICT_TO_ILL; 4781 ip6_asp_table_held = B_TRUE; 4782 src_ipif = ipif_select_source_v6(dst_ill, 4783 v6dstp, restrict_ill, 4784 IPV6_PREFER_SRC_DEFAULT, zoneid); 4785 if (src_ipif != NULL) 4786 ire_marks |= IRE_MARK_USESRC_CHECK; 4787 } 4788 } 4789 4790 if (src_ipif == NULL) { 4791 if (ip_debug > 2) { 4792 /* ip1dbg */ 4793 pr_addr_dbg("ip_newroute_v6: no src for " 4794 "dst %s\n, ", AF_INET6, v6dstp); 4795 printf("ip_newroute_v6: interface name %s\n", 4796 dst_ill->ill_name); 4797 } 4798 goto icmp_err_ret; 4799 } 4800 4801 if (ip_debug > 3) { 4802 /* ip2dbg */ 4803 pr_addr_dbg("ip_newroute_v6: first hop %s\n", 4804 AF_INET6, &v6gw); 4805 } 4806 ip2dbg(("\tire type %s (%d)\n", 4807 ip_nv_lookup(ire_nv_tbl, ire->ire_type), ire->ire_type)); 4808 4809 /* 4810 * At this point in ip_newroute_v6(), ire is either the 4811 * IRE_CACHE of the next-hop gateway for an off-subnet 4812 * destination or an IRE_INTERFACE type that should be used 4813 * to resolve an on-subnet destination or an on-subnet 4814 * next-hop gateway. 4815 * 4816 * In the IRE_CACHE case, we have the following : 4817 * 4818 * 1) src_ipif - used for getting a source address. 4819 * 4820 * 2) dst_ill - from which we derive ire_stq/ire_rfq. This 4821 * means packets using this IRE_CACHE will go out on dst_ill. 4822 * 4823 * 3) The IRE sire will point to the prefix that is the longest 4824 * matching route for the destination. These prefix types 4825 * include IRE_DEFAULT, IRE_PREFIX, IRE_HOST, and 4826 * IRE_HOST_REDIRECT. 4827 * 4828 * The newly created IRE_CACHE entry for the off-subnet 4829 * destination is tied to both the prefix route and the 4830 * interface route used to resolve the next-hop gateway 4831 * via the ire_phandle and ire_ihandle fields, respectively. 4832 * 4833 * In the IRE_INTERFACE case, we have the following : 4834 * 4835 * 1) src_ipif - used for getting a source address. 4836 * 4837 * 2) dst_ill - from which we derive ire_stq/ire_rfq. This 4838 * means packets using the IRE_CACHE that we will build 4839 * here will go out on dst_ill. 4840 * 4841 * 3) sire may or may not be NULL. But, the IRE_CACHE that is 4842 * to be created will only be tied to the IRE_INTERFACE that 4843 * was derived from the ire_ihandle field. 4844 * 4845 * If sire is non-NULL, it means the destination is off-link 4846 * and we will first create the IRE_CACHE for the gateway. 4847 * Next time through ip_newroute_v6, we will create the 4848 * IRE_CACHE for the final destination as described above. 4849 */ 4850 save_ire = ire; 4851 switch (ire->ire_type) { 4852 case IRE_CACHE: { 4853 ire_t *ipif_ire; 4854 4855 ASSERT(sire != NULL); 4856 if (IN6_IS_ADDR_UNSPECIFIED(&v6gw)) { 4857 mutex_enter(&ire->ire_lock); 4858 v6gw = ire->ire_gateway_addr_v6; 4859 mutex_exit(&ire->ire_lock); 4860 } 4861 /* 4862 * We need 3 ire's to create a new cache ire for an 4863 * off-link destination from the cache ire of the 4864 * gateway. 4865 * 4866 * 1. The prefix ire 'sire' 4867 * 2. The cache ire of the gateway 'ire' 4868 * 3. The interface ire 'ipif_ire' 4869 * 4870 * We have (1) and (2). We lookup (3) below. 4871 * 4872 * If there is no interface route to the gateway, 4873 * it is a race condition, where we found the cache 4874 * but the inteface route has been deleted. 4875 */ 4876 ipif_ire = ire_ihandle_lookup_offlink_v6(ire, sire); 4877 if (ipif_ire == NULL) { 4878 ip1dbg(("ip_newroute_v6:" 4879 "ire_ihandle_lookup_offlink_v6 failed\n")); 4880 goto icmp_err_ret; 4881 } 4882 /* 4883 * Assume DL_UNITDATA_REQ is same for all physical 4884 * interfaces in the ifgrp. If it isn't, this code will 4885 * have to be seriously rewhacked to allow the 4886 * fastpath probing (such that I cache the link 4887 * header in the IRE_CACHE) to work over ifgrps. 4888 * We have what we need to build an IRE_CACHE. 4889 */ 4890 /* 4891 * Note: the new ire inherits RTF_SETSRC 4892 * and RTF_MULTIRT to propagate these flags from prefix 4893 * to cache. 4894 */ 4895 4896 /* 4897 * Check cached gateway IRE for any security 4898 * attributes; if found, associate the gateway 4899 * credentials group to the destination IRE. 4900 */ 4901 if ((attrp = save_ire->ire_gw_secattr) != NULL) { 4902 mutex_enter(&attrp->igsa_lock); 4903 if ((gcgrp = attrp->igsa_gcgrp) != NULL) 4904 GCGRP_REFHOLD(gcgrp); 4905 mutex_exit(&attrp->igsa_lock); 4906 } 4907 4908 ire = ire_create_v6( 4909 v6dstp, /* dest address */ 4910 &ipv6_all_ones, /* mask */ 4911 &src_ipif->ipif_v6src_addr, /* source address */ 4912 &v6gw, /* gateway address */ 4913 &save_ire->ire_max_frag, 4914 NULL, /* Fast Path header */ 4915 dst_ill->ill_rq, /* recv-from queue */ 4916 dst_ill->ill_wq, /* send-to queue */ 4917 IRE_CACHE, 4918 NULL, 4919 src_ipif, 4920 &sire->ire_mask_v6, /* Parent mask */ 4921 sire->ire_phandle, /* Parent handle */ 4922 ipif_ire->ire_ihandle, /* Interface handle */ 4923 sire->ire_flags & /* flags if any */ 4924 (RTF_SETSRC | RTF_MULTIRT), 4925 &(sire->ire_uinfo), 4926 NULL, 4927 gcgrp); 4928 4929 if (ire == NULL) { 4930 if (gcgrp != NULL) { 4931 GCGRP_REFRELE(gcgrp); 4932 gcgrp = NULL; 4933 } 4934 ire_refrele(save_ire); 4935 ire_refrele(ipif_ire); 4936 break; 4937 } 4938 4939 /* reference now held by IRE */ 4940 gcgrp = NULL; 4941 4942 ire->ire_marks |= ire_marks; 4943 4944 /* 4945 * Prevent sire and ipif_ire from getting deleted. The 4946 * newly created ire is tied to both of them via the 4947 * phandle and ihandle respectively. 4948 */ 4949 IRB_REFHOLD(sire->ire_bucket); 4950 /* Has it been removed already ? */ 4951 if (sire->ire_marks & IRE_MARK_CONDEMNED) { 4952 IRB_REFRELE(sire->ire_bucket); 4953 ire_refrele(ipif_ire); 4954 ire_refrele(save_ire); 4955 break; 4956 } 4957 4958 IRB_REFHOLD(ipif_ire->ire_bucket); 4959 /* Has it been removed already ? */ 4960 if (ipif_ire->ire_marks & IRE_MARK_CONDEMNED) { 4961 IRB_REFRELE(ipif_ire->ire_bucket); 4962 IRB_REFRELE(sire->ire_bucket); 4963 ire_refrele(ipif_ire); 4964 ire_refrele(save_ire); 4965 break; 4966 } 4967 4968 xmit_mp = first_mp; 4969 if (ire->ire_flags & RTF_MULTIRT) { 4970 copy_mp = copymsg(first_mp); 4971 if (copy_mp != NULL) { 4972 xmit_mp = copy_mp; 4973 MULTIRT_DEBUG_TAG(first_mp); 4974 } 4975 } 4976 ire_add_then_send(q, ire, xmit_mp); 4977 if (ip6_asp_table_held) { 4978 ip6_asp_table_refrele(); 4979 ip6_asp_table_held = B_FALSE; 4980 } 4981 ire_refrele(save_ire); 4982 4983 /* Assert that sire is not deleted yet. */ 4984 ASSERT(sire->ire_ptpn != NULL); 4985 IRB_REFRELE(sire->ire_bucket); 4986 4987 /* Assert that ipif_ire is not deleted yet. */ 4988 ASSERT(ipif_ire->ire_ptpn != NULL); 4989 IRB_REFRELE(ipif_ire->ire_bucket); 4990 ire_refrele(ipif_ire); 4991 4992 if (copy_mp != NULL) { 4993 /* 4994 * Search for the next unresolved 4995 * multirt route. 4996 */ 4997 copy_mp = NULL; 4998 ipif_ire = NULL; 4999 ire = NULL; 5000 /* re-enter the loop */ 5001 multirt_resolve_next = B_TRUE; 5002 continue; 5003 } 5004 ire_refrele(sire); 5005 ill_refrele(dst_ill); 5006 ipif_refrele(src_ipif); 5007 return; 5008 } 5009 case IRE_IF_NORESOLVER: 5010 /* 5011 * We have what we need to build an IRE_CACHE. 5012 * 5013 * Create a new dlureq_mp with the IPv6 gateway 5014 * address in destination address in the DLPI hdr 5015 * if the physical length is exactly 16 bytes. 5016 */ 5017 if (dst_ill->ill_phys_addr_length == IPV6_ADDR_LEN) { 5018 const in6_addr_t *addr; 5019 5020 if (!IN6_IS_ADDR_UNSPECIFIED(&v6gw)) 5021 addr = &v6gw; 5022 else 5023 addr = v6dstp; 5024 5025 dlureq_mp = ill_dlur_gen((uchar_t *)addr, 5026 dst_ill->ill_phys_addr_length, 5027 dst_ill->ill_sap, 5028 dst_ill->ill_sap_length); 5029 } else { 5030 dlureq_mp = ill_dlur_gen(NULL, 5031 dst_ill->ill_phys_addr_length, 5032 dst_ill->ill_sap, 5033 dst_ill->ill_sap_length); 5034 } 5035 if (dlureq_mp == NULL) 5036 break; 5037 /* 5038 * TSol note: We are creating the ire cache for the 5039 * destination 'dst'. If 'dst' is offlink, going 5040 * through the first hop 'gw', the security attributes 5041 * of 'dst' must be set to point to the gateway 5042 * credentials of gateway 'gw'. If 'dst' is onlink, it 5043 * is possible that 'dst' is a potential gateway that is 5044 * referenced by some route that has some security 5045 * attributes. Thus in the former case, we need to do a 5046 * gcgrp_lookup of 'gw' while in the latter case we 5047 * need to do gcgrp_lookup of 'dst' itself. 5048 */ 5049 ga.ga_af = AF_INET6; 5050 if (!IN6_IS_ADDR_UNSPECIFIED(&v6gw)) 5051 ga.ga_addr = v6gw; 5052 else 5053 ga.ga_addr = *v6dstp; 5054 gcgrp = gcgrp_lookup(&ga, B_FALSE); 5055 5056 /* 5057 * Note: the new ire inherits sire flags RTF_SETSRC 5058 * and RTF_MULTIRT to propagate those rules from prefix 5059 * to cache. 5060 */ 5061 ire = ire_create_v6( 5062 v6dstp, /* dest address */ 5063 &ipv6_all_ones, /* mask */ 5064 &src_ipif->ipif_v6src_addr, /* source address */ 5065 &v6gw, /* gateway address */ 5066 &save_ire->ire_max_frag, 5067 NULL, /* Fast Path header */ 5068 dst_ill->ill_rq, /* recv-from queue */ 5069 dst_ill->ill_wq, /* send-to queue */ 5070 IRE_CACHE, 5071 dlureq_mp, 5072 src_ipif, 5073 &save_ire->ire_mask_v6, /* Parent mask */ 5074 (sire != NULL) ? /* Parent handle */ 5075 sire->ire_phandle : 0, 5076 save_ire->ire_ihandle, /* Interface handle */ 5077 (sire != NULL) ? /* flags if any */ 5078 sire->ire_flags & 5079 (RTF_SETSRC | RTF_MULTIRT) : 0, 5080 &(save_ire->ire_uinfo), 5081 NULL, 5082 gcgrp); 5083 5084 if (dst_ill->ill_phys_addr_length == IPV6_ADDR_LEN) 5085 freeb(dlureq_mp); 5086 5087 if (ire == NULL) { 5088 if (gcgrp != NULL) { 5089 GCGRP_REFRELE(gcgrp); 5090 gcgrp = NULL; 5091 } 5092 ire_refrele(save_ire); 5093 break; 5094 } 5095 5096 /* reference now held by IRE */ 5097 gcgrp = NULL; 5098 5099 ire->ire_marks |= ire_marks; 5100 5101 if (!IN6_IS_ADDR_UNSPECIFIED(&v6gw)) 5102 dst = v6gw; 5103 else 5104 dst = *v6dstp; 5105 err = ndp_noresolver(dst_ill, &dst); 5106 if (err != 0) { 5107 ire_refrele(save_ire); 5108 break; 5109 } 5110 5111 /* Prevent save_ire from getting deleted */ 5112 IRB_REFHOLD(save_ire->ire_bucket); 5113 /* Has it been removed already ? */ 5114 if (save_ire->ire_marks & IRE_MARK_CONDEMNED) { 5115 IRB_REFRELE(save_ire->ire_bucket); 5116 ire_refrele(save_ire); 5117 break; 5118 } 5119 5120 xmit_mp = first_mp; 5121 /* 5122 * In case of MULTIRT, a copy of the current packet 5123 * to send is made to further re-enter the 5124 * loop and attempt another route resolution 5125 */ 5126 if ((sire != NULL) && sire->ire_flags & RTF_MULTIRT) { 5127 copy_mp = copymsg(first_mp); 5128 if (copy_mp != NULL) { 5129 xmit_mp = copy_mp; 5130 MULTIRT_DEBUG_TAG(first_mp); 5131 } 5132 } 5133 ire_add_then_send(q, ire, xmit_mp); 5134 if (ip6_asp_table_held) { 5135 ip6_asp_table_refrele(); 5136 ip6_asp_table_held = B_FALSE; 5137 } 5138 5139 /* Assert that it is not deleted yet. */ 5140 ASSERT(save_ire->ire_ptpn != NULL); 5141 IRB_REFRELE(save_ire->ire_bucket); 5142 ire_refrele(save_ire); 5143 5144 if (copy_mp != NULL) { 5145 /* 5146 * If we found a (no)resolver, we ignore any 5147 * trailing top priority IRE_CACHE in 5148 * further loops. This ensures that we do not 5149 * omit any (no)resolver despite the priority 5150 * in this call. 5151 * IRE_CACHE, if any, will be processed 5152 * by another thread entering ip_newroute(), 5153 * (on resolver response, for example). 5154 * We use this to force multiple parallel 5155 * resolution as soon as a packet needs to be 5156 * sent. The result is, after one packet 5157 * emission all reachable routes are generally 5158 * resolved. 5159 * Otherwise, complete resolution of MULTIRT 5160 * routes would require several emissions as 5161 * side effect. 5162 */ 5163 multirt_flags &= ~MULTIRT_CACHEGW; 5164 5165 /* 5166 * Search for the next unresolved multirt 5167 * route. 5168 */ 5169 copy_mp = NULL; 5170 save_ire = NULL; 5171 ire = NULL; 5172 /* re-enter the loop */ 5173 multirt_resolve_next = B_TRUE; 5174 continue; 5175 } 5176 5177 /* Don't need sire anymore */ 5178 if (sire != NULL) 5179 ire_refrele(sire); 5180 ill_refrele(dst_ill); 5181 ipif_refrele(src_ipif); 5182 return; 5183 5184 case IRE_IF_RESOLVER: 5185 /* 5186 * We can't build an IRE_CACHE yet, but at least we 5187 * found a resolver that can help. 5188 */ 5189 dst = *v6dstp; 5190 5191 /* 5192 * To be at this point in the code with a non-zero gw 5193 * means that dst is reachable through a gateway that 5194 * we have never resolved. By changing dst to the gw 5195 * addr we resolve the gateway first. When 5196 * ire_add_then_send() tries to put the IP dg to dst, 5197 * it will reenter ip_newroute() at which time we will 5198 * find the IRE_CACHE for the gw and create another 5199 * IRE_CACHE above (for dst itself). 5200 */ 5201 if (!IN6_IS_ADDR_UNSPECIFIED(&v6gw)) { 5202 save_dst = dst; 5203 dst = v6gw; 5204 v6gw = ipv6_all_zeros; 5205 } 5206 if (dst_ill->ill_flags & ILLF_XRESOLV) { 5207 /* 5208 * Ask the external resolver to do its thing. 5209 * Make an mblk chain in the following form: 5210 * ARQ_REQ_MBLK-->IRE_MBLK-->packet 5211 */ 5212 mblk_t *ire_mp; 5213 mblk_t *areq_mp; 5214 areq_t *areq; 5215 in6_addr_t *addrp; 5216 5217 ip1dbg(("ip_newroute_v6:ILLF_XRESOLV\n")); 5218 if (ip6_asp_table_held) { 5219 ip6_asp_table_refrele(); 5220 ip6_asp_table_held = B_FALSE; 5221 } 5222 ire = ire_create_mp_v6( 5223 &dst, /* dest address */ 5224 &ipv6_all_ones, /* mask */ 5225 &src_ipif->ipif_v6src_addr, 5226 /* source address */ 5227 &v6gw, /* gateway address */ 5228 NULL, /* Fast Path header */ 5229 dst_ill->ill_rq, /* recv-from queue */ 5230 dst_ill->ill_wq, /* send-to queue */ 5231 IRE_CACHE, 5232 NULL, 5233 src_ipif, 5234 &save_ire->ire_mask_v6, 5235 /* Parent mask */ 5236 0, 5237 save_ire->ire_ihandle, 5238 /* Interface handle */ 5239 0, /* flags if any */ 5240 &(save_ire->ire_uinfo), 5241 NULL, 5242 NULL); 5243 5244 ire_refrele(save_ire); 5245 if (ire == NULL) { 5246 ip1dbg(("ip_newroute_v6:" 5247 "ire is NULL\n")); 5248 break; 5249 } 5250 5251 if ((sire != NULL) && 5252 (sire->ire_flags & RTF_MULTIRT)) { 5253 /* 5254 * processing a copy of the packet to 5255 * send for further resolution loops 5256 */ 5257 copy_mp = copymsg(first_mp); 5258 if (copy_mp != NULL) 5259 MULTIRT_DEBUG_TAG(copy_mp); 5260 } 5261 ire->ire_marks |= ire_marks; 5262 ire_mp = ire->ire_mp; 5263 /* 5264 * Now create or find an nce for this interface. 5265 * The hw addr will need to to be set from 5266 * the reply to the AR_ENTRY_QUERY that 5267 * we're about to send. This will be done in 5268 * ire_add_v6(). 5269 */ 5270 err = ndp_resolver(dst_ill, &dst, mp, zoneid); 5271 switch (err) { 5272 case 0: 5273 /* 5274 * New cache entry created. 5275 * Break, then ask the external 5276 * resolver. 5277 */ 5278 break; 5279 case EINPROGRESS: 5280 /* 5281 * Resolution in progress; 5282 * packet has been queued by 5283 * ndp_resolver(). 5284 */ 5285 ire_delete(ire); 5286 ire = NULL; 5287 /* 5288 * Check if another multirt 5289 * route must be resolved. 5290 */ 5291 if (copy_mp != NULL) { 5292 /* 5293 * If we found a resolver, we 5294 * ignore any trailing top 5295 * priority IRE_CACHE in 5296 * further loops. The reason is 5297 * the same as for noresolver. 5298 */ 5299 multirt_flags &= 5300 ~MULTIRT_CACHEGW; 5301 /* 5302 * Search for the next 5303 * unresolved multirt route. 5304 */ 5305 first_mp = copy_mp; 5306 copy_mp = NULL; 5307 mp = first_mp; 5308 if (mp->b_datap->db_type == 5309 M_CTL) { 5310 mp = mp->b_cont; 5311 } 5312 ASSERT(sire != NULL); 5313 dst = save_dst; 5314 /* 5315 * re-enter the loop 5316 */ 5317 multirt_resolve_next = 5318 B_TRUE; 5319 continue; 5320 } 5321 5322 if (sire != NULL) 5323 ire_refrele(sire); 5324 ill_refrele(dst_ill); 5325 ipif_refrele(src_ipif); 5326 return; 5327 default: 5328 /* 5329 * Transient error; packet will be 5330 * freed. 5331 */ 5332 ire_delete(ire); 5333 ire = NULL; 5334 break; 5335 } 5336 if (err != 0) 5337 break; 5338 /* 5339 * Now set up the AR_ENTRY_QUERY and send it. 5340 */ 5341 areq_mp = ill_arp_alloc(dst_ill, 5342 (uchar_t *)&ipv6_areq_template, 5343 (caddr_t)&dst); 5344 if (areq_mp == NULL) { 5345 ip1dbg(("ip_newroute_v6:" 5346 "areq_mp is NULL\n")); 5347 freemsg(ire_mp); 5348 break; 5349 } 5350 areq = (areq_t *)areq_mp->b_rptr; 5351 addrp = (in6_addr_t *)((char *)areq + 5352 areq->areq_target_addr_offset); 5353 *addrp = dst; 5354 addrp = (in6_addr_t *)((char *)areq + 5355 areq->areq_sender_addr_offset); 5356 *addrp = src_ipif->ipif_v6src_addr; 5357 /* 5358 * link the chain, then send up to the resolver. 5359 */ 5360 linkb(areq_mp, ire_mp); 5361 linkb(areq_mp, mp); 5362 ip1dbg(("ip_newroute_v6:" 5363 "putnext to resolver\n")); 5364 putnext(dst_ill->ill_rq, areq_mp); 5365 /* 5366 * Check if another multirt route 5367 * must be resolved. 5368 */ 5369 ire = NULL; 5370 if (copy_mp != NULL) { 5371 /* 5372 * If we find a resolver, we ignore any 5373 * trailing top priority IRE_CACHE in 5374 * further loops. The reason is the 5375 * same as for noresolver. 5376 */ 5377 multirt_flags &= ~MULTIRT_CACHEGW; 5378 /* 5379 * Search for the next unresolved 5380 * multirt route. 5381 */ 5382 first_mp = copy_mp; 5383 copy_mp = NULL; 5384 mp = first_mp; 5385 if (mp->b_datap->db_type == M_CTL) { 5386 mp = mp->b_cont; 5387 } 5388 ASSERT(sire != NULL); 5389 dst = save_dst; 5390 /* 5391 * re-enter the loop 5392 */ 5393 multirt_resolve_next = B_TRUE; 5394 continue; 5395 } 5396 5397 if (sire != NULL) 5398 ire_refrele(sire); 5399 ill_refrele(dst_ill); 5400 ipif_refrele(src_ipif); 5401 return; 5402 } 5403 /* 5404 * Non-external resolver case. 5405 * 5406 * TSol note: Please see the note above the 5407 * IRE_IF_NORESOLVER case. 5408 */ 5409 ga.ga_af = AF_INET6; 5410 ga.ga_addr = dst; 5411 gcgrp = gcgrp_lookup(&ga, B_FALSE); 5412 5413 ire = ire_create_v6( 5414 &dst, /* dest address */ 5415 &ipv6_all_ones, /* mask */ 5416 &src_ipif->ipif_v6src_addr, /* source address */ 5417 &v6gw, /* gateway address */ 5418 &save_ire->ire_max_frag, 5419 NULL, /* Fast Path header */ 5420 dst_ill->ill_rq, /* recv-from queue */ 5421 dst_ill->ill_wq, /* send-to queue */ 5422 IRE_CACHE, 5423 NULL, 5424 src_ipif, 5425 &save_ire->ire_mask_v6, /* Parent mask */ 5426 0, 5427 save_ire->ire_ihandle, /* Interface handle */ 5428 0, /* flags if any */ 5429 &(save_ire->ire_uinfo), 5430 NULL, 5431 gcgrp); 5432 5433 if (ire == NULL) { 5434 if (gcgrp != NULL) { 5435 GCGRP_REFRELE(gcgrp); 5436 gcgrp = NULL; 5437 } 5438 ire_refrele(save_ire); 5439 break; 5440 } 5441 5442 /* reference now held by IRE */ 5443 gcgrp = NULL; 5444 5445 if ((sire != NULL) && 5446 (sire->ire_flags & RTF_MULTIRT)) { 5447 copy_mp = copymsg(first_mp); 5448 if (copy_mp != NULL) 5449 MULTIRT_DEBUG_TAG(copy_mp); 5450 } 5451 5452 ire->ire_marks |= ire_marks; 5453 err = ndp_resolver(dst_ill, &dst, first_mp, zoneid); 5454 switch (err) { 5455 case 0: 5456 /* Prevent save_ire from getting deleted */ 5457 IRB_REFHOLD(save_ire->ire_bucket); 5458 /* Has it been removed already ? */ 5459 if (save_ire->ire_marks & IRE_MARK_CONDEMNED) { 5460 IRB_REFRELE(save_ire->ire_bucket); 5461 ire_refrele(save_ire); 5462 break; 5463 } 5464 5465 /* 5466 * We have a resolved cache entry, 5467 * add in the IRE. 5468 */ 5469 ire_add_then_send(q, ire, first_mp); 5470 if (ip6_asp_table_held) { 5471 ip6_asp_table_refrele(); 5472 ip6_asp_table_held = B_FALSE; 5473 } 5474 5475 /* Assert that it is not deleted yet. */ 5476 ASSERT(save_ire->ire_ptpn != NULL); 5477 IRB_REFRELE(save_ire->ire_bucket); 5478 ire_refrele(save_ire); 5479 /* 5480 * Check if another multirt route 5481 * must be resolved. 5482 */ 5483 ire = NULL; 5484 if (copy_mp != NULL) { 5485 /* 5486 * If we find a resolver, we ignore any 5487 * trailing top priority IRE_CACHE in 5488 * further loops. The reason is the 5489 * same as for noresolver. 5490 */ 5491 multirt_flags &= ~MULTIRT_CACHEGW; 5492 /* 5493 * Search for the next unresolved 5494 * multirt route. 5495 */ 5496 first_mp = copy_mp; 5497 copy_mp = NULL; 5498 mp = first_mp; 5499 if (mp->b_datap->db_type == M_CTL) { 5500 mp = mp->b_cont; 5501 } 5502 ASSERT(sire != NULL); 5503 dst = save_dst; 5504 /* 5505 * re-enter the loop 5506 */ 5507 multirt_resolve_next = B_TRUE; 5508 continue; 5509 } 5510 5511 if (sire != NULL) 5512 ire_refrele(sire); 5513 ill_refrele(dst_ill); 5514 ipif_refrele(src_ipif); 5515 return; 5516 5517 case EINPROGRESS: 5518 /* 5519 * mp was consumed - presumably queued. 5520 * No need for ire, presumably resolution is 5521 * in progress, and ire will be added when the 5522 * address is resolved. 5523 */ 5524 if (ip6_asp_table_held) { 5525 ip6_asp_table_refrele(); 5526 ip6_asp_table_held = B_FALSE; 5527 } 5528 ASSERT(ire->ire_nce == NULL); 5529 ire_delete(ire); 5530 ire_refrele(save_ire); 5531 /* 5532 * Check if another multirt route 5533 * must be resolved. 5534 */ 5535 ire = NULL; 5536 if (copy_mp != NULL) { 5537 /* 5538 * If we find a resolver, we ignore any 5539 * trailing top priority IRE_CACHE in 5540 * further loops. The reason is the 5541 * same as for noresolver. 5542 */ 5543 multirt_flags &= ~MULTIRT_CACHEGW; 5544 /* 5545 * Search for the next unresolved 5546 * multirt route. 5547 */ 5548 first_mp = copy_mp; 5549 copy_mp = NULL; 5550 mp = first_mp; 5551 if (mp->b_datap->db_type == M_CTL) { 5552 mp = mp->b_cont; 5553 } 5554 ASSERT(sire != NULL); 5555 dst = save_dst; 5556 /* 5557 * re-enter the loop 5558 */ 5559 multirt_resolve_next = B_TRUE; 5560 continue; 5561 } 5562 if (sire != NULL) 5563 ire_refrele(sire); 5564 ill_refrele(dst_ill); 5565 ipif_refrele(src_ipif); 5566 return; 5567 default: 5568 /* Some transient error */ 5569 ASSERT(ire->ire_nce == NULL); 5570 ire_refrele(save_ire); 5571 break; 5572 } 5573 break; 5574 default: 5575 break; 5576 } 5577 if (ip6_asp_table_held) { 5578 ip6_asp_table_refrele(); 5579 ip6_asp_table_held = B_FALSE; 5580 } 5581 } while (multirt_resolve_next); 5582 5583 err_ret: 5584 ip1dbg(("ip_newroute_v6: dropped\n")); 5585 if (src_ipif != NULL) 5586 ipif_refrele(src_ipif); 5587 if (dst_ill != NULL) { 5588 need_rele = B_TRUE; 5589 ill = dst_ill; 5590 } 5591 if (ill != NULL) { 5592 if (mp->b_prev != NULL) { 5593 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 5594 } else { 5595 BUMP_MIB(ill->ill_ip6_mib, ipv6OutDiscards); 5596 } 5597 5598 if (need_rele) 5599 ill_refrele(ill); 5600 } else { 5601 if (mp->b_prev != NULL) { 5602 BUMP_MIB(&ip6_mib, ipv6InDiscards); 5603 } else { 5604 BUMP_MIB(&ip6_mib, ipv6OutDiscards); 5605 } 5606 } 5607 /* Did this packet originate externally? */ 5608 if (mp->b_prev) { 5609 mp->b_next = NULL; 5610 mp->b_prev = NULL; 5611 } 5612 if (copy_mp != NULL) { 5613 MULTIRT_DEBUG_UNTAG(copy_mp); 5614 freemsg(copy_mp); 5615 } 5616 MULTIRT_DEBUG_UNTAG(first_mp); 5617 freemsg(first_mp); 5618 if (ire != NULL) 5619 ire_refrele(ire); 5620 if (sire != NULL) 5621 ire_refrele(sire); 5622 return; 5623 5624 icmp_err_ret: 5625 if (ip6_asp_table_held) 5626 ip6_asp_table_refrele(); 5627 if (src_ipif != NULL) 5628 ipif_refrele(src_ipif); 5629 if (dst_ill != NULL) { 5630 need_rele = B_TRUE; 5631 ill = dst_ill; 5632 } 5633 ip1dbg(("ip_newroute_v6: no route\n")); 5634 if (sire != NULL) 5635 ire_refrele(sire); 5636 /* 5637 * We need to set sire to NULL to avoid double freeing if we 5638 * ever goto err_ret from below. 5639 */ 5640 sire = NULL; 5641 ip6h = (ip6_t *)mp->b_rptr; 5642 /* Skip ip6i_t header if present */ 5643 if (ip6h->ip6_nxt == IPPROTO_RAW) { 5644 /* Make sure the IPv6 header is present */ 5645 if ((mp->b_wptr - (uchar_t *)ip6h) < 5646 sizeof (ip6i_t) + IPV6_HDR_LEN) { 5647 if (!pullupmsg(mp, sizeof (ip6i_t) + IPV6_HDR_LEN)) { 5648 ip1dbg(("ip_newroute_v6: pullupmsg failed\n")); 5649 goto err_ret; 5650 } 5651 } 5652 mp->b_rptr += sizeof (ip6i_t); 5653 ip6h = (ip6_t *)mp->b_rptr; 5654 } 5655 /* Did this packet originate externally? */ 5656 if (mp->b_prev) { 5657 if (ill != NULL) { 5658 BUMP_MIB(ill->ill_ip6_mib, ipv6InNoRoutes); 5659 } else { 5660 BUMP_MIB(&ip6_mib, ipv6InNoRoutes); 5661 } 5662 mp->b_next = NULL; 5663 mp->b_prev = NULL; 5664 q = WR(q); 5665 } else { 5666 if (ill != NULL) { 5667 BUMP_MIB(ill->ill_ip6_mib, ipv6OutNoRoutes); 5668 } else { 5669 BUMP_MIB(&ip6_mib, ipv6OutNoRoutes); 5670 } 5671 if (ip_hdr_complete_v6(ip6h, zoneid)) { 5672 /* Failed */ 5673 if (copy_mp != NULL) { 5674 MULTIRT_DEBUG_UNTAG(copy_mp); 5675 freemsg(copy_mp); 5676 } 5677 MULTIRT_DEBUG_UNTAG(first_mp); 5678 freemsg(first_mp); 5679 if (ire != NULL) 5680 ire_refrele(ire); 5681 if (need_rele) 5682 ill_refrele(ill); 5683 return; 5684 } 5685 } 5686 5687 if (need_rele) 5688 ill_refrele(ill); 5689 5690 /* 5691 * At this point we will have ire only if RTF_BLACKHOLE 5692 * or RTF_REJECT flags are set on the IRE. It will not 5693 * generate ICMP6_DST_UNREACH_NOROUTE if RTF_BLACKHOLE is set. 5694 */ 5695 if (ire != NULL) { 5696 if (ire->ire_flags & RTF_BLACKHOLE) { 5697 ire_refrele(ire); 5698 if (copy_mp != NULL) { 5699 MULTIRT_DEBUG_UNTAG(copy_mp); 5700 freemsg(copy_mp); 5701 } 5702 MULTIRT_DEBUG_UNTAG(first_mp); 5703 freemsg(first_mp); 5704 return; 5705 } 5706 ire_refrele(ire); 5707 } 5708 if (ip_debug > 3) { 5709 /* ip2dbg */ 5710 pr_addr_dbg("ip_newroute_v6: no route to %s\n", 5711 AF_INET6, v6dstp); 5712 } 5713 icmp_unreachable_v6(WR(q), first_mp, ICMP6_DST_UNREACH_NOROUTE, 5714 B_FALSE, B_FALSE); 5715 } 5716 5717 /* 5718 * ip_newroute_ipif_v6 is called by ip_wput_v6 and ip_wput_ipsec_out_v6 whenever 5719 * we need to send out a packet to a destination address for which we do not 5720 * have specific routing information. It is only used for multicast packets. 5721 * 5722 * If unspec_src we allow creating an IRE with source address zero. 5723 * ire_send_v6() will delete it after the packet is sent. 5724 */ 5725 void 5726 ip_newroute_ipif_v6(queue_t *q, mblk_t *mp, ipif_t *ipif, 5727 in6_addr_t v6dst, int unspec_src, zoneid_t zoneid) 5728 { 5729 ire_t *ire = NULL; 5730 ipif_t *src_ipif = NULL; 5731 int err = 0; 5732 ill_t *dst_ill = NULL; 5733 ire_t *save_ire; 5734 ushort_t ire_marks = 0; 5735 ipsec_out_t *io; 5736 ill_t *attach_ill = NULL; 5737 ill_t *ill; 5738 ip6_t *ip6h; 5739 mblk_t *first_mp; 5740 boolean_t ip6i_present; 5741 ire_t *fire = NULL; 5742 mblk_t *copy_mp = NULL; 5743 boolean_t multirt_resolve_next; 5744 in6_addr_t *v6dstp = &v6dst; 5745 boolean_t ipif_held = B_FALSE; 5746 boolean_t ill_held = B_FALSE; 5747 boolean_t ip6_asp_table_held = B_FALSE; 5748 5749 /* 5750 * This loop is run only once in most cases. 5751 * We loop to resolve further routes only when the destination 5752 * can be reached through multiple RTF_MULTIRT-flagged ires. 5753 */ 5754 do { 5755 multirt_resolve_next = B_FALSE; 5756 if (dst_ill != NULL) { 5757 ill_refrele(dst_ill); 5758 dst_ill = NULL; 5759 } 5760 5761 if (src_ipif != NULL) { 5762 ipif_refrele(src_ipif); 5763 src_ipif = NULL; 5764 } 5765 ASSERT(ipif != NULL); 5766 ill = ipif->ipif_ill; 5767 5768 ASSERT(!IN6_IS_ADDR_V4MAPPED(v6dstp)); 5769 if (ip_debug > 2) { 5770 /* ip1dbg */ 5771 pr_addr_dbg("ip_newroute_ipif_v6: v6dst %s\n", 5772 AF_INET6, v6dstp); 5773 printf("ip_newroute_ipif_v6: if %s, v6 %d\n", 5774 ill->ill_name, ipif->ipif_isv6); 5775 } 5776 5777 first_mp = mp; 5778 if (mp->b_datap->db_type == M_CTL) { 5779 mp = mp->b_cont; 5780 io = (ipsec_out_t *)first_mp->b_rptr; 5781 ASSERT(io->ipsec_out_type == IPSEC_OUT); 5782 } else { 5783 io = NULL; 5784 } 5785 5786 /* 5787 * If the interface is a pt-pt interface we look for an 5788 * IRE_IF_RESOLVER or IRE_IF_NORESOLVER that matches both the 5789 * local_address and the pt-pt destination address. 5790 * Otherwise we just match the local address. 5791 */ 5792 if (!(ill->ill_flags & ILLF_MULTICAST)) { 5793 goto err_ret; 5794 } 5795 /* 5796 * If this end point is bound to IPIF_NOFAILOVER, set bnf_ill 5797 * and bind_to_nofailover B_TRUE. We can't use conn to determine 5798 * as it could be NULL. 5799 * 5800 * This information can appear either in an ip6i_t or an 5801 * IPSEC_OUT message. 5802 */ 5803 ip6h = (ip6_t *)mp->b_rptr; 5804 ip6i_present = (ip6h->ip6_nxt == IPPROTO_RAW); 5805 if (ip6i_present || (io != NULL && io->ipsec_out_attach_if)) { 5806 if (!ip6i_present || 5807 ((ip6i_t *)ip6h)->ip6i_flags & IP6I_ATTACH_IF) { 5808 attach_ill = ip_grab_attach_ill(ill, first_mp, 5809 (ip6i_present ? 5810 ((ip6i_t *)ip6h)->ip6i_ifindex : 5811 io->ipsec_out_ill_index), B_TRUE); 5812 /* Failure case frees things for us. */ 5813 if (attach_ill == NULL) 5814 return; 5815 5816 /* 5817 * Check if we need an ire that will not be 5818 * looked up by anybody else i.e. HIDDEN. 5819 */ 5820 if (ill_is_probeonly(attach_ill)) 5821 ire_marks = IRE_MARK_HIDDEN; 5822 } 5823 } 5824 5825 /* 5826 * We check if an IRE_OFFSUBNET for the addr that goes through 5827 * ipif exists. We need it to determine if the RTF_SETSRC and/or 5828 * RTF_MULTIRT flags must be honored. 5829 */ 5830 fire = ipif_lookup_multi_ire_v6(ipif, v6dstp); 5831 ip2dbg(("ip_newroute_ipif_v6: " 5832 "ipif_lookup_multi_ire_v6(" 5833 "ipif %p, dst %08x) = fire %p\n", 5834 (void *)ipif, ntohl(V4_PART_OF_V6((*v6dstp))), 5835 (void *)fire)); 5836 5837 /* 5838 * If the application specified the ill (ifindex), we still 5839 * load spread. Only if the packets needs to go out specifically 5840 * on a given ill e.g. binding to IPIF_NOFAILOVER address or 5841 * IPV6_BOUND_PIF, or there is a parent ire entry that specified 5842 * multirouting, then we don't try to use a different ill for 5843 * load spreading. 5844 */ 5845 if (attach_ill == NULL) { 5846 /* 5847 * If the interface belongs to an interface group, 5848 * make sure the next possible interface in the group 5849 * is used. This encourages load spreading among peers 5850 * in an interface group. 5851 * 5852 * Note: While we pick a dst_ill we are really only 5853 * interested in the ill for load spreading. The source 5854 * ipif is determined by source address selection below. 5855 */ 5856 if ((fire != NULL) && (fire->ire_flags & RTF_MULTIRT)) { 5857 dst_ill = ipif->ipif_ill; 5858 /* For uniformity do a refhold */ 5859 ill_refhold(dst_ill); 5860 } else { 5861 /* refheld by ip_newroute_get_dst_ill_v6 */ 5862 dst_ill = 5863 ip_newroute_get_dst_ill_v6(ipif->ipif_ill); 5864 } 5865 if (dst_ill == NULL) { 5866 if (ip_debug > 2) { 5867 pr_addr_dbg("ip_newroute_ipif_v6: " 5868 "no dst ill for dst %s\n", 5869 AF_INET6, v6dstp); 5870 } 5871 goto err_ret; 5872 } 5873 } else { 5874 dst_ill = ipif->ipif_ill; 5875 /* 5876 * ip_wput_v6 passes the right ipif for IPIF_NOFAILOVER 5877 * and IPV6_BOUND_PIF case. 5878 */ 5879 ASSERT(dst_ill == attach_ill); 5880 /* attach_ill is already refheld */ 5881 } 5882 /* 5883 * Pick a source address which matches the scope of the 5884 * destination address. 5885 * For RTF_SETSRC routes, the source address is imposed by the 5886 * parent ire (fire). 5887 */ 5888 ASSERT(src_ipif == NULL); 5889 if ((fire != NULL) && (fire->ire_flags & RTF_SETSRC)) { 5890 /* 5891 * Check that the ipif matching the requested source 5892 * address still exists. 5893 */ 5894 src_ipif = 5895 ipif_lookup_addr_v6(&fire->ire_src_addr_v6, 5896 NULL, zoneid, NULL, NULL, NULL, NULL); 5897 } 5898 if (src_ipif == NULL && ip6_asp_can_lookup()) { 5899 ip6_asp_table_held = B_TRUE; 5900 src_ipif = ipif_select_source_v6(dst_ill, v6dstp, 5901 RESTRICT_TO_NONE, IPV6_PREFER_SRC_DEFAULT, zoneid); 5902 } 5903 5904 if (src_ipif == NULL) { 5905 if (!unspec_src) { 5906 if (ip_debug > 2) { 5907 /* ip1dbg */ 5908 pr_addr_dbg("ip_newroute_ipif_v6: " 5909 "no src for dst %s\n,", 5910 AF_INET6, v6dstp); 5911 printf(" through interface %s\n", 5912 dst_ill->ill_name); 5913 } 5914 goto err_ret; 5915 } 5916 src_ipif = ipif; 5917 ipif_refhold(src_ipif); 5918 } 5919 ire = ipif_to_ire_v6(ipif); 5920 if (ire == NULL) { 5921 if (ip_debug > 2) { 5922 /* ip1dbg */ 5923 pr_addr_dbg("ip_newroute_ipif_v6: v6src %s\n", 5924 AF_INET6, &ipif->ipif_v6lcl_addr); 5925 printf("ip_newroute_ipif_v6: " 5926 "if %s\n", dst_ill->ill_name); 5927 } 5928 goto err_ret; 5929 } 5930 if (ire->ire_flags & (RTF_REJECT | RTF_BLACKHOLE)) 5931 goto err_ret; 5932 5933 ASSERT(ire->ire_ipversion == IPV6_VERSION); 5934 5935 ip1dbg(("ip_newroute_ipif_v6: interface type %s (%d),", 5936 ip_nv_lookup(ire_nv_tbl, ire->ire_type), ire->ire_type)); 5937 if (ip_debug > 2) { 5938 /* ip1dbg */ 5939 pr_addr_dbg(" address %s\n", 5940 AF_INET6, &ire->ire_src_addr_v6); 5941 } 5942 save_ire = ire; 5943 ip2dbg(("ip_newroute_ipif: ire %p, ipif %p\n", 5944 (void *)ire, (void *)ipif)); 5945 5946 if ((fire != NULL) && (fire->ire_flags & RTF_MULTIRT)) { 5947 /* 5948 * an IRE_OFFSUBET was looked up 5949 * on that interface. 5950 * this ire has RTF_MULTIRT flag, 5951 * so the resolution loop 5952 * will be re-entered to resolve 5953 * additional routes on other 5954 * interfaces. For that purpose, 5955 * a copy of the packet is 5956 * made at this point. 5957 */ 5958 fire->ire_last_used_time = lbolt; 5959 copy_mp = copymsg(first_mp); 5960 if (copy_mp) { 5961 MULTIRT_DEBUG_TAG(copy_mp); 5962 } 5963 } 5964 5965 ASSERT((attach_ill == NULL) || (dst_ill == attach_ill)); 5966 switch (ire->ire_type) { 5967 case IRE_IF_NORESOLVER: { 5968 /* We have what we need to build an IRE_CACHE. */ 5969 mblk_t *dlureq_mp; 5970 5971 /* 5972 * Create a new dlureq_mp with the 5973 * IPv6 gateway address in destination address in the 5974 * DLPI hdr if the physical length is exactly 16 bytes. 5975 */ 5976 ASSERT(dst_ill->ill_isv6); 5977 if (dst_ill->ill_phys_addr_length == IPV6_ADDR_LEN) { 5978 dlureq_mp = ill_dlur_gen((uchar_t *)v6dstp, 5979 dst_ill->ill_phys_addr_length, 5980 dst_ill->ill_sap, 5981 dst_ill->ill_sap_length); 5982 } else { 5983 dlureq_mp = ill_dlur_gen(NULL, 5984 dst_ill->ill_phys_addr_length, 5985 dst_ill->ill_sap, 5986 dst_ill->ill_sap_length); 5987 } 5988 5989 if (dlureq_mp == NULL) 5990 break; 5991 /* 5992 * The newly created ire will inherit the flags of the 5993 * parent ire, if any. 5994 */ 5995 ire = ire_create_v6( 5996 v6dstp, /* dest address */ 5997 &ipv6_all_ones, /* mask */ 5998 &src_ipif->ipif_v6src_addr, /* source address */ 5999 NULL, /* gateway address */ 6000 &save_ire->ire_max_frag, 6001 NULL, /* Fast Path header */ 6002 dst_ill->ill_rq, /* recv-from queue */ 6003 dst_ill->ill_wq, /* send-to queue */ 6004 IRE_CACHE, 6005 dlureq_mp, 6006 src_ipif, 6007 NULL, 6008 (fire != NULL) ? /* Parent handle */ 6009 fire->ire_phandle : 0, 6010 save_ire->ire_ihandle, /* Interface handle */ 6011 (fire != NULL) ? 6012 (fire->ire_flags & (RTF_SETSRC | RTF_MULTIRT)) : 6013 0, 6014 &ire_uinfo_null, 6015 NULL, 6016 NULL); 6017 6018 if (dst_ill->ill_phys_addr_length == IPV6_ADDR_LEN) 6019 freeb(dlureq_mp); 6020 6021 if (ire == NULL) { 6022 ire_refrele(save_ire); 6023 break; 6024 } 6025 6026 ire->ire_marks |= ire_marks; 6027 6028 err = ndp_noresolver(dst_ill, v6dstp); 6029 if (err != 0) { 6030 ire_refrele(save_ire); 6031 break; 6032 } 6033 6034 /* Prevent save_ire from getting deleted */ 6035 IRB_REFHOLD(save_ire->ire_bucket); 6036 /* Has it been removed already ? */ 6037 if (save_ire->ire_marks & IRE_MARK_CONDEMNED) { 6038 IRB_REFRELE(save_ire->ire_bucket); 6039 ire_refrele(save_ire); 6040 break; 6041 } 6042 6043 ire_add_then_send(q, ire, first_mp); 6044 if (ip6_asp_table_held) { 6045 ip6_asp_table_refrele(); 6046 ip6_asp_table_held = B_FALSE; 6047 } 6048 6049 /* Assert that it is not deleted yet. */ 6050 ASSERT(save_ire->ire_ptpn != NULL); 6051 IRB_REFRELE(save_ire->ire_bucket); 6052 ire_refrele(save_ire); 6053 if (fire != NULL) { 6054 ire_refrele(fire); 6055 fire = NULL; 6056 } 6057 6058 /* 6059 * The resolution loop is re-entered if we 6060 * actually are in a multirouting case. 6061 */ 6062 if (copy_mp != NULL) { 6063 boolean_t need_resolve = 6064 ire_multirt_need_resolve_v6(v6dstp, 6065 MBLK_GETLABEL(copy_mp)); 6066 if (!need_resolve) { 6067 MULTIRT_DEBUG_UNTAG(copy_mp); 6068 freemsg(copy_mp); 6069 copy_mp = NULL; 6070 } else { 6071 /* 6072 * ipif_lookup_group_v6() calls 6073 * ire_lookup_multi_v6() that uses 6074 * ire_ftable_lookup_v6() to find 6075 * an IRE_INTERFACE for the group. 6076 * In the multirt case, 6077 * ire_lookup_multi_v6() then invokes 6078 * ire_multirt_lookup_v6() to find 6079 * the next resolvable ire. 6080 * As a result, we obtain a new 6081 * interface, derived from the 6082 * next ire. 6083 */ 6084 if (ipif_held) { 6085 ipif_refrele(ipif); 6086 ipif_held = B_FALSE; 6087 } 6088 ipif = ipif_lookup_group_v6(v6dstp, 6089 zoneid); 6090 ip2dbg(("ip_newroute_ipif: " 6091 "multirt dst %08x, ipif %p\n", 6092 ntohl(V4_PART_OF_V6((*v6dstp))), 6093 (void *)ipif)); 6094 if (ipif != NULL) { 6095 ipif_held = B_TRUE; 6096 mp = copy_mp; 6097 copy_mp = NULL; 6098 multirt_resolve_next = 6099 B_TRUE; 6100 continue; 6101 } else { 6102 freemsg(copy_mp); 6103 } 6104 } 6105 } 6106 ill_refrele(dst_ill); 6107 if (ipif_held) { 6108 ipif_refrele(ipif); 6109 ipif_held = B_FALSE; 6110 } 6111 if (src_ipif != NULL) 6112 ipif_refrele(src_ipif); 6113 return; 6114 } 6115 case IRE_IF_RESOLVER: { 6116 6117 ASSERT(dst_ill->ill_isv6); 6118 6119 /* 6120 * We obtain a partial IRE_CACHE which we will pass 6121 * along with the resolver query. When the response 6122 * comes back it will be there ready for us to add. 6123 */ 6124 /* 6125 * the newly created ire will inherit the flags of the 6126 * parent ire, if any. 6127 */ 6128 ire = ire_create_v6( 6129 v6dstp, /* dest address */ 6130 &ipv6_all_ones, /* mask */ 6131 &src_ipif->ipif_v6src_addr, /* source address */ 6132 NULL, /* gateway address */ 6133 &save_ire->ire_max_frag, 6134 NULL, /* Fast Path header */ 6135 dst_ill->ill_rq, /* recv-from queue */ 6136 dst_ill->ill_wq, /* send-to queue */ 6137 IRE_CACHE, 6138 NULL, 6139 src_ipif, 6140 NULL, 6141 (fire != NULL) ? /* Parent handle */ 6142 fire->ire_phandle : 0, 6143 save_ire->ire_ihandle, /* Interface handle */ 6144 (fire != NULL) ? 6145 (fire->ire_flags & (RTF_SETSRC | RTF_MULTIRT)) : 6146 0, 6147 &ire_uinfo_null, 6148 NULL, 6149 NULL); 6150 6151 if (ire == NULL) { 6152 ire_refrele(save_ire); 6153 break; 6154 } 6155 6156 ire->ire_marks |= ire_marks; 6157 6158 /* Resolve and add ire to the ctable */ 6159 err = ndp_resolver(dst_ill, v6dstp, first_mp, zoneid); 6160 switch (err) { 6161 case 0: 6162 /* Prevent save_ire from getting deleted */ 6163 IRB_REFHOLD(save_ire->ire_bucket); 6164 /* Has it been removed already ? */ 6165 if (save_ire->ire_marks & IRE_MARK_CONDEMNED) { 6166 IRB_REFRELE(save_ire->ire_bucket); 6167 ire_refrele(save_ire); 6168 break; 6169 } 6170 /* 6171 * We have a resolved cache entry, 6172 * add in the IRE. 6173 */ 6174 ire_add_then_send(q, ire, first_mp); 6175 if (ip6_asp_table_held) { 6176 ip6_asp_table_refrele(); 6177 ip6_asp_table_held = B_FALSE; 6178 } 6179 6180 /* Assert that it is not deleted yet. */ 6181 ASSERT(save_ire->ire_ptpn != NULL); 6182 IRB_REFRELE(save_ire->ire_bucket); 6183 ire_refrele(save_ire); 6184 if (fire != NULL) { 6185 ire_refrele(fire); 6186 fire = NULL; 6187 } 6188 6189 /* 6190 * The resolution loop is re-entered if we 6191 * actually are in a multirouting case. 6192 */ 6193 if (copy_mp != NULL) { 6194 boolean_t need_resolve = 6195 ire_multirt_need_resolve_v6(v6dstp, 6196 MBLK_GETLABEL(copy_mp)); 6197 if (!need_resolve) { 6198 MULTIRT_DEBUG_UNTAG(copy_mp); 6199 freemsg(copy_mp); 6200 copy_mp = NULL; 6201 } else { 6202 /* 6203 * ipif_lookup_group_v6() calls 6204 * ire_lookup_multi_v6() that 6205 * uses ire_ftable_lookup_v6() 6206 * to find an IRE_INTERFACE for 6207 * the group. In the multirt 6208 * case, ire_lookup_multi_v6() 6209 * then invokes 6210 * ire_multirt_lookup_v6() to 6211 * find the next resolvable ire. 6212 * As a result, we obtain a new 6213 * interface, derived from the 6214 * next ire. 6215 */ 6216 if (ipif_held) { 6217 ipif_refrele(ipif); 6218 ipif_held = B_FALSE; 6219 } 6220 ipif = ipif_lookup_group_v6( 6221 v6dstp, zoneid); 6222 ip2dbg(("ip_newroute_ipif: " 6223 "multirt dst %08x, " 6224 "ipif %p\n", 6225 ntohl(V4_PART_OF_V6( 6226 (*v6dstp))), 6227 (void *)ipif)); 6228 if (ipif != NULL) { 6229 ipif_held = B_TRUE; 6230 mp = copy_mp; 6231 copy_mp = NULL; 6232 multirt_resolve_next = 6233 B_TRUE; 6234 continue; 6235 } else { 6236 freemsg(copy_mp); 6237 } 6238 } 6239 } 6240 ill_refrele(dst_ill); 6241 if (ipif_held) { 6242 ipif_refrele(ipif); 6243 ipif_held = B_FALSE; 6244 } 6245 if (src_ipif != NULL) 6246 ipif_refrele(src_ipif); 6247 return; 6248 6249 case EINPROGRESS: 6250 /* 6251 * mp was consumed - presumably queued. 6252 * No need for ire, presumably resolution is 6253 * in progress, and ire will be added when the 6254 * address is resolved. 6255 */ 6256 if (ip6_asp_table_held) { 6257 ip6_asp_table_refrele(); 6258 ip6_asp_table_held = B_FALSE; 6259 } 6260 ire_delete(ire); 6261 ire_refrele(save_ire); 6262 if (fire != NULL) { 6263 ire_refrele(fire); 6264 fire = NULL; 6265 } 6266 6267 /* 6268 * The resolution loop is re-entered if we 6269 * actually are in a multirouting case. 6270 */ 6271 if (copy_mp != NULL) { 6272 boolean_t need_resolve = 6273 ire_multirt_need_resolve_v6(v6dstp, 6274 MBLK_GETLABEL(copy_mp)); 6275 if (!need_resolve) { 6276 MULTIRT_DEBUG_UNTAG(copy_mp); 6277 freemsg(copy_mp); 6278 copy_mp = NULL; 6279 } else { 6280 /* 6281 * ipif_lookup_group_v6() calls 6282 * ire_lookup_multi_v6() that 6283 * uses ire_ftable_lookup_v6() 6284 * to find an IRE_INTERFACE for 6285 * the group. In the multirt 6286 * case, ire_lookup_multi_v6() 6287 * then invokes 6288 * ire_multirt_lookup_v6() to 6289 * find the next resolvable ire. 6290 * As a result, we obtain a new 6291 * interface, derived from the 6292 * next ire. 6293 */ 6294 if (ipif_held) { 6295 ipif_refrele(ipif); 6296 ipif_held = B_FALSE; 6297 } 6298 ipif = ipif_lookup_group_v6( 6299 v6dstp, zoneid); 6300 ip2dbg(("ip_newroute_ipif: " 6301 "multirt dst %08x, " 6302 "ipif %p\n", 6303 ntohl(V4_PART_OF_V6( 6304 (*v6dstp))), 6305 (void *)ipif)); 6306 if (ipif != NULL) { 6307 ipif_held = B_TRUE; 6308 mp = copy_mp; 6309 copy_mp = NULL; 6310 multirt_resolve_next = 6311 B_TRUE; 6312 continue; 6313 } else { 6314 freemsg(copy_mp); 6315 } 6316 } 6317 } 6318 ill_refrele(dst_ill); 6319 if (ipif_held) { 6320 ipif_refrele(ipif); 6321 ipif_held = B_FALSE; 6322 } 6323 if (src_ipif != NULL) 6324 ipif_refrele(src_ipif); 6325 return; 6326 default: 6327 /* Some transient error */ 6328 ire_refrele(save_ire); 6329 break; 6330 } 6331 break; 6332 } 6333 default: 6334 break; 6335 } 6336 if (ip6_asp_table_held) { 6337 ip6_asp_table_refrele(); 6338 ip6_asp_table_held = B_FALSE; 6339 } 6340 } while (multirt_resolve_next); 6341 6342 err_ret: 6343 if (ip6_asp_table_held) 6344 ip6_asp_table_refrele(); 6345 if (ire != NULL) 6346 ire_refrele(ire); 6347 if (fire != NULL) 6348 ire_refrele(fire); 6349 if (ipif != NULL && ipif_held) 6350 ipif_refrele(ipif); 6351 if (src_ipif != NULL) 6352 ipif_refrele(src_ipif); 6353 /* Multicast - no point in trying to generate ICMP error */ 6354 ASSERT((attach_ill == NULL) || (dst_ill == attach_ill)); 6355 if (dst_ill != NULL) { 6356 ill = dst_ill; 6357 ill_held = B_TRUE; 6358 } 6359 if (mp->b_prev || mp->b_next) { 6360 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 6361 } else { 6362 BUMP_MIB(ill->ill_ip6_mib, ipv6OutDiscards); 6363 } 6364 ip1dbg(("ip_newroute_ipif_v6: dropped\n")); 6365 mp->b_next = NULL; 6366 mp->b_prev = NULL; 6367 freemsg(first_mp); 6368 if (ill_held) 6369 ill_refrele(ill); 6370 } 6371 6372 /* 6373 * Parse and process any hop-by-hop or destination options. 6374 * 6375 * Assumes that q is an ill read queue so that ICMP errors for link-local 6376 * destinations are sent out the correct interface. 6377 * 6378 * Returns -1 if there was an error and mp has been consumed. 6379 * Returns 0 if no special action is needed. 6380 * Returns 1 if the packet contained a router alert option for this node 6381 * which is verified to be "interesting/known" for our implementation. 6382 * 6383 * XXX Note: In future as more hbh or dest options are defined, 6384 * it may be better to have different routines for hbh and dest 6385 * options as opt_type fields other than IP6OPT_PAD1 and IP6OPT_PADN 6386 * may have same value in different namespaces. Or is it same namespace ?? 6387 * Current code checks for each opt_type (other than pads) if it is in 6388 * the expected nexthdr (hbh or dest) 6389 */ 6390 static int 6391 ip_process_options_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, 6392 uint8_t *optptr, uint_t optlen, uint8_t hdr_type) 6393 { 6394 uint8_t opt_type; 6395 uint_t optused; 6396 int ret = 0; 6397 mblk_t *first_mp; 6398 const char *errtype; 6399 6400 first_mp = mp; 6401 if (mp->b_datap->db_type == M_CTL) { 6402 mp = mp->b_cont; 6403 } 6404 6405 while (optlen != 0) { 6406 opt_type = *optptr; 6407 if (opt_type == IP6OPT_PAD1) { 6408 optused = 1; 6409 } else { 6410 if (optlen < 2) 6411 goto bad_opt; 6412 errtype = "malformed"; 6413 if (opt_type == ip6opt_ls) { 6414 optused = 2 + optptr[1]; 6415 if (optused > optlen) 6416 goto bad_opt; 6417 } else switch (opt_type) { 6418 case IP6OPT_PADN: 6419 /* 6420 * Note:We don't verify that (N-2) pad octets 6421 * are zero as required by spec. Adhere to 6422 * "be liberal in what you accept..." part of 6423 * implementation philosophy (RFC791,RFC1122) 6424 */ 6425 optused = 2 + optptr[1]; 6426 if (optused > optlen) 6427 goto bad_opt; 6428 break; 6429 6430 case IP6OPT_JUMBO: 6431 if (hdr_type != IPPROTO_HOPOPTS) 6432 goto opt_error; 6433 goto opt_error; /* XXX Not implemented! */ 6434 6435 case IP6OPT_ROUTER_ALERT: { 6436 struct ip6_opt_router *or; 6437 6438 if (hdr_type != IPPROTO_HOPOPTS) 6439 goto opt_error; 6440 optused = 2 + optptr[1]; 6441 if (optused > optlen) 6442 goto bad_opt; 6443 or = (struct ip6_opt_router *)optptr; 6444 /* Check total length and alignment */ 6445 if (optused != sizeof (*or) || 6446 ((uintptr_t)or->ip6or_value & 0x1) != 0) 6447 goto opt_error; 6448 /* Check value */ 6449 switch (*((uint16_t *)or->ip6or_value)) { 6450 case IP6_ALERT_MLD: 6451 case IP6_ALERT_RSVP: 6452 ret = 1; 6453 } 6454 break; 6455 } 6456 case IP6OPT_HOME_ADDRESS: { 6457 /* 6458 * Minimal support for the home address option 6459 * (which is required by all IPv6 nodes). 6460 * Implement by just swapping the home address 6461 * and source address. 6462 * XXX Note: this has IPsec implications since 6463 * AH needs to take this into account. 6464 * Also, when IPsec is used we need to ensure 6465 * that this is only processed once 6466 * in the received packet (to avoid swapping 6467 * back and forth). 6468 * NOTE:This option processing is considered 6469 * to be unsafe and prone to a denial of 6470 * service attack. 6471 * The current processing is not safe even with 6472 * IPsec secured IP packets. Since the home 6473 * address option processing requirement still 6474 * is in the IETF draft and in the process of 6475 * being redefined for its usage, it has been 6476 * decided to turn off the option by default. 6477 * If this section of code needs to be executed, 6478 * ndd variable ip6_ignore_home_address_opt 6479 * should be set to 0 at the user's own risk. 6480 */ 6481 struct ip6_opt_home_address *oh; 6482 in6_addr_t tmp; 6483 6484 if (ipv6_ignore_home_address_opt) 6485 goto opt_error; 6486 6487 if (hdr_type != IPPROTO_DSTOPTS) 6488 goto opt_error; 6489 optused = 2 + optptr[1]; 6490 if (optused > optlen) 6491 goto bad_opt; 6492 6493 /* 6494 * We did this dest. opt the first time 6495 * around (i.e. before AH processing). 6496 * If we've done AH... stop now. 6497 */ 6498 if (first_mp != mp) { 6499 ipsec_in_t *ii; 6500 6501 ii = (ipsec_in_t *)first_mp->b_rptr; 6502 if (ii->ipsec_in_ah_sa != NULL) 6503 break; 6504 } 6505 6506 oh = (struct ip6_opt_home_address *)optptr; 6507 /* Check total length and alignment */ 6508 if (optused < sizeof (*oh) || 6509 ((uintptr_t)oh->ip6oh_addr & 0x7) != 0) 6510 goto opt_error; 6511 /* Swap ip6_src and the home address */ 6512 tmp = ip6h->ip6_src; 6513 /* XXX Note: only 8 byte alignment option */ 6514 ip6h->ip6_src = *(in6_addr_t *)oh->ip6oh_addr; 6515 *(in6_addr_t *)oh->ip6oh_addr = tmp; 6516 break; 6517 } 6518 6519 case IP6OPT_TUNNEL_LIMIT: 6520 if (hdr_type != IPPROTO_DSTOPTS) { 6521 goto opt_error; 6522 } 6523 optused = 2 + optptr[1]; 6524 if (optused > optlen) { 6525 goto bad_opt; 6526 } 6527 if (optused != 3) { 6528 goto opt_error; 6529 } 6530 break; 6531 6532 default: 6533 errtype = "unknown"; 6534 /* FALLTHROUGH */ 6535 opt_error: 6536 switch (IP6OPT_TYPE(opt_type)) { 6537 case IP6OPT_TYPE_SKIP: 6538 optused = 2 + optptr[1]; 6539 if (optused > optlen) 6540 goto bad_opt; 6541 ip1dbg(("ip_process_options_v6: %s " 6542 "opt 0x%x skipped\n", 6543 errtype, opt_type)); 6544 break; 6545 case IP6OPT_TYPE_DISCARD: 6546 ip1dbg(("ip_process_options_v6: %s " 6547 "opt 0x%x; packet dropped\n", 6548 errtype, opt_type)); 6549 freemsg(first_mp); 6550 return (-1); 6551 case IP6OPT_TYPE_ICMP: 6552 icmp_param_problem_v6(WR(q), first_mp, 6553 ICMP6_PARAMPROB_OPTION, 6554 (uint32_t)(optptr - 6555 (uint8_t *)ip6h), 6556 B_FALSE, B_FALSE); 6557 return (-1); 6558 case IP6OPT_TYPE_FORCEICMP: 6559 icmp_param_problem_v6(WR(q), first_mp, 6560 ICMP6_PARAMPROB_OPTION, 6561 (uint32_t)(optptr - 6562 (uint8_t *)ip6h), 6563 B_FALSE, B_TRUE); 6564 return (-1); 6565 default: 6566 ASSERT(0); 6567 } 6568 } 6569 } 6570 optlen -= optused; 6571 optptr += optused; 6572 } 6573 return (ret); 6574 6575 bad_opt: 6576 icmp_param_problem_v6(WR(q), first_mp, ICMP6_PARAMPROB_OPTION, 6577 (uint32_t)(optptr - (uint8_t *)ip6h), 6578 B_FALSE, B_FALSE); 6579 return (-1); 6580 } 6581 6582 /* 6583 * Process a routing header that is not yet empty. 6584 * Only handles type 0 routing headers. 6585 */ 6586 static void 6587 ip_process_rthdr(queue_t *q, mblk_t *mp, ip6_t *ip6h, ip6_rthdr_t *rth, 6588 ill_t *ill, uint_t flags, mblk_t *hada_mp, mblk_t *dl_mp) 6589 { 6590 ip6_rthdr0_t *rthdr; 6591 uint_t ehdrlen; 6592 uint_t numaddr; 6593 in6_addr_t *addrptr; 6594 in6_addr_t tmp; 6595 6596 ASSERT(rth->ip6r_segleft != 0); 6597 6598 if (!ipv6_forward_src_routed) { 6599 /* XXX Check for source routed out same interface? */ 6600 BUMP_MIB(ill->ill_ip6_mib, ipv6ForwProhibits); 6601 BUMP_MIB(ill->ill_ip6_mib, ipv6InAddrErrors); 6602 freemsg(hada_mp); 6603 freemsg(mp); 6604 return; 6605 } 6606 6607 if (rth->ip6r_type != 0) { 6608 if (hada_mp != NULL) 6609 goto hada_drop; 6610 icmp_param_problem_v6(WR(q), mp, 6611 ICMP6_PARAMPROB_HEADER, 6612 (uint32_t)((uchar_t *)&rth->ip6r_type - (uchar_t *)ip6h), 6613 B_FALSE, B_FALSE); 6614 return; 6615 } 6616 rthdr = (ip6_rthdr0_t *)rth; 6617 ehdrlen = 8 * (rthdr->ip6r0_len + 1); 6618 ASSERT(mp->b_rptr + ehdrlen <= mp->b_wptr); 6619 addrptr = (in6_addr_t *)((char *)rthdr + sizeof (*rthdr)); 6620 /* rthdr->ip6r0_len is twice the number of addresses in the header */ 6621 if (rthdr->ip6r0_len & 0x1) { 6622 /* An odd length is impossible */ 6623 if (hada_mp != NULL) 6624 goto hada_drop; 6625 icmp_param_problem_v6(WR(q), mp, 6626 ICMP6_PARAMPROB_HEADER, 6627 (uint32_t)((uchar_t *)&rthdr->ip6r0_len - (uchar_t *)ip6h), 6628 B_FALSE, B_FALSE); 6629 return; 6630 } 6631 numaddr = rthdr->ip6r0_len / 2; 6632 if (rthdr->ip6r0_segleft > numaddr) { 6633 /* segleft exceeds number of addresses in routing header */ 6634 if (hada_mp != NULL) 6635 goto hada_drop; 6636 icmp_param_problem_v6(WR(q), mp, 6637 ICMP6_PARAMPROB_HEADER, 6638 (uint32_t)((uchar_t *)&rthdr->ip6r0_segleft - 6639 (uchar_t *)ip6h), 6640 B_FALSE, B_FALSE); 6641 return; 6642 } 6643 addrptr += (numaddr - rthdr->ip6r0_segleft); 6644 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst) || 6645 IN6_IS_ADDR_MULTICAST(addrptr)) { 6646 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 6647 freemsg(hada_mp); 6648 freemsg(mp); 6649 return; 6650 } 6651 /* Swap */ 6652 tmp = *addrptr; 6653 *addrptr = ip6h->ip6_dst; 6654 ip6h->ip6_dst = tmp; 6655 rthdr->ip6r0_segleft--; 6656 /* Don't allow any mapped addresses - ip_wput_v6 can't handle them */ 6657 if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_dst)) { 6658 if (hada_mp != NULL) 6659 goto hada_drop; 6660 icmp_unreachable_v6(WR(q), mp, ICMP6_DST_UNREACH_NOROUTE, 6661 B_FALSE, B_FALSE); 6662 return; 6663 } 6664 ip_rput_data_v6(q, ill, mp, ip6h, flags, hada_mp, dl_mp); 6665 return; 6666 hada_drop: 6667 /* IPsec kstats: bean counter? */ 6668 freemsg(hada_mp); 6669 freemsg(mp); 6670 } 6671 6672 /* 6673 * Read side put procedure for IPv6 module. 6674 */ 6675 static void 6676 ip_rput_v6(queue_t *q, mblk_t *mp) 6677 { 6678 mblk_t *first_mp; 6679 mblk_t *hada_mp = NULL; 6680 ip6_t *ip6h; 6681 boolean_t ll_multicast = B_FALSE; 6682 boolean_t mctl_present = B_FALSE; 6683 ill_t *ill; 6684 struct iocblk *iocp; 6685 uint_t flags = 0; 6686 mblk_t *dl_mp; 6687 6688 ill = (ill_t *)q->q_ptr; 6689 if (ill->ill_state_flags & ILL_CONDEMNED) { 6690 union DL_primitives *dl; 6691 6692 dl = (union DL_primitives *)mp->b_rptr; 6693 /* 6694 * Things are opening or closing - only accept DLPI 6695 * ack messages. If the stream is closing and ip_wsrv 6696 * has completed, ip_close is out of the qwait, but has 6697 * not yet completed qprocsoff. Don't proceed any further 6698 * because the ill has been cleaned up and things hanging 6699 * off the ill have been freed. 6700 */ 6701 if ((mp->b_datap->db_type != M_PCPROTO) || 6702 (dl->dl_primitive == DL_UNITDATA_IND)) { 6703 inet_freemsg(mp); 6704 return; 6705 } 6706 } 6707 6708 dl_mp = NULL; 6709 switch (mp->b_datap->db_type) { 6710 case M_DATA: { 6711 int hlen; 6712 uchar_t *ucp; 6713 struct ether_header *eh; 6714 dl_unitdata_ind_t *dui; 6715 6716 /* 6717 * This is a work-around for CR 6451644, a bug in Nemo. It 6718 * should be removed when that problem is fixed. 6719 */ 6720 if (ill->ill_mactype == DL_ETHER && 6721 (hlen = MBLKHEAD(mp)) >= sizeof (struct ether_header) && 6722 (ucp = mp->b_rptr)[-1] == (IP6_DL_SAP & 0xFF) && 6723 ucp[-2] == (IP6_DL_SAP >> 8)) { 6724 if (hlen >= sizeof (struct ether_vlan_header) && 6725 ucp[-5] == 0 && ucp[-6] == 0x81) 6726 ucp -= sizeof (struct ether_vlan_header); 6727 else 6728 ucp -= sizeof (struct ether_header); 6729 /* 6730 * If it's a group address, then fabricate a 6731 * DL_UNITDATA_IND message. 6732 */ 6733 if ((ll_multicast = (ucp[0] & 1)) != 0 && 6734 (dl_mp = allocb(DL_UNITDATA_IND_SIZE + 16, 6735 BPRI_HI)) != NULL) { 6736 eh = (struct ether_header *)ucp; 6737 dui = (dl_unitdata_ind_t *)dl_mp->b_rptr; 6738 DB_TYPE(dl_mp) = M_PROTO; 6739 dl_mp->b_wptr = (uchar_t *)(dui + 1) + 16; 6740 dui->dl_primitive = DL_UNITDATA_IND; 6741 dui->dl_dest_addr_length = 8; 6742 dui->dl_dest_addr_offset = DL_UNITDATA_IND_SIZE; 6743 dui->dl_src_addr_length = 8; 6744 dui->dl_src_addr_offset = DL_UNITDATA_IND_SIZE + 6745 8; 6746 dui->dl_group_address = 1; 6747 ucp = (uchar_t *)(dui + 1); 6748 if (ill->ill_sap_length > 0) 6749 ucp += ill->ill_sap_length; 6750 bcopy(&eh->ether_dhost, ucp, 6); 6751 bcopy(&eh->ether_shost, ucp + 8, 6); 6752 ucp = (uchar_t *)(dui + 1); 6753 if (ill->ill_sap_length < 0) 6754 ucp += 8 + ill->ill_sap_length; 6755 bcopy(&eh->ether_type, ucp, 2); 6756 bcopy(&eh->ether_type, ucp + 8, 2); 6757 } 6758 } 6759 break; 6760 } 6761 6762 case M_PROTO: 6763 case M_PCPROTO: 6764 if (((dl_unitdata_ind_t *)mp->b_rptr)->dl_primitive != 6765 DL_UNITDATA_IND) { 6766 /* Go handle anything other than data elsewhere. */ 6767 ip_rput_dlpi(q, mp); 6768 return; 6769 } 6770 #define dlur ((dl_unitdata_ind_t *)mp->b_rptr) 6771 ll_multicast = dlur->dl_group_address; 6772 #undef dlur 6773 /* Save the DLPI header. */ 6774 dl_mp = mp; 6775 mp = mp->b_cont; 6776 dl_mp->b_cont = NULL; 6777 break; 6778 case M_BREAK: 6779 panic("ip_rput_v6: got an M_BREAK"); 6780 /*NOTREACHED*/ 6781 case M_IOCACK: 6782 iocp = (struct iocblk *)mp->b_rptr; 6783 switch (iocp->ioc_cmd) { 6784 case DL_IOC_HDR_INFO: 6785 ill = (ill_t *)q->q_ptr; 6786 ill_fastpath_ack(ill, mp); 6787 return; 6788 case SIOCSTUNPARAM: 6789 case SIOCGTUNPARAM: 6790 case OSIOCSTUNPARAM: 6791 case OSIOCGTUNPARAM: 6792 /* Go through qwriter */ 6793 break; 6794 default: 6795 putnext(q, mp); 6796 return; 6797 } 6798 /* FALLTHRU */ 6799 case M_ERROR: 6800 case M_HANGUP: 6801 mutex_enter(&ill->ill_lock); 6802 if (ill->ill_state_flags & ILL_CONDEMNED) { 6803 mutex_exit(&ill->ill_lock); 6804 freemsg(mp); 6805 return; 6806 } 6807 ill_refhold_locked(ill); 6808 mutex_exit(&ill->ill_lock); 6809 qwriter_ip(NULL, ill, q, mp, ip_rput_other, CUR_OP, B_FALSE); 6810 return; 6811 case M_CTL: 6812 if ((MBLKL(mp) > sizeof (int)) && 6813 ((da_ipsec_t *)mp->b_rptr)->da_type == IPHADA_M_CTL) { 6814 ASSERT(MBLKL(mp) >= sizeof (da_ipsec_t)); 6815 mctl_present = B_TRUE; 6816 break; 6817 } 6818 putnext(q, mp); 6819 return; 6820 case M_IOCNAK: 6821 iocp = (struct iocblk *)mp->b_rptr; 6822 switch (iocp->ioc_cmd) { 6823 case DL_IOC_HDR_INFO: 6824 case SIOCSTUNPARAM: 6825 case SIOCGTUNPARAM: 6826 case OSIOCSTUNPARAM: 6827 case OSIOCGTUNPARAM: 6828 mutex_enter(&ill->ill_lock); 6829 if (ill->ill_state_flags & ILL_CONDEMNED) { 6830 mutex_exit(&ill->ill_lock); 6831 freemsg(mp); 6832 return; 6833 } 6834 ill_refhold_locked(ill); 6835 mutex_exit(&ill->ill_lock); 6836 qwriter_ip(NULL, ill, q, mp, ip_rput_other, CUR_OP, 6837 B_FALSE); 6838 return; 6839 default: 6840 break; 6841 } 6842 /* FALLTHRU */ 6843 default: 6844 putnext(q, mp); 6845 return; 6846 } 6847 6848 BUMP_MIB(ill->ill_ip6_mib, ipv6InReceives); 6849 /* 6850 * if db_ref > 1 then copymsg and free original. Packet may be 6851 * changed and do not want other entity who has a reference to this 6852 * message to trip over the changes. This is a blind change because 6853 * trying to catch all places that might change packet is too 6854 * difficult (since it may be a module above this one). 6855 */ 6856 if (mp->b_datap->db_ref > 1) { 6857 mblk_t *mp1; 6858 6859 mp1 = copymsg(mp); 6860 freemsg(mp); 6861 if (mp1 == NULL) { 6862 first_mp = NULL; 6863 goto discard; 6864 } 6865 mp = mp1; 6866 } 6867 first_mp = mp; 6868 if (mctl_present) { 6869 hada_mp = first_mp; 6870 mp = first_mp->b_cont; 6871 } 6872 6873 ip6h = (ip6_t *)mp->b_rptr; 6874 6875 /* check for alignment and full IPv6 header */ 6876 if (!OK_32PTR((uchar_t *)ip6h) || 6877 (mp->b_wptr - (uchar_t *)ip6h) < IPV6_HDR_LEN) { 6878 if (!pullupmsg(mp, IPV6_HDR_LEN)) { 6879 ip1dbg(("ip_rput_v6: pullupmsg failed\n")); 6880 goto discard; 6881 } 6882 ip6h = (ip6_t *)mp->b_rptr; 6883 } 6884 if ((ip6h->ip6_vcf & IPV6_VERS_AND_FLOW_MASK) == 6885 IPV6_DEFAULT_VERS_AND_FLOW) { 6886 /* 6887 * It may be a bit too expensive to do this mapped address 6888 * check here, but in the interest of robustness, it seems 6889 * like the correct place. 6890 * TODO: Avoid this check for e.g. connected TCP sockets 6891 */ 6892 if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_src)) { 6893 ip1dbg(("ip_rput_v6: pkt with mapped src addr\n")); 6894 goto discard; 6895 } 6896 6897 if (IN6_IS_ADDR_LOOPBACK(&ip6h->ip6_src)) { 6898 ip1dbg(("ip_rput_v6: pkt with loopback src")); 6899 goto discard; 6900 } else if (IN6_IS_ADDR_LOOPBACK(&ip6h->ip6_dst)) { 6901 ip1dbg(("ip_rput_v6: pkt with loopback dst")); 6902 goto discard; 6903 } 6904 6905 flags |= (ll_multicast ? IP6_IN_LLMCAST : 0); 6906 ip_rput_data_v6(q, ill, mp, ip6h, flags, hada_mp, dl_mp); 6907 } else { 6908 BUMP_MIB(ill->ill_ip6_mib, ipv6InIPv4); 6909 goto discard; 6910 } 6911 freemsg(dl_mp); 6912 return; 6913 6914 discard: 6915 if (dl_mp != NULL) 6916 freeb(dl_mp); 6917 freemsg(first_mp); 6918 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 6919 } 6920 6921 /* 6922 * Walk through the IPv6 packet in mp and see if there's an AH header 6923 * in it. See if the AH header needs to get done before other headers in 6924 * the packet. (Worker function for ipsec_early_ah_v6().) 6925 */ 6926 #define IPSEC_HDR_DONT_PROCESS 0 6927 #define IPSEC_HDR_PROCESS 1 6928 #define IPSEC_MEMORY_ERROR 2 6929 static int 6930 ipsec_needs_processing_v6(mblk_t *mp, uint8_t *nexthdr) 6931 { 6932 uint_t length; 6933 uint_t ehdrlen; 6934 uint8_t *whereptr; 6935 uint8_t *endptr; 6936 uint8_t *nexthdrp; 6937 ip6_dest_t *desthdr; 6938 ip6_rthdr_t *rthdr; 6939 ip6_t *ip6h; 6940 6941 /* 6942 * For now just pullup everything. In general, the less pullups, 6943 * the better, but there's so much squirrelling through anyway, 6944 * it's just easier this way. 6945 */ 6946 if (!pullupmsg(mp, -1)) { 6947 return (IPSEC_MEMORY_ERROR); 6948 } 6949 6950 ip6h = (ip6_t *)mp->b_rptr; 6951 length = IPV6_HDR_LEN; 6952 whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */ 6953 endptr = mp->b_wptr; 6954 6955 /* 6956 * We can't just use the argument nexthdr in the place 6957 * of nexthdrp becaue we don't dereference nexthdrp 6958 * till we confirm whether it is a valid address. 6959 */ 6960 nexthdrp = &ip6h->ip6_nxt; 6961 while (whereptr < endptr) { 6962 /* Is there enough left for len + nexthdr? */ 6963 if (whereptr + MIN_EHDR_LEN > endptr) 6964 return (IPSEC_MEMORY_ERROR); 6965 6966 switch (*nexthdrp) { 6967 case IPPROTO_HOPOPTS: 6968 case IPPROTO_DSTOPTS: 6969 /* Assumes the headers are identical for hbh and dst */ 6970 desthdr = (ip6_dest_t *)whereptr; 6971 ehdrlen = 8 * (desthdr->ip6d_len + 1); 6972 if ((uchar_t *)desthdr + ehdrlen > endptr) 6973 return (IPSEC_MEMORY_ERROR); 6974 /* 6975 * Return DONT_PROCESS because of potential Mobile IPv6 6976 * cruft for destination options. 6977 */ 6978 if (*nexthdrp == IPPROTO_DSTOPTS) 6979 return (IPSEC_HDR_DONT_PROCESS); 6980 nexthdrp = &desthdr->ip6d_nxt; 6981 break; 6982 case IPPROTO_ROUTING: 6983 rthdr = (ip6_rthdr_t *)whereptr; 6984 6985 /* 6986 * If there's more hops left on the routing header, 6987 * return now with DON'T PROCESS. 6988 */ 6989 if (rthdr->ip6r_segleft > 0) 6990 return (IPSEC_HDR_DONT_PROCESS); 6991 6992 ehdrlen = 8 * (rthdr->ip6r_len + 1); 6993 if ((uchar_t *)rthdr + ehdrlen > endptr) 6994 return (IPSEC_MEMORY_ERROR); 6995 nexthdrp = &rthdr->ip6r_nxt; 6996 break; 6997 case IPPROTO_FRAGMENT: 6998 /* Wait for reassembly */ 6999 return (IPSEC_HDR_DONT_PROCESS); 7000 case IPPROTO_AH: 7001 *nexthdr = IPPROTO_AH; 7002 return (IPSEC_HDR_PROCESS); 7003 case IPPROTO_NONE: 7004 /* No next header means we're finished */ 7005 default: 7006 return (IPSEC_HDR_DONT_PROCESS); 7007 } 7008 length += ehdrlen; 7009 whereptr += ehdrlen; 7010 } 7011 panic("ipsec_needs_processing_v6"); 7012 /*NOTREACHED*/ 7013 } 7014 7015 /* 7016 * Path for AH if options are present. If this is the first time we are 7017 * sending a datagram to AH, allocate a IPSEC_IN message and prepend it. 7018 * Otherwise, just fanout. Return value answers the boolean question: 7019 * "Did I consume the mblk you sent me?" 7020 * 7021 * Sometimes AH needs to be done before other IPv6 headers for security 7022 * reasons. This function (and its ipsec_needs_processing_v6() above) 7023 * indicates if that is so, and fans out to the appropriate IPsec protocol 7024 * for the datagram passed in. 7025 */ 7026 static boolean_t 7027 ipsec_early_ah_v6(queue_t *q, mblk_t *first_mp, boolean_t mctl_present, 7028 ill_t *ill, ire_t *ire, mblk_t *hada_mp, zoneid_t zoneid) 7029 { 7030 mblk_t *mp; 7031 uint8_t nexthdr; 7032 ipsec_in_t *ii = NULL; 7033 ah_t *ah; 7034 ipsec_status_t ipsec_rc; 7035 7036 ASSERT((hada_mp == NULL) || (!mctl_present)); 7037 7038 switch (ipsec_needs_processing_v6( 7039 (mctl_present ? first_mp->b_cont : first_mp), &nexthdr)) { 7040 case IPSEC_MEMORY_ERROR: 7041 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 7042 freemsg(hada_mp); 7043 freemsg(first_mp); 7044 return (B_TRUE); 7045 case IPSEC_HDR_DONT_PROCESS: 7046 return (B_FALSE); 7047 } 7048 7049 /* Default means send it to AH! */ 7050 ASSERT(nexthdr == IPPROTO_AH); 7051 if (!mctl_present) { 7052 mp = first_mp; 7053 if ((first_mp = ipsec_in_alloc(B_FALSE)) == NULL) { 7054 ip1dbg(("ipsec_early_ah_v6: IPSEC_IN " 7055 "allocation failure.\n")); 7056 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 7057 freemsg(hada_mp); 7058 freemsg(mp); 7059 return (B_TRUE); 7060 } 7061 /* 7062 * Store the ill_index so that when we come back 7063 * from IPSEC we ride on the same queue. 7064 */ 7065 ii = (ipsec_in_t *)first_mp->b_rptr; 7066 ii->ipsec_in_ill_index = ill->ill_phyint->phyint_ifindex; 7067 ii->ipsec_in_rill_index = ii->ipsec_in_ill_index; 7068 first_mp->b_cont = mp; 7069 } 7070 /* 7071 * Cache hardware acceleration info. 7072 */ 7073 if (hada_mp != NULL) { 7074 ASSERT(ii != NULL); 7075 IPSECHW_DEBUG(IPSECHW_PKT, ("ipsec_early_ah_v6: " 7076 "caching data attr.\n")); 7077 ii->ipsec_in_accelerated = B_TRUE; 7078 ii->ipsec_in_da = hada_mp; 7079 } 7080 7081 if (!ipsec_loaded()) { 7082 ip_proto_not_sup(q, first_mp, IP_FF_SEND_ICMP, zoneid); 7083 return (B_TRUE); 7084 } 7085 7086 ah = ipsec_inbound_ah_sa(first_mp); 7087 if (ah == NULL) 7088 return (B_TRUE); 7089 ASSERT(ii->ipsec_in_ah_sa != NULL); 7090 ASSERT(ii->ipsec_in_ah_sa->ipsa_input_func != NULL); 7091 ipsec_rc = ii->ipsec_in_ah_sa->ipsa_input_func(first_mp, ah); 7092 7093 switch (ipsec_rc) { 7094 case IPSEC_STATUS_SUCCESS: 7095 /* we're done with IPsec processing, send it up */ 7096 ip_fanout_proto_again(first_mp, ill, ill, ire); 7097 break; 7098 case IPSEC_STATUS_FAILED: 7099 BUMP_MIB(&ip6_mib, ipv6InDiscards); 7100 break; 7101 case IPSEC_STATUS_PENDING: 7102 /* no action needed */ 7103 break; 7104 } 7105 return (B_TRUE); 7106 } 7107 7108 /* 7109 * ip_rput_data_v6 -- received IPv6 packets in M_DATA messages show up here. 7110 * ip_rput_v6 has already verified alignment, the min length, the version, 7111 * and db_ref = 1. 7112 * 7113 * The ill passed in (the arg named inill) is the ill that the packet 7114 * actually arrived on. We need to remember this when saving the 7115 * input interface index into potential IPV6_PKTINFO data in 7116 * ip_add_info_v6(). 7117 * 7118 * This routine doesn't free dl_mp; that's the caller's responsibility on 7119 * return. (Note that the callers are complex enough that there's no tail 7120 * recursion here anyway.) 7121 */ 7122 void 7123 ip_rput_data_v6(queue_t *q, ill_t *inill, mblk_t *mp, ip6_t *ip6h, 7124 uint_t flags, mblk_t *hada_mp, mblk_t *dl_mp) 7125 { 7126 ire_t *ire = NULL; 7127 queue_t *rq; 7128 ill_t *ill = inill; 7129 ipif_t *ipif; 7130 uint8_t *whereptr; 7131 uint8_t nexthdr; 7132 uint16_t remlen; 7133 uint_t prev_nexthdr_offset; 7134 uint_t used; 7135 size_t pkt_len; 7136 uint16_t ip6_len; 7137 uint_t hdr_len; 7138 boolean_t mctl_present; 7139 mblk_t *first_mp; 7140 mblk_t *first_mp1; 7141 boolean_t no_forward; 7142 ip6_hbh_t *hbhhdr; 7143 boolean_t ll_multicast = (flags & IP6_IN_LLMCAST); 7144 conn_t *connp; 7145 ilm_t *ilm; 7146 uint32_t ports; 7147 uint_t ipif_id = 0; 7148 zoneid_t zoneid = GLOBAL_ZONEID; 7149 uint16_t hck_flags, reass_hck_flags; 7150 uint32_t reass_sum; 7151 boolean_t cksum_err; 7152 mblk_t *mp1; 7153 7154 EXTRACT_PKT_MP(mp, first_mp, mctl_present); 7155 7156 if (hada_mp != NULL) { 7157 /* 7158 * It's an IPsec accelerated packet. 7159 * Keep a pointer to the data attributes around until 7160 * we allocate the ipsecinfo structure. 7161 */ 7162 IPSECHW_DEBUG(IPSECHW_PKT, 7163 ("ip_rput_data_v6: inbound HW accelerated IPsec pkt\n")); 7164 hada_mp->b_cont = NULL; 7165 /* 7166 * Since it is accelerated, it came directly from 7167 * the ill. 7168 */ 7169 ASSERT(mctl_present == B_FALSE); 7170 ASSERT(mp->b_datap->db_type != M_CTL); 7171 } 7172 7173 ASSERT(OK_32PTR((uchar_t *)ip6h) && 7174 (mp->b_wptr - (uchar_t *)ip6h) >= IPV6_HDR_LEN); 7175 7176 if (mp->b_cont == NULL) 7177 pkt_len = mp->b_wptr - mp->b_rptr; 7178 else 7179 pkt_len = msgdsize(mp); 7180 ip6_len = ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN; 7181 7182 /* 7183 * Check for bogus (too short packet) and packet which 7184 * was padded by the link layer. 7185 */ 7186 if (ip6_len != pkt_len) { 7187 ssize_t diff; 7188 7189 if (ip6_len > pkt_len) { 7190 ip1dbg(("ip_rput_data_v6: packet too short %d %lu\n", 7191 ip6_len, pkt_len)); 7192 BUMP_MIB(ill->ill_ip6_mib, ipv6InTruncatedPkts); 7193 freemsg(hada_mp); 7194 freemsg(first_mp); 7195 return; 7196 } 7197 diff = (ssize_t)(pkt_len - ip6_len); 7198 7199 if (!adjmsg(mp, -diff)) { 7200 ip1dbg(("ip_rput_data_v6: adjmsg failed\n")); 7201 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 7202 freemsg(hada_mp); 7203 freemsg(first_mp); 7204 return; 7205 } 7206 pkt_len -= diff; 7207 } 7208 7209 if (ILL_HCKSUM_CAPABLE(ill) && !mctl_present && dohwcksum) 7210 hck_flags = DB_CKSUMFLAGS(mp); 7211 else 7212 hck_flags = 0; 7213 7214 /* Clear checksum flags in case we need to forward */ 7215 DB_CKSUMFLAGS(mp) = 0; 7216 reass_sum = reass_hck_flags = 0; 7217 7218 nexthdr = ip6h->ip6_nxt; 7219 7220 prev_nexthdr_offset = (uint_t)((uchar_t *)&ip6h->ip6_nxt - 7221 (uchar_t *)ip6h); 7222 whereptr = (uint8_t *)&ip6h[1]; 7223 remlen = pkt_len - IPV6_HDR_LEN; /* Track how much is left */ 7224 7225 /* Process hop by hop header options */ 7226 if (nexthdr == IPPROTO_HOPOPTS) { 7227 uint_t ehdrlen; 7228 uint8_t *optptr; 7229 7230 if (remlen < MIN_EHDR_LEN) 7231 goto pkt_too_short; 7232 if (mp->b_cont != NULL && 7233 whereptr + MIN_EHDR_LEN > mp->b_wptr) { 7234 if (!pullupmsg(mp, IPV6_HDR_LEN + MIN_EHDR_LEN)) { 7235 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 7236 freemsg(hada_mp); 7237 freemsg(first_mp); 7238 return; 7239 } 7240 ip6h = (ip6_t *)mp->b_rptr; 7241 whereptr = (uint8_t *)ip6h + pkt_len - remlen; 7242 } 7243 hbhhdr = (ip6_hbh_t *)whereptr; 7244 nexthdr = hbhhdr->ip6h_nxt; 7245 prev_nexthdr_offset = (uint_t)(whereptr - (uint8_t *)ip6h); 7246 ehdrlen = 8 * (hbhhdr->ip6h_len + 1); 7247 7248 if (remlen < ehdrlen) 7249 goto pkt_too_short; 7250 if (mp->b_cont != NULL && 7251 whereptr + ehdrlen > mp->b_wptr) { 7252 if (!pullupmsg(mp, IPV6_HDR_LEN + ehdrlen)) { 7253 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 7254 freemsg(hada_mp); 7255 freemsg(first_mp); 7256 return; 7257 } 7258 ip6h = (ip6_t *)mp->b_rptr; 7259 whereptr = (uint8_t *)ip6h + pkt_len - remlen; 7260 hbhhdr = (ip6_hbh_t *)whereptr; 7261 } 7262 7263 optptr = whereptr + 2; 7264 whereptr += ehdrlen; 7265 remlen -= ehdrlen; 7266 switch (ip_process_options_v6(q, first_mp, ip6h, optptr, 7267 ehdrlen - 2, IPPROTO_HOPOPTS)) { 7268 case -1: 7269 /* 7270 * Packet has been consumed and any 7271 * needed ICMP messages sent. 7272 */ 7273 BUMP_MIB(ill->ill_ip6_mib, ipv6InHdrErrors); 7274 freemsg(hada_mp); 7275 return; 7276 case 0: 7277 /* no action needed */ 7278 break; 7279 case 1: 7280 /* Known router alert */ 7281 goto ipv6forus; 7282 } 7283 } 7284 7285 /* 7286 * Attach any necessary label information to this packet. 7287 */ 7288 if (is_system_labeled() && !tsol_get_pkt_label(mp, IPV6_VERSION)) { 7289 if (ip6opt_ls != 0) 7290 ip0dbg(("tsol_get_pkt_label v6 failed\n")); 7291 BUMP_MIB(ill->ill_ip6_mib, ipv6InHdrErrors); 7292 freemsg(hada_mp); 7293 freemsg(first_mp); 7294 return; 7295 } 7296 7297 /* 7298 * On incoming v6 multicast packets we will bypass the ire table, 7299 * and assume that the read queue corresponds to the targetted 7300 * interface. 7301 * 7302 * The effect of this is the same as the IPv4 original code, but is 7303 * much cleaner I think. See ip_rput for how that was done. 7304 */ 7305 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 7306 BUMP_MIB(ill->ill_ip6_mib, ipv6InMcastPkts); 7307 /* 7308 * XXX TODO Give to mrouted to for multicast forwarding. 7309 */ 7310 ILM_WALKER_HOLD(ill); 7311 ilm = ilm_lookup_ill_v6(ill, &ip6h->ip6_dst, ALL_ZONES); 7312 ILM_WALKER_RELE(ill); 7313 if (ilm == NULL) { 7314 if (ip_debug > 3) { 7315 /* ip2dbg */ 7316 pr_addr_dbg("ip_rput_data_v6: got mcast packet" 7317 " which is not for us: %s\n", AF_INET6, 7318 &ip6h->ip6_dst); 7319 } 7320 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 7321 freemsg(hada_mp); 7322 freemsg(first_mp); 7323 return; 7324 } 7325 if (ip_debug > 3) { 7326 /* ip2dbg */ 7327 pr_addr_dbg("ip_rput_data_v6: multicast for us: %s\n", 7328 AF_INET6, &ip6h->ip6_dst); 7329 } 7330 rq = ill->ill_rq; 7331 zoneid = GLOBAL_ZONEID; 7332 goto ipv6forus; 7333 } 7334 7335 ipif = ill->ill_ipif; 7336 7337 /* 7338 * If a packet was received on an interface that is a 6to4 tunnel, 7339 * incoming IPv6 packets, with a 6to4 addressed IPv6 destination, must 7340 * be checked to have a 6to4 prefix (2002:V4ADDR::/48) that is equal to 7341 * the 6to4 prefix of the address configured on the receiving interface. 7342 * Otherwise, the packet was delivered to this interface in error and 7343 * the packet must be dropped. 7344 */ 7345 if ((ill->ill_is_6to4tun) && IN6_IS_ADDR_6TO4(&ip6h->ip6_dst)) { 7346 7347 if (!IN6_ARE_6TO4_PREFIX_EQUAL(&ipif->ipif_v6lcl_addr, 7348 &ip6h->ip6_dst)) { 7349 if (ip_debug > 2) { 7350 /* ip1dbg */ 7351 pr_addr_dbg("ip_rput_data_v6: received 6to4 " 7352 "addressed packet which is not for us: " 7353 "%s\n", AF_INET6, &ip6h->ip6_dst); 7354 } 7355 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 7356 freemsg(first_mp); 7357 return; 7358 } 7359 } 7360 7361 /* 7362 * Find an ire that matches destination. For link-local addresses 7363 * we have to match the ill. 7364 * TBD for site local addresses. 7365 */ 7366 if (IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_dst)) { 7367 ire = ire_ctable_lookup_v6(&ip6h->ip6_dst, NULL, 7368 IRE_CACHE|IRE_LOCAL, ill->ill_ipif, ALL_ZONES, NULL, 7369 MATCH_IRE_TYPE | MATCH_IRE_ILL_GROUP); 7370 } else { 7371 ire = ire_cache_lookup_v6(&ip6h->ip6_dst, ALL_ZONES, 7372 MBLK_GETLABEL(mp)); 7373 } 7374 if (ire == NULL) { 7375 /* 7376 * No matching IRE found. Mark this packet as having 7377 * originated externally. 7378 */ 7379 if (!(ill->ill_flags & ILLF_ROUTER) || ll_multicast) { 7380 BUMP_MIB(ill->ill_ip6_mib, ipv6ForwProhibits); 7381 if (!(ill->ill_flags & ILLF_ROUTER)) 7382 BUMP_MIB(ill->ill_ip6_mib, ipv6InAddrErrors); 7383 freemsg(hada_mp); 7384 freemsg(first_mp); 7385 return; 7386 } 7387 if (ip6h->ip6_hops <= 1) { 7388 if (hada_mp != NULL) 7389 goto hada_drop; 7390 icmp_time_exceeded_v6(WR(q), first_mp, 7391 ICMP6_TIME_EXCEED_TRANSIT, ll_multicast, B_FALSE); 7392 return; 7393 } 7394 /* 7395 * Per RFC 3513 section 2.5.2, we must not forward packets with 7396 * an unspecified source address. 7397 */ 7398 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 7399 BUMP_MIB(ill->ill_ip6_mib, ipv6ForwProhibits); 7400 freemsg(hada_mp); 7401 freemsg(first_mp); 7402 return; 7403 } 7404 mp->b_prev = (mblk_t *)(uintptr_t) 7405 ill->ill_phyint->phyint_ifindex; 7406 ip_newroute_v6(q, mp, &ip6h->ip6_dst, &ip6h->ip6_src, 7407 IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_dst) ? ill : NULL, 7408 ALL_ZONES); 7409 return; 7410 } 7411 ipif_id = ire->ire_ipif->ipif_seqid; 7412 /* we have a matching IRE */ 7413 if (ire->ire_stq != NULL) { 7414 ill_group_t *ill_group; 7415 ill_group_t *ire_group; 7416 7417 /* 7418 * To be quicker, we may wish not to chase pointers 7419 * (ire->ire_ipif->ipif_ill...) and instead store the 7420 * forwarding policy in the ire. An unfortunate side- 7421 * effect of this would be requiring an ire flush whenever 7422 * the ILLF_ROUTER flag changes. For now, chase pointers 7423 * once and store in the boolean no_forward. 7424 * 7425 * This appears twice to keep it out of the non-forwarding, 7426 * yes-it's-for-us-on-the-right-interface case. 7427 */ 7428 no_forward = ((ill->ill_flags & 7429 ire->ire_ipif->ipif_ill->ill_flags & ILLF_ROUTER) == 0); 7430 7431 7432 ASSERT(first_mp == mp); 7433 /* 7434 * This ire has a send-to queue - forward the packet. 7435 */ 7436 if (no_forward || ll_multicast || (hada_mp != NULL)) { 7437 freemsg(hada_mp); 7438 BUMP_MIB(ill->ill_ip6_mib, ipv6ForwProhibits); 7439 if (no_forward) 7440 BUMP_MIB(ill->ill_ip6_mib, ipv6InAddrErrors); 7441 freemsg(mp); 7442 ire_refrele(ire); 7443 return; 7444 } 7445 if (ip6h->ip6_hops <= 1) { 7446 ip1dbg(("ip_rput_data_v6: hop limit expired.\n")); 7447 icmp_time_exceeded_v6(WR(q), mp, 7448 ICMP6_TIME_EXCEED_TRANSIT, ll_multicast, B_FALSE); 7449 ire_refrele(ire); 7450 return; 7451 } 7452 /* 7453 * Per RFC 3513 section 2.5.2, we must not forward packets with 7454 * an unspecified source address. 7455 */ 7456 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 7457 BUMP_MIB(ill->ill_ip6_mib, ipv6ForwProhibits); 7458 freemsg(mp); 7459 ire_refrele(ire); 7460 return; 7461 } 7462 7463 if (is_system_labeled()) { 7464 mblk_t *mp1; 7465 7466 if ((mp1 = tsol_ip_forward(ire, mp)) == NULL) { 7467 BUMP_MIB(ill->ill_ip6_mib, ipv6ForwProhibits); 7468 freemsg(mp); 7469 ire_refrele(ire); 7470 return; 7471 } 7472 /* Size may have changed */ 7473 mp = mp1; 7474 ip6h = (ip6_t *)mp->b_rptr; 7475 pkt_len = msgdsize(mp); 7476 } 7477 7478 if (pkt_len > ire->ire_max_frag) { 7479 BUMP_MIB(ill->ill_ip6_mib, ipv6InTooBigErrors); 7480 icmp_pkt2big_v6(WR(q), mp, ire->ire_max_frag, 7481 ll_multicast, B_TRUE); 7482 ire_refrele(ire); 7483 return; 7484 } 7485 7486 /* 7487 * Check to see if we're forwarding the packet to a 7488 * different link from which it came. If so, check the 7489 * source and destination addresses since routers must not 7490 * forward any packets with link-local source or 7491 * destination addresses to other links. Otherwise (if 7492 * we're forwarding onto the same link), conditionally send 7493 * a redirect message. 7494 */ 7495 ill_group = ill->ill_group; 7496 ire_group = ((ill_t *)(ire->ire_rfq)->q_ptr)->ill_group; 7497 if (ire->ire_rfq != q && (ill_group == NULL || 7498 ill_group != ire_group)) { 7499 if (IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_dst) || 7500 IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src)) { 7501 BUMP_MIB(ill->ill_ip6_mib, ipv6InAddrErrors); 7502 freemsg(mp); 7503 ire_refrele(ire); 7504 return; 7505 } 7506 /* TBD add site-local check at site boundary? */ 7507 } else if (ipv6_send_redirects) { 7508 in6_addr_t *v6targ; 7509 in6_addr_t gw_addr_v6; 7510 ire_t *src_ire_v6 = NULL; 7511 7512 /* 7513 * Don't send a redirect when forwarding a source 7514 * routed packet. 7515 */ 7516 if (ip_source_routed_v6(ip6h, mp)) 7517 goto forward; 7518 7519 mutex_enter(&ire->ire_lock); 7520 gw_addr_v6 = ire->ire_gateway_addr_v6; 7521 mutex_exit(&ire->ire_lock); 7522 if (!IN6_IS_ADDR_UNSPECIFIED(&gw_addr_v6)) { 7523 v6targ = &gw_addr_v6; 7524 /* 7525 * We won't send redirects to a router 7526 * that doesn't have a link local 7527 * address, but will forward. 7528 */ 7529 if (!IN6_IS_ADDR_LINKLOCAL(v6targ)) { 7530 BUMP_MIB(ill->ill_ip6_mib, 7531 ipv6InAddrErrors); 7532 goto forward; 7533 } 7534 } else { 7535 v6targ = &ip6h->ip6_dst; 7536 } 7537 7538 src_ire_v6 = ire_ftable_lookup_v6(&ip6h->ip6_src, 7539 NULL, NULL, IRE_INTERFACE, ire->ire_ipif, NULL, 7540 ALL_ZONES, 0, NULL, 7541 MATCH_IRE_IPIF | MATCH_IRE_TYPE); 7542 7543 if (src_ire_v6 != NULL) { 7544 /* 7545 * The source is directly connected. 7546 */ 7547 mp1 = copymsg(mp); 7548 if (mp1 != NULL) { 7549 icmp_send_redirect_v6(WR(q), 7550 mp1, v6targ, &ip6h->ip6_dst, 7551 ill, B_FALSE); 7552 } 7553 ire_refrele(src_ire_v6); 7554 } 7555 } 7556 7557 forward: 7558 /* Hoplimit verified above */ 7559 ip6h->ip6_hops--; 7560 UPDATE_IB_PKT_COUNT(ire); 7561 ire->ire_last_used_time = lbolt; 7562 BUMP_MIB(ill->ill_ip6_mib, ipv6OutForwDatagrams); 7563 ip_xmit_v6(mp, ire, 0, NULL, B_FALSE, NULL); 7564 IRE_REFRELE(ire); 7565 return; 7566 } 7567 rq = ire->ire_rfq; 7568 7569 /* 7570 * Need to put on correct queue for reassembly to find it. 7571 * No need to use put() since reassembly has its own locks. 7572 * Note: multicast packets and packets destined to addresses 7573 * assigned to loopback (ire_rfq is NULL) will be reassembled on 7574 * the arriving ill. 7575 */ 7576 if (rq != q) { 7577 boolean_t check_multi = B_TRUE; 7578 ill_group_t *ill_group = NULL; 7579 ill_group_t *ire_group = NULL; 7580 ill_t *ire_ill = NULL; 7581 uint_t ill_ifindex = ill->ill_usesrc_ifindex; 7582 7583 /* 7584 * To be quicker, we may wish not to chase pointers 7585 * (ire->ire_ipif->ipif_ill...) and instead store the 7586 * forwarding policy in the ire. An unfortunate side- 7587 * effect of this would be requiring an ire flush whenever 7588 * the ILLF_ROUTER flag changes. For now, chase pointers 7589 * once and store in the boolean no_forward. 7590 */ 7591 no_forward = ((ill->ill_flags & 7592 ire->ire_ipif->ipif_ill->ill_flags & ILLF_ROUTER) == 0); 7593 7594 ill_group = ill->ill_group; 7595 if (rq != NULL) { 7596 ire_ill = (ill_t *)(rq->q_ptr); 7597 ire_group = ire_ill->ill_group; 7598 } 7599 7600 /* 7601 * If it's part of the same IPMP group, or if it's a legal 7602 * address on the 'usesrc' interface, then bypass strict 7603 * checks. 7604 */ 7605 if (ill_group != NULL && ill_group == ire_group) { 7606 check_multi = B_FALSE; 7607 } else if (ill_ifindex != 0 && ire_ill != NULL && 7608 ill_ifindex == ire_ill->ill_phyint->phyint_ifindex) { 7609 check_multi = B_FALSE; 7610 } 7611 7612 ASSERT(!IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)); 7613 if (check_multi && ipv6_strict_dst_multihoming && no_forward) { 7614 /* 7615 * This packet came in on an interface other than the 7616 * one associated with the destination address 7617 * and we are strict about matches. 7618 * 7619 * As long as the ills belong to the same group, 7620 * we don't consider them to arriving on the wrong 7621 * interface. Thus, when the switch is doing inbound 7622 * load spreading, we won't drop packets when we 7623 * are doing strict multihoming checks. 7624 */ 7625 BUMP_MIB(ill->ill_ip6_mib, ipv6ForwProhibits); 7626 freemsg(hada_mp); 7627 freemsg(first_mp); 7628 ire_refrele(ire); 7629 return; 7630 } 7631 7632 if (rq != NULL) 7633 q = rq; 7634 7635 ill = (ill_t *)q->q_ptr; 7636 ASSERT(ill); 7637 } 7638 7639 zoneid = ire->ire_zoneid; 7640 UPDATE_IB_PKT_COUNT(ire); 7641 ire->ire_last_used_time = lbolt; 7642 /* Don't use the ire after this point. */ 7643 ire_refrele(ire); 7644 ipv6forus: 7645 /* 7646 * Looks like this packet is for us one way or another. 7647 * This is where we'll process destination headers etc. 7648 */ 7649 for (; ; ) { 7650 switch (nexthdr) { 7651 case IPPROTO_TCP: { 7652 uint16_t *up; 7653 uint32_t sum; 7654 int offset; 7655 7656 hdr_len = pkt_len - remlen; 7657 7658 if (hada_mp != NULL) { 7659 ip0dbg(("tcp hada drop\n")); 7660 goto hada_drop; 7661 } 7662 7663 7664 /* TCP needs all of the TCP header */ 7665 if (remlen < TCP_MIN_HEADER_LENGTH) 7666 goto pkt_too_short; 7667 if (mp->b_cont != NULL && 7668 whereptr + TCP_MIN_HEADER_LENGTH > mp->b_wptr) { 7669 if (!pullupmsg(mp, 7670 hdr_len + TCP_MIN_HEADER_LENGTH)) { 7671 BUMP_MIB(ill->ill_ip6_mib, 7672 ipv6InDiscards); 7673 freemsg(first_mp); 7674 return; 7675 } 7676 hck_flags = 0; 7677 ip6h = (ip6_t *)mp->b_rptr; 7678 whereptr = (uint8_t *)ip6h + hdr_len; 7679 } 7680 /* 7681 * Extract the offset field from the TCP header. 7682 */ 7683 offset = ((uchar_t *)ip6h)[hdr_len + 12] >> 4; 7684 if (offset != 5) { 7685 if (offset < 5) { 7686 ip1dbg(("ip_rput_data_v6: short " 7687 "TCP data offset")); 7688 BUMP_MIB(ill->ill_ip6_mib, 7689 ipv6InDiscards); 7690 freemsg(first_mp); 7691 return; 7692 } 7693 /* 7694 * There must be TCP options. 7695 * Make sure we can grab them. 7696 */ 7697 offset <<= 2; 7698 if (remlen < offset) 7699 goto pkt_too_short; 7700 if (mp->b_cont != NULL && 7701 whereptr + offset > mp->b_wptr) { 7702 if (!pullupmsg(mp, 7703 hdr_len + offset)) { 7704 BUMP_MIB(ill->ill_ip6_mib, 7705 ipv6InDiscards); 7706 freemsg(first_mp); 7707 return; 7708 } 7709 hck_flags = 0; 7710 ip6h = (ip6_t *)mp->b_rptr; 7711 whereptr = (uint8_t *)ip6h + hdr_len; 7712 } 7713 } 7714 7715 up = (uint16_t *)&ip6h->ip6_src; 7716 /* 7717 * TCP checksum calculation. First sum up the 7718 * pseudo-header fields: 7719 * - Source IPv6 address 7720 * - Destination IPv6 address 7721 * - TCP payload length 7722 * - TCP protocol ID 7723 */ 7724 sum = htons(IPPROTO_TCP + remlen) + 7725 up[0] + up[1] + up[2] + up[3] + 7726 up[4] + up[5] + up[6] + up[7] + 7727 up[8] + up[9] + up[10] + up[11] + 7728 up[12] + up[13] + up[14] + up[15]; 7729 7730 /* Fold initial sum */ 7731 sum = (sum & 0xffff) + (sum >> 16); 7732 7733 mp1 = mp->b_cont; 7734 7735 if ((hck_flags & (HCK_FULLCKSUM|HCK_PARTIALCKSUM)) == 0) 7736 IP6_STAT(ip6_in_sw_cksum); 7737 7738 IP_CKSUM_RECV(hck_flags, sum, (uchar_t *) 7739 ((uchar_t *)mp->b_rptr + DB_CKSUMSTART(mp)), 7740 (int32_t)(whereptr - (uchar_t *)mp->b_rptr), 7741 mp, mp1, cksum_err); 7742 7743 if (cksum_err) { 7744 BUMP_MIB(&ip_mib, tcpInErrs); 7745 7746 if (hck_flags & HCK_FULLCKSUM) 7747 IP6_STAT(ip6_tcp_in_full_hw_cksum_err); 7748 else if (hck_flags & HCK_PARTIALCKSUM) 7749 IP6_STAT(ip6_tcp_in_part_hw_cksum_err); 7750 else 7751 IP6_STAT(ip6_tcp_in_sw_cksum_err); 7752 7753 freemsg(first_mp); 7754 return; 7755 } 7756 tcp_fanout: 7757 ip_fanout_tcp_v6(q, first_mp, ip6h, ill, inill, 7758 (flags|IP_FF_SEND_ICMP|IP_FF_SYN_ADDIRE| 7759 IP_FF_IP6INFO), hdr_len, mctl_present, zoneid); 7760 return; 7761 } 7762 case IPPROTO_SCTP: 7763 { 7764 sctp_hdr_t *sctph; 7765 uint32_t calcsum, pktsum; 7766 uint_t hdr_len = pkt_len - remlen; 7767 7768 /* SCTP needs all of the SCTP header */ 7769 if (remlen < sizeof (*sctph)) { 7770 goto pkt_too_short; 7771 } 7772 if (whereptr + sizeof (*sctph) > mp->b_wptr) { 7773 ASSERT(mp->b_cont != NULL); 7774 if (!pullupmsg(mp, hdr_len + sizeof (*sctph))) { 7775 BUMP_MIB(ill->ill_ip6_mib, 7776 ipv6InDiscards); 7777 freemsg(mp); 7778 return; 7779 } 7780 ip6h = (ip6_t *)mp->b_rptr; 7781 whereptr = (uint8_t *)ip6h + hdr_len; 7782 } 7783 7784 sctph = (sctp_hdr_t *)(mp->b_rptr + hdr_len); 7785 /* checksum */ 7786 pktsum = sctph->sh_chksum; 7787 sctph->sh_chksum = 0; 7788 calcsum = sctp_cksum(mp, hdr_len); 7789 if (calcsum != pktsum) { 7790 BUMP_MIB(&sctp_mib, sctpChecksumError); 7791 freemsg(mp); 7792 return; 7793 } 7794 sctph->sh_chksum = pktsum; 7795 ports = *(uint32_t *)(mp->b_rptr + hdr_len); 7796 if ((connp = sctp_fanout(&ip6h->ip6_src, &ip6h->ip6_dst, 7797 ports, ipif_id, zoneid, mp)) == NULL) { 7798 ip_fanout_sctp_raw(first_mp, ill, 7799 (ipha_t *)ip6h, B_FALSE, ports, 7800 mctl_present, 7801 (flags|IP_FF_SEND_ICMP|IP_FF_IP6INFO), 7802 B_TRUE, ipif_id, zoneid); 7803 return; 7804 } 7805 BUMP_MIB(&ip_mib, ipInDelivers); 7806 sctp_input(connp, (ipha_t *)ip6h, mp, first_mp, ill, 7807 B_FALSE, mctl_present); 7808 return; 7809 } 7810 case IPPROTO_UDP: { 7811 uint16_t *up; 7812 uint32_t sum; 7813 7814 hdr_len = pkt_len - remlen; 7815 7816 if (hada_mp != NULL) { 7817 ip0dbg(("udp hada drop\n")); 7818 goto hada_drop; 7819 } 7820 7821 /* Verify that at least the ports are present */ 7822 if (remlen < UDPH_SIZE) 7823 goto pkt_too_short; 7824 if (mp->b_cont != NULL && 7825 whereptr + UDPH_SIZE > mp->b_wptr) { 7826 if (!pullupmsg(mp, hdr_len + UDPH_SIZE)) { 7827 BUMP_MIB(ill->ill_ip6_mib, 7828 ipv6InDiscards); 7829 freemsg(first_mp); 7830 return; 7831 } 7832 hck_flags = 0; 7833 ip6h = (ip6_t *)mp->b_rptr; 7834 whereptr = (uint8_t *)ip6h + hdr_len; 7835 } 7836 7837 /* 7838 * Before going through the regular checksum 7839 * calculation, make sure the received checksum 7840 * is non-zero. RFC 2460 says, a 0x0000 checksum 7841 * in a UDP packet (within IPv6 packet) is invalid 7842 * and should be replaced by 0xffff. This makes 7843 * sense as regular checksum calculation will 7844 * pass for both the cases i.e. 0x0000 and 0xffff. 7845 * Removing one of the case makes error detection 7846 * stronger. 7847 */ 7848 7849 if (((udpha_t *)whereptr)->uha_checksum == 0) { 7850 /* 0x0000 checksum is invalid */ 7851 ip1dbg(("ip_rput_data_v6: Invalid UDP " 7852 "checksum value 0x0000\n")); 7853 BUMP_MIB(ill->ill_ip6_mib, udpInCksumErrs); 7854 freemsg(first_mp); 7855 return; 7856 } 7857 7858 up = (uint16_t *)&ip6h->ip6_src; 7859 7860 /* 7861 * UDP checksum calculation. First sum up the 7862 * pseudo-header fields: 7863 * - Source IPv6 address 7864 * - Destination IPv6 address 7865 * - UDP payload length 7866 * - UDP protocol ID 7867 */ 7868 7869 sum = htons(IPPROTO_UDP + remlen) + 7870 up[0] + up[1] + up[2] + up[3] + 7871 up[4] + up[5] + up[6] + up[7] + 7872 up[8] + up[9] + up[10] + up[11] + 7873 up[12] + up[13] + up[14] + up[15]; 7874 7875 /* Fold initial sum */ 7876 sum = (sum & 0xffff) + (sum >> 16); 7877 7878 if (reass_hck_flags != 0) { 7879 hck_flags = reass_hck_flags; 7880 7881 IP_CKSUM_RECV_REASS(hck_flags, 7882 (int32_t)(whereptr - (uchar_t *)mp->b_rptr), 7883 sum, reass_sum, cksum_err); 7884 } else { 7885 mp1 = mp->b_cont; 7886 7887 IP_CKSUM_RECV(hck_flags, sum, (uchar_t *) 7888 ((uchar_t *)mp->b_rptr + DB_CKSUMSTART(mp)), 7889 (int32_t)(whereptr - (uchar_t *)mp->b_rptr), 7890 mp, mp1, cksum_err); 7891 } 7892 7893 if ((hck_flags & (HCK_FULLCKSUM|HCK_PARTIALCKSUM)) == 0) 7894 IP6_STAT(ip6_in_sw_cksum); 7895 7896 if (cksum_err) { 7897 BUMP_MIB(ill->ill_ip6_mib, udpInCksumErrs); 7898 7899 if (hck_flags & HCK_FULLCKSUM) 7900 IP6_STAT(ip6_udp_in_full_hw_cksum_err); 7901 else if (hck_flags & HCK_PARTIALCKSUM) 7902 IP6_STAT(ip6_udp_in_part_hw_cksum_err); 7903 else 7904 IP6_STAT(ip6_udp_in_sw_cksum_err); 7905 7906 freemsg(first_mp); 7907 return; 7908 } 7909 goto udp_fanout; 7910 } 7911 case IPPROTO_ICMPV6: { 7912 uint16_t *up; 7913 uint32_t sum; 7914 uint_t hdr_len = pkt_len - remlen; 7915 7916 if (hada_mp != NULL) { 7917 ip0dbg(("icmp hada drop\n")); 7918 goto hada_drop; 7919 } 7920 7921 up = (uint16_t *)&ip6h->ip6_src; 7922 sum = htons(IPPROTO_ICMPV6 + remlen) + 7923 up[0] + up[1] + up[2] + up[3] + 7924 up[4] + up[5] + up[6] + up[7] + 7925 up[8] + up[9] + up[10] + up[11] + 7926 up[12] + up[13] + up[14] + up[15]; 7927 sum = (sum & 0xffff) + (sum >> 16); 7928 sum = IP_CSUM(mp, hdr_len, sum); 7929 if (sum != 0) { 7930 /* IPv6 ICMP checksum failed */ 7931 ip1dbg(("ip_rput_data_v6: ICMPv6 checksum " 7932 "failed %x\n", 7933 sum)); 7934 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInMsgs); 7935 BUMP_MIB(ill->ill_icmp6_mib, 7936 ipv6IfIcmpInErrors); 7937 freemsg(first_mp); 7938 return; 7939 } 7940 7941 icmp_fanout: 7942 /* Check variable for testing applications */ 7943 if (ipv6_drop_inbound_icmpv6) { 7944 freemsg(first_mp); 7945 return; 7946 } 7947 /* 7948 * Assume that there is always at least one conn for 7949 * ICMPv6 (in.ndpd) i.e. don't optimize the case 7950 * where there is no conn. 7951 */ 7952 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 7953 ASSERT(!(ill->ill_phyint->phyint_flags & 7954 PHYI_LOOPBACK)); 7955 /* 7956 * In the multicast case, applications may have 7957 * joined the group from different zones, so we 7958 * need to deliver the packet to each of them. 7959 * Loop through the multicast memberships 7960 * structures (ilm) on the receive ill and send 7961 * a copy of the packet up each matching one. 7962 */ 7963 ILM_WALKER_HOLD(ill); 7964 for (ilm = ill->ill_ilm; ilm != NULL; 7965 ilm = ilm->ilm_next) { 7966 if (ilm->ilm_flags & ILM_DELETED) 7967 continue; 7968 if (!IN6_ARE_ADDR_EQUAL( 7969 &ilm->ilm_v6addr, &ip6h->ip6_dst)) 7970 continue; 7971 if (!ipif_lookup_zoneid(ill, 7972 ilm->ilm_zoneid, IPIF_UP, NULL)) 7973 continue; 7974 7975 first_mp1 = ip_copymsg(first_mp); 7976 if (first_mp1 == NULL) 7977 continue; 7978 icmp_inbound_v6(q, first_mp1, ill, 7979 hdr_len, mctl_present, 0, 7980 ilm->ilm_zoneid, dl_mp); 7981 } 7982 ILM_WALKER_RELE(ill); 7983 } else { 7984 first_mp1 = ip_copymsg(first_mp); 7985 if (first_mp1 != NULL) 7986 icmp_inbound_v6(q, first_mp1, ill, 7987 hdr_len, mctl_present, 0, zoneid, 7988 dl_mp); 7989 } 7990 } 7991 /* FALLTHRU */ 7992 default: { 7993 /* 7994 * Handle protocols with which IPv6 is less intimate. 7995 */ 7996 uint_t proto_flags = IP_FF_RAWIP|IP_FF_IP6INFO; 7997 7998 if (hada_mp != NULL) { 7999 ip0dbg(("default hada drop\n")); 8000 goto hada_drop; 8001 } 8002 8003 /* 8004 * Enable sending ICMP for "Unknown" nexthdr 8005 * case. i.e. where we did not FALLTHRU from 8006 * IPPROTO_ICMPV6 processing case above. 8007 * If we did FALLTHRU, then the packet has already been 8008 * processed for IPPF, don't process it again in 8009 * ip_fanout_proto_v6; set IP6_NO_IPPOLICY in the 8010 * flags 8011 */ 8012 if (nexthdr != IPPROTO_ICMPV6) 8013 proto_flags |= IP_FF_SEND_ICMP; 8014 else 8015 proto_flags |= IP6_NO_IPPOLICY; 8016 8017 ip_fanout_proto_v6(q, first_mp, ip6h, ill, inill, 8018 nexthdr, prev_nexthdr_offset, (flags|proto_flags), 8019 mctl_present, zoneid); 8020 return; 8021 } 8022 8023 case IPPROTO_DSTOPTS: { 8024 uint_t ehdrlen; 8025 uint8_t *optptr; 8026 ip6_dest_t *desthdr; 8027 8028 /* Check if AH is present. */ 8029 if (ipsec_early_ah_v6(q, first_mp, mctl_present, ill, 8030 ire, hada_mp, zoneid)) { 8031 ip0dbg(("dst early hada drop\n")); 8032 return; 8033 } 8034 8035 /* 8036 * Reinitialize pointers, as ipsec_early_ah_v6() does 8037 * complete pullups. We don't have to do more pullups 8038 * as a result. 8039 */ 8040 whereptr = (uint8_t *)((uintptr_t)mp->b_rptr + 8041 (uintptr_t)(whereptr - ((uint8_t *)ip6h))); 8042 ip6h = (ip6_t *)mp->b_rptr; 8043 8044 if (remlen < MIN_EHDR_LEN) 8045 goto pkt_too_short; 8046 8047 desthdr = (ip6_dest_t *)whereptr; 8048 nexthdr = desthdr->ip6d_nxt; 8049 prev_nexthdr_offset = (uint_t)(whereptr - 8050 (uint8_t *)ip6h); 8051 ehdrlen = 8 * (desthdr->ip6d_len + 1); 8052 if (remlen < ehdrlen) 8053 goto pkt_too_short; 8054 optptr = whereptr + 2; 8055 /* 8056 * Note: XXX This code does not seem to make 8057 * distinction between Destination Options Header 8058 * being before/after Routing Header which can 8059 * happen if we are at the end of source route. 8060 * This may become significant in future. 8061 * (No real significant Destination Options are 8062 * defined/implemented yet ). 8063 */ 8064 switch (ip_process_options_v6(q, first_mp, ip6h, optptr, 8065 ehdrlen - 2, IPPROTO_DSTOPTS)) { 8066 case -1: 8067 /* 8068 * Packet has been consumed and any needed 8069 * ICMP errors sent. 8070 */ 8071 BUMP_MIB(ill->ill_ip6_mib, ipv6InHdrErrors); 8072 freemsg(hada_mp); 8073 return; 8074 case 0: 8075 /* No action needed continue */ 8076 break; 8077 case 1: 8078 /* 8079 * Unnexpected return value 8080 * (Router alert is a Hop-by-Hop option) 8081 */ 8082 #ifdef DEBUG 8083 panic("ip_rput_data_v6: router " 8084 "alert hbh opt indication in dest opt"); 8085 /*NOTREACHED*/ 8086 #else 8087 freemsg(hada_mp); 8088 freemsg(first_mp); 8089 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 8090 return; 8091 #endif 8092 } 8093 used = ehdrlen; 8094 break; 8095 } 8096 case IPPROTO_FRAGMENT: { 8097 ip6_frag_t *fraghdr; 8098 size_t no_frag_hdr_len; 8099 8100 if (hada_mp != NULL) { 8101 ip0dbg(("frag hada drop\n")); 8102 goto hada_drop; 8103 } 8104 8105 ASSERT(first_mp == mp); 8106 if (remlen < sizeof (ip6_frag_t)) 8107 goto pkt_too_short; 8108 8109 if (mp->b_cont != NULL && 8110 whereptr + sizeof (ip6_frag_t) > mp->b_wptr) { 8111 if (!pullupmsg(mp, 8112 pkt_len - remlen + sizeof (ip6_frag_t))) { 8113 BUMP_MIB(ill->ill_ip6_mib, 8114 ipv6InDiscards); 8115 freemsg(mp); 8116 return; 8117 } 8118 hck_flags = 0; 8119 ip6h = (ip6_t *)mp->b_rptr; 8120 whereptr = (uint8_t *)ip6h + pkt_len - remlen; 8121 } 8122 8123 fraghdr = (ip6_frag_t *)whereptr; 8124 used = (uint_t)sizeof (ip6_frag_t); 8125 BUMP_MIB(ill->ill_ip6_mib, ipv6ReasmReqds); 8126 8127 /* 8128 * Invoke the CGTP (multirouting) filtering module to 8129 * process the incoming packet. Packets identified as 8130 * duplicates must be discarded. Filtering is active 8131 * only if the the ip_cgtp_filter ndd variable is 8132 * non-zero. 8133 */ 8134 if (ip_cgtp_filter && (ip_cgtp_filter_ops != NULL)) { 8135 int cgtp_flt_pkt = 8136 ip_cgtp_filter_ops->cfo_filter_v6( 8137 inill->ill_rq, ip6h, fraghdr); 8138 if (cgtp_flt_pkt == CGTP_IP_PKT_DUPLICATE) { 8139 freemsg(mp); 8140 return; 8141 } 8142 } 8143 8144 /* Restore the flags */ 8145 DB_CKSUMFLAGS(mp) = hck_flags; 8146 8147 mp = ip_rput_frag_v6(q, mp, ip6h, fraghdr, 8148 remlen - used, &prev_nexthdr_offset, 8149 &reass_sum, &reass_hck_flags); 8150 if (mp == NULL) { 8151 /* Reassembly is still pending */ 8152 return; 8153 } 8154 /* The first mblk are the headers before the frag hdr */ 8155 BUMP_MIB(ill->ill_ip6_mib, ipv6ReasmOKs); 8156 8157 first_mp = mp; /* mp has most likely changed! */ 8158 no_frag_hdr_len = mp->b_wptr - mp->b_rptr; 8159 ip6h = (ip6_t *)mp->b_rptr; 8160 nexthdr = ((char *)ip6h)[prev_nexthdr_offset]; 8161 whereptr = mp->b_rptr + no_frag_hdr_len; 8162 remlen = ntohs(ip6h->ip6_plen) + 8163 (uint16_t)(IPV6_HDR_LEN - no_frag_hdr_len); 8164 pkt_len = msgdsize(mp); 8165 used = 0; 8166 break; 8167 } 8168 case IPPROTO_HOPOPTS: 8169 if (hada_mp != NULL) { 8170 ip0dbg(("hop hada drop\n")); 8171 goto hada_drop; 8172 } 8173 /* 8174 * Illegal header sequence. 8175 * (Hop-by-hop headers are processed above 8176 * and required to immediately follow IPv6 header) 8177 */ 8178 icmp_param_problem_v6(WR(q), first_mp, 8179 ICMP6_PARAMPROB_NEXTHEADER, 8180 prev_nexthdr_offset, 8181 B_FALSE, B_FALSE); 8182 return; 8183 8184 case IPPROTO_ROUTING: { 8185 uint_t ehdrlen; 8186 ip6_rthdr_t *rthdr; 8187 8188 /* Check if AH is present. */ 8189 if (ipsec_early_ah_v6(q, first_mp, mctl_present, ill, 8190 ire, hada_mp, zoneid)) { 8191 ip0dbg(("routing hada drop\n")); 8192 return; 8193 } 8194 8195 /* 8196 * Reinitialize pointers, as ipsec_early_ah_v6() does 8197 * complete pullups. We don't have to do more pullups 8198 * as a result. 8199 */ 8200 whereptr = (uint8_t *)((uintptr_t)mp->b_rptr + 8201 (uintptr_t)(whereptr - ((uint8_t *)ip6h))); 8202 ip6h = (ip6_t *)mp->b_rptr; 8203 8204 if (remlen < MIN_EHDR_LEN) 8205 goto pkt_too_short; 8206 rthdr = (ip6_rthdr_t *)whereptr; 8207 nexthdr = rthdr->ip6r_nxt; 8208 prev_nexthdr_offset = (uint_t)(whereptr - 8209 (uint8_t *)ip6h); 8210 ehdrlen = 8 * (rthdr->ip6r_len + 1); 8211 if (remlen < ehdrlen) 8212 goto pkt_too_short; 8213 if (rthdr->ip6r_segleft != 0) { 8214 /* Not end of source route */ 8215 if (ll_multicast) { 8216 BUMP_MIB(ill->ill_ip6_mib, 8217 ipv6ForwProhibits); 8218 freemsg(hada_mp); 8219 freemsg(mp); 8220 return; 8221 } 8222 ip_process_rthdr(q, mp, ip6h, rthdr, ill, 8223 flags, hada_mp, dl_mp); 8224 return; 8225 } 8226 used = ehdrlen; 8227 break; 8228 } 8229 case IPPROTO_AH: 8230 case IPPROTO_ESP: { 8231 /* 8232 * Fast path for AH/ESP. If this is the first time 8233 * we are sending a datagram to AH/ESP, allocate 8234 * a IPSEC_IN message and prepend it. Otherwise, 8235 * just fanout. 8236 */ 8237 8238 ipsec_in_t *ii; 8239 int ipsec_rc; 8240 8241 if (!mctl_present) { 8242 ASSERT(first_mp == mp); 8243 if ((first_mp = ipsec_in_alloc(B_FALSE)) == 8244 NULL) { 8245 ip1dbg(("ip_rput_data_v6: IPSEC_IN " 8246 "allocation failure.\n")); 8247 BUMP_MIB(ill->ill_ip6_mib, 8248 ipv6InDiscards); 8249 freemsg(mp); 8250 return; 8251 } 8252 /* 8253 * Store the ill_index so that when we come back 8254 * from IPSEC we ride on the same queue. 8255 */ 8256 ii = (ipsec_in_t *)first_mp->b_rptr; 8257 ii->ipsec_in_ill_index = 8258 ill->ill_phyint->phyint_ifindex; 8259 ii->ipsec_in_rill_index = 8260 ii->ipsec_in_ill_index; 8261 first_mp->b_cont = mp; 8262 /* 8263 * Cache hardware acceleration info. 8264 */ 8265 if (hada_mp != NULL) { 8266 IPSECHW_DEBUG(IPSECHW_PKT, 8267 ("ip_rput_data_v6: " 8268 "caching data attr.\n")); 8269 ii->ipsec_in_accelerated = B_TRUE; 8270 ii->ipsec_in_da = hada_mp; 8271 hada_mp = NULL; 8272 } 8273 } else { 8274 ii = (ipsec_in_t *)first_mp->b_rptr; 8275 } 8276 8277 if (!ipsec_loaded()) { 8278 ip_proto_not_sup(q, first_mp, IP_FF_SEND_ICMP, 8279 ire->ire_zoneid); 8280 return; 8281 } 8282 8283 /* select inbound SA and have IPsec process the pkt */ 8284 if (nexthdr == IPPROTO_ESP) { 8285 esph_t *esph = ipsec_inbound_esp_sa(first_mp); 8286 if (esph == NULL) 8287 return; 8288 ASSERT(ii->ipsec_in_esp_sa != NULL); 8289 ASSERT(ii->ipsec_in_esp_sa->ipsa_input_func != 8290 NULL); 8291 ipsec_rc = ii->ipsec_in_esp_sa->ipsa_input_func( 8292 first_mp, esph); 8293 } else { 8294 ah_t *ah = ipsec_inbound_ah_sa(first_mp); 8295 if (ah == NULL) 8296 return; 8297 ASSERT(ii->ipsec_in_ah_sa != NULL); 8298 ASSERT(ii->ipsec_in_ah_sa->ipsa_input_func != 8299 NULL); 8300 ipsec_rc = ii->ipsec_in_ah_sa->ipsa_input_func( 8301 first_mp, ah); 8302 } 8303 8304 switch (ipsec_rc) { 8305 case IPSEC_STATUS_SUCCESS: 8306 break; 8307 case IPSEC_STATUS_FAILED: 8308 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 8309 /* FALLTHRU */ 8310 case IPSEC_STATUS_PENDING: 8311 return; 8312 } 8313 /* we're done with IPsec processing, send it up */ 8314 ip_fanout_proto_again(first_mp, ill, inill, ire); 8315 return; 8316 } 8317 case IPPROTO_NONE: 8318 /* All processing is done. Count as "delivered". */ 8319 freemsg(hada_mp); 8320 freemsg(first_mp); 8321 BUMP_MIB(ill->ill_ip6_mib, ipv6InDelivers); 8322 return; 8323 } 8324 whereptr += used; 8325 ASSERT(remlen >= used); 8326 remlen -= used; 8327 } 8328 /* NOTREACHED */ 8329 8330 pkt_too_short: 8331 ip1dbg(("ip_rput_data_v6: packet too short %d %lu %d\n", 8332 ip6_len, pkt_len, remlen)); 8333 BUMP_MIB(ill->ill_ip6_mib, ipv6InTruncatedPkts); 8334 freemsg(hada_mp); 8335 freemsg(first_mp); 8336 return; 8337 udp_fanout: 8338 if (mctl_present || IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 8339 connp = NULL; 8340 } else { 8341 connp = ipcl_classify_v6(mp, IPPROTO_UDP, hdr_len, zoneid); 8342 if ((connp != NULL) && (connp->conn_upq == NULL)) { 8343 CONN_DEC_REF(connp); 8344 connp = NULL; 8345 } 8346 } 8347 8348 if (connp == NULL) { 8349 uint32_t ports; 8350 8351 ports = *(uint32_t *)(mp->b_rptr + hdr_len + 8352 UDP_PORTS_OFFSET); 8353 IP6_STAT(ip6_udp_slow_path); 8354 ip_fanout_udp_v6(q, first_mp, ip6h, ports, ill, inill, 8355 (flags|IP_FF_SEND_ICMP|IP_FF_IP6INFO), mctl_present, 8356 zoneid); 8357 return; 8358 } 8359 8360 if (CONN_UDP_FLOWCTLD(connp)) { 8361 freemsg(first_mp); 8362 BUMP_MIB(ill->ill_ip6_mib, udpInOverflows); 8363 CONN_DEC_REF(connp); 8364 return; 8365 } 8366 8367 /* Initiate IPPF processing */ 8368 if (IP6_IN_IPP(flags)) { 8369 ip_process(IPP_LOCAL_IN, &mp, ill->ill_phyint->phyint_ifindex); 8370 if (mp == NULL) { 8371 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 8372 CONN_DEC_REF(connp); 8373 return; 8374 } 8375 } 8376 8377 if (connp->conn_ipv6_recvpktinfo || 8378 IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src)) { 8379 mp = ip_add_info_v6(mp, inill, &ip6h->ip6_dst); 8380 if (mp == NULL) { 8381 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 8382 CONN_DEC_REF(connp); 8383 return; 8384 } 8385 } 8386 8387 IP6_STAT(ip6_udp_fast_path); 8388 BUMP_MIB(ill->ill_ip6_mib, ipv6InReceives); 8389 BUMP_MIB(ill->ill_ip6_mib, ipv6InDelivers); 8390 8391 /* Send it upstream */ 8392 CONN_UDP_RECV(connp, mp); 8393 8394 CONN_DEC_REF(connp); 8395 freemsg(hada_mp); 8396 return; 8397 8398 hada_drop: 8399 ip1dbg(("ip_rput_data_v6: malformed accelerated packet\n")); 8400 /* IPsec kstats: bump counter here */ 8401 freemsg(hada_mp); 8402 freemsg(first_mp); 8403 } 8404 8405 /* 8406 * Reassemble fragment. 8407 * When it returns a completed message the first mblk will only contain 8408 * the headers prior to the fragment header. 8409 * 8410 * prev_nexthdr_offset is an offset indication of where the nexthdr field is 8411 * of the preceding header. This is needed to patch the previous header's 8412 * nexthdr field when reassembly completes. 8413 */ 8414 static mblk_t * 8415 ip_rput_frag_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, 8416 ip6_frag_t *fraghdr, uint_t remlen, uint_t *prev_nexthdr_offset, 8417 uint32_t *cksum_val, uint16_t *cksum_flags) 8418 { 8419 ill_t *ill = (ill_t *)q->q_ptr; 8420 uint32_t ident = ntohl(fraghdr->ip6f_ident); 8421 uint16_t offset; 8422 boolean_t more_frags; 8423 uint8_t nexthdr = fraghdr->ip6f_nxt; 8424 in6_addr_t *v6dst_ptr; 8425 in6_addr_t *v6src_ptr; 8426 uint_t end; 8427 uint_t hdr_length; 8428 size_t count; 8429 ipf_t *ipf; 8430 ipf_t **ipfp; 8431 ipfb_t *ipfb; 8432 mblk_t *mp1; 8433 uint8_t ecn_info = 0; 8434 size_t msg_len; 8435 mblk_t *tail_mp; 8436 mblk_t *t_mp; 8437 boolean_t pruned = B_FALSE; 8438 uint32_t sum_val; 8439 uint16_t sum_flags; 8440 8441 8442 if (cksum_val != NULL) 8443 *cksum_val = 0; 8444 if (cksum_flags != NULL) 8445 *cksum_flags = 0; 8446 8447 /* 8448 * We utilize hardware computed checksum info only for UDP since 8449 * IP fragmentation is a normal occurence for the protocol. In 8450 * addition, checksum offload support for IP fragments carrying 8451 * UDP payload is commonly implemented across network adapters. 8452 */ 8453 ASSERT(ill != NULL); 8454 if (nexthdr == IPPROTO_UDP && dohwcksum && ILL_HCKSUM_CAPABLE(ill) && 8455 (DB_CKSUMFLAGS(mp) & (HCK_FULLCKSUM | HCK_PARTIALCKSUM))) { 8456 mblk_t *mp1 = mp->b_cont; 8457 int32_t len; 8458 8459 /* Record checksum information from the packet */ 8460 sum_val = (uint32_t)DB_CKSUM16(mp); 8461 sum_flags = DB_CKSUMFLAGS(mp); 8462 8463 /* fragmented payload offset from beginning of mblk */ 8464 offset = (uint16_t)((uchar_t *)&fraghdr[1] - mp->b_rptr); 8465 8466 if ((sum_flags & HCK_PARTIALCKSUM) && 8467 (mp1 == NULL || mp1->b_cont == NULL) && 8468 offset >= (uint16_t)DB_CKSUMSTART(mp) && 8469 ((len = offset - (uint16_t)DB_CKSUMSTART(mp)) & 1) == 0) { 8470 uint32_t adj; 8471 /* 8472 * Partial checksum has been calculated by hardware 8473 * and attached to the packet; in addition, any 8474 * prepended extraneous data is even byte aligned. 8475 * If any such data exists, we adjust the checksum; 8476 * this would also handle any postpended data. 8477 */ 8478 IP_ADJCKSUM_PARTIAL(mp->b_rptr + DB_CKSUMSTART(mp), 8479 mp, mp1, len, adj); 8480 8481 /* One's complement subtract extraneous checksum */ 8482 if (adj >= sum_val) 8483 sum_val = ~(adj - sum_val) & 0xFFFF; 8484 else 8485 sum_val -= adj; 8486 } 8487 } else { 8488 sum_val = 0; 8489 sum_flags = 0; 8490 } 8491 8492 /* Clear hardware checksumming flag */ 8493 DB_CKSUMFLAGS(mp) = 0; 8494 8495 /* 8496 * Note: Fragment offset in header is in 8-octet units. 8497 * Clearing least significant 3 bits not only extracts 8498 * it but also gets it in units of octets. 8499 */ 8500 offset = ntohs(fraghdr->ip6f_offlg) & ~7; 8501 more_frags = (fraghdr->ip6f_offlg & IP6F_MORE_FRAG); 8502 8503 /* 8504 * Is the more frags flag on and the payload length not a multiple 8505 * of eight? 8506 */ 8507 if (more_frags && (ntohs(ip6h->ip6_plen) & 7)) { 8508 BUMP_MIB(ill->ill_ip6_mib, ipv6InHdrErrors); 8509 icmp_param_problem_v6(WR(q), mp, ICMP6_PARAMPROB_HEADER, 8510 (uint32_t)((char *)&ip6h->ip6_plen - 8511 (char *)ip6h), B_FALSE, B_FALSE); 8512 return (NULL); 8513 } 8514 8515 v6src_ptr = &ip6h->ip6_src; 8516 v6dst_ptr = &ip6h->ip6_dst; 8517 end = remlen; 8518 8519 hdr_length = (uint_t)((char *)&fraghdr[1] - (char *)ip6h); 8520 end += offset; 8521 8522 /* 8523 * Would fragment cause reassembled packet to have a payload length 8524 * greater than IP_MAXPACKET - the max payload size? 8525 */ 8526 if (end > IP_MAXPACKET) { 8527 BUMP_MIB(ill->ill_ip6_mib, ipv6InHdrErrors); 8528 icmp_param_problem_v6(WR(q), mp, ICMP6_PARAMPROB_HEADER, 8529 (uint32_t)((char *)&fraghdr->ip6f_offlg - 8530 (char *)ip6h), B_FALSE, B_FALSE); 8531 return (NULL); 8532 } 8533 8534 /* 8535 * This packet just has one fragment. Reassembly not 8536 * needed. 8537 */ 8538 if (!more_frags && offset == 0) { 8539 goto reass_done; 8540 } 8541 8542 /* 8543 * Drop the fragmented as early as possible, if 8544 * we don't have resource(s) to re-assemble. 8545 */ 8546 if (ip_reass_queue_bytes == 0) { 8547 freemsg(mp); 8548 return (NULL); 8549 } 8550 8551 /* Record the ECN field info. */ 8552 ecn_info = (uint8_t)(ntohl(ip6h->ip6_vcf & htonl(~0xFFCFFFFF)) >> 20); 8553 /* 8554 * If this is not the first fragment, dump the unfragmentable 8555 * portion of the packet. 8556 */ 8557 if (offset) 8558 mp->b_rptr = (uchar_t *)&fraghdr[1]; 8559 8560 /* 8561 * Fragmentation reassembly. Each ILL has a hash table for 8562 * queueing packets undergoing reassembly for all IPIFs 8563 * associated with the ILL. The hash is based on the packet 8564 * IP ident field. The ILL frag hash table was allocated 8565 * as a timer block at the time the ILL was created. Whenever 8566 * there is anything on the reassembly queue, the timer will 8567 * be running. 8568 */ 8569 msg_len = MBLKSIZE(mp); 8570 tail_mp = mp; 8571 while (tail_mp->b_cont != NULL) { 8572 tail_mp = tail_mp->b_cont; 8573 msg_len += MBLKSIZE(tail_mp); 8574 } 8575 /* 8576 * If the reassembly list for this ILL will get too big 8577 * prune it. 8578 */ 8579 8580 if ((msg_len + sizeof (*ipf) + ill->ill_frag_count) >= 8581 ip_reass_queue_bytes) { 8582 ill_frag_prune(ill, (ip_reass_queue_bytes < msg_len) ? 0 8583 : (ip_reass_queue_bytes - msg_len)); 8584 pruned = B_TRUE; 8585 } 8586 8587 ipfb = &ill->ill_frag_hash_tbl[ILL_FRAG_HASH_V6(*v6src_ptr, ident)]; 8588 mutex_enter(&ipfb->ipfb_lock); 8589 8590 ipfp = &ipfb->ipfb_ipf; 8591 /* Try to find an existing fragment queue for this packet. */ 8592 for (;;) { 8593 ipf = ipfp[0]; 8594 if (ipf) { 8595 /* 8596 * It has to match on ident, source address, and 8597 * dest address. 8598 */ 8599 if (ipf->ipf_ident == ident && 8600 IN6_ARE_ADDR_EQUAL(&ipf->ipf_v6src, v6src_ptr) && 8601 IN6_ARE_ADDR_EQUAL(&ipf->ipf_v6dst, v6dst_ptr)) { 8602 8603 /* 8604 * If we have received too many 8605 * duplicate fragments for this packet 8606 * free it. 8607 */ 8608 if (ipf->ipf_num_dups > ip_max_frag_dups) { 8609 ill_frag_free_pkts(ill, ipfb, ipf, 1); 8610 freemsg(mp); 8611 mutex_exit(&ipfb->ipfb_lock); 8612 return (NULL); 8613 } 8614 8615 break; 8616 } 8617 ipfp = &ipf->ipf_hash_next; 8618 continue; 8619 } 8620 8621 8622 /* 8623 * If we pruned the list, do we want to store this new 8624 * fragment?. We apply an optimization here based on the 8625 * fact that most fragments will be received in order. 8626 * So if the offset of this incoming fragment is zero, 8627 * it is the first fragment of a new packet. We will 8628 * keep it. Otherwise drop the fragment, as we have 8629 * probably pruned the packet already (since the 8630 * packet cannot be found). 8631 */ 8632 8633 if (pruned && offset != 0) { 8634 mutex_exit(&ipfb->ipfb_lock); 8635 freemsg(mp); 8636 return (NULL); 8637 } 8638 8639 /* New guy. Allocate a frag message. */ 8640 mp1 = allocb(sizeof (*ipf), BPRI_MED); 8641 if (!mp1) { 8642 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 8643 freemsg(mp); 8644 partial_reass_done: 8645 mutex_exit(&ipfb->ipfb_lock); 8646 return (NULL); 8647 } 8648 8649 if (ipfb->ipfb_frag_pkts >= MAX_FRAG_PKTS) { 8650 /* 8651 * Too many fragmented packets in this hash bucket. 8652 * Free the oldest. 8653 */ 8654 ill_frag_free_pkts(ill, ipfb, ipfb->ipfb_ipf, 1); 8655 } 8656 8657 mp1->b_cont = mp; 8658 8659 /* Initialize the fragment header. */ 8660 ipf = (ipf_t *)mp1->b_rptr; 8661 ipf->ipf_mp = mp1; 8662 ipf->ipf_ptphn = ipfp; 8663 ipfp[0] = ipf; 8664 ipf->ipf_hash_next = NULL; 8665 ipf->ipf_ident = ident; 8666 ipf->ipf_v6src = *v6src_ptr; 8667 ipf->ipf_v6dst = *v6dst_ptr; 8668 /* Record reassembly start time. */ 8669 ipf->ipf_timestamp = gethrestime_sec(); 8670 /* Record ipf generation and account for frag header */ 8671 ipf->ipf_gen = ill->ill_ipf_gen++; 8672 ipf->ipf_count = MBLKSIZE(mp1); 8673 ipf->ipf_protocol = nexthdr; 8674 ipf->ipf_nf_hdr_len = 0; 8675 ipf->ipf_prev_nexthdr_offset = 0; 8676 ipf->ipf_last_frag_seen = B_FALSE; 8677 ipf->ipf_ecn = ecn_info; 8678 ipf->ipf_num_dups = 0; 8679 ipfb->ipfb_frag_pkts++; 8680 ipf->ipf_checksum = 0; 8681 ipf->ipf_checksum_flags = 0; 8682 8683 /* Store checksum value in fragment header */ 8684 if (sum_flags != 0) { 8685 sum_val = (sum_val & 0xFFFF) + (sum_val >> 16); 8686 sum_val = (sum_val & 0xFFFF) + (sum_val >> 16); 8687 ipf->ipf_checksum = sum_val; 8688 ipf->ipf_checksum_flags = sum_flags; 8689 } 8690 8691 /* 8692 * We handle reassembly two ways. In the easy case, 8693 * where all the fragments show up in order, we do 8694 * minimal bookkeeping, and just clip new pieces on 8695 * the end. If we ever see a hole, then we go off 8696 * to ip_reassemble which has to mark the pieces and 8697 * keep track of the number of holes, etc. Obviously, 8698 * the point of having both mechanisms is so we can 8699 * handle the easy case as efficiently as possible. 8700 */ 8701 if (offset == 0) { 8702 /* Easy case, in-order reassembly so far. */ 8703 /* Update the byte count */ 8704 ipf->ipf_count += msg_len; 8705 ipf->ipf_tail_mp = tail_mp; 8706 /* 8707 * Keep track of next expected offset in 8708 * ipf_end. 8709 */ 8710 ipf->ipf_end = end; 8711 ipf->ipf_nf_hdr_len = hdr_length; 8712 ipf->ipf_prev_nexthdr_offset = *prev_nexthdr_offset; 8713 } else { 8714 /* Hard case, hole at the beginning. */ 8715 ipf->ipf_tail_mp = NULL; 8716 /* 8717 * ipf_end == 0 means that we have given up 8718 * on easy reassembly. 8719 */ 8720 ipf->ipf_end = 0; 8721 8722 /* Forget checksum offload from now on */ 8723 ipf->ipf_checksum_flags = 0; 8724 8725 /* 8726 * ipf_hole_cnt is set by ip_reassemble. 8727 * ipf_count is updated by ip_reassemble. 8728 * No need to check for return value here 8729 * as we don't expect reassembly to complete or 8730 * fail for the first fragment itself. 8731 */ 8732 (void) ip_reassemble(mp, ipf, offset, more_frags, ill, 8733 msg_len); 8734 } 8735 /* Update per ipfb and ill byte counts */ 8736 ipfb->ipfb_count += ipf->ipf_count; 8737 ASSERT(ipfb->ipfb_count > 0); /* Wraparound */ 8738 ill->ill_frag_count += ipf->ipf_count; 8739 ASSERT(ill->ill_frag_count > 0); /* Wraparound */ 8740 /* If the frag timer wasn't already going, start it. */ 8741 mutex_enter(&ill->ill_lock); 8742 ill_frag_timer_start(ill); 8743 mutex_exit(&ill->ill_lock); 8744 goto partial_reass_done; 8745 } 8746 8747 /* 8748 * If the packet's flag has changed (it could be coming up 8749 * from an interface different than the previous, therefore 8750 * possibly different checksum capability), then forget about 8751 * any stored checksum states. Otherwise add the value to 8752 * the existing one stored in the fragment header. 8753 */ 8754 if (sum_flags != 0 && sum_flags == ipf->ipf_checksum_flags) { 8755 sum_val += ipf->ipf_checksum; 8756 sum_val = (sum_val & 0xFFFF) + (sum_val >> 16); 8757 sum_val = (sum_val & 0xFFFF) + (sum_val >> 16); 8758 ipf->ipf_checksum = sum_val; 8759 } else if (ipf->ipf_checksum_flags != 0) { 8760 /* Forget checksum offload from now on */ 8761 ipf->ipf_checksum_flags = 0; 8762 } 8763 8764 /* 8765 * We have a new piece of a datagram which is already being 8766 * reassembled. Update the ECN info if all IP fragments 8767 * are ECN capable. If there is one which is not, clear 8768 * all the info. If there is at least one which has CE 8769 * code point, IP needs to report that up to transport. 8770 */ 8771 if (ecn_info != IPH_ECN_NECT && ipf->ipf_ecn != IPH_ECN_NECT) { 8772 if (ecn_info == IPH_ECN_CE) 8773 ipf->ipf_ecn = IPH_ECN_CE; 8774 } else { 8775 ipf->ipf_ecn = IPH_ECN_NECT; 8776 } 8777 8778 if (offset && ipf->ipf_end == offset) { 8779 /* The new fragment fits at the end */ 8780 ipf->ipf_tail_mp->b_cont = mp; 8781 /* Update the byte count */ 8782 ipf->ipf_count += msg_len; 8783 /* Update per ipfb and ill byte counts */ 8784 ipfb->ipfb_count += msg_len; 8785 ASSERT(ipfb->ipfb_count > 0); /* Wraparound */ 8786 ill->ill_frag_count += msg_len; 8787 ASSERT(ill->ill_frag_count > 0); /* Wraparound */ 8788 if (more_frags) { 8789 /* More to come. */ 8790 ipf->ipf_end = end; 8791 ipf->ipf_tail_mp = tail_mp; 8792 goto partial_reass_done; 8793 } 8794 } else { 8795 /* 8796 * Go do the hard cases. 8797 * Call ip_reassemble(). 8798 */ 8799 int ret; 8800 8801 if (offset == 0) { 8802 if (ipf->ipf_prev_nexthdr_offset == 0) { 8803 ipf->ipf_nf_hdr_len = hdr_length; 8804 ipf->ipf_prev_nexthdr_offset = 8805 *prev_nexthdr_offset; 8806 } 8807 } 8808 /* Save current byte count */ 8809 count = ipf->ipf_count; 8810 ret = ip_reassemble(mp, ipf, offset, more_frags, ill, msg_len); 8811 8812 /* Count of bytes added and subtracted (freeb()ed) */ 8813 count = ipf->ipf_count - count; 8814 if (count) { 8815 /* Update per ipfb and ill byte counts */ 8816 ipfb->ipfb_count += count; 8817 ASSERT(ipfb->ipfb_count > 0); /* Wraparound */ 8818 ill->ill_frag_count += count; 8819 ASSERT(ill->ill_frag_count > 0); /* Wraparound */ 8820 } 8821 if (ret == IP_REASS_PARTIAL) { 8822 goto partial_reass_done; 8823 } else if (ret == IP_REASS_FAILED) { 8824 /* Reassembly failed. Free up all resources */ 8825 ill_frag_free_pkts(ill, ipfb, ipf, 1); 8826 for (t_mp = mp; t_mp != NULL; t_mp = t_mp->b_cont) { 8827 IP_REASS_SET_START(t_mp, 0); 8828 IP_REASS_SET_END(t_mp, 0); 8829 } 8830 freemsg(mp); 8831 goto partial_reass_done; 8832 } 8833 8834 /* We will reach here iff 'ret' is IP_REASS_COMPLETE */ 8835 } 8836 /* 8837 * We have completed reassembly. Unhook the frag header from 8838 * the reassembly list. 8839 * 8840 * Grab the unfragmentable header length next header value out 8841 * of the first fragment 8842 */ 8843 ASSERT(ipf->ipf_nf_hdr_len != 0); 8844 hdr_length = ipf->ipf_nf_hdr_len; 8845 8846 /* 8847 * Before we free the frag header, record the ECN info 8848 * to report back to the transport. 8849 */ 8850 ecn_info = ipf->ipf_ecn; 8851 8852 /* 8853 * Store the nextheader field in the header preceding the fragment 8854 * header 8855 */ 8856 nexthdr = ipf->ipf_protocol; 8857 *prev_nexthdr_offset = ipf->ipf_prev_nexthdr_offset; 8858 ipfp = ipf->ipf_ptphn; 8859 8860 /* We need to supply these to caller */ 8861 if ((sum_flags = ipf->ipf_checksum_flags) != 0) 8862 sum_val = ipf->ipf_checksum; 8863 else 8864 sum_val = 0; 8865 8866 mp1 = ipf->ipf_mp; 8867 count = ipf->ipf_count; 8868 ipf = ipf->ipf_hash_next; 8869 if (ipf) 8870 ipf->ipf_ptphn = ipfp; 8871 ipfp[0] = ipf; 8872 ill->ill_frag_count -= count; 8873 ASSERT(ipfb->ipfb_count >= count); 8874 ipfb->ipfb_count -= count; 8875 ipfb->ipfb_frag_pkts--; 8876 mutex_exit(&ipfb->ipfb_lock); 8877 /* Ditch the frag header. */ 8878 mp = mp1->b_cont; 8879 freeb(mp1); 8880 8881 /* 8882 * Make sure the packet is good by doing some sanity 8883 * check. If bad we can silentely drop the packet. 8884 */ 8885 reass_done: 8886 if (hdr_length < sizeof (ip6_frag_t)) { 8887 BUMP_MIB(ill->ill_ip6_mib, ipv6InHdrErrors); 8888 ip1dbg(("ip_rput_frag_v6: bad packet\n")); 8889 freemsg(mp); 8890 return (NULL); 8891 } 8892 8893 /* 8894 * Remove the fragment header from the initial header by 8895 * splitting the mblk into the non-fragmentable header and 8896 * everthing after the fragment extension header. This has the 8897 * side effect of putting all the headers that need destination 8898 * processing into the b_cont block-- on return this fact is 8899 * used in order to avoid having to look at the extensions 8900 * already processed. 8901 * 8902 * Note that this code assumes that the unfragmentable portion 8903 * of the header is in the first mblk and increments 8904 * the read pointer past it. If this assumption is broken 8905 * this code fails badly. 8906 */ 8907 if (mp->b_rptr + hdr_length != mp->b_wptr) { 8908 mblk_t *nmp; 8909 8910 if (!(nmp = dupb(mp))) { 8911 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 8912 ip1dbg(("ip_rput_frag_v6: dupb failed\n")); 8913 freemsg(mp); 8914 return (NULL); 8915 } 8916 nmp->b_cont = mp->b_cont; 8917 mp->b_cont = nmp; 8918 nmp->b_rptr += hdr_length; 8919 } 8920 mp->b_wptr = mp->b_rptr + hdr_length - sizeof (ip6_frag_t); 8921 8922 ip6h = (ip6_t *)mp->b_rptr; 8923 ((char *)ip6h)[*prev_nexthdr_offset] = nexthdr; 8924 8925 /* Restore original IP length in header. */ 8926 ip6h->ip6_plen = htons((uint16_t)(msgdsize(mp) - IPV6_HDR_LEN)); 8927 /* Record the ECN info. */ 8928 ip6h->ip6_vcf &= htonl(0xFFCFFFFF); 8929 ip6h->ip6_vcf |= htonl(ecn_info << 20); 8930 8931 /* Reassembly is successful; return checksum information if needed */ 8932 if (cksum_val != NULL) 8933 *cksum_val = sum_val; 8934 if (cksum_flags != NULL) 8935 *cksum_flags = sum_flags; 8936 8937 return (mp); 8938 } 8939 8940 /* 8941 * Walk through the options to see if there is a routing header. 8942 * If present get the destination which is the last address of 8943 * the option. 8944 */ 8945 in6_addr_t 8946 ip_get_dst_v6(ip6_t *ip6h, boolean_t *is_fragment) 8947 { 8948 uint8_t nexthdr; 8949 uint8_t *whereptr; 8950 ip6_hbh_t *hbhhdr; 8951 ip6_dest_t *dsthdr; 8952 ip6_rthdr0_t *rthdr; 8953 ip6_frag_t *fraghdr; 8954 int ehdrlen; 8955 int left; 8956 in6_addr_t *ap, rv; 8957 8958 if (is_fragment != NULL) 8959 *is_fragment = B_FALSE; 8960 8961 rv = ip6h->ip6_dst; 8962 8963 nexthdr = ip6h->ip6_nxt; 8964 whereptr = (uint8_t *)&ip6h[1]; 8965 for (;;) { 8966 8967 ASSERT(nexthdr != IPPROTO_RAW); 8968 switch (nexthdr) { 8969 case IPPROTO_HOPOPTS: 8970 hbhhdr = (ip6_hbh_t *)whereptr; 8971 nexthdr = hbhhdr->ip6h_nxt; 8972 ehdrlen = 8 * (hbhhdr->ip6h_len + 1); 8973 break; 8974 case IPPROTO_DSTOPTS: 8975 dsthdr = (ip6_dest_t *)whereptr; 8976 nexthdr = dsthdr->ip6d_nxt; 8977 ehdrlen = 8 * (dsthdr->ip6d_len + 1); 8978 break; 8979 case IPPROTO_ROUTING: 8980 rthdr = (ip6_rthdr0_t *)whereptr; 8981 nexthdr = rthdr->ip6r0_nxt; 8982 ehdrlen = 8 * (rthdr->ip6r0_len + 1); 8983 8984 left = rthdr->ip6r0_segleft; 8985 ap = (in6_addr_t *)((char *)rthdr + sizeof (*rthdr)); 8986 rv = *(ap + left - 1); 8987 /* 8988 * If the caller doesn't care whether the packet 8989 * is a fragment or not, we can stop here since 8990 * we have our destination. 8991 */ 8992 if (is_fragment == NULL) 8993 goto done; 8994 break; 8995 case IPPROTO_FRAGMENT: 8996 fraghdr = (ip6_frag_t *)whereptr; 8997 nexthdr = fraghdr->ip6f_nxt; 8998 ehdrlen = sizeof (ip6_frag_t); 8999 if (is_fragment != NULL) 9000 *is_fragment = B_TRUE; 9001 goto done; 9002 default : 9003 goto done; 9004 } 9005 whereptr += ehdrlen; 9006 } 9007 9008 done: 9009 return (rv); 9010 } 9011 9012 /* 9013 * ip_source_routed_v6: 9014 * This function is called by redirect code in ip_rput_data_v6 to 9015 * know whether this packet is source routed through this node i.e 9016 * whether this node (router) is part of the journey. This 9017 * function is called under two cases : 9018 * 9019 * case 1 : Routing header was processed by this node and 9020 * ip_process_rthdr replaced ip6_dst with the next hop 9021 * and we are forwarding the packet to the next hop. 9022 * 9023 * case 2 : Routing header was not processed by this node and we 9024 * are just forwarding the packet. 9025 * 9026 * For case (1) we don't want to send redirects. For case(2) we 9027 * want to send redirects. 9028 */ 9029 static boolean_t 9030 ip_source_routed_v6(ip6_t *ip6h, mblk_t *mp) 9031 { 9032 uint8_t nexthdr; 9033 in6_addr_t *addrptr; 9034 ip6_rthdr0_t *rthdr; 9035 uint8_t numaddr; 9036 ip6_hbh_t *hbhhdr; 9037 uint_t ehdrlen; 9038 uint8_t *byteptr; 9039 9040 ip2dbg(("ip_source_routed_v6\n")); 9041 nexthdr = ip6h->ip6_nxt; 9042 ehdrlen = IPV6_HDR_LEN; 9043 9044 /* if a routing hdr is preceeded by HOPOPT or DSTOPT */ 9045 while (nexthdr == IPPROTO_HOPOPTS || 9046 nexthdr == IPPROTO_DSTOPTS) { 9047 byteptr = (uint8_t *)ip6h + ehdrlen; 9048 /* 9049 * Check if we have already processed 9050 * packets or we are just a forwarding 9051 * router which only pulled up msgs up 9052 * to IPV6HDR and one HBH ext header 9053 */ 9054 if (byteptr + MIN_EHDR_LEN > mp->b_wptr) { 9055 ip2dbg(("ip_source_routed_v6: Extension" 9056 " headers not processed\n")); 9057 return (B_FALSE); 9058 } 9059 hbhhdr = (ip6_hbh_t *)byteptr; 9060 nexthdr = hbhhdr->ip6h_nxt; 9061 ehdrlen = ehdrlen + 8 * (hbhhdr->ip6h_len + 1); 9062 } 9063 switch (nexthdr) { 9064 case IPPROTO_ROUTING: 9065 byteptr = (uint8_t *)ip6h + ehdrlen; 9066 /* 9067 * If for some reason, we haven't pulled up 9068 * the routing hdr data mblk, then we must 9069 * not have processed it at all. So for sure 9070 * we are not part of the source routed journey. 9071 */ 9072 if (byteptr + MIN_EHDR_LEN > mp->b_wptr) { 9073 ip2dbg(("ip_source_routed_v6: Routing" 9074 " header not processed\n")); 9075 return (B_FALSE); 9076 } 9077 rthdr = (ip6_rthdr0_t *)byteptr; 9078 /* 9079 * Either we are an intermediate router or the 9080 * last hop before destination and we have 9081 * already processed the routing header. 9082 * If segment_left is greater than or equal to zero, 9083 * then we must be the (numaddr - segleft) entry 9084 * of the routing header. Although ip6r0_segleft 9085 * is a unit8_t variable, we still check for zero 9086 * or greater value, if in case the data type 9087 * is changed someday in future. 9088 */ 9089 if (rthdr->ip6r0_segleft > 0 || 9090 rthdr->ip6r0_segleft == 0) { 9091 ire_t *ire = NULL; 9092 9093 numaddr = rthdr->ip6r0_len / 2; 9094 addrptr = (in6_addr_t *)((char *)rthdr + 9095 sizeof (*rthdr)); 9096 addrptr += (numaddr - (rthdr->ip6r0_segleft + 1)); 9097 if (addrptr != NULL) { 9098 ire = ire_ctable_lookup_v6(addrptr, NULL, 9099 IRE_LOCAL, NULL, ALL_ZONES, NULL, 9100 MATCH_IRE_TYPE); 9101 if (ire != NULL) { 9102 ire_refrele(ire); 9103 return (B_TRUE); 9104 } 9105 ip1dbg(("ip_source_routed_v6: No ire found\n")); 9106 } 9107 } 9108 /* FALLTHRU */ 9109 default: 9110 ip2dbg(("ip_source_routed_v6: Not source routed here\n")); 9111 return (B_FALSE); 9112 } 9113 } 9114 9115 /* 9116 * ip_wput_v6 -- Packets sent down from transport modules show up here. 9117 * Assumes that the following set of headers appear in the first 9118 * mblk: 9119 * ip6i_t (if present) CAN also appear as a separate mblk. 9120 * ip6_t 9121 * Any extension headers 9122 * TCP/UDP/SCTP header (if present) 9123 * The routine can handle an ICMPv6 header that is not in the first mblk. 9124 * 9125 * The order to determine the outgoing interface is as follows: 9126 * 1. IPV6_BOUND_PIF is set, use that ill (conn_outgoing_pill) 9127 * 2. If conn_nofailover_ill is set then use that ill. 9128 * 3. If an ip6i_t with IP6I_IFINDEX set then use that ill. 9129 * 4. If q is an ill queue and (link local or multicast destination) then 9130 * use that ill. 9131 * 5. If IPV6_BOUND_IF has been set use that ill. 9132 * 6. For multicast: if IPV6_MULTICAST_IF has been set use it. Otherwise 9133 * look for the best IRE match for the unspecified group to determine 9134 * the ill. 9135 * 7. For unicast: Just do an IRE lookup for the best match. 9136 */ 9137 void 9138 ip_output_v6(void *arg, mblk_t *mp, void *arg2, int caller) 9139 { 9140 conn_t *connp = NULL; 9141 queue_t *q = (queue_t *)arg2; 9142 ire_t *ire = NULL; 9143 ire_t *sctp_ire = NULL; 9144 ip6_t *ip6h; 9145 in6_addr_t *v6dstp; 9146 ill_t *ill = NULL; 9147 ipif_t *ipif; 9148 ip6i_t *ip6i; 9149 int cksum_request; /* -1 => normal. */ 9150 /* 1 => Skip TCP/UDP/SCTP checksum */ 9151 /* Otherwise contains insert offset for checksum */ 9152 int unspec_src; 9153 boolean_t do_outrequests; /* Increment OutRequests? */ 9154 mib2_ipv6IfStatsEntry_t *mibptr; 9155 int match_flags = MATCH_IRE_ILL_GROUP; 9156 boolean_t attach_if = B_FALSE; 9157 mblk_t *first_mp; 9158 boolean_t mctl_present; 9159 ipsec_out_t *io; 9160 boolean_t drop_if_delayed = B_FALSE; 9161 boolean_t multirt_need_resolve = B_FALSE; 9162 mblk_t *copy_mp = NULL; 9163 int err; 9164 int ip6i_flags = 0; 9165 zoneid_t zoneid; 9166 ill_t *saved_ill = NULL; 9167 boolean_t conn_lock_held; 9168 boolean_t need_decref = B_FALSE; 9169 9170 /* 9171 * Highest bit in version field is Reachability Confirmation bit 9172 * used by NUD in ip_xmit_v6(). 9173 */ 9174 #ifdef _BIG_ENDIAN 9175 #define IPVER(ip6h) ((((uint32_t *)ip6h)[0] >> 28) & 0x7) 9176 #else 9177 #define IPVER(ip6h) ((((uint32_t *)ip6h)[0] >> 4) & 0x7) 9178 #endif 9179 9180 /* 9181 * M_CTL comes from 5 places 9182 * 9183 * 1) TCP sends down IPSEC_OUT(M_CTL) for detached connections 9184 * both V4 and V6 datagrams. 9185 * 9186 * 2) AH/ESP sends down M_CTL after doing their job with both 9187 * V4 and V6 datagrams. 9188 * 9189 * 3) NDP callbacks when nce is resolved and IPSEC_OUT has been 9190 * attached. 9191 * 9192 * 4) Notifications from an external resolver (for XRESOLV ifs) 9193 * 9194 * 5) AH/ESP send down IPSEC_CTL(M_CTL) to be relayed to hardware for 9195 * IPsec hardware acceleration support. 9196 * 9197 * We need to handle (1)'s IPv6 case and (3) here. For the 9198 * IPv4 case in (1), and (2), IPSEC processing has already 9199 * started. The code in ip_wput() already knows how to handle 9200 * continuing IPSEC processing (for IPv4 and IPv6). All other 9201 * M_CTLs (including case (4)) are passed on to ip_wput_nondata() 9202 * for handling. 9203 */ 9204 first_mp = mp; 9205 mctl_present = B_FALSE; 9206 io = NULL; 9207 9208 /* Multidata transmit? */ 9209 if (DB_TYPE(mp) == M_MULTIDATA) { 9210 /* 9211 * We should never get here, since all Multidata messages 9212 * originating from tcp should have been directed over to 9213 * tcp_multisend() in the first place. 9214 */ 9215 BUMP_MIB(&ip6_mib, ipv6OutDiscards); 9216 freemsg(mp); 9217 return; 9218 } else if (DB_TYPE(mp) == M_CTL) { 9219 uint32_t mctltype = 0; 9220 uint32_t mlen = MBLKL(first_mp); 9221 9222 mp = mp->b_cont; 9223 mctl_present = B_TRUE; 9224 io = (ipsec_out_t *)first_mp->b_rptr; 9225 9226 /* 9227 * Validate this M_CTL message. The only three types of 9228 * M_CTL messages we expect to see in this code path are 9229 * ipsec_out_t or ipsec_in_t structures (allocated as 9230 * ipsec_info_t unions), or ipsec_ctl_t structures. 9231 * The ipsec_out_type and ipsec_in_type overlap in the two 9232 * data structures, and they are either set to IPSEC_OUT 9233 * or IPSEC_IN depending on which data structure it is. 9234 * ipsec_ctl_t is an IPSEC_CTL. 9235 * 9236 * All other M_CTL messages are sent to ip_wput_nondata() 9237 * for handling. 9238 */ 9239 if (mlen >= sizeof (io->ipsec_out_type)) 9240 mctltype = io->ipsec_out_type; 9241 9242 if ((mlen == sizeof (ipsec_ctl_t)) && 9243 (mctltype == IPSEC_CTL)) { 9244 ip_output(Q_TO_CONN(q), first_mp, q, caller); 9245 return; 9246 } 9247 9248 if ((mlen < sizeof (ipsec_info_t)) || 9249 (mctltype != IPSEC_OUT && mctltype != IPSEC_IN) || 9250 mp == NULL) { 9251 ip_wput_nondata(NULL, q, first_mp, NULL); 9252 return; 9253 } 9254 /* NDP callbacks have q_next non-NULL. That's case #3. */ 9255 if (q->q_next == NULL) { 9256 ip6h = (ip6_t *)mp->b_rptr; 9257 /* 9258 * For a freshly-generated TCP dgram that needs IPV6 9259 * processing, don't call ip_wput immediately. We can 9260 * tell this by the ipsec_out_proc_begin. In-progress 9261 * IPSEC_OUT messages have proc_begin set to TRUE, 9262 * and we want to send all IPSEC_IN messages to 9263 * ip_wput() for IPsec processing or finishing. 9264 */ 9265 if (mctltype == IPSEC_IN || 9266 IPVER(ip6h) != IPV6_VERSION || 9267 io->ipsec_out_proc_begin) { 9268 mibptr = &ip6_mib; 9269 goto notv6; 9270 } 9271 } 9272 } else if (DB_TYPE(mp) != M_DATA) { 9273 ip_wput_nondata(NULL, q, mp, NULL); 9274 return; 9275 } 9276 9277 ip6h = (ip6_t *)mp->b_rptr; 9278 9279 if (IPVER(ip6h) != IPV6_VERSION) { 9280 mibptr = &ip6_mib; 9281 goto notv6; 9282 } 9283 9284 if (q->q_next != NULL) { 9285 ill = (ill_t *)q->q_ptr; 9286 /* 9287 * We don't know if this ill will be used for IPv6 9288 * until the ILLF_IPV6 flag is set via SIOCSLIFNAME. 9289 * ipif_set_values() sets the ill_isv6 flag to true if 9290 * ILLF_IPV6 is set. If the ill_isv6 flag isn't true, 9291 * just drop the packet. 9292 */ 9293 if (!ill->ill_isv6) { 9294 ip1dbg(("ip_wput_v6: Received an IPv6 packet before " 9295 "ILLF_IPV6 was set\n")); 9296 freemsg(first_mp); 9297 return; 9298 } 9299 /* For uniformity do a refhold */ 9300 mutex_enter(&ill->ill_lock); 9301 if (!ILL_CAN_LOOKUP(ill)) { 9302 mutex_exit(&ill->ill_lock); 9303 freemsg(first_mp); 9304 return; 9305 } 9306 ill_refhold_locked(ill); 9307 mutex_exit(&ill->ill_lock); 9308 mibptr = ill->ill_ip6_mib; 9309 /* 9310 * ill_ip6_mib is allocated by ipif_set_values() when 9311 * ill_isv6 is set. Thus if ill_isv6 is true, 9312 * ill_ip6_mib had better not be NULL. 9313 */ 9314 ASSERT(mibptr != NULL); 9315 unspec_src = 0; 9316 BUMP_MIB(mibptr, ipv6OutRequests); 9317 do_outrequests = B_FALSE; 9318 } else { 9319 connp = (conn_t *)arg; 9320 ASSERT(connp != NULL); 9321 9322 /* is queue flow controlled? */ 9323 if ((q->q_first || connp->conn_draining) && 9324 (caller == IP_WPUT)) { 9325 /* 9326 * 1) TCP sends down M_CTL for detached connections. 9327 * 2) AH/ESP sends down M_CTL. 9328 * 9329 * We don't flow control either of the above. Only 9330 * UDP and others are flow controlled for which we 9331 * can't have a M_CTL. 9332 */ 9333 ASSERT(first_mp == mp); 9334 (void) putq(q, mp); 9335 return; 9336 } 9337 mibptr = &ip6_mib; 9338 unspec_src = connp->conn_unspec_src; 9339 do_outrequests = B_TRUE; 9340 if (mp->b_flag & MSGHASREF) { 9341 mp->b_flag &= ~MSGHASREF; 9342 ASSERT(connp->conn_ulp == IPPROTO_SCTP); 9343 SCTP_EXTRACT_IPINFO(mp, sctp_ire); 9344 need_decref = B_TRUE; 9345 } 9346 9347 /* 9348 * If there is a policy, try to attach an ipsec_out in 9349 * the front. At the end, first_mp either points to a 9350 * M_DATA message or IPSEC_OUT message linked to a 9351 * M_DATA message. We have to do it now as we might 9352 * lose the "conn" if we go through ip_newroute. 9353 */ 9354 if (!mctl_present && 9355 (connp->conn_out_enforce_policy || 9356 connp->conn_latch != NULL)) { 9357 ASSERT(first_mp == mp); 9358 /* XXX Any better way to get the protocol fast ? */ 9359 if (((mp = ipsec_attach_ipsec_out(mp, connp, NULL, 9360 connp->conn_ulp)) == NULL)) { 9361 if (need_decref) 9362 CONN_DEC_REF(connp); 9363 return; 9364 } else { 9365 ASSERT(mp->b_datap->db_type == M_CTL); 9366 first_mp = mp; 9367 mp = mp->b_cont; 9368 mctl_present = B_TRUE; 9369 io = (ipsec_out_t *)first_mp->b_rptr; 9370 } 9371 } 9372 } 9373 9374 /* check for alignment and full IPv6 header */ 9375 if (!OK_32PTR((uchar_t *)ip6h) || 9376 (mp->b_wptr - (uchar_t *)ip6h) < IPV6_HDR_LEN) { 9377 ip0dbg(("ip_wput_v6: bad alignment or length\n")); 9378 if (do_outrequests) 9379 BUMP_MIB(mibptr, ipv6OutRequests); 9380 BUMP_MIB(mibptr, ipv6OutDiscards); 9381 freemsg(first_mp); 9382 if (ill != NULL) 9383 ill_refrele(ill); 9384 if (need_decref) 9385 CONN_DEC_REF(connp); 9386 return; 9387 } 9388 v6dstp = &ip6h->ip6_dst; 9389 cksum_request = -1; 9390 ip6i = NULL; 9391 9392 /* 9393 * Once neighbor discovery has completed, ndp_process() will provide 9394 * locally generated packets for which processing can be reattempted. 9395 * In these cases, connp is NULL and the original zone is part of a 9396 * prepended ipsec_out_t. 9397 */ 9398 if (io != NULL) { 9399 zoneid = io->ipsec_out_zoneid; 9400 ASSERT(zoneid != ALL_ZONES); 9401 } else { 9402 zoneid = (connp != NULL ? connp->conn_zoneid : ALL_ZONES); 9403 } 9404 9405 if (ip6h->ip6_nxt == IPPROTO_RAW) { 9406 /* 9407 * This is an ip6i_t header followed by an ip6_hdr. 9408 * Check which fields are set. 9409 * 9410 * When the packet comes from a transport we should have 9411 * all needed headers in the first mblk. However, when 9412 * going through ip_newroute*_v6 the ip6i might be in 9413 * a separate mblk when we return here. In that case 9414 * we pullup everything to ensure that extension and transport 9415 * headers "stay" in the first mblk. 9416 */ 9417 ip6i = (ip6i_t *)ip6h; 9418 ip6i_flags = ip6i->ip6i_flags; 9419 9420 ASSERT((mp->b_wptr - (uchar_t *)ip6i) == sizeof (ip6i_t) || 9421 ((mp->b_wptr - (uchar_t *)ip6i) >= 9422 sizeof (ip6i_t) + IPV6_HDR_LEN)); 9423 9424 if ((mp->b_wptr - (uchar_t *)ip6i) == sizeof (ip6i_t)) { 9425 if (!pullupmsg(mp, -1)) { 9426 ip1dbg(("ip_wput_v6: pullupmsg failed\n")); 9427 if (do_outrequests) 9428 BUMP_MIB(mibptr, ipv6OutRequests); 9429 BUMP_MIB(mibptr, ipv6OutDiscards); 9430 freemsg(first_mp); 9431 if (ill != NULL) 9432 ill_refrele(ill); 9433 if (need_decref) 9434 CONN_DEC_REF(connp); 9435 return; 9436 } 9437 ip6h = (ip6_t *)mp->b_rptr; 9438 v6dstp = &ip6h->ip6_dst; 9439 ip6i = (ip6i_t *)ip6h; 9440 } 9441 ip6h = (ip6_t *)&ip6i[1]; 9442 9443 /* 9444 * Advance rptr past the ip6i_t to get ready for 9445 * transmitting the packet. However, if the packet gets 9446 * passed to ip_newroute*_v6 then rptr is moved back so 9447 * that the ip6i_t header can be inspected when the 9448 * packet comes back here after passing through 9449 * ire_add_then_send. 9450 */ 9451 mp->b_rptr = (uchar_t *)ip6h; 9452 9453 /* 9454 * IP6I_ATTACH_IF is set in this function when we had a 9455 * conn and it was either bound to the IPFF_NOFAILOVER address 9456 * or IPV6_BOUND_PIF was set. These options override other 9457 * options that set the ifindex. We come here with 9458 * IP6I_ATTACH_IF set when we can't find the ire and 9459 * ip_newroute_v6 is feeding the packet for second time. 9460 */ 9461 if ((ip6i->ip6i_flags & IP6I_IFINDEX) || 9462 (ip6i->ip6i_flags & IP6I_ATTACH_IF)) { 9463 ASSERT(ip6i->ip6i_ifindex != 0); 9464 if (ill != NULL) 9465 ill_refrele(ill); 9466 ill = ill_lookup_on_ifindex(ip6i->ip6i_ifindex, 1, 9467 NULL, NULL, NULL, NULL); 9468 if (ill == NULL) { 9469 if (do_outrequests) 9470 BUMP_MIB(mibptr, ipv6OutRequests); 9471 BUMP_MIB(mibptr, ipv6OutDiscards); 9472 ip1dbg(("ip_wput_v6: bad ifindex %d\n", 9473 ip6i->ip6i_ifindex)); 9474 if (need_decref) 9475 CONN_DEC_REF(connp); 9476 freemsg(first_mp); 9477 return; 9478 } 9479 mibptr = ill->ill_ip6_mib; 9480 if (ip6i->ip6i_flags & IP6I_IFINDEX) { 9481 /* 9482 * Preserve the index so that when we return 9483 * from IPSEC processing, we know where to 9484 * send the packet. 9485 */ 9486 if (mctl_present) { 9487 ASSERT(io != NULL); 9488 io->ipsec_out_ill_index = 9489 ip6i->ip6i_ifindex; 9490 } 9491 } 9492 if (ip6i->ip6i_flags & IP6I_ATTACH_IF) { 9493 /* 9494 * This is a multipathing probe packet that has 9495 * been delayed in ND resolution. Drop the 9496 * packet for the reasons mentioned in 9497 * nce_queue_mp() 9498 */ 9499 if ((ip6i->ip6i_flags & IP6I_DROP_IFDELAYED) && 9500 (ip6i->ip6i_flags & IP6I_ND_DELAYED)) { 9501 freemsg(first_mp); 9502 ill_refrele(ill); 9503 if (need_decref) 9504 CONN_DEC_REF(connp); 9505 return; 9506 } 9507 } 9508 } 9509 if (ip6i->ip6i_flags & IP6I_VERIFY_SRC) { 9510 cred_t *cr = DB_CREDDEF(mp, GET_QUEUE_CRED(q)); 9511 9512 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)); 9513 if (secpolicy_net_rawaccess(cr) != 0) { 9514 ire = ire_route_lookup_v6(&ip6h->ip6_src, 9515 0, 0, (IRE_LOCAL|IRE_LOOPBACK), NULL, 9516 NULL, zoneid, NULL, 9517 MATCH_IRE_TYPE | MATCH_IRE_ZONEONLY); 9518 if (ire == NULL) { 9519 if (do_outrequests) 9520 BUMP_MIB(mibptr, 9521 ipv6OutRequests); 9522 BUMP_MIB(mibptr, ipv6OutDiscards); 9523 ip1dbg(("ip_wput_v6: bad source " 9524 "addr\n")); 9525 freemsg(first_mp); 9526 if (ill != NULL) 9527 ill_refrele(ill); 9528 if (need_decref) 9529 CONN_DEC_REF(connp); 9530 return; 9531 } 9532 ire_refrele(ire); 9533 } 9534 /* No need to verify again when using ip_newroute */ 9535 ip6i->ip6i_flags &= ~IP6I_VERIFY_SRC; 9536 } 9537 if (!(ip6i->ip6i_flags & IP6I_NEXTHOP)) { 9538 /* 9539 * Make sure they match since ip_newroute*_v6 etc might 9540 * (unknown to them) inspect ip6i_nexthop when 9541 * they think they access ip6_dst. 9542 */ 9543 ip6i->ip6i_nexthop = ip6h->ip6_dst; 9544 } 9545 if (ip6i->ip6i_flags & IP6I_NO_ULP_CKSUM) 9546 cksum_request = 1; 9547 if (ip6i->ip6i_flags & IP6I_RAW_CHECKSUM) 9548 cksum_request = ip6i->ip6i_checksum_off; 9549 if (ip6i->ip6i_flags & IP6I_UNSPEC_SRC) 9550 unspec_src = 1; 9551 9552 if (do_outrequests && ill != NULL) { 9553 BUMP_MIB(mibptr, ipv6OutRequests); 9554 do_outrequests = B_FALSE; 9555 } 9556 /* 9557 * Store ip6i_t info that we need after we come back 9558 * from IPSEC processing. 9559 */ 9560 if (mctl_present) { 9561 ASSERT(io != NULL); 9562 io->ipsec_out_unspec_src = unspec_src; 9563 } 9564 } 9565 if (connp != NULL && connp->conn_dontroute) 9566 ip6h->ip6_hops = 1; 9567 9568 if (IN6_IS_ADDR_MULTICAST(v6dstp)) 9569 goto ipv6multicast; 9570 9571 /* 1. IPV6_BOUND_PIF takes precedence over all the ifindex settings. */ 9572 if (connp != NULL && connp->conn_outgoing_pill != NULL) { 9573 ill_t *conn_outgoing_pill; 9574 9575 conn_outgoing_pill = conn_get_held_ill(connp, 9576 &connp->conn_outgoing_pill, &err); 9577 if (err == ILL_LOOKUP_FAILED) { 9578 if (ill != NULL) 9579 ill_refrele(ill); 9580 if (need_decref) 9581 CONN_DEC_REF(connp); 9582 freemsg(first_mp); 9583 return; 9584 } 9585 if (conn_outgoing_pill != NULL) { 9586 if (ill != NULL) 9587 ill_refrele(ill); 9588 ill = conn_outgoing_pill; 9589 attach_if = B_TRUE; 9590 match_flags = MATCH_IRE_ILL; 9591 mibptr = ill->ill_ip6_mib; 9592 9593 /* 9594 * Check if we need an ire that will not be 9595 * looked up by anybody else i.e. HIDDEN. 9596 */ 9597 if (ill_is_probeonly(ill)) 9598 match_flags |= MATCH_IRE_MARK_HIDDEN; 9599 goto send_from_ill; 9600 } 9601 } 9602 9603 /* 2. If ipc_nofailover_ill is set then use that ill. */ 9604 if (connp != NULL && connp->conn_nofailover_ill != NULL) { 9605 ill_t *conn_nofailover_ill; 9606 9607 conn_nofailover_ill = conn_get_held_ill(connp, 9608 &connp->conn_nofailover_ill, &err); 9609 if (err == ILL_LOOKUP_FAILED) { 9610 if (ill != NULL) 9611 ill_refrele(ill); 9612 if (need_decref) 9613 CONN_DEC_REF(connp); 9614 freemsg(first_mp); 9615 return; 9616 } 9617 if (conn_nofailover_ill != NULL) { 9618 if (ill != NULL) 9619 ill_refrele(ill); 9620 ill = conn_nofailover_ill; 9621 attach_if = B_TRUE; 9622 /* 9623 * Assumes that ipc_nofailover_ill is used only for 9624 * multipathing probe packets. These packets are better 9625 * dropped, if they are delayed in ND resolution, for 9626 * the reasons described in nce_queue_mp(). 9627 * IP6I_DROP_IFDELAYED will be set later on in this 9628 * function for this packet. 9629 */ 9630 drop_if_delayed = B_TRUE; 9631 match_flags = MATCH_IRE_ILL; 9632 mibptr = ill->ill_ip6_mib; 9633 9634 /* 9635 * Check if we need an ire that will not be 9636 * looked up by anybody else i.e. HIDDEN. 9637 */ 9638 if (ill_is_probeonly(ill)) 9639 match_flags |= MATCH_IRE_MARK_HIDDEN; 9640 goto send_from_ill; 9641 } 9642 } 9643 9644 /* 9645 * Redo 1. If we did not find an IRE_CACHE the first time, we should 9646 * have an ip6i_t with IP6I_ATTACH_IF if IPV6_BOUND_PIF or 9647 * bind to the IPIF_NOFAILOVER address was used on this endpoint. 9648 */ 9649 if (ip6i != NULL && (ip6i->ip6i_flags & IP6I_ATTACH_IF)) { 9650 ASSERT(ip6i->ip6i_ifindex != 0); 9651 attach_if = B_TRUE; 9652 ASSERT(ill != NULL); 9653 match_flags = MATCH_IRE_ILL; 9654 9655 /* 9656 * Check if we need an ire that will not be 9657 * looked up by anybody else i.e. HIDDEN. 9658 */ 9659 if (ill_is_probeonly(ill)) 9660 match_flags |= MATCH_IRE_MARK_HIDDEN; 9661 goto send_from_ill; 9662 } 9663 9664 /* 3. If an ip6i_t with IP6I_IFINDEX set then use that ill. */ 9665 if (ip6i != NULL && (ip6i->ip6i_flags & IP6I_IFINDEX)) { 9666 ASSERT(ill != NULL); 9667 goto send_from_ill; 9668 } 9669 9670 /* 9671 * 4. If q is an ill queue and (link local or multicast destination) 9672 * then use that ill. 9673 */ 9674 if (ill != NULL && IN6_IS_ADDR_LINKLOCAL(v6dstp)) { 9675 goto send_from_ill; 9676 } 9677 9678 /* 5. If IPV6_BOUND_IF has been set use that ill. */ 9679 if (connp != NULL && connp->conn_outgoing_ill != NULL) { 9680 ill_t *conn_outgoing_ill; 9681 9682 conn_outgoing_ill = conn_get_held_ill(connp, 9683 &connp->conn_outgoing_ill, &err); 9684 if (err == ILL_LOOKUP_FAILED) { 9685 if (ill != NULL) 9686 ill_refrele(ill); 9687 if (need_decref) 9688 CONN_DEC_REF(connp); 9689 freemsg(first_mp); 9690 return; 9691 } 9692 if (ill != NULL) 9693 ill_refrele(ill); 9694 ill = conn_outgoing_ill; 9695 mibptr = ill->ill_ip6_mib; 9696 goto send_from_ill; 9697 } 9698 9699 /* 9700 * 6. For unicast: Just do an IRE lookup for the best match. 9701 * If we get here for a link-local address it is rather random 9702 * what interface we pick on a multihomed host. 9703 * *If* there is an IRE_CACHE (and the link-local address 9704 * isn't duplicated on multi links) this will find the IRE_CACHE. 9705 * Otherwise it will use one of the matching IRE_INTERFACE routes 9706 * for the link-local prefix. Hence, applications 9707 * *should* be encouraged to specify an outgoing interface when sending 9708 * to a link local address. 9709 */ 9710 if (connp == NULL || (IP_FLOW_CONTROLLED_ULP(connp->conn_ulp) && 9711 !connp->conn_fully_bound)) { 9712 /* 9713 * We cache IRE_CACHEs to avoid lookups. We don't do 9714 * this for the tcp global queue and listen end point 9715 * as it does not really have a real destination to 9716 * talk to. 9717 */ 9718 ire = ire_cache_lookup_v6(v6dstp, zoneid, MBLK_GETLABEL(mp)); 9719 } else { 9720 /* 9721 * IRE_MARK_CONDEMNED is marked in ire_delete. We don't 9722 * grab a lock here to check for CONDEMNED as it is okay 9723 * to send a packet or two with the IRE_CACHE that is going 9724 * away. 9725 */ 9726 mutex_enter(&connp->conn_lock); 9727 ire = sctp_ire != NULL ? sctp_ire : connp->conn_ire_cache; 9728 if (ire != NULL && 9729 IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, v6dstp) && 9730 !(ire->ire_marks & IRE_MARK_CONDEMNED)) { 9731 9732 IRE_REFHOLD(ire); 9733 mutex_exit(&connp->conn_lock); 9734 9735 } else { 9736 boolean_t cached = B_FALSE; 9737 9738 connp->conn_ire_cache = NULL; 9739 mutex_exit(&connp->conn_lock); 9740 /* Release the old ire */ 9741 if (ire != NULL && sctp_ire == NULL) 9742 IRE_REFRELE_NOTR(ire); 9743 9744 ire = ire_cache_lookup_v6(v6dstp, zoneid, 9745 MBLK_GETLABEL(mp)); 9746 if (ire != NULL) { 9747 IRE_REFHOLD_NOTR(ire); 9748 9749 mutex_enter(&connp->conn_lock); 9750 if (!(connp->conn_state_flags & CONN_CLOSING) && 9751 (connp->conn_ire_cache == NULL)) { 9752 rw_enter(&ire->ire_bucket->irb_lock, 9753 RW_READER); 9754 if (!(ire->ire_marks & 9755 IRE_MARK_CONDEMNED)) { 9756 connp->conn_ire_cache = ire; 9757 cached = B_TRUE; 9758 } 9759 rw_exit(&ire->ire_bucket->irb_lock); 9760 } 9761 mutex_exit(&connp->conn_lock); 9762 9763 /* 9764 * We can continue to use the ire but since it 9765 * was not cached, we should drop the extra 9766 * reference. 9767 */ 9768 if (!cached) 9769 IRE_REFRELE_NOTR(ire); 9770 } 9771 } 9772 } 9773 9774 if (ire != NULL) { 9775 if (do_outrequests) { 9776 /* Handle IRE_LOCAL's that might appear here */ 9777 if (ire->ire_type == IRE_CACHE) { 9778 mibptr = ((ill_t *)ire->ire_stq->q_ptr)-> 9779 ill_ip6_mib; 9780 } else { 9781 mibptr = ire->ire_ipif->ipif_ill->ill_ip6_mib; 9782 } 9783 BUMP_MIB(mibptr, ipv6OutRequests); 9784 } 9785 ASSERT(!attach_if); 9786 9787 /* 9788 * Check if the ire has the RTF_MULTIRT flag, inherited 9789 * from an IRE_OFFSUBNET ire entry in ip_newroute(). 9790 */ 9791 if (ire->ire_flags & RTF_MULTIRT) { 9792 /* 9793 * Force hop limit of multirouted packets if required. 9794 * The hop limit of such packets is bounded by the 9795 * ip_multirt_ttl ndd variable. 9796 * NDP packets must have a hop limit of 255; don't 9797 * change the hop limit in that case. 9798 */ 9799 if ((ip_multirt_ttl > 0) && 9800 (ip6h->ip6_hops > ip_multirt_ttl) && 9801 (ip6h->ip6_hops != IPV6_MAX_HOPS)) { 9802 if (ip_debug > 3) { 9803 ip2dbg(("ip_wput_v6: forcing multirt " 9804 "hop limit to %d (was %d) ", 9805 ip_multirt_ttl, ip6h->ip6_hops)); 9806 pr_addr_dbg("v6dst %s\n", AF_INET6, 9807 &ire->ire_addr_v6); 9808 } 9809 ip6h->ip6_hops = ip_multirt_ttl; 9810 } 9811 9812 /* 9813 * We look at this point if there are pending 9814 * unresolved routes. ire_multirt_need_resolve_v6() 9815 * checks in O(n) that all IRE_OFFSUBNET ire 9816 * entries for the packet's destination and 9817 * flagged RTF_MULTIRT are currently resolved. 9818 * If some remain unresolved, we do a copy 9819 * of the current message. It will be used 9820 * to initiate additional route resolutions. 9821 */ 9822 multirt_need_resolve = 9823 ire_multirt_need_resolve_v6(&ire->ire_addr_v6, 9824 MBLK_GETLABEL(first_mp)); 9825 ip2dbg(("ip_wput_v6: ire %p, " 9826 "multirt_need_resolve %d, first_mp %p\n", 9827 (void *)ire, multirt_need_resolve, 9828 (void *)first_mp)); 9829 if (multirt_need_resolve) { 9830 copy_mp = copymsg(first_mp); 9831 if (copy_mp != NULL) { 9832 MULTIRT_DEBUG_TAG(copy_mp); 9833 } 9834 } 9835 } 9836 ip_wput_ire_v6(q, first_mp, ire, unspec_src, cksum_request, 9837 connp, caller, 0, ip6i_flags); 9838 if (need_decref) { 9839 CONN_DEC_REF(connp); 9840 connp = NULL; 9841 } 9842 IRE_REFRELE(ire); 9843 9844 /* 9845 * Try to resolve another multiroute if 9846 * ire_multirt_need_resolve_v6() deemed it necessary. 9847 * copy_mp will be consumed (sent or freed) by 9848 * ip_newroute_v6(). 9849 */ 9850 if (copy_mp != NULL) { 9851 if (mctl_present) { 9852 ip6h = (ip6_t *)copy_mp->b_cont->b_rptr; 9853 } else { 9854 ip6h = (ip6_t *)copy_mp->b_rptr; 9855 } 9856 ip_newroute_v6(q, copy_mp, &ip6h->ip6_dst, 9857 &ip6h->ip6_src, NULL, zoneid); 9858 } 9859 if (ill != NULL) 9860 ill_refrele(ill); 9861 return; 9862 } 9863 9864 /* 9865 * No full IRE for this destination. Send it to 9866 * ip_newroute_v6 to see if anything else matches. 9867 * Mark this packet as having originated on this 9868 * machine. 9869 * Update rptr if there was an ip6i_t header. 9870 */ 9871 mp->b_prev = NULL; 9872 mp->b_next = NULL; 9873 if (ip6i != NULL) 9874 mp->b_rptr -= sizeof (ip6i_t); 9875 9876 if (unspec_src) { 9877 if (ip6i == NULL) { 9878 /* 9879 * Add ip6i_t header to carry unspec_src 9880 * until the packet comes back in ip_wput_v6. 9881 */ 9882 mp = ip_add_info_v6(mp, NULL, v6dstp); 9883 if (mp == NULL) { 9884 if (do_outrequests) 9885 BUMP_MIB(mibptr, ipv6OutRequests); 9886 BUMP_MIB(mibptr, ipv6OutDiscards); 9887 if (mctl_present) 9888 freeb(first_mp); 9889 if (ill != NULL) 9890 ill_refrele(ill); 9891 if (need_decref) 9892 CONN_DEC_REF(connp); 9893 return; 9894 } 9895 ip6i = (ip6i_t *)mp->b_rptr; 9896 9897 if (mctl_present) { 9898 ASSERT(first_mp != mp); 9899 first_mp->b_cont = mp; 9900 } else { 9901 first_mp = mp; 9902 } 9903 9904 if ((mp->b_wptr - (uchar_t *)ip6i) == 9905 sizeof (ip6i_t)) { 9906 /* 9907 * ndp_resolver called from ip_newroute_v6 9908 * expects pulled up message. 9909 */ 9910 if (!pullupmsg(mp, -1)) { 9911 ip1dbg(("ip_wput_v6: pullupmsg" 9912 " failed\n")); 9913 if (do_outrequests) { 9914 BUMP_MIB(mibptr, 9915 ipv6OutRequests); 9916 } 9917 BUMP_MIB(mibptr, ipv6OutDiscards); 9918 freemsg(first_mp); 9919 if (ill != NULL) 9920 ill_refrele(ill); 9921 if (need_decref) 9922 CONN_DEC_REF(connp); 9923 return; 9924 } 9925 ip6i = (ip6i_t *)mp->b_rptr; 9926 } 9927 ip6h = (ip6_t *)&ip6i[1]; 9928 v6dstp = &ip6h->ip6_dst; 9929 } 9930 ip6i->ip6i_flags |= IP6I_UNSPEC_SRC; 9931 if (mctl_present) { 9932 ASSERT(io != NULL); 9933 io->ipsec_out_unspec_src = unspec_src; 9934 } 9935 } 9936 if (do_outrequests) 9937 BUMP_MIB(mibptr, ipv6OutRequests); 9938 if (need_decref) 9939 CONN_DEC_REF(connp); 9940 ip_newroute_v6(q, first_mp, v6dstp, &ip6h->ip6_src, NULL, zoneid); 9941 if (ill != NULL) 9942 ill_refrele(ill); 9943 return; 9944 9945 9946 /* 9947 * Handle multicast packets with or without an conn. 9948 * Assumes that the transports set ip6_hops taking 9949 * IPV6_MULTICAST_HOPS (and the other ways to set the hoplimit) 9950 * into account. 9951 */ 9952 ipv6multicast: 9953 ip2dbg(("ip_wput_v6: multicast\n")); 9954 9955 /* 9956 * 1. IPV6_BOUND_PIF takes precedence over all the ifindex settings 9957 * 2. If conn_nofailover_ill is set then use that ill. 9958 * 9959 * Hold the conn_lock till we refhold the ill of interest that is 9960 * pointed to from the conn. Since we cannot do an ill/ipif_refrele 9961 * while holding any locks, postpone the refrele until after the 9962 * conn_lock is dropped. 9963 */ 9964 if (connp != NULL) { 9965 mutex_enter(&connp->conn_lock); 9966 conn_lock_held = B_TRUE; 9967 } else { 9968 conn_lock_held = B_FALSE; 9969 } 9970 if (connp != NULL && connp->conn_outgoing_pill != NULL) { 9971 err = ill_check_and_refhold(connp->conn_outgoing_pill); 9972 if (err == ILL_LOOKUP_FAILED) { 9973 ip1dbg(("ip_output_v6: multicast" 9974 " conn_outgoing_pill no ipif\n")); 9975 multicast_discard: 9976 ASSERT(saved_ill == NULL); 9977 if (conn_lock_held) 9978 mutex_exit(&connp->conn_lock); 9979 if (ill != NULL) 9980 ill_refrele(ill); 9981 freemsg(first_mp); 9982 if (do_outrequests) 9983 BUMP_MIB(mibptr, ipv6OutDiscards); 9984 if (need_decref) 9985 CONN_DEC_REF(connp); 9986 return; 9987 } 9988 saved_ill = ill; 9989 ill = connp->conn_outgoing_pill; 9990 attach_if = B_TRUE; 9991 match_flags = MATCH_IRE_ILL; 9992 mibptr = ill->ill_ip6_mib; 9993 9994 /* 9995 * Check if we need an ire that will not be 9996 * looked up by anybody else i.e. HIDDEN. 9997 */ 9998 if (ill_is_probeonly(ill)) 9999 match_flags |= MATCH_IRE_MARK_HIDDEN; 10000 } else if (connp != NULL && connp->conn_nofailover_ill != NULL) { 10001 err = ill_check_and_refhold(connp->conn_nofailover_ill); 10002 if (err == ILL_LOOKUP_FAILED) { 10003 ip1dbg(("ip_output_v6: multicast" 10004 " conn_nofailover_ill no ipif\n")); 10005 goto multicast_discard; 10006 } 10007 saved_ill = ill; 10008 ill = connp->conn_nofailover_ill; 10009 attach_if = B_TRUE; 10010 match_flags = MATCH_IRE_ILL; 10011 10012 /* 10013 * Check if we need an ire that will not be 10014 * looked up by anybody else i.e. HIDDEN. 10015 */ 10016 if (ill_is_probeonly(ill)) 10017 match_flags |= MATCH_IRE_MARK_HIDDEN; 10018 } else if (ip6i != NULL && (ip6i->ip6i_flags & IP6I_ATTACH_IF)) { 10019 /* 10020 * Redo 1. If we did not find an IRE_CACHE the first time, 10021 * we should have an ip6i_t with IP6I_ATTACH_IF if 10022 * IPV6_BOUND_PIF or bind to the IPIF_NOFAILOVER address was 10023 * used on this endpoint. 10024 */ 10025 ASSERT(ip6i->ip6i_ifindex != 0); 10026 attach_if = B_TRUE; 10027 ASSERT(ill != NULL); 10028 match_flags = MATCH_IRE_ILL; 10029 10030 /* 10031 * Check if we need an ire that will not be 10032 * looked up by anybody else i.e. HIDDEN. 10033 */ 10034 if (ill_is_probeonly(ill)) 10035 match_flags |= MATCH_IRE_MARK_HIDDEN; 10036 } else if (ip6i != NULL && (ip6i->ip6i_flags & IP6I_IFINDEX)) { 10037 /* 3. If an ip6i_t with IP6I_IFINDEX set then use that ill. */ 10038 10039 ASSERT(ill != NULL); 10040 } else if (ill != NULL) { 10041 /* 10042 * 4. If q is an ill queue and (link local or multicast 10043 * destination) then use that ill. 10044 * We don't need the ipif initialization here. 10045 * This useless assert below is just to prevent lint from 10046 * reporting a null body if statement. 10047 */ 10048 ASSERT(ill != NULL); 10049 } else if (connp != NULL) { 10050 /* 10051 * 5. If IPV6_BOUND_IF has been set use that ill. 10052 * 10053 * 6. For multicast: if IPV6_MULTICAST_IF has been set use it. 10054 * Otherwise look for the best IRE match for the unspecified 10055 * group to determine the ill. 10056 * 10057 * conn_multicast_ill is used for only IPv6 packets. 10058 * conn_multicast_ipif is used for only IPv4 packets. 10059 * Thus a PF_INET6 socket send both IPv4 and IPv6 10060 * multicast packets using different IP*_MULTICAST_IF 10061 * interfaces. 10062 */ 10063 if (connp->conn_outgoing_ill != NULL) { 10064 err = ill_check_and_refhold(connp->conn_outgoing_ill); 10065 if (err == ILL_LOOKUP_FAILED) { 10066 ip1dbg(("ip_output_v6: multicast" 10067 " conn_outgoing_ill no ipif\n")); 10068 goto multicast_discard; 10069 } 10070 ill = connp->conn_outgoing_ill; 10071 } else if (connp->conn_multicast_ill != NULL) { 10072 err = ill_check_and_refhold(connp->conn_multicast_ill); 10073 if (err == ILL_LOOKUP_FAILED) { 10074 ip1dbg(("ip_output_v6: multicast" 10075 " conn_multicast_ill no ipif\n")); 10076 goto multicast_discard; 10077 } 10078 ill = connp->conn_multicast_ill; 10079 } else { 10080 mutex_exit(&connp->conn_lock); 10081 conn_lock_held = B_FALSE; 10082 ipif = ipif_lookup_group_v6(v6dstp, zoneid); 10083 if (ipif == NULL) { 10084 ip1dbg(("ip_output_v6: multicast no ipif\n")); 10085 goto multicast_discard; 10086 } 10087 /* 10088 * We have a ref to this ipif, so we can safely 10089 * access ipif_ill. 10090 */ 10091 ill = ipif->ipif_ill; 10092 mutex_enter(&ill->ill_lock); 10093 if (!ILL_CAN_LOOKUP(ill)) { 10094 mutex_exit(&ill->ill_lock); 10095 ipif_refrele(ipif); 10096 ill = NULL; 10097 ip1dbg(("ip_output_v6: multicast no ipif\n")); 10098 goto multicast_discard; 10099 } 10100 ill_refhold_locked(ill); 10101 mutex_exit(&ill->ill_lock); 10102 ipif_refrele(ipif); 10103 /* 10104 * Save binding until IPV6_MULTICAST_IF 10105 * changes it 10106 */ 10107 mutex_enter(&connp->conn_lock); 10108 connp->conn_multicast_ill = ill; 10109 connp->conn_orig_multicast_ifindex = 10110 ill->ill_phyint->phyint_ifindex; 10111 mutex_exit(&connp->conn_lock); 10112 } 10113 } 10114 if (conn_lock_held) 10115 mutex_exit(&connp->conn_lock); 10116 10117 if (saved_ill != NULL) 10118 ill_refrele(saved_ill); 10119 10120 ASSERT(ill != NULL); 10121 /* 10122 * For multicast loopback interfaces replace the multicast address 10123 * with a unicast address for the ire lookup. 10124 */ 10125 if (ill->ill_phyint->phyint_flags & PHYI_LOOPBACK) 10126 v6dstp = &ill->ill_ipif->ipif_v6lcl_addr; 10127 10128 mibptr = ill->ill_ip6_mib; 10129 if (do_outrequests) { 10130 BUMP_MIB(mibptr, ipv6OutRequests); 10131 do_outrequests = B_FALSE; 10132 } 10133 BUMP_MIB(mibptr, ipv6OutMcastPkts); 10134 10135 /* 10136 * As we may lose the conn by the time we reach ip_wput_ire_v6 10137 * we copy conn_multicast_loop and conn_dontroute on to an 10138 * ipsec_out. In case if this datagram goes out secure, 10139 * we need the ill_index also. Copy that also into the 10140 * ipsec_out. 10141 */ 10142 if (mctl_present) { 10143 io = (ipsec_out_t *)first_mp->b_rptr; 10144 ASSERT(first_mp->b_datap->db_type == M_CTL); 10145 ASSERT(io->ipsec_out_type == IPSEC_OUT); 10146 } else { 10147 ASSERT(mp == first_mp); 10148 if ((first_mp = ipsec_alloc_ipsec_out()) == NULL) { 10149 BUMP_MIB(mibptr, ipv6OutDiscards); 10150 freemsg(mp); 10151 if (ill != NULL) 10152 ill_refrele(ill); 10153 if (need_decref) 10154 CONN_DEC_REF(connp); 10155 return; 10156 } 10157 io = (ipsec_out_t *)first_mp->b_rptr; 10158 /* This is not a secure packet */ 10159 io->ipsec_out_secure = B_FALSE; 10160 io->ipsec_out_use_global_policy = B_TRUE; 10161 io->ipsec_out_zoneid = 10162 (zoneid != ALL_ZONES ? zoneid : GLOBAL_ZONEID); 10163 first_mp->b_cont = mp; 10164 mctl_present = B_TRUE; 10165 } 10166 io->ipsec_out_ill_index = ill->ill_phyint->phyint_ifindex; 10167 io->ipsec_out_unspec_src = unspec_src; 10168 if (connp != NULL) 10169 io->ipsec_out_dontroute = connp->conn_dontroute; 10170 10171 send_from_ill: 10172 ASSERT(ill != NULL); 10173 ASSERT(mibptr == ill->ill_ip6_mib); 10174 if (do_outrequests) { 10175 BUMP_MIB(mibptr, ipv6OutRequests); 10176 do_outrequests = B_FALSE; 10177 } 10178 10179 if (io != NULL) 10180 io->ipsec_out_ill_index = ill->ill_phyint->phyint_ifindex; 10181 10182 /* 10183 * When a specific ill is specified (using IPV6_PKTINFO, 10184 * IPV6_MULTICAST_IF, or IPV6_BOUND_IF) we will only match 10185 * on routing entries (ftable and ctable) that have a matching 10186 * ire->ire_ipif->ipif_ill. Thus this can only be used 10187 * for destinations that are on-link for the specific ill 10188 * and that can appear on multiple links. Thus it is useful 10189 * for multicast destinations, link-local destinations, and 10190 * at some point perhaps for site-local destinations (if the 10191 * node sits at a site boundary). 10192 * We create the cache entries in the regular ctable since 10193 * it can not "confuse" things for other destinations. 10194 * table. 10195 * 10196 * NOTE : conn_ire_cache is not used for caching ire_ctable_lookups. 10197 * It is used only when ire_cache_lookup is used above. 10198 */ 10199 ire = ire_ctable_lookup_v6(v6dstp, 0, 0, ill->ill_ipif, 10200 zoneid, MBLK_GETLABEL(mp), match_flags); 10201 if (ire != NULL) { 10202 /* 10203 * Check if the ire has the RTF_MULTIRT flag, inherited 10204 * from an IRE_OFFSUBNET ire entry in ip_newroute(). 10205 */ 10206 if (ire->ire_flags & RTF_MULTIRT) { 10207 /* 10208 * Force hop limit of multirouted packets if required. 10209 * The hop limit of such packets is bounded by the 10210 * ip_multirt_ttl ndd variable. 10211 * NDP packets must have a hop limit of 255; don't 10212 * change the hop limit in that case. 10213 */ 10214 if ((ip_multirt_ttl > 0) && 10215 (ip6h->ip6_hops > ip_multirt_ttl) && 10216 (ip6h->ip6_hops != IPV6_MAX_HOPS)) { 10217 if (ip_debug > 3) { 10218 ip2dbg(("ip_wput_v6: forcing multirt " 10219 "hop limit to %d (was %d) ", 10220 ip_multirt_ttl, ip6h->ip6_hops)); 10221 pr_addr_dbg("v6dst %s\n", AF_INET6, 10222 &ire->ire_addr_v6); 10223 } 10224 ip6h->ip6_hops = ip_multirt_ttl; 10225 } 10226 10227 /* 10228 * We look at this point if there are pending 10229 * unresolved routes. ire_multirt_need_resolve_v6() 10230 * checks in O(n) that all IRE_OFFSUBNET ire 10231 * entries for the packet's destination and 10232 * flagged RTF_MULTIRT are currently resolved. 10233 * If some remain unresolved, we make a copy 10234 * of the current message. It will be used 10235 * to initiate additional route resolutions. 10236 */ 10237 multirt_need_resolve = 10238 ire_multirt_need_resolve_v6(&ire->ire_addr_v6, 10239 MBLK_GETLABEL(first_mp)); 10240 ip2dbg(("ip_wput_v6[send_from_ill]: ire %p, " 10241 "multirt_need_resolve %d, first_mp %p\n", 10242 (void *)ire, multirt_need_resolve, 10243 (void *)first_mp)); 10244 if (multirt_need_resolve) { 10245 copy_mp = copymsg(first_mp); 10246 if (copy_mp != NULL) { 10247 MULTIRT_DEBUG_TAG(copy_mp); 10248 } 10249 } 10250 } 10251 10252 ip1dbg(("ip_wput_v6: send on %s, ire = %p, ill index = %d\n", 10253 ill->ill_name, (void *)ire, 10254 ill->ill_phyint->phyint_ifindex)); 10255 ip_wput_ire_v6(q, first_mp, ire, unspec_src, cksum_request, 10256 connp, caller, 10257 (attach_if ? ill->ill_phyint->phyint_ifindex : 0), 10258 ip6i_flags); 10259 ire_refrele(ire); 10260 if (need_decref) { 10261 CONN_DEC_REF(connp); 10262 connp = NULL; 10263 } 10264 10265 /* 10266 * Try to resolve another multiroute if 10267 * ire_multirt_need_resolve_v6() deemed it necessary. 10268 * copy_mp will be consumed (sent or freed) by 10269 * ip_newroute_[ipif_]v6(). 10270 */ 10271 if (copy_mp != NULL) { 10272 if (mctl_present) { 10273 ip6h = (ip6_t *)copy_mp->b_cont->b_rptr; 10274 } else { 10275 ip6h = (ip6_t *)copy_mp->b_rptr; 10276 } 10277 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 10278 ipif = ipif_lookup_group_v6(&ip6h->ip6_dst, 10279 zoneid); 10280 if (ipif == NULL) { 10281 ip1dbg(("ip_wput_v6: No ipif for " 10282 "multicast\n")); 10283 MULTIRT_DEBUG_UNTAG(copy_mp); 10284 freemsg(copy_mp); 10285 return; 10286 } 10287 ip_newroute_ipif_v6(q, copy_mp, ipif, 10288 ip6h->ip6_dst, unspec_src, zoneid); 10289 ipif_refrele(ipif); 10290 } else { 10291 ip_newroute_v6(q, copy_mp, &ip6h->ip6_dst, 10292 &ip6h->ip6_src, ill, zoneid); 10293 } 10294 } 10295 ill_refrele(ill); 10296 return; 10297 } 10298 if (need_decref) { 10299 CONN_DEC_REF(connp); 10300 connp = NULL; 10301 } 10302 10303 /* Update rptr if there was an ip6i_t header. */ 10304 if (ip6i != NULL) 10305 mp->b_rptr -= sizeof (ip6i_t); 10306 if (unspec_src || attach_if) { 10307 if (ip6i == NULL) { 10308 /* 10309 * Add ip6i_t header to carry unspec_src 10310 * or attach_if until the packet comes back in 10311 * ip_wput_v6. 10312 */ 10313 if (mctl_present) { 10314 first_mp->b_cont = 10315 ip_add_info_v6(mp, NULL, v6dstp); 10316 mp = first_mp->b_cont; 10317 if (mp == NULL) 10318 freeb(first_mp); 10319 } else { 10320 first_mp = mp = ip_add_info_v6(mp, NULL, 10321 v6dstp); 10322 } 10323 if (mp == NULL) { 10324 BUMP_MIB(mibptr, ipv6OutDiscards); 10325 ill_refrele(ill); 10326 return; 10327 } 10328 ip6i = (ip6i_t *)mp->b_rptr; 10329 if ((mp->b_wptr - (uchar_t *)ip6i) == 10330 sizeof (ip6i_t)) { 10331 /* 10332 * ndp_resolver called from ip_newroute_v6 10333 * expects a pulled up message. 10334 */ 10335 if (!pullupmsg(mp, -1)) { 10336 ip1dbg(("ip_wput_v6: pullupmsg" 10337 " failed\n")); 10338 BUMP_MIB(mibptr, ipv6OutDiscards); 10339 freemsg(first_mp); 10340 return; 10341 } 10342 ip6i = (ip6i_t *)mp->b_rptr; 10343 } 10344 ip6h = (ip6_t *)&ip6i[1]; 10345 v6dstp = &ip6h->ip6_dst; 10346 } 10347 if (unspec_src) 10348 ip6i->ip6i_flags |= IP6I_UNSPEC_SRC; 10349 if (attach_if) { 10350 /* 10351 * Bind to nofailover/BOUND_PIF overrides ifindex. 10352 */ 10353 ip6i->ip6i_flags |= IP6I_ATTACH_IF; 10354 ip6i->ip6i_flags &= ~IP6I_IFINDEX; 10355 ip6i->ip6i_ifindex = ill->ill_phyint->phyint_ifindex; 10356 if (drop_if_delayed) { 10357 /* This is a multipathing probe packet */ 10358 ip6i->ip6i_flags |= IP6I_DROP_IFDELAYED; 10359 } 10360 } 10361 if (mctl_present) { 10362 ASSERT(io != NULL); 10363 io->ipsec_out_unspec_src = unspec_src; 10364 } 10365 } 10366 if (IN6_IS_ADDR_MULTICAST(v6dstp)) { 10367 ip_newroute_ipif_v6(q, first_mp, ill->ill_ipif, *v6dstp, 10368 unspec_src, zoneid); 10369 } else { 10370 ip_newroute_v6(q, first_mp, v6dstp, &ip6h->ip6_src, ill, 10371 zoneid); 10372 } 10373 ill_refrele(ill); 10374 return; 10375 10376 notv6: 10377 /* 10378 * XXX implement a IPv4 and IPv6 packet counter per conn and 10379 * switch when ratio exceeds e.g. 10:1 10380 */ 10381 if (q->q_next == NULL) { 10382 connp = Q_TO_CONN(q); 10383 10384 if (IPCL_IS_TCP(connp)) { 10385 /* change conn_send for the tcp_v4_connections */ 10386 connp->conn_send = ip_output; 10387 } else if (connp->conn_ulp == IPPROTO_SCTP) { 10388 /* The 'q' is the default SCTP queue */ 10389 connp = (conn_t *)arg; 10390 } else { 10391 ip_setqinfo(RD(q), IPV4_MINOR, B_TRUE); 10392 } 10393 } 10394 BUMP_MIB(mibptr, ipv6OutIPv4); 10395 (void) ip_output(connp, first_mp, q, caller); 10396 if (ill != NULL) 10397 ill_refrele(ill); 10398 } 10399 10400 static void 10401 ip_wput_v6(queue_t *q, mblk_t *mp) 10402 { 10403 ip_output_v6(Q_TO_CONN(q), mp, q, IP_WPUT); 10404 } 10405 10406 static void 10407 ipsec_out_attach_if(ipsec_out_t *io, int attach_index) 10408 { 10409 ASSERT(io->ipsec_out_type == IPSEC_OUT); 10410 io->ipsec_out_attach_if = B_TRUE; 10411 io->ipsec_out_ill_index = attach_index; 10412 } 10413 10414 /* 10415 * NULL send-to queue - packet is to be delivered locally. 10416 */ 10417 void 10418 ip_wput_local_v6(queue_t *q, ill_t *ill, ip6_t *ip6h, mblk_t *first_mp, 10419 ire_t *ire, int fanout_flags) 10420 { 10421 uint32_t ports; 10422 mblk_t *mp = first_mp, *first_mp1; 10423 boolean_t mctl_present; 10424 uint8_t nexthdr; 10425 uint16_t hdr_length; 10426 ipsec_out_t *io; 10427 mib2_ipv6IfStatsEntry_t *mibptr; 10428 ilm_t *ilm; 10429 uint_t nexthdr_offset; 10430 10431 if (DB_TYPE(mp) == M_CTL) { 10432 io = (ipsec_out_t *)mp->b_rptr; 10433 if (!io->ipsec_out_secure) { 10434 mp = mp->b_cont; 10435 freeb(first_mp); 10436 first_mp = mp; 10437 mctl_present = B_FALSE; 10438 } else { 10439 mctl_present = B_TRUE; 10440 mp = first_mp->b_cont; 10441 ipsec_out_to_in(first_mp); 10442 } 10443 } else { 10444 mctl_present = B_FALSE; 10445 } 10446 10447 nexthdr = ip6h->ip6_nxt; 10448 mibptr = ill->ill_ip6_mib; 10449 10450 /* Fastpath */ 10451 switch (nexthdr) { 10452 case IPPROTO_TCP: 10453 case IPPROTO_UDP: 10454 case IPPROTO_ICMPV6: 10455 case IPPROTO_SCTP: 10456 hdr_length = IPV6_HDR_LEN; 10457 nexthdr_offset = (uint_t)((uchar_t *)&ip6h->ip6_nxt - 10458 (uchar_t *)ip6h); 10459 break; 10460 default: { 10461 uint8_t *nexthdrp; 10462 10463 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, 10464 &hdr_length, &nexthdrp)) { 10465 /* Malformed packet */ 10466 BUMP_MIB(mibptr, ipv6OutDiscards); 10467 freemsg(first_mp); 10468 return; 10469 } 10470 nexthdr = *nexthdrp; 10471 nexthdr_offset = nexthdrp - (uint8_t *)ip6h; 10472 break; 10473 } 10474 } 10475 10476 10477 UPDATE_OB_PKT_COUNT(ire); 10478 ire->ire_last_used_time = lbolt; 10479 10480 /* 10481 * Remove reacability confirmation bit from version field 10482 * before looping back the packet. 10483 */ 10484 if (ip6h->ip6_vcf & IP_FORWARD_PROG) { 10485 ip6h->ip6_vcf &= ~IP_FORWARD_PROG; 10486 } 10487 10488 switch (nexthdr) { 10489 case IPPROTO_TCP: 10490 if (DB_TYPE(mp) == M_DATA) { 10491 /* 10492 * M_DATA mblk, so init mblk (chain) for 10493 * no struio(). 10494 */ 10495 mblk_t *mp1 = mp; 10496 10497 do { 10498 mp1->b_datap->db_struioflag = 0; 10499 } while ((mp1 = mp1->b_cont) != NULL); 10500 } 10501 ports = *(uint32_t *)(mp->b_rptr + hdr_length + 10502 TCP_PORTS_OFFSET); 10503 ip_fanout_tcp_v6(q, first_mp, ip6h, ill, ill, 10504 fanout_flags|IP_FF_SEND_ICMP|IP_FF_SYN_ADDIRE| 10505 IP_FF_IP6INFO|IP6_NO_IPPOLICY|IP_FF_LOOPBACK, 10506 hdr_length, mctl_present, ire->ire_zoneid); 10507 return; 10508 10509 case IPPROTO_UDP: 10510 ports = *(uint32_t *)(mp->b_rptr + hdr_length + 10511 UDP_PORTS_OFFSET); 10512 ip_fanout_udp_v6(q, first_mp, ip6h, ports, ill, ill, 10513 fanout_flags|IP_FF_SEND_ICMP|IP_FF_IP6INFO| 10514 IP6_NO_IPPOLICY, mctl_present, ire->ire_zoneid); 10515 return; 10516 10517 case IPPROTO_SCTP: 10518 { 10519 uint_t ipif_seqid = ire->ire_ipif->ipif_seqid; 10520 10521 ports = *(uint32_t *)(mp->b_rptr + hdr_length); 10522 ip_fanout_sctp(mp, ill, (ipha_t *)ip6h, ports, 10523 fanout_flags|IP_FF_SEND_ICMP|IP_FF_IP6INFO, 10524 mctl_present, IP6_NO_IPPOLICY, ipif_seqid, 10525 ire->ire_zoneid); 10526 return; 10527 } 10528 case IPPROTO_ICMPV6: { 10529 icmp6_t *icmp6; 10530 10531 /* check for full IPv6+ICMPv6 header */ 10532 if ((mp->b_wptr - mp->b_rptr) < 10533 (hdr_length + ICMP6_MINLEN)) { 10534 if (!pullupmsg(mp, hdr_length + ICMP6_MINLEN)) { 10535 ip1dbg(("ip_wput_v6: ICMP hdr pullupmsg" 10536 " failed\n")); 10537 BUMP_MIB(mibptr, ipv6OutDiscards); 10538 freemsg(first_mp); 10539 return; 10540 } 10541 ip6h = (ip6_t *)mp->b_rptr; 10542 } 10543 icmp6 = (icmp6_t *)((uchar_t *)ip6h + hdr_length); 10544 10545 /* Update output mib stats */ 10546 icmp_update_out_mib_v6(ill, icmp6); 10547 10548 /* Check variable for testing applications */ 10549 if (ipv6_drop_inbound_icmpv6) { 10550 freemsg(first_mp); 10551 return; 10552 } 10553 /* 10554 * Assume that there is always at least one conn for 10555 * ICMPv6 (in.ndpd) i.e. don't optimize the case 10556 * where there is no conn. 10557 */ 10558 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst) && 10559 !(ill->ill_phyint->phyint_flags & PHYI_LOOPBACK)) { 10560 /* 10561 * In the multicast case, applications may have 10562 * joined the group from different zones, so we 10563 * need to deliver the packet to each of them. 10564 * Loop through the multicast memberships 10565 * structures (ilm) on the receive ill and send 10566 * a copy of the packet up each matching one. 10567 * However, we don't do this for multicasts sent 10568 * on the loopback interface (PHYI_LOOPBACK flag 10569 * set) as they must stay in the sender's zone. 10570 */ 10571 ILM_WALKER_HOLD(ill); 10572 for (ilm = ill->ill_ilm; ilm != NULL; 10573 ilm = ilm->ilm_next) { 10574 if (ilm->ilm_flags & ILM_DELETED) 10575 continue; 10576 if (!IN6_ARE_ADDR_EQUAL( 10577 &ilm->ilm_v6addr, &ip6h->ip6_dst)) 10578 continue; 10579 if ((fanout_flags & 10580 IP_FF_NO_MCAST_LOOP) && 10581 ilm->ilm_zoneid == ire->ire_zoneid) 10582 continue; 10583 if (!ipif_lookup_zoneid(ill, 10584 ilm->ilm_zoneid, IPIF_UP, NULL)) 10585 continue; 10586 10587 first_mp1 = ip_copymsg(first_mp); 10588 if (first_mp1 == NULL) 10589 continue; 10590 icmp_inbound_v6(q, first_mp1, ill, 10591 hdr_length, mctl_present, 10592 IP6_NO_IPPOLICY, ilm->ilm_zoneid, 10593 NULL); 10594 } 10595 ILM_WALKER_RELE(ill); 10596 } else { 10597 first_mp1 = ip_copymsg(first_mp); 10598 if (first_mp1 != NULL) 10599 icmp_inbound_v6(q, first_mp1, ill, 10600 hdr_length, mctl_present, 10601 IP6_NO_IPPOLICY, ire->ire_zoneid, 10602 NULL); 10603 } 10604 } 10605 /* FALLTHRU */ 10606 default: { 10607 /* 10608 * Handle protocols with which IPv6 is less intimate. 10609 */ 10610 fanout_flags |= IP_FF_RAWIP|IP_FF_IP6INFO; 10611 10612 /* 10613 * Enable sending ICMP for "Unknown" nexthdr 10614 * case. i.e. where we did not FALLTHRU from 10615 * IPPROTO_ICMPV6 processing case above. 10616 */ 10617 if (nexthdr != IPPROTO_ICMPV6) 10618 fanout_flags |= IP_FF_SEND_ICMP; 10619 /* 10620 * Note: There can be more than one stream bound 10621 * to a particular protocol. When this is the case, 10622 * each one gets a copy of any incoming packets. 10623 */ 10624 ip_fanout_proto_v6(q, first_mp, ip6h, ill, ill, nexthdr, 10625 nexthdr_offset, fanout_flags|IP6_NO_IPPOLICY, 10626 mctl_present, ire->ire_zoneid); 10627 return; 10628 } 10629 } 10630 } 10631 10632 /* 10633 * Send packet using IRE. 10634 * Checksumming is controlled by cksum_request: 10635 * -1 => normal i.e. TCP/UDP/SCTP/ICMPv6 are checksummed and nothing else. 10636 * 1 => Skip TCP/UDP/SCTP checksum 10637 * Otherwise => checksum_request contains insert offset for checksum 10638 * 10639 * Assumes that the following set of headers appear in the first 10640 * mblk: 10641 * ip6_t 10642 * Any extension headers 10643 * TCP/UDP/SCTP header (if present) 10644 * The routine can handle an ICMPv6 header that is not in the first mblk. 10645 * 10646 * NOTE : This function does not ire_refrele the ire passed in as the 10647 * argument unlike ip_wput_ire where the REFRELE is done. 10648 * Refer to ip_wput_ire for more on this. 10649 */ 10650 static void 10651 ip_wput_ire_v6(queue_t *q, mblk_t *mp, ire_t *ire, int unspec_src, 10652 int cksum_request, conn_t *connp, int caller, int attach_index, int flags) 10653 { 10654 ip6_t *ip6h; 10655 uint8_t nexthdr; 10656 uint16_t hdr_length; 10657 uint_t reachable = 0x0; 10658 ill_t *ill; 10659 mib2_ipv6IfStatsEntry_t *mibptr; 10660 mblk_t *first_mp; 10661 boolean_t mctl_present; 10662 ipsec_out_t *io; 10663 boolean_t conn_dontroute; /* conn value for multicast */ 10664 boolean_t conn_multicast_loop; /* conn value for multicast */ 10665 boolean_t multicast_forward; /* Should we forward ? */ 10666 int max_frag; 10667 zoneid_t zoneid; 10668 10669 zoneid = (connp != NULL ? connp->conn_zoneid : ALL_ZONES); 10670 ill = ire_to_ill(ire); 10671 first_mp = mp; 10672 multicast_forward = B_FALSE; 10673 10674 if (mp->b_datap->db_type != M_CTL) { 10675 ip6h = (ip6_t *)first_mp->b_rptr; 10676 } else { 10677 io = (ipsec_out_t *)first_mp->b_rptr; 10678 ASSERT(io->ipsec_out_type == IPSEC_OUT); 10679 /* 10680 * Grab the zone id now because the M_CTL can be discarded by 10681 * ip_wput_ire_parse_ipsec_out() below. 10682 */ 10683 zoneid = io->ipsec_out_zoneid; 10684 ASSERT(zoneid != ALL_ZONES); 10685 ip6h = (ip6_t *)first_mp->b_cont->b_rptr; 10686 /* 10687 * For the multicast case, ipsec_out carries conn_dontroute and 10688 * conn_multicast_loop as conn may not be available here. We 10689 * need this for multicast loopback and forwarding which is done 10690 * later in the code. 10691 */ 10692 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 10693 conn_dontroute = io->ipsec_out_dontroute; 10694 conn_multicast_loop = io->ipsec_out_multicast_loop; 10695 /* 10696 * If conn_dontroute is not set or conn_multicast_loop 10697 * is set, we need to do forwarding/loopback. For 10698 * datagrams from ip_wput_multicast, conn_dontroute is 10699 * set to B_TRUE and conn_multicast_loop is set to 10700 * B_FALSE so that we neither do forwarding nor 10701 * loopback. 10702 */ 10703 if (!conn_dontroute || conn_multicast_loop) 10704 multicast_forward = B_TRUE; 10705 } 10706 } 10707 10708 /* 10709 * If the sender didn't supply the hop limit and there is a default 10710 * unicast hop limit associated with the output interface, we use 10711 * that if the packet is unicast. Interface specific unicast hop 10712 * limits as set via the SIOCSLIFLNKINFO ioctl. 10713 */ 10714 if (ill->ill_max_hops != 0 && !(flags & IP6I_HOPLIMIT) && 10715 !(IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst))) { 10716 ip6h->ip6_hops = ill->ill_max_hops; 10717 } 10718 10719 if (ire->ire_type == IRE_LOCAL && ire->ire_zoneid != zoneid && 10720 ire->ire_zoneid != ALL_ZONES) { 10721 /* 10722 * When a zone sends a packet to another zone, we try to deliver 10723 * the packet under the same conditions as if the destination 10724 * was a real node on the network. To do so, we look for a 10725 * matching route in the forwarding table. 10726 * RTF_REJECT and RTF_BLACKHOLE are handled just like 10727 * ip_newroute_v6() does. 10728 */ 10729 ire_t *src_ire = ire_ftable_lookup_v6(&ip6h->ip6_dst, 0, 0, 0, 10730 NULL, NULL, zoneid, 0, NULL, (MATCH_IRE_RECURSIVE | 10731 MATCH_IRE_DEFAULT | MATCH_IRE_RJ_BHOLE)); 10732 if (src_ire != NULL && 10733 !(src_ire->ire_flags & (RTF_REJECT | RTF_BLACKHOLE))) { 10734 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src) && 10735 !unspec_src) { 10736 ip6h->ip6_src = src_ire->ire_src_addr_v6; 10737 } 10738 ire_refrele(src_ire); 10739 } else { 10740 BUMP_MIB(ill->ill_ip6_mib, ipv6OutNoRoutes); 10741 if (src_ire != NULL) { 10742 if (src_ire->ire_flags & RTF_BLACKHOLE) { 10743 ire_refrele(src_ire); 10744 freemsg(first_mp); 10745 return; 10746 } 10747 ire_refrele(src_ire); 10748 } 10749 if (ip_hdr_complete_v6(ip6h, zoneid)) { 10750 /* Failed */ 10751 freemsg(first_mp); 10752 return; 10753 } 10754 icmp_unreachable_v6(q, first_mp, 10755 ICMP6_DST_UNREACH_NOROUTE, B_FALSE, B_FALSE); 10756 return; 10757 } 10758 } 10759 10760 if (mp->b_datap->db_type == M_CTL || ipsec_outbound_v6_policy_present) { 10761 mp = ip_wput_ire_parse_ipsec_out(first_mp, NULL, ip6h, ire, 10762 connp, unspec_src); 10763 if (mp == NULL) { 10764 return; 10765 } 10766 } 10767 10768 first_mp = mp; 10769 if (mp->b_datap->db_type == M_CTL) { 10770 io = (ipsec_out_t *)mp->b_rptr; 10771 ASSERT(io->ipsec_out_type == IPSEC_OUT); 10772 mp = mp->b_cont; 10773 mctl_present = B_TRUE; 10774 } else { 10775 mctl_present = B_FALSE; 10776 } 10777 10778 ip6h = (ip6_t *)mp->b_rptr; 10779 nexthdr = ip6h->ip6_nxt; 10780 mibptr = ill->ill_ip6_mib; 10781 10782 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src) && !unspec_src) { 10783 ipif_t *ipif; 10784 10785 /* 10786 * Select the source address using ipif_select_source_v6. 10787 */ 10788 if (attach_index != 0) { 10789 ipif = ipif_select_source_v6(ill, &ip6h->ip6_dst, 10790 RESTRICT_TO_ILL, IPV6_PREFER_SRC_DEFAULT, zoneid); 10791 } else { 10792 ipif = ipif_select_source_v6(ill, &ip6h->ip6_dst, 10793 RESTRICT_TO_NONE, IPV6_PREFER_SRC_DEFAULT, zoneid); 10794 } 10795 if (ipif == NULL) { 10796 if (ip_debug > 2) { 10797 /* ip1dbg */ 10798 pr_addr_dbg("ip_wput_ire_v6: no src for " 10799 "dst %s\n, ", AF_INET6, &ip6h->ip6_dst); 10800 printf("ip_wput_ire_v6: interface name %s\n", 10801 ill->ill_name); 10802 } 10803 freemsg(first_mp); 10804 return; 10805 } 10806 ip6h->ip6_src = ipif->ipif_v6src_addr; 10807 ipif_refrele(ipif); 10808 } 10809 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 10810 if ((connp != NULL && connp->conn_multicast_loop) || 10811 !(ill->ill_phyint->phyint_flags & PHYI_LOOPBACK)) { 10812 ilm_t *ilm; 10813 10814 ILM_WALKER_HOLD(ill); 10815 ilm = ilm_lookup_ill_v6(ill, &ip6h->ip6_dst, ALL_ZONES); 10816 ILM_WALKER_RELE(ill); 10817 if (ilm != NULL) { 10818 mblk_t *nmp; 10819 int fanout_flags = 0; 10820 10821 if (connp != NULL && 10822 !connp->conn_multicast_loop) { 10823 fanout_flags |= IP_FF_NO_MCAST_LOOP; 10824 } 10825 ip1dbg(("ip_wput_ire_v6: " 10826 "Loopback multicast\n")); 10827 nmp = ip_copymsg(first_mp); 10828 if (nmp != NULL) { 10829 ip6_t *nip6h; 10830 10831 if (mctl_present) { 10832 nip6h = (ip6_t *) 10833 nmp->b_cont->b_rptr; 10834 } else { 10835 nip6h = (ip6_t *)nmp->b_rptr; 10836 } 10837 /* 10838 * Deliver locally and to every local 10839 * zone, except the sending zone when 10840 * IPV6_MULTICAST_LOOP is disabled. 10841 */ 10842 ip_wput_local_v6(RD(q), ill, nip6h, nmp, 10843 ire, fanout_flags); 10844 } else { 10845 BUMP_MIB(mibptr, ipv6OutDiscards); 10846 ip1dbg(("ip_wput_ire_v6: " 10847 "copymsg failed\n")); 10848 } 10849 } 10850 } 10851 if (ip6h->ip6_hops == 0 || 10852 IN6_IS_ADDR_MC_NODELOCAL(&ip6h->ip6_dst) || 10853 (ill->ill_phyint->phyint_flags & PHYI_LOOPBACK)) { 10854 /* 10855 * Local multicast or just loopback on loopback 10856 * interface. 10857 */ 10858 BUMP_MIB(mibptr, ipv6OutMcastPkts); 10859 ip1dbg(("ip_wput_ire_v6: local multicast only\n")); 10860 freemsg(first_mp); 10861 return; 10862 } 10863 } 10864 10865 if (ire->ire_stq != NULL) { 10866 uint32_t sum; 10867 uint_t ill_index = ((ill_t *)ire->ire_stq->q_ptr)-> 10868 ill_phyint->phyint_ifindex; 10869 queue_t *dev_q = ire->ire_stq->q_next; 10870 10871 /* 10872 * non-NULL send-to queue - packet is to be sent 10873 * out an interface. 10874 */ 10875 10876 /* Driver is flow-controlling? */ 10877 if (!IP_FLOW_CONTROLLED_ULP(nexthdr) && 10878 ((dev_q->q_next || dev_q->q_first) && !canput(dev_q))) { 10879 /* 10880 * Queue packet if we have an conn to give back 10881 * pressure. We can't queue packets intended for 10882 * hardware acceleration since we've tossed that 10883 * state already. If the packet is being fed back 10884 * from ire_send_v6, we don't know the position in 10885 * the queue to enqueue the packet and we discard 10886 * the packet. 10887 */ 10888 if (ip_output_queue && connp != NULL && 10889 !mctl_present && caller != IRE_SEND) { 10890 if (caller == IP_WSRV) { 10891 connp->conn_did_putbq = 1; 10892 (void) putbq(connp->conn_wq, mp); 10893 conn_drain_insert(connp); 10894 /* 10895 * caller == IP_WSRV implies we are 10896 * the service thread, and the 10897 * queue is already noenabled. 10898 * The check for canput and 10899 * the putbq is not atomic. 10900 * So we need to check again. 10901 */ 10902 if (canput(dev_q)) 10903 connp->conn_did_putbq = 0; 10904 } else { 10905 (void) putq(connp->conn_wq, mp); 10906 } 10907 return; 10908 } 10909 BUMP_MIB(mibptr, ipv6OutDiscards); 10910 freemsg(first_mp); 10911 return; 10912 } 10913 10914 /* 10915 * Look for reachability confirmations from the transport. 10916 */ 10917 if (ip6h->ip6_vcf & IP_FORWARD_PROG) { 10918 reachable |= IPV6_REACHABILITY_CONFIRMATION; 10919 ip6h->ip6_vcf &= ~IP_FORWARD_PROG; 10920 if (mctl_present) 10921 io->ipsec_out_reachable = B_TRUE; 10922 } 10923 /* Fastpath */ 10924 switch (nexthdr) { 10925 case IPPROTO_TCP: 10926 case IPPROTO_UDP: 10927 case IPPROTO_ICMPV6: 10928 case IPPROTO_SCTP: 10929 hdr_length = IPV6_HDR_LEN; 10930 break; 10931 default: { 10932 uint8_t *nexthdrp; 10933 10934 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, 10935 &hdr_length, &nexthdrp)) { 10936 /* Malformed packet */ 10937 BUMP_MIB(mibptr, ipv6OutDiscards); 10938 freemsg(first_mp); 10939 return; 10940 } 10941 nexthdr = *nexthdrp; 10942 break; 10943 } 10944 } 10945 10946 if (cksum_request != -1 && nexthdr != IPPROTO_ICMPV6) { 10947 uint16_t *up; 10948 uint16_t *insp; 10949 10950 /* 10951 * The packet header is processed once for all, even 10952 * in the multirouting case. We disable hardware 10953 * checksum if the packet is multirouted, as it will be 10954 * replicated via several interfaces, and not all of 10955 * them may have this capability. 10956 */ 10957 if (cksum_request == 1 && 10958 !(ire->ire_flags & RTF_MULTIRT)) { 10959 /* Skip the transport checksum */ 10960 goto cksum_done; 10961 } 10962 /* 10963 * Do user-configured raw checksum. 10964 * Compute checksum and insert at offset "cksum_request" 10965 */ 10966 10967 /* check for enough headers for checksum */ 10968 cksum_request += hdr_length; /* offset from rptr */ 10969 if ((mp->b_wptr - mp->b_rptr) < 10970 (cksum_request + sizeof (int16_t))) { 10971 if (!pullupmsg(mp, 10972 cksum_request + sizeof (int16_t))) { 10973 ip1dbg(("ip_wput_v6: ICMP hdr pullupmsg" 10974 " failed\n")); 10975 BUMP_MIB(mibptr, ipv6OutDiscards); 10976 freemsg(first_mp); 10977 return; 10978 } 10979 ip6h = (ip6_t *)mp->b_rptr; 10980 } 10981 insp = (uint16_t *)((uchar_t *)ip6h + cksum_request); 10982 ASSERT(((uintptr_t)insp & 0x1) == 0); 10983 up = (uint16_t *)&ip6h->ip6_src; 10984 /* 10985 * icmp has placed length and routing 10986 * header adjustment in *insp. 10987 */ 10988 sum = htons(nexthdr) + 10989 up[0] + up[1] + up[2] + up[3] + 10990 up[4] + up[5] + up[6] + up[7] + 10991 up[8] + up[9] + up[10] + up[11] + 10992 up[12] + up[13] + up[14] + up[15]; 10993 sum = (sum & 0xffff) + (sum >> 16); 10994 *insp = IP_CSUM(mp, hdr_length, sum); 10995 if (*insp == 0) 10996 *insp = 0xFFFF; 10997 } else if (nexthdr == IPPROTO_TCP) { 10998 uint16_t *up; 10999 11000 /* 11001 * Check for full IPv6 header + enough TCP header 11002 * to get at the checksum field. 11003 */ 11004 if ((mp->b_wptr - mp->b_rptr) < 11005 (hdr_length + TCP_CHECKSUM_OFFSET + 11006 TCP_CHECKSUM_SIZE)) { 11007 if (!pullupmsg(mp, hdr_length + 11008 TCP_CHECKSUM_OFFSET + TCP_CHECKSUM_SIZE)) { 11009 ip1dbg(("ip_wput_v6: TCP hdr pullupmsg" 11010 " failed\n")); 11011 BUMP_MIB(mibptr, ipv6OutDiscards); 11012 freemsg(first_mp); 11013 return; 11014 } 11015 ip6h = (ip6_t *)mp->b_rptr; 11016 } 11017 11018 up = (uint16_t *)&ip6h->ip6_src; 11019 /* 11020 * Note: The TCP module has stored the length value 11021 * into the tcp checksum field, so we don't 11022 * need to explicitly sum it in here. 11023 */ 11024 sum = up[0] + up[1] + up[2] + up[3] + 11025 up[4] + up[5] + up[6] + up[7] + 11026 up[8] + up[9] + up[10] + up[11] + 11027 up[12] + up[13] + up[14] + up[15]; 11028 11029 /* Fold the initial sum */ 11030 sum = (sum & 0xffff) + (sum >> 16); 11031 11032 up = (uint16_t *)(((uchar_t *)ip6h) + 11033 hdr_length + TCP_CHECKSUM_OFFSET); 11034 11035 IP_CKSUM_XMIT(ill, ire, mp, ip6h, up, IPPROTO_TCP, 11036 hdr_length, ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN, 11037 ire->ire_max_frag, mctl_present, sum); 11038 11039 /* Software checksum? */ 11040 if (DB_CKSUMFLAGS(mp) == 0) { 11041 IP6_STAT(ip6_out_sw_cksum); 11042 IP6_STAT_UPDATE(ip6_tcp_out_sw_cksum_bytes, 11043 (ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN) - 11044 hdr_length); 11045 } 11046 } else if (nexthdr == IPPROTO_UDP) { 11047 uint16_t *up; 11048 11049 /* 11050 * check for full IPv6 header + enough UDP header 11051 * to get at the UDP checksum field 11052 */ 11053 if ((mp->b_wptr - mp->b_rptr) < (hdr_length + 11054 UDP_CHECKSUM_OFFSET + UDP_CHECKSUM_SIZE)) { 11055 if (!pullupmsg(mp, hdr_length + 11056 UDP_CHECKSUM_OFFSET + UDP_CHECKSUM_SIZE)) { 11057 ip1dbg(("ip_wput_v6: UDP hdr pullupmsg" 11058 " failed\n")); 11059 BUMP_MIB(mibptr, ipv6OutDiscards); 11060 freemsg(first_mp); 11061 return; 11062 } 11063 ip6h = (ip6_t *)mp->b_rptr; 11064 } 11065 up = (uint16_t *)&ip6h->ip6_src; 11066 /* 11067 * Note: The UDP module has stored the length value 11068 * into the udp checksum field, so we don't 11069 * need to explicitly sum it in here. 11070 */ 11071 sum = up[0] + up[1] + up[2] + up[3] + 11072 up[4] + up[5] + up[6] + up[7] + 11073 up[8] + up[9] + up[10] + up[11] + 11074 up[12] + up[13] + up[14] + up[15]; 11075 11076 /* Fold the initial sum */ 11077 sum = (sum & 0xffff) + (sum >> 16); 11078 11079 up = (uint16_t *)(((uchar_t *)ip6h) + 11080 hdr_length + UDP_CHECKSUM_OFFSET); 11081 11082 IP_CKSUM_XMIT(ill, ire, mp, ip6h, up, IPPROTO_UDP, 11083 hdr_length, ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN, 11084 ire->ire_max_frag, mctl_present, sum); 11085 11086 /* Software checksum? */ 11087 if (DB_CKSUMFLAGS(mp) == 0) { 11088 IP6_STAT(ip6_out_sw_cksum); 11089 IP6_STAT_UPDATE(ip6_udp_out_sw_cksum_bytes, 11090 (ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN) - 11091 hdr_length); 11092 } 11093 } else if (nexthdr == IPPROTO_ICMPV6) { 11094 uint16_t *up; 11095 icmp6_t *icmp6; 11096 11097 /* check for full IPv6+ICMPv6 header */ 11098 if ((mp->b_wptr - mp->b_rptr) < 11099 (hdr_length + ICMP6_MINLEN)) { 11100 if (!pullupmsg(mp, hdr_length + ICMP6_MINLEN)) { 11101 ip1dbg(("ip_wput_v6: ICMP hdr pullupmsg" 11102 " failed\n")); 11103 BUMP_MIB(mibptr, ipv6OutDiscards); 11104 freemsg(first_mp); 11105 return; 11106 } 11107 ip6h = (ip6_t *)mp->b_rptr; 11108 } 11109 icmp6 = (icmp6_t *)((uchar_t *)ip6h + hdr_length); 11110 up = (uint16_t *)&ip6h->ip6_src; 11111 /* 11112 * icmp has placed length and routing 11113 * header adjustment in icmp6_cksum. 11114 */ 11115 sum = htons(IPPROTO_ICMPV6) + 11116 up[0] + up[1] + up[2] + up[3] + 11117 up[4] + up[5] + up[6] + up[7] + 11118 up[8] + up[9] + up[10] + up[11] + 11119 up[12] + up[13] + up[14] + up[15]; 11120 sum = (sum & 0xffff) + (sum >> 16); 11121 icmp6->icmp6_cksum = IP_CSUM(mp, hdr_length, sum); 11122 if (icmp6->icmp6_cksum == 0) 11123 icmp6->icmp6_cksum = 0xFFFF; 11124 11125 /* Update output mib stats */ 11126 icmp_update_out_mib_v6(ill, icmp6); 11127 } else if (nexthdr == IPPROTO_SCTP) { 11128 sctp_hdr_t *sctph; 11129 11130 if (MBLKL(mp) < (hdr_length + sizeof (*sctph))) { 11131 if (!pullupmsg(mp, hdr_length + 11132 sizeof (*sctph))) { 11133 ip1dbg(("ip_wput_v6: SCTP hdr pullupmsg" 11134 " failed\n")); 11135 BUMP_MIB(ill->ill_ip6_mib, 11136 ipv6OutDiscards); 11137 freemsg(mp); 11138 return; 11139 } 11140 ip6h = (ip6_t *)mp->b_rptr; 11141 } 11142 sctph = (sctp_hdr_t *)(mp->b_rptr + hdr_length); 11143 sctph->sh_chksum = 0; 11144 sctph->sh_chksum = sctp_cksum(mp, hdr_length); 11145 } 11146 11147 cksum_done: 11148 /* 11149 * We force the insertion of a fragment header using the 11150 * IPH_FRAG_HDR flag in two cases: 11151 * - after reception of an ICMPv6 "packet too big" message 11152 * with a MTU < 1280 (cf. RFC 2460 section 5) 11153 * - for multirouted IPv6 packets, so that the receiver can 11154 * discard duplicates according to their fragment identifier 11155 * 11156 * Two flags modifed from the API can modify this behavior. 11157 * The first is IPV6_USE_MIN_MTU. With this API the user 11158 * can specify how to manage PMTUD for unicast and multicast. 11159 * 11160 * IPV6_DONTFRAG disallows fragmentation. 11161 */ 11162 max_frag = ire->ire_max_frag; 11163 switch (IP6I_USE_MIN_MTU_API(flags)) { 11164 case IPV6_USE_MIN_MTU_DEFAULT: 11165 case IPV6_USE_MIN_MTU_UNICAST: 11166 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 11167 max_frag = IPV6_MIN_MTU; 11168 } 11169 break; 11170 11171 case IPV6_USE_MIN_MTU_NEVER: 11172 max_frag = IPV6_MIN_MTU; 11173 break; 11174 } 11175 if (ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN > max_frag || 11176 (ire->ire_frag_flag & IPH_FRAG_HDR)) { 11177 if (connp != NULL && (flags & IP6I_DONTFRAG)) { 11178 icmp_pkt2big_v6(ire->ire_stq, first_mp, 11179 max_frag, B_FALSE, B_TRUE); 11180 return; 11181 } 11182 11183 if (ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN != 11184 (mp->b_cont ? msgdsize(mp) : 11185 mp->b_wptr - (uchar_t *)ip6h)) { 11186 ip0dbg(("Packet length mismatch: %d, %ld\n", 11187 ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN, 11188 msgdsize(mp))); 11189 freemsg(first_mp); 11190 return; 11191 } 11192 /* Do IPSEC processing first */ 11193 if (mctl_present) { 11194 if (attach_index != 0) 11195 ipsec_out_attach_if(io, attach_index); 11196 ipsec_out_process(q, first_mp, ire, ill_index); 11197 return; 11198 } 11199 ASSERT(mp->b_prev == NULL); 11200 ip2dbg(("Fragmenting Size = %d, mtu = %d\n", 11201 ntohs(ip6h->ip6_plen) + 11202 IPV6_HDR_LEN, max_frag)); 11203 ASSERT(mp == first_mp); 11204 /* Initiate IPPF processing */ 11205 if (IPP_ENABLED(IPP_LOCAL_OUT)) { 11206 ip_process(IPP_LOCAL_OUT, &mp, ill_index); 11207 if (mp == NULL) { 11208 return; 11209 } 11210 } 11211 ip_wput_frag_v6(mp, ire, reachable, connp, 11212 caller, max_frag); 11213 return; 11214 } 11215 /* Do IPSEC processing first */ 11216 if (mctl_present) { 11217 int extra_len = ipsec_out_extra_length(first_mp); 11218 11219 if (ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN + extra_len > 11220 max_frag && ip_ulp_cando_pkt2big(nexthdr)) { 11221 /* 11222 * IPsec headers will push the packet over the 11223 * MTU limit. Issue an ICMPv6 Packet Too Big 11224 * message for this packet if the upper-layer 11225 * that issued this packet will be able to 11226 * react to the icmp_pkt2big_v6() that we'll 11227 * generate. 11228 */ 11229 icmp_pkt2big_v6(ire->ire_stq, first_mp, 11230 max_frag, B_FALSE, B_TRUE); 11231 return; 11232 } 11233 if (attach_index != 0) 11234 ipsec_out_attach_if(io, attach_index); 11235 ipsec_out_process(q, first_mp, ire, ill_index); 11236 return; 11237 } 11238 /* 11239 * XXX multicast: add ip_mforward_v6() here. 11240 * Check conn_dontroute 11241 */ 11242 #ifdef lint 11243 /* 11244 * XXX The only purpose of this statement is to avoid lint 11245 * errors. See the above "XXX multicast". When that gets 11246 * fixed, remove this whole #ifdef lint section. 11247 */ 11248 ip3dbg(("multicast forward is %s.\n", 11249 (multicast_forward ? "TRUE" : "FALSE"))); 11250 #endif 11251 11252 UPDATE_OB_PKT_COUNT(ire); 11253 ire->ire_last_used_time = lbolt; 11254 ASSERT(mp == first_mp); 11255 ip_xmit_v6(mp, ire, reachable, connp, caller, NULL); 11256 } else { 11257 ip_wput_local_v6(RD(q), ill, ip6h, first_mp, ire, 0); 11258 } 11259 } 11260 11261 /* 11262 * Outbound IPv6 fragmentation routine using MDT. 11263 */ 11264 static void 11265 ip_wput_frag_mdt_v6(mblk_t *mp, ire_t *ire, size_t max_chunk, 11266 size_t unfragmentable_len, uint8_t nexthdr, uint_t prev_nexthdr_offset) 11267 { 11268 ip6_t *ip6h = (ip6_t *)mp->b_rptr; 11269 uint_t pkts, wroff, hdr_chunk_len, pbuf_idx; 11270 mblk_t *hdr_mp, *md_mp = NULL; 11271 int i1; 11272 multidata_t *mmd; 11273 unsigned char *hdr_ptr, *pld_ptr; 11274 ip_pdescinfo_t pdi; 11275 uint32_t ident; 11276 size_t len; 11277 uint16_t offset; 11278 queue_t *stq = ire->ire_stq; 11279 ill_t *ill = (ill_t *)stq->q_ptr; 11280 11281 ASSERT(DB_TYPE(mp) == M_DATA); 11282 ASSERT(MBLKL(mp) > unfragmentable_len); 11283 11284 /* 11285 * Move read ptr past unfragmentable portion, we don't want this part 11286 * of the data in our fragments. 11287 */ 11288 mp->b_rptr += unfragmentable_len; 11289 11290 /* Calculate how many packets we will send out */ 11291 i1 = (mp->b_cont == NULL) ? MBLKL(mp) : msgsize(mp); 11292 pkts = (i1 + max_chunk - 1) / max_chunk; 11293 ASSERT(pkts > 1); 11294 11295 /* Allocate a message block which will hold all the IP Headers. */ 11296 wroff = ip_wroff_extra; 11297 hdr_chunk_len = wroff + unfragmentable_len + sizeof (ip6_frag_t); 11298 11299 i1 = pkts * hdr_chunk_len; 11300 /* 11301 * Create the header buffer, Multidata and destination address 11302 * and SAP attribute that should be associated with it. 11303 */ 11304 if ((hdr_mp = allocb(i1, BPRI_HI)) == NULL || 11305 ((hdr_mp->b_wptr += i1), 11306 (mmd = mmd_alloc(hdr_mp, &md_mp, KM_NOSLEEP)) == NULL) || 11307 !ip_md_addr_attr(mmd, NULL, ire->ire_nce->nce_res_mp)) { 11308 freemsg(mp); 11309 if (md_mp == NULL) { 11310 freemsg(hdr_mp); 11311 } else { 11312 free_mmd: IP6_STAT(ip6_frag_mdt_discarded); 11313 freemsg(md_mp); 11314 } 11315 IP6_STAT(ip6_frag_mdt_allocfail); 11316 BUMP_MIB(ill->ill_ip6_mib, ipv6OutFragFails); 11317 UPDATE_MIB(ill->ill_ip6_mib, ipv6OutDiscards, pkts); 11318 return; 11319 } 11320 IP6_STAT(ip6_frag_mdt_allocd); 11321 11322 /* 11323 * Add a payload buffer to the Multidata; this operation must not 11324 * fail, or otherwise our logic in this routine is broken. There 11325 * is no memory allocation done by the routine, so any returned 11326 * failure simply tells us that we've done something wrong. 11327 * 11328 * A failure tells us that either we're adding the same payload 11329 * buffer more than once, or we're trying to add more buffers than 11330 * allowed. None of the above cases should happen, and we panic 11331 * because either there's horrible heap corruption, and/or 11332 * programming mistake. 11333 */ 11334 if ((pbuf_idx = mmd_addpldbuf(mmd, mp)) < 0) { 11335 goto pbuf_panic; 11336 } 11337 11338 hdr_ptr = hdr_mp->b_rptr; 11339 pld_ptr = mp->b_rptr; 11340 11341 pdi.flags = PDESC_HBUF_REF | PDESC_PBUF_REF; 11342 11343 ident = htonl(atomic_add_32_nv(&ire->ire_ident, 1)); 11344 11345 /* 11346 * len is the total length of the fragmentable data in this 11347 * datagram. For each fragment sent, we will decrement len 11348 * by the amount of fragmentable data sent in that fragment 11349 * until len reaches zero. 11350 */ 11351 len = ntohs(ip6h->ip6_plen) - (unfragmentable_len - IPV6_HDR_LEN); 11352 11353 offset = 0; 11354 prev_nexthdr_offset += wroff; 11355 11356 while (len != 0) { 11357 size_t mlen; 11358 ip6_t *fip6h; 11359 ip6_frag_t *fraghdr; 11360 int error; 11361 11362 ASSERT((hdr_ptr + hdr_chunk_len) <= hdr_mp->b_wptr); 11363 mlen = MIN(len, max_chunk); 11364 len -= mlen; 11365 11366 fip6h = (ip6_t *)(hdr_ptr + wroff); 11367 ASSERT(OK_32PTR(fip6h)); 11368 bcopy(ip6h, fip6h, unfragmentable_len); 11369 hdr_ptr[prev_nexthdr_offset] = IPPROTO_FRAGMENT; 11370 11371 fip6h->ip6_plen = htons((uint16_t)(mlen + 11372 unfragmentable_len - IPV6_HDR_LEN + sizeof (ip6_frag_t))); 11373 11374 fraghdr = (ip6_frag_t *)((unsigned char *)fip6h + 11375 unfragmentable_len); 11376 fraghdr->ip6f_nxt = nexthdr; 11377 fraghdr->ip6f_reserved = 0; 11378 fraghdr->ip6f_offlg = htons(offset) | 11379 ((len != 0) ? IP6F_MORE_FRAG : 0); 11380 fraghdr->ip6f_ident = ident; 11381 11382 /* 11383 * Record offset and size of header and data of the next packet 11384 * in the multidata message. 11385 */ 11386 PDESC_HDR_ADD(&pdi, hdr_ptr, wroff, 11387 unfragmentable_len + sizeof (ip6_frag_t), 0); 11388 PDESC_PLD_INIT(&pdi); 11389 i1 = MIN(mp->b_wptr - pld_ptr, mlen); 11390 ASSERT(i1 > 0); 11391 PDESC_PLD_SPAN_ADD(&pdi, pbuf_idx, pld_ptr, i1); 11392 if (i1 == mlen) { 11393 pld_ptr += mlen; 11394 } else { 11395 i1 = mlen - i1; 11396 mp = mp->b_cont; 11397 ASSERT(mp != NULL); 11398 ASSERT(MBLKL(mp) >= i1); 11399 /* 11400 * Attach the next payload message block to the 11401 * multidata message. 11402 */ 11403 if ((pbuf_idx = mmd_addpldbuf(mmd, mp)) < 0) 11404 goto pbuf_panic; 11405 PDESC_PLD_SPAN_ADD(&pdi, pbuf_idx, mp->b_rptr, i1); 11406 pld_ptr = mp->b_rptr + i1; 11407 } 11408 11409 if ((mmd_addpdesc(mmd, (pdescinfo_t *)&pdi, &error, 11410 KM_NOSLEEP)) == NULL) { 11411 /* 11412 * Any failure other than ENOMEM indicates that we 11413 * have passed in invalid pdesc info or parameters 11414 * to mmd_addpdesc, which must not happen. 11415 * 11416 * EINVAL is a result of failure on boundary checks 11417 * against the pdesc info contents. It should not 11418 * happen, and we panic because either there's 11419 * horrible heap corruption, and/or programming 11420 * mistake. 11421 */ 11422 if (error != ENOMEM) { 11423 cmn_err(CE_PANIC, "ip_wput_frag_mdt_v6: " 11424 "pdesc logic error detected for " 11425 "mmd %p pinfo %p (%d)\n", 11426 (void *)mmd, (void *)&pdi, error); 11427 /* NOTREACHED */ 11428 } 11429 IP6_STAT(ip6_frag_mdt_addpdescfail); 11430 /* Free unattached payload message blocks as well */ 11431 md_mp->b_cont = mp->b_cont; 11432 goto free_mmd; 11433 } 11434 11435 /* Advance fragment offset. */ 11436 offset += mlen; 11437 11438 /* Advance to location for next header in the buffer. */ 11439 hdr_ptr += hdr_chunk_len; 11440 11441 /* Did we reach the next payload message block? */ 11442 if (pld_ptr == mp->b_wptr && mp->b_cont != NULL) { 11443 mp = mp->b_cont; 11444 /* 11445 * Attach the next message block with payload 11446 * data to the multidata message. 11447 */ 11448 if ((pbuf_idx = mmd_addpldbuf(mmd, mp)) < 0) 11449 goto pbuf_panic; 11450 pld_ptr = mp->b_rptr; 11451 } 11452 } 11453 11454 ASSERT(hdr_mp->b_wptr == hdr_ptr); 11455 ASSERT(mp->b_wptr == pld_ptr); 11456 11457 /* Update IP statistics */ 11458 UPDATE_MIB(ill->ill_ip6_mib, ipv6OutFragCreates, pkts); 11459 BUMP_MIB(ill->ill_ip6_mib, ipv6OutFragOKs); 11460 IP6_STAT_UPDATE(ip6_frag_mdt_pkt_out, pkts); 11461 11462 ire->ire_ob_pkt_count += pkts; 11463 if (ire->ire_ipif != NULL) 11464 atomic_add_32(&ire->ire_ipif->ipif_ob_pkt_count, pkts); 11465 11466 ire->ire_last_used_time = lbolt; 11467 /* Send it down */ 11468 putnext(stq, md_mp); 11469 return; 11470 11471 pbuf_panic: 11472 cmn_err(CE_PANIC, "ip_wput_frag_mdt_v6: payload buffer logic " 11473 "error for mmd %p pbuf %p (%d)", (void *)mmd, (void *)mp, 11474 pbuf_idx); 11475 /* NOTREACHED */ 11476 } 11477 11478 /* 11479 * IPv6 fragmentation. Essentially the same as IPv4 fragmentation. 11480 * We have not optimized this in terms of number of mblks 11481 * allocated. For instance, for each fragment sent we always allocate a 11482 * mblk to hold the IPv6 header and fragment header. 11483 * 11484 * Assumes that all the extension headers are contained in the first mblk. 11485 * 11486 * The fragment header is inserted after an hop-by-hop options header 11487 * and after [an optional destinations header followed by] a routing header. 11488 * 11489 * NOTE : This function does not ire_refrele the ire passed in as 11490 * the argument. 11491 */ 11492 void 11493 ip_wput_frag_v6(mblk_t *mp, ire_t *ire, uint_t reachable, conn_t *connp, 11494 int caller, int max_frag) 11495 { 11496 ip6_t *ip6h = (ip6_t *)mp->b_rptr; 11497 ip6_t *fip6h; 11498 mblk_t *hmp; 11499 mblk_t *hmp0; 11500 mblk_t *dmp; 11501 ip6_frag_t *fraghdr; 11502 size_t unfragmentable_len; 11503 size_t len; 11504 size_t mlen; 11505 size_t max_chunk; 11506 uint32_t ident; 11507 uint16_t off_flags; 11508 uint16_t offset = 0; 11509 ill_t *ill; 11510 uint8_t nexthdr; 11511 uint_t prev_nexthdr_offset; 11512 uint8_t *ptr; 11513 11514 ASSERT(ire->ire_type == IRE_CACHE); 11515 ill = (ill_t *)ire->ire_stq->q_ptr; 11516 11517 /* 11518 * Determine the length of the unfragmentable portion of this 11519 * datagram. This consists of the IPv6 header, a potential 11520 * hop-by-hop options header, a potential pre-routing-header 11521 * destination options header, and a potential routing header. 11522 */ 11523 nexthdr = ip6h->ip6_nxt; 11524 prev_nexthdr_offset = (uint8_t *)&ip6h->ip6_nxt - (uint8_t *)ip6h; 11525 ptr = (uint8_t *)&ip6h[1]; 11526 11527 if (nexthdr == IPPROTO_HOPOPTS) { 11528 ip6_hbh_t *hbh_hdr; 11529 uint_t hdr_len; 11530 11531 hbh_hdr = (ip6_hbh_t *)ptr; 11532 hdr_len = 8 * (hbh_hdr->ip6h_len + 1); 11533 nexthdr = hbh_hdr->ip6h_nxt; 11534 prev_nexthdr_offset = (uint8_t *)&hbh_hdr->ip6h_nxt 11535 - (uint8_t *)ip6h; 11536 ptr += hdr_len; 11537 } 11538 if (nexthdr == IPPROTO_DSTOPTS) { 11539 ip6_dest_t *dest_hdr; 11540 uint_t hdr_len; 11541 11542 dest_hdr = (ip6_dest_t *)ptr; 11543 if (dest_hdr->ip6d_nxt == IPPROTO_ROUTING) { 11544 hdr_len = 8 * (dest_hdr->ip6d_len + 1); 11545 nexthdr = dest_hdr->ip6d_nxt; 11546 prev_nexthdr_offset = (uint8_t *)&dest_hdr->ip6d_nxt 11547 - (uint8_t *)ip6h; 11548 ptr += hdr_len; 11549 } 11550 } 11551 if (nexthdr == IPPROTO_ROUTING) { 11552 ip6_rthdr_t *rthdr; 11553 uint_t hdr_len; 11554 11555 rthdr = (ip6_rthdr_t *)ptr; 11556 nexthdr = rthdr->ip6r_nxt; 11557 prev_nexthdr_offset = (uint8_t *)&rthdr->ip6r_nxt 11558 - (uint8_t *)ip6h; 11559 hdr_len = 8 * (rthdr->ip6r_len + 1); 11560 ptr += hdr_len; 11561 } 11562 unfragmentable_len = (uint_t)(ptr - (uint8_t *)ip6h); 11563 11564 max_chunk = (min(max_frag, ire->ire_max_frag) - unfragmentable_len - 11565 sizeof (ip6_frag_t)) & ~7; 11566 11567 /* Check if we can use MDT to send out the frags. */ 11568 ASSERT(!IRE_IS_LOCAL(ire)); 11569 if (ip_multidata_outbound && reachable == 0 && 11570 !(ire->ire_flags & RTF_MULTIRT) && ILL_MDT_CAPABLE(ill) && 11571 IP_CAN_FRAG_MDT(mp, unfragmentable_len, max_chunk)) { 11572 ip_wput_frag_mdt_v6(mp, ire, max_chunk, unfragmentable_len, 11573 nexthdr, prev_nexthdr_offset); 11574 return; 11575 } 11576 11577 /* 11578 * Allocate an mblk with enough room for the link-layer 11579 * header, the unfragmentable part of the datagram, and the 11580 * fragment header. This (or a copy) will be used as the 11581 * first mblk for each fragment we send. 11582 */ 11583 hmp = allocb(unfragmentable_len + sizeof (ip6_frag_t) + ip_wroff_extra, 11584 BPRI_HI); 11585 if (hmp == NULL) { 11586 BUMP_MIB(ill->ill_ip6_mib, ipv6OutFragFails); 11587 freemsg(mp); 11588 return; 11589 } 11590 hmp->b_rptr += ip_wroff_extra; 11591 hmp->b_wptr = hmp->b_rptr + unfragmentable_len + sizeof (ip6_frag_t); 11592 11593 fip6h = (ip6_t *)hmp->b_rptr; 11594 fraghdr = (ip6_frag_t *)(hmp->b_rptr + unfragmentable_len); 11595 11596 bcopy(ip6h, fip6h, unfragmentable_len); 11597 hmp->b_rptr[prev_nexthdr_offset] = IPPROTO_FRAGMENT; 11598 11599 ident = atomic_add_32_nv(&ire->ire_ident, 1); 11600 11601 fraghdr->ip6f_nxt = nexthdr; 11602 fraghdr->ip6f_reserved = 0; 11603 fraghdr->ip6f_offlg = 0; 11604 fraghdr->ip6f_ident = htonl(ident); 11605 11606 /* 11607 * len is the total length of the fragmentable data in this 11608 * datagram. For each fragment sent, we will decrement len 11609 * by the amount of fragmentable data sent in that fragment 11610 * until len reaches zero. 11611 */ 11612 len = ntohs(ip6h->ip6_plen) - (unfragmentable_len - IPV6_HDR_LEN); 11613 11614 /* 11615 * Move read ptr past unfragmentable portion, we don't want this part 11616 * of the data in our fragments. 11617 */ 11618 mp->b_rptr += unfragmentable_len; 11619 11620 while (len != 0) { 11621 mlen = MIN(len, max_chunk); 11622 len -= mlen; 11623 if (len != 0) { 11624 /* Not last */ 11625 hmp0 = copyb(hmp); 11626 if (hmp0 == NULL) { 11627 freeb(hmp); 11628 freemsg(mp); 11629 BUMP_MIB(ill->ill_ip6_mib, ipv6OutFragFails); 11630 ip1dbg(("ip_wput_frag_v6: copyb failed\n")); 11631 return; 11632 } 11633 off_flags = IP6F_MORE_FRAG; 11634 } else { 11635 /* Last fragment */ 11636 hmp0 = hmp; 11637 hmp = NULL; 11638 off_flags = 0; 11639 } 11640 fip6h = (ip6_t *)(hmp0->b_rptr); 11641 fraghdr = (ip6_frag_t *)(hmp0->b_rptr + unfragmentable_len); 11642 11643 fip6h->ip6_plen = htons((uint16_t)(mlen + 11644 unfragmentable_len - IPV6_HDR_LEN + sizeof (ip6_frag_t))); 11645 /* 11646 * Note: Optimization alert. 11647 * In IPv6 (and IPv4) protocol header, Fragment Offset 11648 * ("offset") is 13 bits wide and in 8-octet units. 11649 * In IPv6 protocol header (unlike IPv4) in a 16 bit field, 11650 * it occupies the most significant 13 bits. 11651 * (least significant 13 bits in IPv4). 11652 * We do not do any shifts here. Not shifting is same effect 11653 * as taking offset value in octet units, dividing by 8 and 11654 * then shifting 3 bits left to line it up in place in proper 11655 * place protocol header. 11656 */ 11657 fraghdr->ip6f_offlg = htons(offset) | off_flags; 11658 11659 if (!(dmp = ip_carve_mp(&mp, mlen))) { 11660 /* mp has already been freed by ip_carve_mp() */ 11661 if (hmp != NULL) 11662 freeb(hmp); 11663 freeb(hmp0); 11664 ip1dbg(("ip_carve_mp: failed\n")); 11665 BUMP_MIB(ill->ill_ip6_mib, ipv6OutFragFails); 11666 return; 11667 } 11668 hmp0->b_cont = dmp; 11669 /* Get the priority marking, if any */ 11670 hmp0->b_band = dmp->b_band; 11671 UPDATE_OB_PKT_COUNT(ire); 11672 ire->ire_last_used_time = lbolt; 11673 ip_xmit_v6(hmp0, ire, reachable | IP6_NO_IPPOLICY, connp, 11674 caller, NULL); 11675 reachable = 0; /* No need to redo state machine in loop */ 11676 BUMP_MIB(ill->ill_ip6_mib, ipv6OutFragCreates); 11677 offset += mlen; 11678 } 11679 BUMP_MIB(ill->ill_ip6_mib, ipv6OutFragOKs); 11680 } 11681 11682 /* 11683 * Determine if the ill and multicast aspects of that packets 11684 * "matches" the conn. 11685 */ 11686 boolean_t 11687 conn_wantpacket_v6(conn_t *connp, ill_t *ill, ip6_t *ip6h, int fanout_flags, 11688 zoneid_t zoneid) 11689 { 11690 ill_t *in_ill; 11691 boolean_t wantpacket = B_TRUE; 11692 in6_addr_t *v6dst_ptr = &ip6h->ip6_dst; 11693 in6_addr_t *v6src_ptr = &ip6h->ip6_src; 11694 11695 /* 11696 * conn_incoming_ill is set by IPV6_BOUND_IF which limits 11697 * unicast and multicast reception to conn_incoming_ill. 11698 * conn_wantpacket_v6 is called both for unicast and 11699 * multicast. 11700 * 11701 * 1) The unicast copy of the packet can come anywhere in 11702 * the ill group if it is part of the group. Thus, we 11703 * need to check to see whether the ill group matches 11704 * if in_ill is part of a group. 11705 * 11706 * 2) ip_rput does not suppress duplicate multicast packets. 11707 * If there are two interfaces in a ill group and we have 11708 * 2 applications (conns) joined a multicast group G on 11709 * both the interfaces, ilm_lookup_ill filter in ip_rput 11710 * will give us two packets because we join G on both the 11711 * interfaces rather than nominating just one interface 11712 * for receiving multicast like broadcast above. So, 11713 * we have to call ilg_lookup_ill to filter out duplicate 11714 * copies, if ill is part of a group, to supress duplicates. 11715 */ 11716 in_ill = connp->conn_incoming_ill; 11717 if (in_ill != NULL) { 11718 mutex_enter(&connp->conn_lock); 11719 in_ill = connp->conn_incoming_ill; 11720 mutex_enter(&ill->ill_lock); 11721 /* 11722 * No IPMP, and the packet did not arrive on conn_incoming_ill 11723 * OR, IPMP in use and the packet arrived on an IPMP group 11724 * different from the conn_incoming_ill's IPMP group. 11725 * Reject the packet. 11726 */ 11727 if ((in_ill->ill_group == NULL && in_ill != ill) || 11728 (in_ill->ill_group != NULL && 11729 in_ill->ill_group != ill->ill_group)) { 11730 wantpacket = B_FALSE; 11731 } 11732 mutex_exit(&ill->ill_lock); 11733 mutex_exit(&connp->conn_lock); 11734 if (!wantpacket) 11735 return (B_FALSE); 11736 } 11737 11738 if (connp->conn_multi_router) 11739 return (B_TRUE); 11740 11741 if (!IN6_IS_ADDR_MULTICAST(v6dst_ptr) && 11742 !IN6_IS_ADDR_V4MAPPED_CLASSD(v6dst_ptr)) { 11743 /* 11744 * Unicast case: we match the conn only if it's in the specified 11745 * zone. 11746 */ 11747 return (connp->conn_zoneid == zoneid || zoneid == ALL_ZONES); 11748 } 11749 11750 if ((fanout_flags & IP_FF_NO_MCAST_LOOP) && 11751 (connp->conn_zoneid == zoneid || zoneid == ALL_ZONES)) { 11752 /* 11753 * Loopback case: the sending endpoint has IP_MULTICAST_LOOP 11754 * disabled, therefore we don't dispatch the multicast packet to 11755 * the sending zone. 11756 */ 11757 return (B_FALSE); 11758 } 11759 11760 if ((ill->ill_phyint->phyint_flags & PHYI_LOOPBACK) && 11761 connp->conn_zoneid != zoneid && zoneid != ALL_ZONES) { 11762 /* 11763 * Multicast packet on the loopback interface: we only match 11764 * conns who joined the group in the specified zone. 11765 */ 11766 return (B_FALSE); 11767 } 11768 11769 mutex_enter(&connp->conn_lock); 11770 wantpacket = 11771 ilg_lookup_ill_withsrc_v6(connp, v6dst_ptr, v6src_ptr, ill) != NULL; 11772 mutex_exit(&connp->conn_lock); 11773 11774 return (wantpacket); 11775 } 11776 11777 11778 /* 11779 * Transmit a packet and update any NUD state based on the flags 11780 * XXX need to "recover" any ip6i_t when doing putq! 11781 * 11782 * NOTE : This function does not ire_refrele the ire passed in as the 11783 * argument. 11784 */ 11785 void 11786 ip_xmit_v6(mblk_t *mp, ire_t *ire, uint_t flags, conn_t *connp, 11787 int caller, ipsec_out_t *io) 11788 { 11789 mblk_t *mp1; 11790 nce_t *nce = ire->ire_nce; 11791 ill_t *ill; 11792 uint64_t delta; 11793 ip6_t *ip6h; 11794 queue_t *stq = ire->ire_stq; 11795 ire_t *ire1 = NULL; 11796 ire_t *save_ire = ire; 11797 boolean_t multirt_send = B_FALSE; 11798 mblk_t *next_mp = NULL; 11799 11800 ip6h = (ip6_t *)mp->b_rptr; 11801 ASSERT(!IN6_IS_ADDR_V4MAPPED(&ire->ire_addr_v6)); 11802 ASSERT(ire->ire_ipversion == IPV6_VERSION); 11803 ASSERT(nce != NULL); 11804 ASSERT(mp->b_datap->db_type == M_DATA); 11805 ASSERT(stq != NULL); 11806 11807 ill = ire_to_ill(ire); 11808 if (!ill) { 11809 ip0dbg(("ip_xmit_v6: ire_to_ill failed\n")); 11810 freemsg(mp); 11811 return; 11812 } 11813 11814 /* 11815 * If a packet is to be sent out an interface that is a 6to4 11816 * tunnel, outgoing IPv6 packets, with a 6to4 addressed IPv6 11817 * destination, must be checked to have a 6to4 prefix 11818 * (2002:V4ADDR::/48) that is NOT equal to the 6to4 prefix of 11819 * address configured on the sending interface. Otherwise, 11820 * the packet was delivered to this interface in error and the 11821 * packet must be dropped. 11822 */ 11823 if ((ill->ill_is_6to4tun) && IN6_IS_ADDR_6TO4(&ip6h->ip6_dst)) { 11824 ipif_t *ipif = ill->ill_ipif; 11825 11826 if (IN6_ARE_6TO4_PREFIX_EQUAL(&ipif->ipif_v6lcl_addr, 11827 &ip6h->ip6_dst)) { 11828 if (ip_debug > 2) { 11829 /* ip1dbg */ 11830 pr_addr_dbg("ip_xmit_v6: attempting to " 11831 "send 6to4 addressed IPv6 " 11832 "destination (%s) out the wrong " 11833 "interface.\n", AF_INET6, 11834 &ip6h->ip6_dst); 11835 } 11836 BUMP_MIB(ill->ill_ip6_mib, ipv6OutDiscards); 11837 freemsg(mp); 11838 return; 11839 } 11840 } 11841 11842 /* Flow-control check has been done in ip_wput_ire_v6 */ 11843 if (IP_FLOW_CONTROLLED_ULP(ip6h->ip6_nxt) || caller == IP_WPUT || 11844 caller == IP_WSRV || canput(stq->q_next)) { 11845 uint32_t ill_index; 11846 11847 /* 11848 * In most cases, the emission loop below is entered only 11849 * once. Only in the case where the ire holds the 11850 * RTF_MULTIRT flag, do we loop to process all RTF_MULTIRT 11851 * flagged ires in the bucket, and send the packet 11852 * through all crossed RTF_MULTIRT routes. 11853 */ 11854 if (ire->ire_flags & RTF_MULTIRT) { 11855 /* 11856 * Multirouting case. The bucket where ire is stored 11857 * probably holds other RTF_MULTIRT flagged ires 11858 * to the destination. In this call to ip_xmit_v6, 11859 * we attempt to send the packet through all 11860 * those ires. Thus, we first ensure that ire is the 11861 * first RTF_MULTIRT ire in the bucket, 11862 * before walking the ire list. 11863 */ 11864 ire_t *first_ire; 11865 irb_t *irb = ire->ire_bucket; 11866 ASSERT(irb != NULL); 11867 multirt_send = B_TRUE; 11868 11869 /* Make sure we do not omit any multiroute ire. */ 11870 IRB_REFHOLD(irb); 11871 for (first_ire = irb->irb_ire; 11872 first_ire != NULL; 11873 first_ire = first_ire->ire_next) { 11874 if ((first_ire->ire_flags & RTF_MULTIRT) && 11875 (IN6_ARE_ADDR_EQUAL(&first_ire->ire_addr_v6, 11876 &ire->ire_addr_v6)) && 11877 !(first_ire->ire_marks & 11878 (IRE_MARK_CONDEMNED | IRE_MARK_HIDDEN))) 11879 break; 11880 } 11881 11882 if ((first_ire != NULL) && (first_ire != ire)) { 11883 IRE_REFHOLD(first_ire); 11884 /* ire will be released by the caller */ 11885 ire = first_ire; 11886 nce = ire->ire_nce; 11887 stq = ire->ire_stq; 11888 ill = ire_to_ill(ire); 11889 } 11890 IRB_REFRELE(irb); 11891 } else if (connp != NULL && IPCL_IS_TCP(connp) && 11892 connp->conn_mdt_ok && !connp->conn_tcp->tcp_mdt && 11893 ILL_MDT_USABLE(ill)) { 11894 /* 11895 * This tcp connection was marked as MDT-capable, but 11896 * it has been turned off due changes in the interface. 11897 * Now that the interface support is back, turn it on 11898 * by notifying tcp. We don't directly modify tcp_mdt, 11899 * since we leave all the details to the tcp code that 11900 * knows better. 11901 */ 11902 mblk_t *mdimp = ip_mdinfo_alloc(ill->ill_mdt_capab); 11903 11904 if (mdimp == NULL) { 11905 ip0dbg(("ip_xmit_v6: can't re-enable MDT for " 11906 "connp %p (ENOMEM)\n", (void *)connp)); 11907 } else { 11908 CONN_INC_REF(connp); 11909 squeue_fill(connp->conn_sqp, mdimp, tcp_input, 11910 connp, SQTAG_TCP_INPUT_MCTL); 11911 } 11912 } 11913 11914 do { 11915 boolean_t qos_done = B_FALSE; 11916 11917 if (multirt_send) { 11918 irb_t *irb; 11919 /* 11920 * We are in a multiple send case, need to get 11921 * the next ire and make a duplicate of the 11922 * packet. ire1 holds here the next ire to 11923 * process in the bucket. If multirouting is 11924 * expected, any non-RTF_MULTIRT ire that has 11925 * the right destination address is ignored. 11926 */ 11927 irb = ire->ire_bucket; 11928 ASSERT(irb != NULL); 11929 11930 IRB_REFHOLD(irb); 11931 for (ire1 = ire->ire_next; 11932 ire1 != NULL; 11933 ire1 = ire1->ire_next) { 11934 if (!(ire1->ire_flags & RTF_MULTIRT)) 11935 continue; 11936 if (!IN6_ARE_ADDR_EQUAL( 11937 &ire1->ire_addr_v6, 11938 &ire->ire_addr_v6)) 11939 continue; 11940 if (ire1->ire_marks & 11941 (IRE_MARK_CONDEMNED| 11942 IRE_MARK_HIDDEN)) 11943 continue; 11944 11945 /* Got one */ 11946 if (ire1 != save_ire) { 11947 IRE_REFHOLD(ire1); 11948 } 11949 break; 11950 } 11951 IRB_REFRELE(irb); 11952 11953 if (ire1 != NULL) { 11954 next_mp = copyb(mp); 11955 if ((next_mp == NULL) || 11956 ((mp->b_cont != NULL) && 11957 ((next_mp->b_cont = 11958 dupmsg(mp->b_cont)) == 11959 NULL))) { 11960 freemsg(next_mp); 11961 next_mp = NULL; 11962 ire_refrele(ire1); 11963 ire1 = NULL; 11964 } 11965 } 11966 11967 /* Last multiroute ire; don't loop anymore. */ 11968 if (ire1 == NULL) { 11969 multirt_send = B_FALSE; 11970 } 11971 } 11972 11973 ill_index = 11974 ((ill_t *)stq->q_ptr)->ill_phyint->phyint_ifindex; 11975 11976 /* 11977 * Check for fastpath, we need to hold nce_lock to 11978 * prevent fastpath update from chaining nce_fp_mp. 11979 */ 11980 11981 ASSERT(nce->nce_ipversion != IPV4_VERSION); 11982 mutex_enter(&nce->nce_lock); 11983 if ((mp1 = nce->nce_fp_mp) != NULL) { 11984 uint32_t hlen; 11985 uchar_t *rptr; 11986 11987 /* Initiate IPPF processing */ 11988 if (IP6_OUT_IPP(flags)) { 11989 /* 11990 * We have to release the nce lock since 11991 * IPPF components use 11992 * ill_lookup_on_ifindex(), 11993 * which takes the ill_g_lock and the 11994 * ill_lock locks. 11995 */ 11996 mutex_exit(&nce->nce_lock); 11997 ip_process(IPP_LOCAL_OUT, &mp, 11998 ill_index); 11999 if (mp == NULL) { 12000 BUMP_MIB( 12001 ill->ill_ip6_mib, 12002 ipv6OutDiscards); 12003 if (next_mp != NULL) 12004 freemsg(next_mp); 12005 if (ire != save_ire) { 12006 ire_refrele(ire); 12007 } 12008 return; 12009 } 12010 mutex_enter(&nce->nce_lock); 12011 if ((mp1 = nce->nce_fp_mp) == NULL) { 12012 /* 12013 * Probably disappeared during 12014 * IPQoS processing. 12015 */ 12016 qos_done = B_TRUE; 12017 goto prepend_unitdata; 12018 } 12019 } 12020 hlen = MBLKL(mp1); 12021 rptr = mp->b_rptr - hlen; 12022 /* 12023 * make sure there is room for the fastpath 12024 * datalink header 12025 */ 12026 if (rptr < mp->b_datap->db_base) { 12027 mp1 = copyb(mp1); 12028 if (mp1 == NULL) { 12029 mutex_exit(&nce->nce_lock); 12030 BUMP_MIB(ill->ill_ip6_mib, 12031 ipv6OutDiscards); 12032 freemsg(mp); 12033 if (next_mp != NULL) 12034 freemsg(next_mp); 12035 if (ire != save_ire) { 12036 ire_refrele(ire); 12037 } 12038 return; 12039 } 12040 mp1->b_cont = mp; 12041 12042 /* Get the priority marking, if any */ 12043 mp1->b_band = mp->b_band; 12044 mp = mp1; 12045 } else { 12046 mp->b_rptr = rptr; 12047 /* 12048 * fastpath - pre-pend datalink 12049 * header 12050 */ 12051 bcopy(mp1->b_rptr, rptr, hlen); 12052 } 12053 12054 mutex_exit(&nce->nce_lock); 12055 12056 } else { 12057 prepend_unitdata: 12058 mutex_exit(&nce->nce_lock); 12059 mp1 = nce->nce_res_mp; 12060 if (mp1 == NULL) { 12061 ip1dbg(("ip_xmit_v6: No resolution " 12062 "block ire = %p\n", (void *)ire)); 12063 freemsg(mp); 12064 if (next_mp != NULL) 12065 freemsg(next_mp); 12066 if (ire != save_ire) { 12067 ire_refrele(ire); 12068 } 12069 return; 12070 } 12071 /* 12072 * Prepend the DL_UNITDATA_REQ. 12073 */ 12074 mp1 = copyb(mp1); 12075 if (mp1 == NULL) { 12076 BUMP_MIB(ill->ill_ip6_mib, 12077 ipv6OutDiscards); 12078 freemsg(mp); 12079 if (next_mp != NULL) 12080 freemsg(next_mp); 12081 if (ire != save_ire) { 12082 ire_refrele(ire); 12083 } 12084 return; 12085 } 12086 mp1->b_cont = mp; 12087 mp = mp1; 12088 /* 12089 * Initiate IPPF processing, if it is 12090 * already done, bypass. 12091 */ 12092 if (!qos_done && IP6_OUT_IPP(flags)) { 12093 ip_process(IPP_LOCAL_OUT, &mp, 12094 ill_index); 12095 if (mp == NULL) { 12096 BUMP_MIB(ill->ill_ip6_mib, 12097 ipv6OutDiscards); 12098 if (next_mp != NULL) 12099 freemsg(next_mp); 12100 if (ire != save_ire) { 12101 ire_refrele(ire); 12102 } 12103 return; 12104 } 12105 } 12106 } 12107 12108 /* 12109 * Update ire counters; for save_ire, this has been 12110 * done by the caller. 12111 */ 12112 if (ire != save_ire) { 12113 UPDATE_OB_PKT_COUNT(ire); 12114 ire->ire_last_used_time = lbolt; 12115 } 12116 12117 /* 12118 * Send it down. XXX Do we want to flow control AH/ESP 12119 * packets that carry TCP payloads? We don't flow 12120 * control TCP packets, but we should also not 12121 * flow-control TCP packets that have been protected. 12122 * We don't have an easy way to find out if an AH/ESP 12123 * packet was originally TCP or not currently. 12124 */ 12125 if (io == NULL) { 12126 putnext(stq, mp); 12127 } else { 12128 /* 12129 * Safety Pup says: make sure this is 12130 * going to the right interface! 12131 */ 12132 if (io->ipsec_out_capab_ill_index != 12133 ill_index) { 12134 /* IPsec kstats: bump lose counter */ 12135 freemsg(mp1); 12136 } else { 12137 ipsec_hw_putnext(stq, mp); 12138 } 12139 } 12140 12141 if (nce->nce_flags & (NCE_F_NONUD|NCE_F_PERMANENT)) { 12142 if (ire != save_ire) { 12143 ire_refrele(ire); 12144 } 12145 if (multirt_send) { 12146 ASSERT(ire1 != NULL); 12147 /* 12148 * Proceed with the next RTF_MULTIRT 12149 * ire, also set up the send-to queue 12150 * accordingly. 12151 */ 12152 ire = ire1; 12153 ire1 = NULL; 12154 stq = ire->ire_stq; 12155 nce = ire->ire_nce; 12156 ill = ire_to_ill(ire); 12157 mp = next_mp; 12158 next_mp = NULL; 12159 continue; 12160 } 12161 ASSERT(next_mp == NULL); 12162 ASSERT(ire1 == NULL); 12163 return; 12164 } 12165 12166 ASSERT(nce->nce_state != ND_INCOMPLETE); 12167 12168 /* 12169 * Check for upper layer advice 12170 */ 12171 if (flags & IPV6_REACHABILITY_CONFIRMATION) { 12172 /* 12173 * It should be o.k. to check the state without 12174 * a lock here, at most we lose an advice. 12175 */ 12176 nce->nce_last = TICK_TO_MSEC(lbolt64); 12177 if (nce->nce_state != ND_REACHABLE) { 12178 12179 mutex_enter(&nce->nce_lock); 12180 nce->nce_state = ND_REACHABLE; 12181 nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT; 12182 mutex_exit(&nce->nce_lock); 12183 (void) untimeout(nce->nce_timeout_id); 12184 if (ip_debug > 2) { 12185 /* ip1dbg */ 12186 pr_addr_dbg("ip_xmit_v6: state" 12187 " for %s changed to" 12188 " REACHABLE\n", AF_INET6, 12189 &ire->ire_addr_v6); 12190 } 12191 } 12192 if (ire != save_ire) { 12193 ire_refrele(ire); 12194 } 12195 if (multirt_send) { 12196 ASSERT(ire1 != NULL); 12197 /* 12198 * Proceed with the next RTF_MULTIRT 12199 * ire, also set up the send-to queue 12200 * accordingly. 12201 */ 12202 ire = ire1; 12203 ire1 = NULL; 12204 stq = ire->ire_stq; 12205 nce = ire->ire_nce; 12206 ill = ire_to_ill(ire); 12207 mp = next_mp; 12208 next_mp = NULL; 12209 continue; 12210 } 12211 ASSERT(next_mp == NULL); 12212 ASSERT(ire1 == NULL); 12213 return; 12214 } 12215 12216 delta = TICK_TO_MSEC(lbolt64) - nce->nce_last; 12217 ip1dbg(("ip_xmit_v6: delta = %" PRId64 12218 " ill_reachable_time = %d \n", delta, 12219 ill->ill_reachable_time)); 12220 if (delta > (uint64_t)ill->ill_reachable_time) { 12221 nce = ire->ire_nce; 12222 mutex_enter(&nce->nce_lock); 12223 switch (nce->nce_state) { 12224 case ND_REACHABLE: 12225 case ND_STALE: 12226 /* 12227 * ND_REACHABLE is identical to 12228 * ND_STALE in this specific case. If 12229 * reachable time has expired for this 12230 * neighbor (delta is greater than 12231 * reachable time), conceptually, the 12232 * neighbor cache is no longer in 12233 * REACHABLE state, but already in 12234 * STALE state. So the correct 12235 * transition here is to ND_DELAY. 12236 */ 12237 nce->nce_state = ND_DELAY; 12238 mutex_exit(&nce->nce_lock); 12239 NDP_RESTART_TIMER(nce, 12240 delay_first_probe_time); 12241 if (ip_debug > 3) { 12242 /* ip2dbg */ 12243 pr_addr_dbg("ip_xmit_v6: state" 12244 " for %s changed to" 12245 " DELAY\n", AF_INET6, 12246 &ire->ire_addr_v6); 12247 } 12248 break; 12249 case ND_DELAY: 12250 case ND_PROBE: 12251 mutex_exit(&nce->nce_lock); 12252 /* Timers have already started */ 12253 break; 12254 case ND_UNREACHABLE: 12255 /* 12256 * ndp timer has detected that this nce 12257 * is unreachable and initiated deleting 12258 * this nce and all its associated IREs. 12259 * This is a race where we found the 12260 * ire before it was deleted and have 12261 * just sent out a packet using this 12262 * unreachable nce. 12263 */ 12264 mutex_exit(&nce->nce_lock); 12265 break; 12266 default: 12267 ASSERT(0); 12268 } 12269 } 12270 12271 if (multirt_send) { 12272 ASSERT(ire1 != NULL); 12273 /* 12274 * Proceed with the next RTF_MULTIRT ire, 12275 * Also set up the send-to queue accordingly. 12276 */ 12277 if (ire != save_ire) { 12278 ire_refrele(ire); 12279 } 12280 ire = ire1; 12281 ire1 = NULL; 12282 stq = ire->ire_stq; 12283 nce = ire->ire_nce; 12284 ill = ire_to_ill(ire); 12285 mp = next_mp; 12286 next_mp = NULL; 12287 } 12288 } while (multirt_send); 12289 /* 12290 * In the multirouting case, release the last ire used for 12291 * emission. save_ire will be released by the caller. 12292 */ 12293 if (ire != save_ire) { 12294 ire_refrele(ire); 12295 } 12296 } else { 12297 /* 12298 * Queue packet if we have an conn to give back pressure. 12299 * We can't queue packets intended for hardware acceleration 12300 * since we've tossed that state already. If the packet is 12301 * being fed back from ire_send_v6, we don't know the 12302 * position in the queue to enqueue the packet and we discard 12303 * the packet. 12304 */ 12305 if (ip_output_queue && (connp != NULL) && (io == NULL) && 12306 (caller != IRE_SEND)) { 12307 if (caller == IP_WSRV) { 12308 connp->conn_did_putbq = 1; 12309 (void) putbq(connp->conn_wq, mp); 12310 conn_drain_insert(connp); 12311 /* 12312 * caller == IP_WSRV implies we are 12313 * the service thread, and the 12314 * queue is already noenabled. 12315 * The check for canput and 12316 * the putbq is not atomic. 12317 * So we need to check again. 12318 */ 12319 if (canput(stq->q_next)) 12320 connp->conn_did_putbq = 0; 12321 } else { 12322 (void) putq(connp->conn_wq, mp); 12323 } 12324 return; 12325 } 12326 BUMP_MIB(ill->ill_ip6_mib, ipv6OutDiscards); 12327 freemsg(mp); 12328 return; 12329 } 12330 } 12331 12332 /* 12333 * pr_addr_dbg function provides the needed buffer space to call 12334 * inet_ntop() function's 3rd argument. This function should be 12335 * used by any kernel routine which wants to save INET6_ADDRSTRLEN 12336 * stack buffer space in it's own stack frame. This function uses 12337 * a buffer from it's own stack and prints the information. 12338 * Example: pr_addr_dbg("func: no route for %s\n ", AF_INET, addr) 12339 * 12340 * Note: This function can call inet_ntop() once. 12341 */ 12342 void 12343 pr_addr_dbg(char *fmt1, int af, const void *addr) 12344 { 12345 char buf[INET6_ADDRSTRLEN]; 12346 12347 if (fmt1 == NULL) { 12348 ip0dbg(("pr_addr_dbg: Wrong arguments\n")); 12349 return; 12350 } 12351 12352 /* 12353 * This does not compare debug level and just prints 12354 * out. Thus it is the responsibility of the caller 12355 * to check the appropriate debug-level before calling 12356 * this function. 12357 */ 12358 if (ip_debug > 0) { 12359 printf(fmt1, inet_ntop(af, addr, buf, sizeof (buf))); 12360 } 12361 12362 12363 } 12364 12365 12366 /* 12367 * Return the length in bytes of the IPv6 headers (base header, ip6i_t 12368 * if needed and extension headers) that will be needed based on the 12369 * ip6_pkt_t structure passed by the caller. 12370 * 12371 * The returned length does not include the length of the upper level 12372 * protocol (ULP) header. 12373 */ 12374 int 12375 ip_total_hdrs_len_v6(ip6_pkt_t *ipp) 12376 { 12377 int len; 12378 12379 len = IPV6_HDR_LEN; 12380 if (ipp->ipp_fields & IPPF_HAS_IP6I) 12381 len += sizeof (ip6i_t); 12382 if (ipp->ipp_fields & IPPF_HOPOPTS) { 12383 ASSERT(ipp->ipp_hopoptslen != 0); 12384 len += ipp->ipp_hopoptslen; 12385 } 12386 if (ipp->ipp_fields & IPPF_RTHDR) { 12387 ASSERT(ipp->ipp_rthdrlen != 0); 12388 len += ipp->ipp_rthdrlen; 12389 } 12390 /* 12391 * En-route destination options 12392 * Only do them if there's a routing header as well 12393 */ 12394 if ((ipp->ipp_fields & (IPPF_RTDSTOPTS|IPPF_RTHDR)) == 12395 (IPPF_RTDSTOPTS|IPPF_RTHDR)) { 12396 ASSERT(ipp->ipp_rtdstoptslen != 0); 12397 len += ipp->ipp_rtdstoptslen; 12398 } 12399 if (ipp->ipp_fields & IPPF_DSTOPTS) { 12400 ASSERT(ipp->ipp_dstoptslen != 0); 12401 len += ipp->ipp_dstoptslen; 12402 } 12403 return (len); 12404 } 12405 12406 /* 12407 * All-purpose routine to build a header chain of an IPv6 header 12408 * followed by any required extension headers and a proto header, 12409 * preceeded (where necessary) by an ip6i_t private header. 12410 * 12411 * The fields of the IPv6 header that are derived from the ip6_pkt_t 12412 * will be filled in appropriately. 12413 * Thus the caller must fill in the rest of the IPv6 header, such as 12414 * traffic class/flowid, source address (if not set here), hoplimit (if not 12415 * set here) and destination address. 12416 * 12417 * The extension headers and ip6i_t header will all be fully filled in. 12418 */ 12419 void 12420 ip_build_hdrs_v6(uchar_t *ext_hdrs, uint_t ext_hdrs_len, 12421 ip6_pkt_t *ipp, uint8_t protocol) 12422 { 12423 uint8_t *nxthdr_ptr; 12424 uint8_t *cp; 12425 ip6i_t *ip6i; 12426 ip6_t *ip6h = (ip6_t *)ext_hdrs; 12427 12428 /* 12429 * If sending private ip6i_t header down (checksum info, nexthop, 12430 * or ifindex), adjust ip header pointer and set ip6i_t header pointer, 12431 * then fill it in. (The checksum info will be filled in by icmp). 12432 */ 12433 if (ipp->ipp_fields & IPPF_HAS_IP6I) { 12434 ip6i = (ip6i_t *)ip6h; 12435 ip6h = (ip6_t *)&ip6i[1]; 12436 12437 ip6i->ip6i_flags = 0; 12438 ip6i->ip6i_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 12439 if (ipp->ipp_fields & IPPF_IFINDEX || 12440 ipp->ipp_fields & IPPF_SCOPE_ID) { 12441 ASSERT(ipp->ipp_ifindex != 0); 12442 ip6i->ip6i_flags |= IP6I_IFINDEX; 12443 ip6i->ip6i_ifindex = ipp->ipp_ifindex; 12444 } 12445 if (ipp->ipp_fields & IPPF_ADDR) { 12446 /* 12447 * Enable per-packet source address verification if 12448 * IPV6_PKTINFO specified the source address. 12449 * ip6_src is set in the transport's _wput function. 12450 */ 12451 ASSERT(!IN6_IS_ADDR_UNSPECIFIED( 12452 &ipp->ipp_addr)); 12453 ip6i->ip6i_flags |= IP6I_VERIFY_SRC; 12454 } 12455 if (ipp->ipp_fields & IPPF_UNICAST_HOPS) { 12456 ip6h->ip6_hops = ipp->ipp_unicast_hops; 12457 /* 12458 * We need to set this flag so that IP doesn't 12459 * rewrite the IPv6 header's hoplimit with the 12460 * current default value. 12461 */ 12462 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 12463 } 12464 if (ipp->ipp_fields & IPPF_NEXTHOP) { 12465 ASSERT(!IN6_IS_ADDR_UNSPECIFIED( 12466 &ipp->ipp_nexthop)); 12467 ip6i->ip6i_flags |= IP6I_NEXTHOP; 12468 ip6i->ip6i_nexthop = ipp->ipp_nexthop; 12469 } 12470 /* 12471 * tell IP this is an ip6i_t private header 12472 */ 12473 ip6i->ip6i_nxt = IPPROTO_RAW; 12474 } 12475 /* Initialize IPv6 header */ 12476 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 12477 if (ipp->ipp_fields & IPPF_TCLASS) { 12478 ip6h->ip6_vcf = (ip6h->ip6_vcf & ~IPV6_FLOWINFO_TCLASS) | 12479 (ipp->ipp_tclass << 20); 12480 } 12481 if (ipp->ipp_fields & IPPF_ADDR) 12482 ip6h->ip6_src = ipp->ipp_addr; 12483 12484 nxthdr_ptr = (uint8_t *)&ip6h->ip6_nxt; 12485 cp = (uint8_t *)&ip6h[1]; 12486 /* 12487 * Here's where we have to start stringing together 12488 * any extension headers in the right order: 12489 * Hop-by-hop, destination, routing, and final destination opts. 12490 */ 12491 if (ipp->ipp_fields & IPPF_HOPOPTS) { 12492 /* Hop-by-hop options */ 12493 ip6_hbh_t *hbh = (ip6_hbh_t *)cp; 12494 12495 *nxthdr_ptr = IPPROTO_HOPOPTS; 12496 nxthdr_ptr = &hbh->ip6h_nxt; 12497 12498 bcopy(ipp->ipp_hopopts, cp, ipp->ipp_hopoptslen); 12499 cp += ipp->ipp_hopoptslen; 12500 } 12501 /* 12502 * En-route destination options 12503 * Only do them if there's a routing header as well 12504 */ 12505 if ((ipp->ipp_fields & (IPPF_RTDSTOPTS|IPPF_RTHDR)) == 12506 (IPPF_RTDSTOPTS|IPPF_RTHDR)) { 12507 ip6_dest_t *dst = (ip6_dest_t *)cp; 12508 12509 *nxthdr_ptr = IPPROTO_DSTOPTS; 12510 nxthdr_ptr = &dst->ip6d_nxt; 12511 12512 bcopy(ipp->ipp_rtdstopts, cp, ipp->ipp_rtdstoptslen); 12513 cp += ipp->ipp_rtdstoptslen; 12514 } 12515 /* 12516 * Routing header next 12517 */ 12518 if (ipp->ipp_fields & IPPF_RTHDR) { 12519 ip6_rthdr_t *rt = (ip6_rthdr_t *)cp; 12520 12521 *nxthdr_ptr = IPPROTO_ROUTING; 12522 nxthdr_ptr = &rt->ip6r_nxt; 12523 12524 bcopy(ipp->ipp_rthdr, cp, ipp->ipp_rthdrlen); 12525 cp += ipp->ipp_rthdrlen; 12526 } 12527 /* 12528 * Do ultimate destination options 12529 */ 12530 if (ipp->ipp_fields & IPPF_DSTOPTS) { 12531 ip6_dest_t *dest = (ip6_dest_t *)cp; 12532 12533 *nxthdr_ptr = IPPROTO_DSTOPTS; 12534 nxthdr_ptr = &dest->ip6d_nxt; 12535 12536 bcopy(ipp->ipp_dstopts, cp, ipp->ipp_dstoptslen); 12537 cp += ipp->ipp_dstoptslen; 12538 } 12539 /* 12540 * Now set the last header pointer to the proto passed in 12541 */ 12542 *nxthdr_ptr = protocol; 12543 ASSERT((int)(cp - ext_hdrs) == ext_hdrs_len); 12544 } 12545 12546 /* 12547 * Return a pointer to the routing header extension header 12548 * in the IPv6 header(s) chain passed in. 12549 * If none found, return NULL 12550 * Assumes that all extension headers are in same mblk as the v6 header 12551 */ 12552 ip6_rthdr_t * 12553 ip_find_rthdr_v6(ip6_t *ip6h, uint8_t *endptr) 12554 { 12555 ip6_dest_t *desthdr; 12556 ip6_frag_t *fraghdr; 12557 uint_t hdrlen; 12558 uint8_t nexthdr; 12559 uint8_t *ptr = (uint8_t *)&ip6h[1]; 12560 12561 if (ip6h->ip6_nxt == IPPROTO_ROUTING) 12562 return ((ip6_rthdr_t *)ptr); 12563 12564 /* 12565 * The routing header will precede all extension headers 12566 * other than the hop-by-hop and destination options 12567 * extension headers, so if we see anything other than those, 12568 * we're done and didn't find it. 12569 * We could see a destination options header alone but no 12570 * routing header, in which case we'll return NULL as soon as 12571 * we see anything after that. 12572 * Hop-by-hop and destination option headers are identical, 12573 * so we can use either one we want as a template. 12574 */ 12575 nexthdr = ip6h->ip6_nxt; 12576 while (ptr < endptr) { 12577 /* Is there enough left for len + nexthdr? */ 12578 if (ptr + MIN_EHDR_LEN > endptr) 12579 return (NULL); 12580 12581 switch (nexthdr) { 12582 case IPPROTO_HOPOPTS: 12583 case IPPROTO_DSTOPTS: 12584 /* Assumes the headers are identical for hbh and dst */ 12585 desthdr = (ip6_dest_t *)ptr; 12586 hdrlen = 8 * (desthdr->ip6d_len + 1); 12587 nexthdr = desthdr->ip6d_nxt; 12588 break; 12589 12590 case IPPROTO_ROUTING: 12591 return ((ip6_rthdr_t *)ptr); 12592 12593 case IPPROTO_FRAGMENT: 12594 fraghdr = (ip6_frag_t *)ptr; 12595 hdrlen = sizeof (ip6_frag_t); 12596 nexthdr = fraghdr->ip6f_nxt; 12597 break; 12598 12599 default: 12600 return (NULL); 12601 } 12602 ptr += hdrlen; 12603 } 12604 return (NULL); 12605 } 12606 12607 /* 12608 * Called for source-routed packets originating on this node. 12609 * Manipulates the original routing header by moving every entry up 12610 * one slot, placing the first entry in the v6 header's v6_dst field, 12611 * and placing the ultimate destination in the routing header's last 12612 * slot. 12613 * 12614 * Returns the checksum diference between the ultimate destination 12615 * (last hop in the routing header when the packet is sent) and 12616 * the first hop (ip6_dst when the packet is sent) 12617 */ 12618 uint32_t 12619 ip_massage_options_v6(ip6_t *ip6h, ip6_rthdr_t *rth) 12620 { 12621 uint_t numaddr; 12622 uint_t i; 12623 in6_addr_t *addrptr; 12624 in6_addr_t tmp; 12625 ip6_rthdr0_t *rthdr = (ip6_rthdr0_t *)rth; 12626 uint32_t cksm; 12627 uint32_t addrsum = 0; 12628 uint16_t *ptr; 12629 12630 /* 12631 * Perform any processing needed for source routing. 12632 * We know that all extension headers will be in the same mblk 12633 * as the IPv6 header. 12634 */ 12635 12636 /* 12637 * If no segments left in header, or the header length field is zero, 12638 * don't move hop addresses around; 12639 * Checksum difference is zero. 12640 */ 12641 if ((rthdr->ip6r0_segleft == 0) || (rthdr->ip6r0_len == 0)) 12642 return (0); 12643 12644 ptr = (uint16_t *)&ip6h->ip6_dst; 12645 cksm = 0; 12646 for (i = 0; i < (sizeof (in6_addr_t) / sizeof (uint16_t)); i++) { 12647 cksm += ptr[i]; 12648 } 12649 cksm = (cksm & 0xFFFF) + (cksm >> 16); 12650 12651 /* 12652 * Here's where the fun begins - we have to 12653 * move all addresses up one spot, take the 12654 * first hop and make it our first ip6_dst, 12655 * and place the ultimate destination in the 12656 * newly-opened last slot. 12657 */ 12658 addrptr = (in6_addr_t *)((char *)rthdr + sizeof (*rthdr)); 12659 numaddr = rthdr->ip6r0_len / 2; 12660 tmp = *addrptr; 12661 for (i = 0; i < (numaddr - 1); addrptr++, i++) { 12662 *addrptr = addrptr[1]; 12663 } 12664 *addrptr = ip6h->ip6_dst; 12665 ip6h->ip6_dst = tmp; 12666 12667 /* 12668 * From the checksummed ultimate destination subtract the checksummed 12669 * current ip6_dst (the first hop address). Return that number. 12670 * (In the v4 case, the second part of this is done in each routine 12671 * that calls ip_massage_options(). We do it all in this one place 12672 * for v6). 12673 */ 12674 ptr = (uint16_t *)&ip6h->ip6_dst; 12675 for (i = 0; i < (sizeof (in6_addr_t) / sizeof (uint16_t)); i++) { 12676 addrsum += ptr[i]; 12677 } 12678 cksm -= ((addrsum >> 16) + (addrsum & 0xFFFF)); 12679 if ((int)cksm < 0) 12680 cksm--; 12681 cksm = (cksm & 0xFFFF) + (cksm >> 16); 12682 12683 return (cksm); 12684 } 12685 12686 /* 12687 * See if the upper-level protocol indicated by 'proto' will be able 12688 * to do something with an ICMP_FRAGMENTATION_NEEDED (IPv4) or 12689 * ICMP6_PACKET_TOO_BIG (IPv6). 12690 */ 12691 static boolean_t 12692 ip_ulp_cando_pkt2big(int proto) 12693 { 12694 /* 12695 * For now, only TCP can handle this. 12696 * Tunnels may be able to also, but since tun isn't working over 12697 * IPv6 yet, don't worry about it for now. 12698 */ 12699 return (proto == IPPROTO_TCP); 12700 } 12701 12702 12703 /* 12704 * Propagate a multicast group membership operation (join/leave) (*fn) on 12705 * all interfaces crossed by the related multirt routes. 12706 * The call is considered successful if the operation succeeds 12707 * on at least one interface. 12708 * The function is called if the destination address in the packet to send 12709 * is multirouted. 12710 */ 12711 int 12712 ip_multirt_apply_membership_v6(int (*fn)(conn_t *, boolean_t, 12713 const in6_addr_t *, int, mcast_record_t, const in6_addr_t *, mblk_t *), 12714 ire_t *ire, conn_t *connp, boolean_t checkonly, const in6_addr_t *v6grp, 12715 mcast_record_t fmode, const in6_addr_t *v6src, mblk_t *first_mp) 12716 { 12717 ire_t *ire_gw; 12718 irb_t *irb; 12719 int index, error = 0; 12720 opt_restart_t *or; 12721 12722 irb = ire->ire_bucket; 12723 ASSERT(irb != NULL); 12724 12725 ASSERT(DB_TYPE(first_mp) == M_CTL); 12726 or = (opt_restart_t *)first_mp->b_rptr; 12727 12728 IRB_REFHOLD(irb); 12729 for (; ire != NULL; ire = ire->ire_next) { 12730 if ((ire->ire_flags & RTF_MULTIRT) == 0) 12731 continue; 12732 if (!IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, v6grp)) 12733 continue; 12734 12735 ire_gw = ire_ftable_lookup_v6(&ire->ire_gateway_addr_v6, 0, 0, 12736 IRE_INTERFACE, NULL, NULL, ALL_ZONES, 0, NULL, 12737 MATCH_IRE_RECURSIVE | MATCH_IRE_TYPE); 12738 /* No resolver exists for the gateway; skip this ire. */ 12739 if (ire_gw == NULL) 12740 continue; 12741 index = ire_gw->ire_ipif->ipif_ill->ill_phyint->phyint_ifindex; 12742 /* 12743 * A resolver exists: we can get the interface on which we have 12744 * to apply the operation. 12745 */ 12746 error = fn(connp, checkonly, v6grp, index, fmode, v6src, 12747 first_mp); 12748 if (error == 0) 12749 or->or_private = CGTP_MCAST_SUCCESS; 12750 12751 if (ip_debug > 0) { 12752 ulong_t off; 12753 char *ksym; 12754 12755 ksym = kobj_getsymname((uintptr_t)fn, &off); 12756 ip2dbg(("ip_multirt_apply_membership_v6: " 12757 "called %s, multirt group 0x%08x via itf 0x%08x, " 12758 "error %d [success %u]\n", 12759 ksym ? ksym : "?", 12760 ntohl(V4_PART_OF_V6((*v6grp))), 12761 ntohl(V4_PART_OF_V6(ire_gw->ire_src_addr_v6)), 12762 error, or->or_private)); 12763 } 12764 12765 ire_refrele(ire_gw); 12766 if (error == EINPROGRESS) { 12767 IRB_REFRELE(irb); 12768 return (error); 12769 } 12770 } 12771 IRB_REFRELE(irb); 12772 /* 12773 * Consider the call as successful if we succeeded on at least 12774 * one interface. Otherwise, return the last encountered error. 12775 */ 12776 return (or->or_private == CGTP_MCAST_SUCCESS ? 0 : error); 12777 } 12778 12779 void 12780 ip6_kstat_init(void) 12781 { 12782 if ((ip6_kstat = kstat_create("ip", 0, "ip6stat", 12783 "net", KSTAT_TYPE_NAMED, 12784 sizeof (ip6_statistics) / sizeof (kstat_named_t), 12785 KSTAT_FLAG_VIRTUAL)) != NULL) { 12786 ip6_kstat->ks_data = &ip6_statistics; 12787 kstat_install(ip6_kstat); 12788 } 12789 } 12790 12791 /* 12792 * The following two functions set and get the value for the 12793 * IPV6_SRC_PREFERENCES socket option. 12794 */ 12795 int 12796 ip6_set_src_preferences(conn_t *connp, uint32_t prefs) 12797 { 12798 /* 12799 * We only support preferences that are covered by 12800 * IPV6_PREFER_SRC_MASK. 12801 */ 12802 if (prefs & ~IPV6_PREFER_SRC_MASK) 12803 return (EINVAL); 12804 12805 /* 12806 * Look for conflicting preferences or default preferences. If 12807 * both bits of a related pair are clear, the application wants the 12808 * system's default value for that pair. Both bits in a pair can't 12809 * be set. 12810 */ 12811 if ((prefs & IPV6_PREFER_SRC_MIPMASK) == 0) { 12812 prefs |= IPV6_PREFER_SRC_MIPDEFAULT; 12813 } else if ((prefs & IPV6_PREFER_SRC_MIPMASK) == 12814 IPV6_PREFER_SRC_MIPMASK) { 12815 return (EINVAL); 12816 } 12817 if ((prefs & IPV6_PREFER_SRC_TMPMASK) == 0) { 12818 prefs |= IPV6_PREFER_SRC_TMPDEFAULT; 12819 } else if ((prefs & IPV6_PREFER_SRC_TMPMASK) == 12820 IPV6_PREFER_SRC_TMPMASK) { 12821 return (EINVAL); 12822 } 12823 if ((prefs & IPV6_PREFER_SRC_CGAMASK) == 0) { 12824 prefs |= IPV6_PREFER_SRC_CGADEFAULT; 12825 } else if ((prefs & IPV6_PREFER_SRC_CGAMASK) == 12826 IPV6_PREFER_SRC_CGAMASK) { 12827 return (EINVAL); 12828 } 12829 12830 connp->conn_src_preferences = prefs; 12831 return (0); 12832 } 12833 12834 size_t 12835 ip6_get_src_preferences(conn_t *connp, uint32_t *val) 12836 { 12837 *val = connp->conn_src_preferences; 12838 return (sizeof (connp->conn_src_preferences)); 12839 } 12840 12841 int 12842 ip6_set_pktinfo(cred_t *cr, conn_t *connp, struct in6_pktinfo *pkti, mblk_t *mp) 12843 { 12844 ill_t *ill; 12845 ire_t *ire; 12846 int error; 12847 12848 /* 12849 * Verify the source address and ifindex. Privileged users can use 12850 * any source address. For ancillary data the source address is 12851 * checked in ip_wput_v6. 12852 */ 12853 if (pkti->ipi6_ifindex != 0) { 12854 ASSERT(connp != NULL); 12855 ill = ill_lookup_on_ifindex(pkti->ipi6_ifindex, B_TRUE, 12856 CONNP_TO_WQ(connp), mp, ip_restart_optmgmt, &error); 12857 if (ill == NULL) { 12858 /* 12859 * We just want to know if the interface exists, we 12860 * don't really care about the ill pointer itself. 12861 */ 12862 if (error != EINPROGRESS) 12863 return (error); 12864 error = 0; /* Ensure we don't use it below */ 12865 } else { 12866 ill_refrele(ill); 12867 } 12868 } 12869 if (!IN6_IS_ADDR_UNSPECIFIED(&pkti->ipi6_addr) && 12870 secpolicy_net_rawaccess(cr) != 0) { 12871 ire = ire_route_lookup_v6(&pkti->ipi6_addr, 0, 0, 12872 (IRE_LOCAL|IRE_LOOPBACK), NULL, NULL, 12873 connp->conn_zoneid, NULL, MATCH_IRE_TYPE); 12874 if (ire != NULL) 12875 ire_refrele(ire); 12876 else 12877 return (ENXIO); 12878 } 12879 return (0); 12880 } 12881 12882 /* 12883 * Get the size of the IP options (including the IP headers size) 12884 * without including the AH header's size. If till_ah is B_FALSE, 12885 * and if AH header is present, dest options beyond AH header will 12886 * also be included in the returned size. 12887 */ 12888 int 12889 ipsec_ah_get_hdr_size_v6(mblk_t *mp, boolean_t till_ah) 12890 { 12891 ip6_t *ip6h; 12892 uint8_t nexthdr; 12893 uint8_t *whereptr; 12894 ip6_hbh_t *hbhhdr; 12895 ip6_dest_t *dsthdr; 12896 ip6_rthdr_t *rthdr; 12897 int ehdrlen; 12898 int size; 12899 ah_t *ah; 12900 12901 ip6h = (ip6_t *)mp->b_rptr; 12902 size = IPV6_HDR_LEN; 12903 nexthdr = ip6h->ip6_nxt; 12904 whereptr = (uint8_t *)&ip6h[1]; 12905 for (;;) { 12906 /* Assume IP has already stripped it */ 12907 ASSERT(nexthdr != IPPROTO_FRAGMENT && nexthdr != IPPROTO_RAW); 12908 switch (nexthdr) { 12909 case IPPROTO_HOPOPTS: 12910 hbhhdr = (ip6_hbh_t *)whereptr; 12911 nexthdr = hbhhdr->ip6h_nxt; 12912 ehdrlen = 8 * (hbhhdr->ip6h_len + 1); 12913 break; 12914 case IPPROTO_DSTOPTS: 12915 dsthdr = (ip6_dest_t *)whereptr; 12916 nexthdr = dsthdr->ip6d_nxt; 12917 ehdrlen = 8 * (dsthdr->ip6d_len + 1); 12918 break; 12919 case IPPROTO_ROUTING: 12920 rthdr = (ip6_rthdr_t *)whereptr; 12921 nexthdr = rthdr->ip6r_nxt; 12922 ehdrlen = 8 * (rthdr->ip6r_len + 1); 12923 break; 12924 default : 12925 if (till_ah) { 12926 ASSERT(nexthdr == IPPROTO_AH); 12927 return (size); 12928 } 12929 /* 12930 * If we don't have a AH header to traverse, 12931 * return now. This happens normally for 12932 * outbound datagrams where we have not inserted 12933 * the AH header. 12934 */ 12935 if (nexthdr != IPPROTO_AH) { 12936 return (size); 12937 } 12938 12939 /* 12940 * We don't include the AH header's size 12941 * to be symmetrical with other cases where 12942 * we either don't have a AH header (outbound) 12943 * or peek into the AH header yet (inbound and 12944 * not pulled up yet). 12945 */ 12946 ah = (ah_t *)whereptr; 12947 nexthdr = ah->ah_nexthdr; 12948 ehdrlen = (ah->ah_length << 2) + 8; 12949 12950 if (nexthdr == IPPROTO_DSTOPTS) { 12951 if (whereptr + ehdrlen >= mp->b_wptr) { 12952 /* 12953 * The destination options header 12954 * is not part of the first mblk. 12955 */ 12956 whereptr = mp->b_cont->b_rptr; 12957 } else { 12958 whereptr += ehdrlen; 12959 } 12960 12961 dsthdr = (ip6_dest_t *)whereptr; 12962 ehdrlen = 8 * (dsthdr->ip6d_len + 1); 12963 size += ehdrlen; 12964 } 12965 return (size); 12966 } 12967 whereptr += ehdrlen; 12968 size += ehdrlen; 12969 } 12970 } 12971