1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* 26 * Copyright (c) 1990 Mentat Inc. 27 */ 28 29 #pragma ident "%Z%%M% %I% %E% SMI" 30 31 #include <sys/types.h> 32 #include <sys/stream.h> 33 #include <sys/dlpi.h> 34 #include <sys/stropts.h> 35 #include <sys/sysmacros.h> 36 #include <sys/strsun.h> 37 #include <sys/strlog.h> 38 #include <sys/strsubr.h> 39 #define _SUN_TPI_VERSION 2 40 #include <sys/tihdr.h> 41 #include <sys/ddi.h> 42 #include <sys/sunddi.h> 43 #include <sys/cmn_err.h> 44 #include <sys/debug.h> 45 #include <sys/kobj.h> 46 #include <sys/zone.h> 47 48 #include <sys/kmem.h> 49 #include <sys/systm.h> 50 #include <sys/param.h> 51 #include <sys/socket.h> 52 #include <sys/vtrace.h> 53 #include <sys/isa_defs.h> 54 #include <sys/atomic.h> 55 #include <sys/iphada.h> 56 #include <sys/policy.h> 57 #include <net/if.h> 58 #include <net/if_types.h> 59 #include <net/route.h> 60 #include <net/if_dl.h> 61 #include <sys/sockio.h> 62 #include <netinet/in.h> 63 #include <netinet/ip6.h> 64 #include <netinet/icmp6.h> 65 #include <netinet/sctp.h> 66 67 #include <inet/common.h> 68 #include <inet/mi.h> 69 #include <inet/mib2.h> 70 #include <inet/nd.h> 71 #include <inet/arp.h> 72 73 #include <inet/ip.h> 74 #include <inet/ip_impl.h> 75 #include <inet/ip6.h> 76 #include <inet/ip6_asp.h> 77 #include <inet/tcp.h> 78 #include <inet/tcp_impl.h> 79 #include <inet/udp_impl.h> 80 #include <inet/ipp_common.h> 81 82 #include <inet/ip_multi.h> 83 #include <inet/ip_if.h> 84 #include <inet/ip_ire.h> 85 #include <inet/ip_rts.h> 86 #include <inet/optcom.h> 87 #include <inet/ip_ndp.h> 88 #include <net/pfkeyv2.h> 89 #include <inet/ipsec_info.h> 90 #include <inet/sadb.h> 91 #include <inet/ipsec_impl.h> 92 #include <inet/tun.h> 93 #include <inet/sctp_ip.h> 94 #include <sys/pattr.h> 95 #include <inet/ipclassifier.h> 96 #include <inet/ipsecah.h> 97 #include <inet/udp_impl.h> 98 #include <sys/squeue.h> 99 100 #include <sys/tsol/label.h> 101 #include <sys/tsol/tnet.h> 102 103 #include <rpc/pmap_prot.h> 104 105 /* Temporary; for CR 6451644 work-around */ 106 #include <sys/ethernet.h> 107 108 extern squeue_func_t ip_input_proc; 109 110 /* 111 * IP statistics. 112 */ 113 #define IP6_STAT(x) (ip6_statistics.x.value.ui64++) 114 #define IP6_STAT_UPDATE(x, n) (ip6_statistics.x.value.ui64 += (n)) 115 116 typedef struct ip6_stat { 117 kstat_named_t ip6_udp_fast_path; 118 kstat_named_t ip6_udp_slow_path; 119 kstat_named_t ip6_udp_fannorm; 120 kstat_named_t ip6_udp_fanmb; 121 kstat_named_t ip6_out_sw_cksum; 122 kstat_named_t ip6_in_sw_cksum; 123 kstat_named_t ip6_tcp_in_full_hw_cksum_err; 124 kstat_named_t ip6_tcp_in_part_hw_cksum_err; 125 kstat_named_t ip6_tcp_in_sw_cksum_err; 126 kstat_named_t ip6_tcp_out_sw_cksum_bytes; 127 kstat_named_t ip6_udp_in_full_hw_cksum_err; 128 kstat_named_t ip6_udp_in_part_hw_cksum_err; 129 kstat_named_t ip6_udp_in_sw_cksum_err; 130 kstat_named_t ip6_udp_out_sw_cksum_bytes; 131 kstat_named_t ip6_frag_mdt_pkt_out; 132 kstat_named_t ip6_frag_mdt_discarded; 133 kstat_named_t ip6_frag_mdt_allocfail; 134 kstat_named_t ip6_frag_mdt_addpdescfail; 135 kstat_named_t ip6_frag_mdt_allocd; 136 } ip6_stat_t; 137 138 static ip6_stat_t ip6_statistics = { 139 { "ip6_udp_fast_path", KSTAT_DATA_UINT64 }, 140 { "ip6_udp_slow_path", KSTAT_DATA_UINT64 }, 141 { "ip6_udp_fannorm", KSTAT_DATA_UINT64 }, 142 { "ip6_udp_fanmb", KSTAT_DATA_UINT64 }, 143 { "ip6_out_sw_cksum", KSTAT_DATA_UINT64 }, 144 { "ip6_in_sw_cksum", KSTAT_DATA_UINT64 }, 145 { "ip6_tcp_in_full_hw_cksum_err", KSTAT_DATA_UINT64 }, 146 { "ip6_tcp_in_part_hw_cksum_err", KSTAT_DATA_UINT64 }, 147 { "ip6_tcp_in_sw_cksum_err", KSTAT_DATA_UINT64 }, 148 { "ip6_tcp_out_sw_cksum_bytes", KSTAT_DATA_UINT64 }, 149 { "ip6_udp_in_full_hw_cksum_err", KSTAT_DATA_UINT64 }, 150 { "ip6_udp_in_part_hw_cksum_err", KSTAT_DATA_UINT64 }, 151 { "ip6_udp_in_sw_cksum_err", KSTAT_DATA_UINT64 }, 152 { "ip6_udp_out_sw_cksum_bytes", KSTAT_DATA_UINT64 }, 153 { "ip6_frag_mdt_pkt_out", KSTAT_DATA_UINT64 }, 154 { "ip6_frag_mdt_discarded", KSTAT_DATA_UINT64 }, 155 { "ip6_frag_mdt_allocfail", KSTAT_DATA_UINT64 }, 156 { "ip6_frag_mdt_addpdescfail", KSTAT_DATA_UINT64 }, 157 { "ip6_frag_mdt_allocd", KSTAT_DATA_UINT64 }, 158 }; 159 160 static kstat_t *ip6_kstat; 161 162 /* 163 * Naming conventions: 164 * These rules should be judiciously applied 165 * if there is a need to identify something as IPv6 versus IPv4 166 * IPv6 funcions will end with _v6 in the ip module. 167 * IPv6 funcions will end with _ipv6 in the transport modules. 168 * IPv6 macros: 169 * Some macros end with _V6; e.g. ILL_FRAG_HASH_V6 170 * Some macros start with V6_; e.g. V6_OR_V4_INADDR_ANY 171 * And then there are ..V4_PART_OF_V6. 172 * The intent is that macros in the ip module end with _V6. 173 * IPv6 global variables will start with ipv6_ 174 * IPv6 structures will start with ipv6 175 * IPv6 defined constants should start with IPV6_ 176 * (but then there are NDP_DEFAULT_VERS_PRI_AND_FLOW, etc) 177 */ 178 179 /* 180 * IPv6 mibs when the interface (ill) is not known. 181 * When the ill is known the per-interface mib in the ill is used. 182 */ 183 mib2_ipv6IfStatsEntry_t ip6_mib; 184 mib2_ipv6IfIcmpEntry_t icmp6_mib; 185 186 /* 187 * ip6opt_ls is used to enable IPv6 (via /etc/system on TX systems). 188 * We need to do this because we didn't obtain the IP6OPT_LS (0x0a) 189 * from IANA. This mechanism will remain in effect until an official 190 * number is obtained. 191 */ 192 uchar_t ip6opt_ls; 193 194 uint_t ipv6_ire_default_count; /* Number of IPv6 IRE_DEFAULT entries */ 195 uint_t ipv6_ire_default_index; /* Walking IPv6 index used to mod in */ 196 197 const in6_addr_t ipv6_all_ones = 198 { 0xffffffffU, 0xffffffffU, 0xffffffffU, 0xffffffffU }; 199 const in6_addr_t ipv6_all_zeros = { 0, 0, 0, 0 }; 200 201 #ifdef _BIG_ENDIAN 202 const in6_addr_t ipv6_unspecified_group = { 0xff000000U, 0, 0, 0 }; 203 #else /* _BIG_ENDIAN */ 204 const in6_addr_t ipv6_unspecified_group = { 0x000000ffU, 0, 0, 0 }; 205 #endif /* _BIG_ENDIAN */ 206 207 #ifdef _BIG_ENDIAN 208 const in6_addr_t ipv6_loopback = { 0, 0, 0, 0x00000001U }; 209 #else /* _BIG_ENDIAN */ 210 const in6_addr_t ipv6_loopback = { 0, 0, 0, 0x01000000U }; 211 #endif /* _BIG_ENDIAN */ 212 213 #ifdef _BIG_ENDIAN 214 const in6_addr_t ipv6_all_hosts_mcast = { 0xff020000U, 0, 0, 0x00000001U }; 215 #else /* _BIG_ENDIAN */ 216 const in6_addr_t ipv6_all_hosts_mcast = { 0x000002ffU, 0, 0, 0x01000000U }; 217 #endif /* _BIG_ENDIAN */ 218 219 #ifdef _BIG_ENDIAN 220 const in6_addr_t ipv6_all_rtrs_mcast = { 0xff020000U, 0, 0, 0x00000002U }; 221 #else /* _BIG_ENDIAN */ 222 const in6_addr_t ipv6_all_rtrs_mcast = { 0x000002ffU, 0, 0, 0x02000000U }; 223 #endif /* _BIG_ENDIAN */ 224 225 #ifdef _BIG_ENDIAN 226 const in6_addr_t ipv6_all_v2rtrs_mcast = { 0xff020000U, 0, 0, 0x00000016U }; 227 #else /* _BIG_ENDIAN */ 228 const in6_addr_t ipv6_all_v2rtrs_mcast = { 0x000002ffU, 0, 0, 0x16000000U }; 229 #endif /* _BIG_ENDIAN */ 230 231 #ifdef _BIG_ENDIAN 232 const in6_addr_t ipv6_solicited_node_mcast = 233 { 0xff020000U, 0, 0x00000001U, 0xff000000U }; 234 #else /* _BIG_ENDIAN */ 235 const in6_addr_t ipv6_solicited_node_mcast = 236 { 0x000002ffU, 0, 0x01000000U, 0x000000ffU }; 237 #endif /* _BIG_ENDIAN */ 238 239 /* 240 * Used by icmp_send_redirect_v6 for picking random src. 241 */ 242 uint_t icmp_redirect_v6_src_index; 243 244 /* Leave room for ip_newroute to tack on the src and target addresses */ 245 #define OK_RESOLVER_MP_V6(mp) \ 246 ((mp) && ((mp)->b_wptr - (mp)->b_rptr) >= (2 * IPV6_ADDR_LEN)) 247 248 static void icmp_inbound_too_big_v6(queue_t *, mblk_t *, ill_t *ill, 249 boolean_t, zoneid_t); 250 static void icmp_pkt_v6(queue_t *, mblk_t *, void *, size_t, 251 const in6_addr_t *, boolean_t, zoneid_t); 252 static void icmp_redirect_v6(queue_t *, mblk_t *, ill_t *ill); 253 static boolean_t icmp_redirect_ok_v6(ill_t *ill, mblk_t *mp); 254 static int ip_bind_connected_v6(conn_t *, mblk_t *, in6_addr_t *, 255 uint16_t, const in6_addr_t *, ip6_pkt_t *, uint16_t, 256 boolean_t, boolean_t, boolean_t, boolean_t); 257 static boolean_t ip_bind_insert_ire_v6(mblk_t *, ire_t *, const in6_addr_t *, 258 iulp_t *); 259 static int ip_bind_laddr_v6(conn_t *, mblk_t *, const in6_addr_t *, 260 uint16_t, boolean_t, boolean_t, boolean_t); 261 static void ip_fanout_proto_v6(queue_t *, mblk_t *, ip6_t *, ill_t *, 262 ill_t *, uint8_t, uint_t, uint_t, boolean_t, zoneid_t); 263 static void ip_fanout_tcp_v6(queue_t *, mblk_t *, ip6_t *, ill_t *, 264 ill_t *, uint_t, uint_t, boolean_t, zoneid_t); 265 static void ip_fanout_udp_v6(queue_t *, mblk_t *, ip6_t *, uint32_t, 266 ill_t *, ill_t *, uint_t, boolean_t, zoneid_t); 267 static int ip_process_options_v6(queue_t *, mblk_t *, ip6_t *, 268 uint8_t *, uint_t, uint8_t); 269 static mblk_t *ip_rput_frag_v6(queue_t *, mblk_t *, ip6_t *, 270 ip6_frag_t *, uint_t, uint_t *, uint32_t *, uint16_t *); 271 static boolean_t ip_source_routed_v6(ip6_t *, mblk_t *); 272 static void ip_wput_ire_v6(queue_t *, mblk_t *, ire_t *, int, int, 273 conn_t *, int, int, int, zoneid_t); 274 static boolean_t ip_ulp_cando_pkt2big(int); 275 276 static void ip_rput_v6(queue_t *, mblk_t *); 277 static void ip_wput_v6(queue_t *, mblk_t *); 278 279 /* 280 * A template for an IPv6 AR_ENTRY_QUERY 281 */ 282 static areq_t ipv6_areq_template = { 283 AR_ENTRY_QUERY, /* cmd */ 284 sizeof (areq_t)+(2*IPV6_ADDR_LEN), /* name offset */ 285 sizeof (areq_t), /* name len (filled by ill_arp_alloc) */ 286 IP6_DL_SAP, /* protocol, from arps perspective */ 287 sizeof (areq_t), /* target addr offset */ 288 IPV6_ADDR_LEN, /* target addr_length */ 289 0, /* flags */ 290 sizeof (areq_t) + IPV6_ADDR_LEN, /* sender addr offset */ 291 IPV6_ADDR_LEN, /* sender addr length */ 292 6, /* xmit_count */ 293 1000, /* (re)xmit_interval in milliseconds */ 294 4 /* max # of requests to buffer */ 295 /* anything else filled in by the code */ 296 }; 297 298 struct qinit rinit_ipv6 = { 299 (pfi_t)ip_rput_v6, 300 NULL, 301 ip_open, 302 ip_close, 303 NULL, 304 &ip_mod_info 305 }; 306 307 struct qinit winit_ipv6 = { 308 (pfi_t)ip_wput_v6, 309 (pfi_t)ip_wsrv, 310 ip_open, 311 ip_close, 312 NULL, 313 &ip_mod_info 314 }; 315 316 /* 317 * Handle IPv6 ICMP packets sent to us. Consume the mblk passed in. 318 * The message has already been checksummed and if needed, 319 * a copy has been made to be sent any interested ICMP client (conn) 320 * Note that this is different than icmp_inbound() which does the fanout 321 * to conn's as well as local processing of the ICMP packets. 322 * 323 * All error messages are passed to the matching transport stream. 324 * 325 * Zones notes: 326 * The packet is only processed in the context of the specified zone: typically 327 * only this zone will reply to an echo request. This means that the caller must 328 * call icmp_inbound_v6() for each relevant zone. 329 */ 330 static void 331 icmp_inbound_v6(queue_t *q, mblk_t *mp, ill_t *ill, uint_t hdr_length, 332 boolean_t mctl_present, uint_t flags, zoneid_t zoneid, mblk_t *dl_mp) 333 { 334 icmp6_t *icmp6; 335 ip6_t *ip6h; 336 boolean_t interested; 337 ip6i_t *ip6i; 338 in6_addr_t origsrc; 339 ire_t *ire; 340 mblk_t *first_mp; 341 ipsec_in_t *ii; 342 343 ASSERT(ill != NULL); 344 first_mp = mp; 345 if (mctl_present) { 346 mp = first_mp->b_cont; 347 ASSERT(mp != NULL); 348 349 ii = (ipsec_in_t *)first_mp->b_rptr; 350 ASSERT(ii->ipsec_in_type == IPSEC_IN); 351 } 352 353 ip6h = (ip6_t *)mp->b_rptr; 354 355 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInMsgs); 356 357 if ((mp->b_wptr - mp->b_rptr) < (hdr_length + ICMP6_MINLEN)) { 358 if (!pullupmsg(mp, hdr_length + ICMP6_MINLEN)) { 359 ip1dbg(("icmp_inbound_v6: pullupmsg failed\n")); 360 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors); 361 freemsg(first_mp); 362 return; 363 } 364 ip6h = (ip6_t *)mp->b_rptr; 365 } 366 if (icmp_accept_clear_messages == 0) { 367 first_mp = ipsec_check_global_policy(first_mp, NULL, 368 NULL, ip6h, mctl_present); 369 if (first_mp == NULL) 370 return; 371 } 372 373 /* 374 * On a labeled system, we have to check whether the zone itself is 375 * permitted to receive raw traffic. 376 */ 377 if (is_system_labeled()) { 378 if (zoneid == ALL_ZONES) 379 zoneid = tsol_packet_to_zoneid(mp); 380 if (!tsol_can_accept_raw(mp, B_FALSE)) { 381 ip1dbg(("icmp_inbound_v6: zone %d can't receive raw", 382 zoneid)); 383 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors); 384 freemsg(first_mp); 385 return; 386 } 387 } 388 389 icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]); 390 ip2dbg(("icmp_inbound_v6: type %d code %d\n", icmp6->icmp6_type, 391 icmp6->icmp6_code)); 392 interested = !(icmp6->icmp6_type & ICMP6_INFOMSG_MASK); 393 394 /* Initiate IPPF processing here */ 395 if (IP6_IN_IPP(flags)) { 396 397 /* 398 * If the ifindex changes due to SIOCSLIFINDEX 399 * packet may return to IP on the wrong ill. 400 */ 401 ip_process(IPP_LOCAL_IN, &mp, ill->ill_phyint->phyint_ifindex); 402 if (mp == NULL) { 403 if (mctl_present) { 404 freeb(first_mp); 405 } 406 return; 407 } 408 } 409 410 switch (icmp6->icmp6_type) { 411 case ICMP6_DST_UNREACH: 412 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInDestUnreachs); 413 if (icmp6->icmp6_code == ICMP6_DST_UNREACH_ADMIN) 414 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInAdminProhibs); 415 break; 416 417 case ICMP6_TIME_EXCEEDED: 418 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInTimeExcds); 419 break; 420 421 case ICMP6_PARAM_PROB: 422 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInParmProblems); 423 break; 424 425 case ICMP6_PACKET_TOO_BIG: 426 icmp_inbound_too_big_v6(q, first_mp, ill, mctl_present, 427 zoneid); 428 return; 429 case ICMP6_ECHO_REQUEST: 430 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInEchos); 431 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst) && 432 !ipv6_resp_echo_mcast) 433 break; 434 435 /* 436 * We must have exclusive use of the mblk to convert it to 437 * a response. 438 * If not, we copy it. 439 */ 440 if (mp->b_datap->db_ref > 1) { 441 mblk_t *mp1; 442 443 mp1 = copymsg(mp); 444 freemsg(mp); 445 if (mp1 == NULL) { 446 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 447 if (mctl_present) 448 freeb(first_mp); 449 return; 450 } 451 mp = mp1; 452 ip6h = (ip6_t *)mp->b_rptr; 453 icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]); 454 if (mctl_present) 455 first_mp->b_cont = mp; 456 else 457 first_mp = mp; 458 } 459 460 /* 461 * Turn the echo into an echo reply. 462 * Remove any extension headers (do not reverse a source route) 463 * and clear the flow id (keep traffic class for now). 464 */ 465 if (hdr_length != IPV6_HDR_LEN) { 466 int i; 467 468 for (i = 0; i < IPV6_HDR_LEN; i++) 469 mp->b_rptr[hdr_length - i - 1] = 470 mp->b_rptr[IPV6_HDR_LEN - i - 1]; 471 mp->b_rptr += (hdr_length - IPV6_HDR_LEN); 472 ip6h = (ip6_t *)mp->b_rptr; 473 ip6h->ip6_nxt = IPPROTO_ICMPV6; 474 hdr_length = IPV6_HDR_LEN; 475 } 476 ip6h->ip6_vcf &= ~IPV6_FLOWINFO_FLOWLABEL; 477 icmp6->icmp6_type = ICMP6_ECHO_REPLY; 478 479 ip6h->ip6_plen = 480 htons((uint16_t)(msgdsize(mp) - IPV6_HDR_LEN)); 481 origsrc = ip6h->ip6_src; 482 /* 483 * Reverse the source and destination addresses. 484 * If the return address is a multicast, zero out the source 485 * (ip_wput_v6 will set an address). 486 */ 487 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 488 ip6h->ip6_src = ipv6_all_zeros; 489 ip6h->ip6_dst = origsrc; 490 } else { 491 ip6h->ip6_src = ip6h->ip6_dst; 492 ip6h->ip6_dst = origsrc; 493 } 494 495 /* set the hop limit */ 496 ip6h->ip6_hops = ipv6_def_hops; 497 498 /* 499 * Prepare for checksum by putting icmp length in the icmp 500 * checksum field. The checksum is calculated in ip_wput_v6. 501 */ 502 icmp6->icmp6_cksum = ip6h->ip6_plen; 503 /* 504 * ICMP echo replies should go out on the same interface 505 * the request came on as probes used by in.mpathd for 506 * detecting NIC failures are ECHO packets. We turn-off load 507 * spreading by allocating a ip6i and setting ip6i_attach_if 508 * to B_TRUE which is handled both by ip_wput_v6 and 509 * ip_newroute_v6. If we don't turnoff load spreading, 510 * the packets might get dropped if there are no 511 * non-FAILED/INACTIVE interfaces for it to go out on and 512 * in.mpathd would wrongly detect a failure or mis-detect 513 * a NIC failure as a link failure. As load spreading can 514 * happen only if ill_group is not NULL, we do only for 515 * that case and this does not affect the normal case. 516 * 517 * We force this only on echo packets that came from on-link 518 * hosts. We restrict this to link-local addresses which 519 * is used by in.mpathd for probing. In the IPv6 case, 520 * default routes typically have an ire_ipif pointer and 521 * hence a MATCH_IRE_ILL later in ip_newroute_v6/ip_wput_v6 522 * might work. As a default route out of this interface 523 * may not be present, enforcing this packet to go out in 524 * this case may not work. 525 */ 526 if (ill->ill_group != NULL && 527 IN6_IS_ADDR_LINKLOCAL(&origsrc)) { 528 /* 529 * If we are sending replies to ourselves, don't 530 * set ATTACH_IF as we may not be able to find 531 * the IRE_LOCAL on this ill i.e setting ATTACH_IF 532 * causes ip_wput_v6 to look for an IRE_LOCAL on 533 * "ill" which it may not find and will try to 534 * create an IRE_CACHE for our local address. Once 535 * we do this, we will try to forward all packets 536 * meant to our LOCAL address. 537 */ 538 ire = ire_cache_lookup_v6(&ip6h->ip6_dst, ALL_ZONES, 539 NULL); 540 if (ire == NULL || ire->ire_type != IRE_LOCAL) { 541 mp = ip_add_info_v6(mp, NULL, &ip6h->ip6_dst); 542 if (mp == NULL) { 543 BUMP_MIB(ill->ill_icmp6_mib, 544 ipv6IfIcmpInErrors); 545 if (ire != NULL) 546 ire_refrele(ire); 547 if (mctl_present) 548 freeb(first_mp); 549 return; 550 } else if (mctl_present) { 551 first_mp->b_cont = mp; 552 } else { 553 first_mp = mp; 554 } 555 ip6i = (ip6i_t *)mp->b_rptr; 556 ip6i->ip6i_flags = IP6I_ATTACH_IF; 557 ip6i->ip6i_ifindex = 558 ill->ill_phyint->phyint_ifindex; 559 } 560 if (ire != NULL) 561 ire_refrele(ire); 562 } 563 564 if (!mctl_present) { 565 /* 566 * This packet should go out the same way as it 567 * came in i.e in clear. To make sure that global 568 * policy will not be applied to this in ip_wput, 569 * we attach a IPSEC_IN mp and clear ipsec_in_secure. 570 */ 571 ASSERT(first_mp == mp); 572 if ((first_mp = ipsec_in_alloc(B_FALSE)) == NULL) { 573 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 574 freemsg(mp); 575 return; 576 } 577 ii = (ipsec_in_t *)first_mp->b_rptr; 578 579 /* This is not a secure packet */ 580 ii->ipsec_in_secure = B_FALSE; 581 first_mp->b_cont = mp; 582 } 583 ii->ipsec_in_zoneid = zoneid; 584 ASSERT(zoneid != ALL_ZONES); 585 if (!ipsec_in_to_out(first_mp, NULL, ip6h)) { 586 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 587 return; 588 } 589 put(WR(q), first_mp); 590 return; 591 592 case ICMP6_ECHO_REPLY: 593 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInEchoReplies); 594 break; 595 596 case ND_ROUTER_SOLICIT: 597 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInRouterSolicits); 598 break; 599 600 case ND_ROUTER_ADVERT: 601 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInRouterAdvertisements); 602 break; 603 604 case ND_NEIGHBOR_SOLICIT: 605 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInNeighborSolicits); 606 if (mctl_present) 607 freeb(first_mp); 608 /* XXX may wish to pass first_mp up to ndp_input someday. */ 609 ndp_input(ill, mp, dl_mp); 610 return; 611 612 case ND_NEIGHBOR_ADVERT: 613 BUMP_MIB(ill->ill_icmp6_mib, 614 ipv6IfIcmpInNeighborAdvertisements); 615 if (mctl_present) 616 freeb(first_mp); 617 /* XXX may wish to pass first_mp up to ndp_input someday. */ 618 ndp_input(ill, mp, dl_mp); 619 return; 620 621 case ND_REDIRECT: { 622 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInRedirects); 623 624 if (ipv6_ignore_redirect) 625 break; 626 627 /* 628 * As there is no upper client to deliver, we don't 629 * need the first_mp any more. 630 */ 631 if (mctl_present) 632 freeb(first_mp); 633 if (!pullupmsg(mp, -1) || 634 !icmp_redirect_ok_v6(ill, mp)) { 635 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects); 636 break; 637 } 638 icmp_redirect_v6(q, mp, ill); 639 return; 640 } 641 642 /* 643 * The next three icmp messages will be handled by MLD. 644 * Pass all valid MLD packets up to any process(es) 645 * listening on a raw ICMP socket. MLD messages are 646 * freed by mld_input function. 647 */ 648 case MLD_LISTENER_QUERY: 649 case MLD_LISTENER_REPORT: 650 case MLD_LISTENER_REDUCTION: 651 if (mctl_present) 652 freeb(first_mp); 653 mld_input(q, mp, ill); 654 return; 655 default: 656 break; 657 } 658 if (interested) { 659 icmp_inbound_error_fanout_v6(q, first_mp, ip6h, icmp6, ill, 660 mctl_present, zoneid); 661 } else { 662 freemsg(first_mp); 663 } 664 } 665 666 /* 667 * Process received IPv6 ICMP Packet too big. 668 * After updating any IRE it does the fanout to any matching transport streams. 669 * Assumes the IPv6 plus ICMPv6 headers have been pulled up but nothing else. 670 */ 671 /* ARGSUSED */ 672 static void 673 icmp_inbound_too_big_v6(queue_t *q, mblk_t *mp, ill_t *ill, 674 boolean_t mctl_present, zoneid_t zoneid) 675 { 676 ip6_t *ip6h; 677 ip6_t *inner_ip6h; 678 icmp6_t *icmp6; 679 uint16_t hdr_length; 680 uint32_t mtu; 681 ire_t *ire, *first_ire; 682 mblk_t *first_mp; 683 684 first_mp = mp; 685 if (mctl_present) 686 mp = first_mp->b_cont; 687 /* 688 * We must have exclusive use of the mblk to update the MTU 689 * in the packet. 690 * If not, we copy it. 691 * 692 * If there's an M_CTL present, we know that allocated first_mp 693 * earlier in this function, so we know first_mp has refcnt of one. 694 */ 695 ASSERT(!mctl_present || first_mp->b_datap->db_ref == 1); 696 if (mp->b_datap->db_ref > 1) { 697 mblk_t *mp1; 698 699 mp1 = copymsg(mp); 700 freemsg(mp); 701 if (mp1 == NULL) { 702 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 703 if (mctl_present) 704 freeb(first_mp); 705 return; 706 } 707 mp = mp1; 708 if (mctl_present) 709 first_mp->b_cont = mp; 710 else 711 first_mp = mp; 712 } 713 ip6h = (ip6_t *)mp->b_rptr; 714 if (ip6h->ip6_nxt != IPPROTO_ICMPV6) 715 hdr_length = ip_hdr_length_v6(mp, ip6h); 716 else 717 hdr_length = IPV6_HDR_LEN; 718 719 icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]); 720 ASSERT((size_t)(mp->b_wptr - mp->b_rptr) >= hdr_length + ICMP6_MINLEN); 721 inner_ip6h = (ip6_t *)&icmp6[1]; /* Packet in error */ 722 if ((uchar_t *)&inner_ip6h[1] > mp->b_wptr) { 723 if (!pullupmsg(mp, (uchar_t *)&inner_ip6h[1] - mp->b_rptr)) { 724 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 725 freemsg(first_mp); 726 return; 727 } 728 ip6h = (ip6_t *)mp->b_rptr; 729 icmp6 = (icmp6_t *)&mp->b_rptr[hdr_length]; 730 inner_ip6h = (ip6_t *)&icmp6[1]; 731 } 732 733 /* 734 * For link local destinations matching simply on IRE type is not 735 * sufficient. Same link local addresses for different ILL's is 736 * possible. 737 */ 738 739 if (IN6_IS_ADDR_LINKLOCAL(&inner_ip6h->ip6_dst)) { 740 first_ire = ire_ctable_lookup_v6(&inner_ip6h->ip6_dst, NULL, 741 IRE_CACHE, ill->ill_ipif, ALL_ZONES, NULL, 742 MATCH_IRE_TYPE | MATCH_IRE_ILL_GROUP); 743 744 if (first_ire == NULL) { 745 if (ip_debug > 2) { 746 /* ip1dbg */ 747 pr_addr_dbg("icmp_inbound_too_big_v6:" 748 "no ire for dst %s\n", AF_INET6, 749 &inner_ip6h->ip6_dst); 750 } 751 freemsg(first_mp); 752 return; 753 } 754 755 mtu = ntohl(icmp6->icmp6_mtu); 756 rw_enter(&first_ire->ire_bucket->irb_lock, RW_READER); 757 for (ire = first_ire; ire != NULL && 758 IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, &inner_ip6h->ip6_dst); 759 ire = ire->ire_next) { 760 mutex_enter(&ire->ire_lock); 761 if (mtu < IPV6_MIN_MTU) { 762 ip1dbg(("Received mtu less than IPv6 " 763 "min mtu %d: %d\n", IPV6_MIN_MTU, mtu)); 764 mtu = IPV6_MIN_MTU; 765 /* 766 * If an mtu less than IPv6 min mtu is received, 767 * we must include a fragment header in 768 * subsequent packets. 769 */ 770 ire->ire_frag_flag |= IPH_FRAG_HDR; 771 } 772 ip1dbg(("Received mtu from router: %d\n", mtu)); 773 ire->ire_max_frag = MIN(ire->ire_max_frag, mtu); 774 /* Record the new max frag size for the ULP. */ 775 if (ire->ire_frag_flag & IPH_FRAG_HDR) { 776 /* 777 * If we need a fragment header in every packet 778 * (above case or multirouting), make sure the 779 * ULP takes it into account when computing the 780 * payload size. 781 */ 782 icmp6->icmp6_mtu = htonl(ire->ire_max_frag - 783 sizeof (ip6_frag_t)); 784 } else { 785 icmp6->icmp6_mtu = htonl(ire->ire_max_frag); 786 } 787 mutex_exit(&ire->ire_lock); 788 } 789 rw_exit(&first_ire->ire_bucket->irb_lock); 790 ire_refrele(first_ire); 791 } else { 792 irb_t *irb = NULL; 793 /* 794 * for non-link local destinations we match only on the IRE type 795 */ 796 ire = ire_ctable_lookup_v6(&inner_ip6h->ip6_dst, NULL, 797 IRE_CACHE, ill->ill_ipif, ALL_ZONES, NULL, MATCH_IRE_TYPE); 798 if (ire == NULL) { 799 if (ip_debug > 2) { 800 /* ip1dbg */ 801 pr_addr_dbg("icmp_inbound_too_big_v6:" 802 "no ire for dst %s\n", 803 AF_INET6, &inner_ip6h->ip6_dst); 804 } 805 freemsg(first_mp); 806 return; 807 } 808 irb = ire->ire_bucket; 809 ire_refrele(ire); 810 rw_enter(&irb->irb_lock, RW_READER); 811 for (ire = irb->irb_ire; ire != NULL; ire = ire->ire_next) { 812 if (IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, 813 &inner_ip6h->ip6_dst)) { 814 mtu = ntohl(icmp6->icmp6_mtu); 815 mutex_enter(&ire->ire_lock); 816 if (mtu < IPV6_MIN_MTU) { 817 ip1dbg(("Received mtu less than IPv6" 818 "min mtu %d: %d\n", 819 IPV6_MIN_MTU, mtu)); 820 mtu = IPV6_MIN_MTU; 821 /* 822 * If an mtu less than IPv6 min mtu is 823 * received, we must include a fragment 824 * header in subsequent packets. 825 */ 826 ire->ire_frag_flag |= IPH_FRAG_HDR; 827 } 828 829 ip1dbg(("Received mtu from router: %d\n", mtu)); 830 ire->ire_max_frag = MIN(ire->ire_max_frag, mtu); 831 /* Record the new max frag size for the ULP. */ 832 if (ire->ire_frag_flag & IPH_FRAG_HDR) { 833 /* 834 * If we need a fragment header in 835 * every packet (above case or 836 * multirouting), make sure the ULP 837 * takes it into account when computing 838 * the payload size. 839 */ 840 icmp6->icmp6_mtu = 841 htonl(ire->ire_max_frag - 842 sizeof (ip6_frag_t)); 843 } else { 844 icmp6->icmp6_mtu = 845 htonl(ire->ire_max_frag); 846 } 847 mutex_exit(&ire->ire_lock); 848 } 849 } 850 rw_exit(&irb->irb_lock); 851 } 852 icmp_inbound_error_fanout_v6(q, first_mp, ip6h, icmp6, ill, 853 mctl_present, zoneid); 854 } 855 856 static void 857 pkt_too_big(conn_t *connp, void *arg) 858 { 859 mblk_t *mp; 860 861 if (!connp->conn_ipv6_recvpathmtu) 862 return; 863 864 /* create message and drop it on this connections read queue */ 865 if ((mp = dupb((mblk_t *)arg)) == NULL) { 866 return; 867 } 868 mp->b_datap->db_type = M_CTL; 869 870 putnext(connp->conn_rq, mp); 871 } 872 873 /* 874 * Fanout received ICMPv6 error packets to the transports. 875 * Assumes the IPv6 plus ICMPv6 headers have been pulled up but nothing else. 876 */ 877 void 878 icmp_inbound_error_fanout_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, 879 icmp6_t *icmp6, ill_t *ill, boolean_t mctl_present, zoneid_t zoneid) 880 { 881 uint16_t *up; /* Pointer to ports in ULP header */ 882 uint32_t ports; /* reversed ports for fanout */ 883 ip6_t rip6h; /* With reversed addresses */ 884 uint16_t hdr_length; 885 uint8_t *nexthdrp; 886 uint8_t nexthdr; 887 mblk_t *first_mp; 888 ipsec_in_t *ii; 889 tcpha_t *tcpha; 890 conn_t *connp; 891 892 first_mp = mp; 893 if (mctl_present) { 894 mp = first_mp->b_cont; 895 ASSERT(mp != NULL); 896 897 ii = (ipsec_in_t *)first_mp->b_rptr; 898 ASSERT(ii->ipsec_in_type == IPSEC_IN); 899 } else { 900 ii = NULL; 901 } 902 903 hdr_length = (uint16_t)((uchar_t *)icmp6 - (uchar_t *)ip6h); 904 ASSERT((size_t)(mp->b_wptr - (uchar_t *)icmp6) >= ICMP6_MINLEN); 905 906 /* 907 * Need to pullup everything in order to use 908 * ip_hdr_length_nexthdr_v6() 909 */ 910 if (mp->b_cont != NULL) { 911 if (!pullupmsg(mp, -1)) { 912 ip1dbg(("icmp_inbound_error_fanout_v6: " 913 "pullupmsg failed\n")); 914 goto drop_pkt; 915 } 916 ip6h = (ip6_t *)mp->b_rptr; 917 icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]); 918 } 919 920 ip6h = (ip6_t *)&icmp6[1]; /* Packet in error */ 921 if ((uchar_t *)&ip6h[1] > mp->b_wptr) 922 goto drop_pkt; 923 924 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_length, &nexthdrp)) 925 goto drop_pkt; 926 nexthdr = *nexthdrp; 927 928 /* Set message type, must be done after pullups */ 929 mp->b_datap->db_type = M_CTL; 930 931 if (icmp6->icmp6_type == ICMP6_PACKET_TOO_BIG) { 932 /* 933 * Deliver indication of ICMP6_PACKET_TOO_BIG to interested 934 * sockets. 935 * 936 * Note I don't like walking every connection to deliver 937 * this information to a set of listeners. A separate 938 * list could be kept to keep the cost of this down. 939 */ 940 ipcl_walk(pkt_too_big, (void *)mp); 941 } 942 943 /* Try to pass the ICMP message to clients who need it */ 944 switch (nexthdr) { 945 case IPPROTO_UDP: { 946 /* 947 * Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of 948 * UDP header to get the port information. 949 */ 950 if ((uchar_t *)ip6h + hdr_length + ICMP_MIN_TP_HDR_LEN > 951 mp->b_wptr) { 952 break; 953 } 954 /* 955 * Attempt to find a client stream based on port. 956 * Note that we do a reverse lookup since the header is 957 * in the form we sent it out. 958 * The rip6h header is only used for the IPCL_UDP_MATCH_V6 959 * and we only set the src and dst addresses and nexthdr. 960 */ 961 up = (uint16_t *)((uchar_t *)ip6h + hdr_length); 962 rip6h.ip6_src = ip6h->ip6_dst; 963 rip6h.ip6_dst = ip6h->ip6_src; 964 rip6h.ip6_nxt = nexthdr; 965 ((uint16_t *)&ports)[0] = up[1]; 966 ((uint16_t *)&ports)[1] = up[0]; 967 968 ip_fanout_udp_v6(q, first_mp, &rip6h, ports, ill, ill, 969 IP6_NO_IPPOLICY, mctl_present, zoneid); 970 return; 971 } 972 case IPPROTO_TCP: { 973 /* 974 * Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of 975 * the TCP header to get the port information. 976 */ 977 if ((uchar_t *)ip6h + hdr_length + ICMP_MIN_TP_HDR_LEN > 978 mp->b_wptr) { 979 break; 980 } 981 982 /* 983 * Attempt to find a client stream based on port. 984 * Note that we do a reverse lookup since the header is 985 * in the form we sent it out. 986 * The rip6h header is only used for the IP_TCP_*MATCH_V6 and 987 * we only set the src and dst addresses and nexthdr. 988 */ 989 990 tcpha = (tcpha_t *)((char *)ip6h + hdr_length); 991 connp = ipcl_tcp_lookup_reversed_ipv6(ip6h, tcpha, 992 TCPS_LISTEN, ill->ill_phyint->phyint_ifindex); 993 if (connp == NULL) { 994 goto drop_pkt; 995 } 996 997 squeue_fill(connp->conn_sqp, first_mp, tcp_input, 998 connp, SQTAG_TCP6_INPUT_ICMP_ERR); 999 return; 1000 1001 } 1002 case IPPROTO_SCTP: 1003 /* 1004 * Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of 1005 * the SCTP header to get the port information. 1006 */ 1007 if ((uchar_t *)ip6h + hdr_length + ICMP_MIN_TP_HDR_LEN > 1008 mp->b_wptr) { 1009 break; 1010 } 1011 1012 up = (uint16_t *)((uchar_t *)ip6h + hdr_length); 1013 ((uint16_t *)&ports)[0] = up[1]; 1014 ((uint16_t *)&ports)[1] = up[0]; 1015 ip_fanout_sctp(mp, ill, (ipha_t *)ip6h, ports, 0, mctl_present, 1016 IP6_NO_IPPOLICY, 0, zoneid); 1017 return; 1018 case IPPROTO_ESP: 1019 case IPPROTO_AH: { 1020 int ipsec_rc; 1021 1022 /* 1023 * We need a IPSEC_IN in the front to fanout to AH/ESP. 1024 * We will re-use the IPSEC_IN if it is already present as 1025 * AH/ESP will not affect any fields in the IPSEC_IN for 1026 * ICMP errors. If there is no IPSEC_IN, allocate a new 1027 * one and attach it in the front. 1028 */ 1029 if (ii != NULL) { 1030 /* 1031 * ip_fanout_proto_again converts the ICMP errors 1032 * that come back from AH/ESP to M_DATA so that 1033 * if it is non-AH/ESP and we do a pullupmsg in 1034 * this function, it would work. Convert it back 1035 * to M_CTL before we send up as this is a ICMP 1036 * error. This could have been generated locally or 1037 * by some router. Validate the inner IPSEC 1038 * headers. 1039 * 1040 * NOTE : ill_index is used by ip_fanout_proto_again 1041 * to locate the ill. 1042 */ 1043 ASSERT(ill != NULL); 1044 ii->ipsec_in_ill_index = 1045 ill->ill_phyint->phyint_ifindex; 1046 ii->ipsec_in_rill_index = ii->ipsec_in_ill_index; 1047 first_mp->b_cont->b_datap->db_type = M_CTL; 1048 } else { 1049 /* 1050 * IPSEC_IN is not present. We attach a ipsec_in 1051 * message and send up to IPSEC for validating 1052 * and removing the IPSEC headers. Clear 1053 * ipsec_in_secure so that when we return 1054 * from IPSEC, we don't mistakenly think that this 1055 * is a secure packet came from the network. 1056 * 1057 * NOTE : ill_index is used by ip_fanout_proto_again 1058 * to locate the ill. 1059 */ 1060 ASSERT(first_mp == mp); 1061 first_mp = ipsec_in_alloc(B_FALSE); 1062 if (first_mp == NULL) { 1063 freemsg(mp); 1064 BUMP_MIB(&ip_mib, ipInDiscards); 1065 return; 1066 } 1067 ii = (ipsec_in_t *)first_mp->b_rptr; 1068 1069 /* This is not a secure packet */ 1070 ii->ipsec_in_secure = B_FALSE; 1071 first_mp->b_cont = mp; 1072 mp->b_datap->db_type = M_CTL; 1073 ASSERT(ill != NULL); 1074 ii->ipsec_in_ill_index = 1075 ill->ill_phyint->phyint_ifindex; 1076 ii->ipsec_in_rill_index = ii->ipsec_in_ill_index; 1077 } 1078 1079 if (!ipsec_loaded()) { 1080 ip_proto_not_sup(q, first_mp, 0, zoneid); 1081 return; 1082 } 1083 1084 if (nexthdr == IPPROTO_ESP) 1085 ipsec_rc = ipsecesp_icmp_error(first_mp); 1086 else 1087 ipsec_rc = ipsecah_icmp_error(first_mp); 1088 if (ipsec_rc == IPSEC_STATUS_FAILED) 1089 return; 1090 1091 ip_fanout_proto_again(first_mp, ill, ill, NULL); 1092 return; 1093 } 1094 case IPPROTO_ENCAP: 1095 case IPPROTO_IPV6: 1096 if ((uint8_t *)ip6h + hdr_length + 1097 (nexthdr == IPPROTO_ENCAP ? sizeof (ipha_t) : 1098 sizeof (ip6_t)) > mp->b_wptr) 1099 goto drop_pkt; 1100 1101 if (nexthdr == IPPROTO_ENCAP || 1102 !IN6_ARE_ADDR_EQUAL( 1103 &((ip6_t *)(((uint8_t *)ip6h) + hdr_length))->ip6_src, 1104 &ip6h->ip6_src) || 1105 !IN6_ARE_ADDR_EQUAL( 1106 &((ip6_t *)(((uint8_t *)ip6h) + hdr_length))->ip6_dst, 1107 &ip6h->ip6_dst)) { 1108 /* 1109 * For tunnels that have used IPsec protection, 1110 * we need to adjust the MTU to take into account 1111 * the IPsec overhead. 1112 */ 1113 if (ii != NULL) 1114 icmp6->icmp6_mtu = htons( 1115 ntohs(icmp6->icmp6_mtu) - 1116 ipsec_in_extra_length(first_mp)); 1117 } else { 1118 /* 1119 * Self-encapsulated case. As in the ipv4 case, 1120 * we need to strip the 2nd IP header. Since mp 1121 * is already pulled-up, we can simply bcopy 1122 * the 3rd header + data over the 2nd header. 1123 */ 1124 uint16_t unused_len; 1125 ip6_t *inner_ip6h = (ip6_t *) 1126 ((uchar_t *)ip6h + hdr_length); 1127 1128 /* 1129 * Make sure we don't do recursion more than once. 1130 */ 1131 if (!ip_hdr_length_nexthdr_v6(mp, inner_ip6h, 1132 &unused_len, &nexthdrp) || 1133 *nexthdrp == IPPROTO_IPV6) { 1134 goto drop_pkt; 1135 } 1136 1137 /* 1138 * We are about to modify the packet. Make a copy if 1139 * someone else has a reference to it. 1140 */ 1141 if (DB_REF(mp) > 1) { 1142 mblk_t *mp1; 1143 uint16_t icmp6_offset; 1144 1145 mp1 = copymsg(mp); 1146 if (mp1 == NULL) { 1147 goto drop_pkt; 1148 } 1149 icmp6_offset = (uint16_t) 1150 ((uchar_t *)icmp6 - mp->b_rptr); 1151 freemsg(mp); 1152 mp = mp1; 1153 1154 icmp6 = (icmp6_t *)(mp->b_rptr + icmp6_offset); 1155 ip6h = (ip6_t *)&icmp6[1]; 1156 inner_ip6h = (ip6_t *) 1157 ((uchar_t *)ip6h + hdr_length); 1158 1159 if (mctl_present) 1160 first_mp->b_cont = mp; 1161 else 1162 first_mp = mp; 1163 } 1164 1165 /* 1166 * Need to set db_type back to M_DATA before 1167 * refeeding mp into this function. 1168 */ 1169 DB_TYPE(mp) = M_DATA; 1170 1171 /* 1172 * Copy the 3rd header + remaining data on top 1173 * of the 2nd header. 1174 */ 1175 bcopy(inner_ip6h, ip6h, 1176 mp->b_wptr - (uchar_t *)inner_ip6h); 1177 1178 /* 1179 * Subtract length of the 2nd header. 1180 */ 1181 mp->b_wptr -= hdr_length; 1182 1183 /* 1184 * Now recurse, and see what I _really_ should be 1185 * doing here. 1186 */ 1187 icmp_inbound_error_fanout_v6(q, first_mp, 1188 (ip6_t *)mp->b_rptr, icmp6, ill, mctl_present, 1189 zoneid); 1190 return; 1191 } 1192 /* FALLTHRU */ 1193 default: 1194 /* 1195 * The rip6h header is only used for the lookup and we 1196 * only set the src and dst addresses and nexthdr. 1197 */ 1198 rip6h.ip6_src = ip6h->ip6_dst; 1199 rip6h.ip6_dst = ip6h->ip6_src; 1200 rip6h.ip6_nxt = nexthdr; 1201 ip_fanout_proto_v6(q, first_mp, &rip6h, ill, ill, nexthdr, 0, 1202 IP6_NO_IPPOLICY, mctl_present, zoneid); 1203 return; 1204 } 1205 /* NOTREACHED */ 1206 drop_pkt: 1207 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors); 1208 ip1dbg(("icmp_inbound_error_fanout_v6: drop pkt\n")); 1209 freemsg(first_mp); 1210 } 1211 1212 /* 1213 * Validate the incoming redirect message, if valid redirect 1214 * processing is done later. This is separated from the actual 1215 * redirect processing to avoid becoming single threaded when not 1216 * necessary. (i.e invalid packet) 1217 * Assumes that any AH or ESP headers have already been removed. 1218 * The mp has already been pulled up. 1219 */ 1220 boolean_t 1221 icmp_redirect_ok_v6(ill_t *ill, mblk_t *mp) 1222 { 1223 ip6_t *ip6h = (ip6_t *)mp->b_rptr; 1224 nd_redirect_t *rd; 1225 ire_t *ire; 1226 uint16_t len; 1227 uint16_t hdr_length; 1228 1229 ASSERT(mp->b_cont == NULL); 1230 if (ip6h->ip6_nxt != IPPROTO_ICMPV6) 1231 hdr_length = ip_hdr_length_v6(mp, ip6h); 1232 else 1233 hdr_length = IPV6_HDR_LEN; 1234 rd = (nd_redirect_t *)&mp->b_rptr[hdr_length]; 1235 len = mp->b_wptr - mp->b_rptr - hdr_length; 1236 if (!IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src) || 1237 (ip6h->ip6_hops != IPV6_MAX_HOPS) || 1238 (rd->nd_rd_code != 0) || 1239 (len < sizeof (nd_redirect_t)) || 1240 (IN6_IS_ADDR_V4MAPPED(&rd->nd_rd_dst)) || 1241 (IN6_IS_ADDR_MULTICAST(&rd->nd_rd_dst))) { 1242 return (B_FALSE); 1243 } 1244 if (!(IN6_IS_ADDR_LINKLOCAL(&rd->nd_rd_target) || 1245 IN6_ARE_ADDR_EQUAL(&rd->nd_rd_target, &rd->nd_rd_dst))) { 1246 return (B_FALSE); 1247 } 1248 1249 /* 1250 * Verify that the IP source address of the redirect is 1251 * the same as the current first-hop router for the specified 1252 * ICMP destination address. Just to be cautious, this test 1253 * will be done again before we add the redirect, in case 1254 * router goes away between now and then. 1255 */ 1256 ire = ire_route_lookup_v6(&rd->nd_rd_dst, 0, 1257 &ip6h->ip6_src, 0, ill->ill_ipif, NULL, ALL_ZONES, NULL, 1258 MATCH_IRE_GW | MATCH_IRE_ILL_GROUP); 1259 if (ire == NULL) 1260 return (B_FALSE); 1261 ire_refrele(ire); 1262 if (len > sizeof (nd_redirect_t)) { 1263 if (!ndp_verify_optlen((nd_opt_hdr_t *)&rd[1], 1264 len - sizeof (nd_redirect_t))) 1265 return (B_FALSE); 1266 } 1267 return (B_TRUE); 1268 } 1269 1270 /* 1271 * Process received IPv6 ICMP Redirect messages. 1272 * Assumes that the icmp packet has already been verfied to be 1273 * valid, aligned and in a single mblk all done in icmp_redirect_ok_v6(). 1274 */ 1275 /* ARGSUSED */ 1276 static void 1277 icmp_redirect_v6(queue_t *q, mblk_t *mp, ill_t *ill) 1278 { 1279 ip6_t *ip6h; 1280 uint16_t hdr_length; 1281 nd_redirect_t *rd; 1282 ire_t *ire; 1283 ire_t *prev_ire; 1284 ire_t *redir_ire; 1285 in6_addr_t *src, *dst, *gateway; 1286 nd_opt_hdr_t *opt; 1287 nce_t *nce; 1288 int nce_flags = 0; 1289 int err = 0; 1290 boolean_t redirect_to_router = B_FALSE; 1291 int len; 1292 iulp_t ulp_info = { 0 }; 1293 ill_t *prev_ire_ill; 1294 ipif_t *ipif; 1295 1296 ip6h = (ip6_t *)mp->b_rptr; 1297 if (ip6h->ip6_nxt != IPPROTO_ICMPV6) 1298 hdr_length = ip_hdr_length_v6(mp, ip6h); 1299 else 1300 hdr_length = IPV6_HDR_LEN; 1301 1302 rd = (nd_redirect_t *)&mp->b_rptr[hdr_length]; 1303 src = &ip6h->ip6_src; 1304 dst = &rd->nd_rd_dst; 1305 gateway = &rd->nd_rd_target; 1306 if (!IN6_ARE_ADDR_EQUAL(gateway, dst)) { 1307 redirect_to_router = B_TRUE; 1308 nce_flags |= NCE_F_ISROUTER; 1309 } 1310 /* 1311 * Make sure we had a route for the dest in question and that 1312 * route was pointing to the old gateway (the source of the 1313 * redirect packet.) 1314 */ 1315 ipif = ipif_get_next_ipif(NULL, ill); 1316 if (ipif == NULL) { 1317 freemsg(mp); 1318 return; 1319 } 1320 prev_ire = ire_route_lookup_v6(dst, 0, src, 0, ipif, NULL, 1321 ALL_ZONES, NULL, MATCH_IRE_GW | MATCH_IRE_ILL_GROUP); 1322 ipif_refrele(ipif); 1323 /* 1324 * Check that 1325 * the redirect was not from ourselves 1326 * old gateway is still directly reachable 1327 */ 1328 if (prev_ire == NULL || 1329 prev_ire->ire_type == IRE_LOCAL) { 1330 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects); 1331 goto fail_redirect; 1332 } 1333 prev_ire_ill = ire_to_ill(prev_ire); 1334 ASSERT(prev_ire_ill != NULL); 1335 if (prev_ire_ill->ill_flags & ILLF_NONUD) 1336 nce_flags |= NCE_F_NONUD; 1337 1338 /* 1339 * Should we use the old ULP info to create the new gateway? From 1340 * a user's perspective, we should inherit the info so that it 1341 * is a "smooth" transition. If we do not do that, then new 1342 * connections going thru the new gateway will have no route metrics, 1343 * which is counter-intuitive to user. From a network point of 1344 * view, this may or may not make sense even though the new gateway 1345 * is still directly connected to us so the route metrics should not 1346 * change much. 1347 * 1348 * But if the old ire_uinfo is not initialized, we do another 1349 * recursive lookup on the dest using the new gateway. There may 1350 * be a route to that. If so, use it to initialize the redirect 1351 * route. 1352 */ 1353 if (prev_ire->ire_uinfo.iulp_set) { 1354 bcopy(&prev_ire->ire_uinfo, &ulp_info, sizeof (iulp_t)); 1355 } else if (redirect_to_router) { 1356 /* 1357 * Only do the following if the redirection is really to 1358 * a router. 1359 */ 1360 ire_t *tmp_ire; 1361 ire_t *sire; 1362 1363 tmp_ire = ire_ftable_lookup_v6(dst, 0, gateway, 0, NULL, &sire, 1364 ALL_ZONES, 0, NULL, 1365 (MATCH_IRE_RECURSIVE | MATCH_IRE_GW | MATCH_IRE_DEFAULT)); 1366 if (sire != NULL) { 1367 bcopy(&sire->ire_uinfo, &ulp_info, sizeof (iulp_t)); 1368 ASSERT(tmp_ire != NULL); 1369 ire_refrele(tmp_ire); 1370 ire_refrele(sire); 1371 } else if (tmp_ire != NULL) { 1372 bcopy(&tmp_ire->ire_uinfo, &ulp_info, 1373 sizeof (iulp_t)); 1374 ire_refrele(tmp_ire); 1375 } 1376 } 1377 1378 len = mp->b_wptr - mp->b_rptr - hdr_length - sizeof (nd_redirect_t); 1379 opt = (nd_opt_hdr_t *)&rd[1]; 1380 opt = ndp_get_option(opt, len, ND_OPT_TARGET_LINKADDR); 1381 if (opt != NULL) { 1382 err = ndp_lookup_then_add(ill, 1383 (uchar_t *)&opt[1], /* Link layer address */ 1384 gateway, 1385 &ipv6_all_ones, /* prefix mask */ 1386 &ipv6_all_zeros, /* Mapping mask */ 1387 0, 1388 nce_flags, 1389 ND_STALE, 1390 &nce, 1391 NULL, 1392 NULL); 1393 switch (err) { 1394 case 0: 1395 NCE_REFRELE(nce); 1396 break; 1397 case EEXIST: 1398 /* 1399 * Check to see if link layer address has changed and 1400 * process the nce_state accordingly. 1401 */ 1402 ndp_process(nce, (uchar_t *)&opt[1], 0, B_FALSE); 1403 NCE_REFRELE(nce); 1404 break; 1405 default: 1406 ip1dbg(("icmp_redirect_v6: NCE create failed %d\n", 1407 err)); 1408 goto fail_redirect; 1409 } 1410 } 1411 if (redirect_to_router) { 1412 /* icmp_redirect_ok_v6() must have already verified this */ 1413 ASSERT(IN6_IS_ADDR_LINKLOCAL(gateway)); 1414 1415 /* 1416 * Create a Route Association. This will allow us to remember 1417 * a router told us to use the particular gateway. 1418 */ 1419 ire = ire_create_v6( 1420 dst, 1421 &ipv6_all_ones, /* mask */ 1422 &prev_ire->ire_src_addr_v6, /* source addr */ 1423 gateway, /* gateway addr */ 1424 &prev_ire->ire_max_frag, /* max frag */ 1425 NULL, /* Fast Path header */ 1426 NULL, /* no rfq */ 1427 NULL, /* no stq */ 1428 IRE_HOST_REDIRECT, 1429 NULL, 1430 prev_ire->ire_ipif, 1431 NULL, 1432 0, 1433 0, 1434 (RTF_DYNAMIC | RTF_GATEWAY | RTF_HOST), 1435 &ulp_info, 1436 NULL, 1437 NULL); 1438 } else { 1439 /* 1440 * Just create an on link entry, may or may not be a router 1441 * If there is no link layer address option ire_add() won't 1442 * add this. 1443 */ 1444 ire = ire_create_v6( 1445 dst, /* gateway == dst */ 1446 &ipv6_all_ones, /* mask */ 1447 &prev_ire->ire_src_addr_v6, /* source addr */ 1448 &ipv6_all_zeros, /* gateway addr */ 1449 &prev_ire->ire_max_frag, /* max frag */ 1450 NULL, /* Fast Path header */ 1451 prev_ire->ire_rfq, /* ire rfq */ 1452 prev_ire->ire_stq, /* ire stq */ 1453 IRE_CACHE, 1454 NULL, 1455 prev_ire->ire_ipif, 1456 &ipv6_all_ones, 1457 0, 1458 0, 1459 0, 1460 &ulp_info, 1461 NULL, 1462 NULL); 1463 } 1464 if (ire == NULL) 1465 goto fail_redirect; 1466 1467 /* 1468 * XXX If there is no nce i.e there is no target link layer address 1469 * option with the redirect message, ire_add will fail. In that 1470 * case we never add the IRE_CACHE/IRE_HOST_REDIRECT. We need 1471 * to fix this. 1472 */ 1473 if (ire_add(&ire, NULL, NULL, NULL, B_FALSE) == 0) { 1474 1475 /* tell routing sockets that we received a redirect */ 1476 ip_rts_change_v6(RTM_REDIRECT, 1477 &rd->nd_rd_dst, 1478 &rd->nd_rd_target, 1479 &ipv6_all_ones, 0, &ire->ire_src_addr_v6, 1480 (RTF_DYNAMIC | RTF_GATEWAY | RTF_HOST), 0, 1481 (RTA_DST | RTA_GATEWAY | RTA_NETMASK | RTA_AUTHOR)); 1482 1483 /* 1484 * Delete any existing IRE_HOST_REDIRECT for this destination. 1485 * This together with the added IRE has the effect of 1486 * modifying an existing redirect. 1487 */ 1488 redir_ire = ire_ftable_lookup_v6(dst, 0, src, IRE_HOST_REDIRECT, 1489 ire->ire_ipif, NULL, ALL_ZONES, 0, NULL, 1490 (MATCH_IRE_GW | MATCH_IRE_TYPE | MATCH_IRE_ILL_GROUP)); 1491 1492 ire_refrele(ire); /* Held in ire_add_v6 */ 1493 1494 if (redir_ire != NULL) { 1495 ire_delete(redir_ire); 1496 ire_refrele(redir_ire); 1497 } 1498 } 1499 1500 if (prev_ire->ire_type == IRE_CACHE) 1501 ire_delete(prev_ire); 1502 ire_refrele(prev_ire); 1503 prev_ire = NULL; 1504 1505 fail_redirect: 1506 if (prev_ire != NULL) 1507 ire_refrele(prev_ire); 1508 freemsg(mp); 1509 } 1510 1511 static ill_t * 1512 ip_queue_to_ill_v6(queue_t *q) 1513 { 1514 ill_t *ill; 1515 1516 ASSERT(WR(q) == q); 1517 1518 if (q->q_next != NULL) { 1519 ill = (ill_t *)q->q_ptr; 1520 if (ILL_CAN_LOOKUP(ill)) 1521 ill_refhold(ill); 1522 else 1523 ill = NULL; 1524 } else { 1525 ill = ill_lookup_on_name(ipif_loopback_name, B_FALSE, B_TRUE, 1526 NULL, NULL, NULL, NULL, NULL); 1527 } 1528 if (ill == NULL) 1529 ip0dbg(("ip_queue_to_ill_v6: no ill\n")); 1530 return (ill); 1531 } 1532 1533 /* 1534 * Assigns an appropriate source address to the packet. 1535 * If origdst is one of our IP addresses that use it as the source. 1536 * If the queue is an ill queue then select a source from that ill. 1537 * Otherwise pick a source based on a route lookup back to the origsrc. 1538 * 1539 * src is the return parameter. Returns a pointer to src or NULL if failure. 1540 */ 1541 static in6_addr_t * 1542 icmp_pick_source_v6(queue_t *wq, in6_addr_t *origsrc, in6_addr_t *origdst, 1543 in6_addr_t *src, zoneid_t zoneid) 1544 { 1545 ill_t *ill; 1546 ire_t *ire; 1547 ipif_t *ipif; 1548 1549 ASSERT(!(wq->q_flag & QREADR)); 1550 if (wq->q_next != NULL) { 1551 ill = (ill_t *)wq->q_ptr; 1552 } else { 1553 ill = NULL; 1554 } 1555 1556 ire = ire_route_lookup_v6(origdst, 0, 0, (IRE_LOCAL|IRE_LOOPBACK), 1557 NULL, NULL, zoneid, NULL, (MATCH_IRE_TYPE|MATCH_IRE_ZONEONLY)); 1558 if (ire != NULL) { 1559 /* Destined to one of our addresses */ 1560 *src = *origdst; 1561 ire_refrele(ire); 1562 return (src); 1563 } 1564 if (ire != NULL) { 1565 ire_refrele(ire); 1566 ire = NULL; 1567 } 1568 if (ill == NULL) { 1569 /* What is the route back to the original source? */ 1570 ire = ire_route_lookup_v6(origsrc, 0, 0, 0, 1571 NULL, NULL, zoneid, NULL, 1572 (MATCH_IRE_DEFAULT|MATCH_IRE_RECURSIVE)); 1573 if (ire == NULL) { 1574 BUMP_MIB(&ip6_mib, ipv6OutNoRoutes); 1575 return (NULL); 1576 } 1577 /* 1578 * Does not matter whether we use ire_stq or ire_ipif here. 1579 * Just pick an ill for ICMP replies. 1580 */ 1581 ASSERT(ire->ire_ipif != NULL); 1582 ill = ire->ire_ipif->ipif_ill; 1583 ire_refrele(ire); 1584 } 1585 ipif = ipif_select_source_v6(ill, origsrc, RESTRICT_TO_NONE, 1586 IPV6_PREFER_SRC_DEFAULT, zoneid); 1587 if (ipif != NULL) { 1588 *src = ipif->ipif_v6src_addr; 1589 ipif_refrele(ipif); 1590 return (src); 1591 } 1592 /* 1593 * Unusual case - can't find a usable source address to reach the 1594 * original source. Use what in the route to the source. 1595 */ 1596 ire = ire_route_lookup_v6(origsrc, 0, 0, 0, 1597 NULL, NULL, zoneid, NULL, 1598 (MATCH_IRE_DEFAULT|MATCH_IRE_RECURSIVE)); 1599 if (ire == NULL) { 1600 BUMP_MIB(&ip6_mib, ipv6OutNoRoutes); 1601 return (NULL); 1602 } 1603 ASSERT(ire != NULL); 1604 *src = ire->ire_src_addr_v6; 1605 ire_refrele(ire); 1606 return (src); 1607 } 1608 1609 /* 1610 * Build and ship an IPv6 ICMP message using the packet data in mp, 1611 * and the ICMP header pointed to by "stuff". (May be called as 1612 * writer.) 1613 * Note: assumes that icmp_pkt_err_ok_v6 has been called to 1614 * verify that an icmp error packet can be sent. 1615 * 1616 * If q is an ill write side queue (which is the case when packets 1617 * arrive from ip_rput) then ip_wput code will ensure that packets to 1618 * link-local destinations are sent out that ill. 1619 * 1620 * If v6src_ptr is set use it as a source. Otherwise select a reasonable 1621 * source address (see above function). 1622 */ 1623 static void 1624 icmp_pkt_v6(queue_t *q, mblk_t *mp, void *stuff, size_t len, 1625 const in6_addr_t *v6src_ptr, boolean_t mctl_present, zoneid_t zoneid) 1626 { 1627 ip6_t *ip6h; 1628 in6_addr_t v6dst; 1629 size_t len_needed; 1630 size_t msg_len; 1631 mblk_t *mp1; 1632 icmp6_t *icmp6; 1633 ill_t *ill; 1634 in6_addr_t v6src; 1635 mblk_t *ipsec_mp; 1636 ipsec_out_t *io; 1637 1638 ill = ip_queue_to_ill_v6(q); 1639 if (ill == NULL) { 1640 freemsg(mp); 1641 return; 1642 } 1643 1644 if (mctl_present) { 1645 /* 1646 * If it is : 1647 * 1648 * 1) a IPSEC_OUT, then this is caused by outbound 1649 * datagram originating on this host. IPSEC processing 1650 * may or may not have been done. Refer to comments above 1651 * icmp_inbound_error_fanout for details. 1652 * 1653 * 2) a IPSEC_IN if we are generating a icmp_message 1654 * for an incoming datagram destined for us i.e called 1655 * from ip_fanout_send_icmp. 1656 */ 1657 ipsec_info_t *in; 1658 1659 ipsec_mp = mp; 1660 mp = ipsec_mp->b_cont; 1661 1662 in = (ipsec_info_t *)ipsec_mp->b_rptr; 1663 ip6h = (ip6_t *)mp->b_rptr; 1664 1665 ASSERT(in->ipsec_info_type == IPSEC_OUT || 1666 in->ipsec_info_type == IPSEC_IN); 1667 1668 if (in->ipsec_info_type == IPSEC_IN) { 1669 /* 1670 * Convert the IPSEC_IN to IPSEC_OUT. 1671 */ 1672 if (!ipsec_in_to_out(ipsec_mp, NULL, ip6h)) { 1673 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 1674 ill_refrele(ill); 1675 return; 1676 } 1677 } else { 1678 ASSERT(in->ipsec_info_type == IPSEC_OUT); 1679 io = (ipsec_out_t *)in; 1680 /* 1681 * Clear out ipsec_out_proc_begin, so we do a fresh 1682 * ire lookup. 1683 */ 1684 io->ipsec_out_proc_begin = B_FALSE; 1685 } 1686 } else { 1687 /* 1688 * This is in clear. The icmp message we are building 1689 * here should go out in clear. 1690 */ 1691 ipsec_in_t *ii; 1692 ASSERT(mp->b_datap->db_type == M_DATA); 1693 if ((ipsec_mp = ipsec_in_alloc(B_FALSE)) == NULL) { 1694 freemsg(mp); 1695 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 1696 ill_refrele(ill); 1697 return; 1698 } 1699 ii = (ipsec_in_t *)ipsec_mp->b_rptr; 1700 1701 /* This is not a secure packet */ 1702 ii->ipsec_in_secure = B_FALSE; 1703 /* 1704 * For trusted extensions using a shared IP address we can 1705 * send using any zoneid. 1706 */ 1707 if (zoneid == ALL_ZONES) 1708 ii->ipsec_in_zoneid = GLOBAL_ZONEID; 1709 else 1710 ii->ipsec_in_zoneid = zoneid; 1711 ipsec_mp->b_cont = mp; 1712 ip6h = (ip6_t *)mp->b_rptr; 1713 /* 1714 * Convert the IPSEC_IN to IPSEC_OUT. 1715 */ 1716 if (!ipsec_in_to_out(ipsec_mp, NULL, ip6h)) { 1717 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 1718 ill_refrele(ill); 1719 return; 1720 } 1721 } 1722 io = (ipsec_out_t *)ipsec_mp->b_rptr; 1723 1724 if (v6src_ptr != NULL) { 1725 v6src = *v6src_ptr; 1726 } else { 1727 if (icmp_pick_source_v6(q, &ip6h->ip6_src, &ip6h->ip6_dst, 1728 &v6src, zoneid) == NULL) { 1729 freemsg(ipsec_mp); 1730 ill_refrele(ill); 1731 return; 1732 } 1733 } 1734 v6dst = ip6h->ip6_src; 1735 len_needed = ipv6_icmp_return - IPV6_HDR_LEN - len; 1736 msg_len = msgdsize(mp); 1737 if (msg_len > len_needed) { 1738 if (!adjmsg(mp, len_needed - msg_len)) { 1739 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutErrors); 1740 freemsg(ipsec_mp); 1741 ill_refrele(ill); 1742 return; 1743 } 1744 msg_len = len_needed; 1745 } 1746 mp1 = allocb(IPV6_HDR_LEN + len, BPRI_HI); 1747 if (mp1 == NULL) { 1748 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutErrors); 1749 freemsg(ipsec_mp); 1750 ill_refrele(ill); 1751 return; 1752 } 1753 ill_refrele(ill); 1754 mp1->b_cont = mp; 1755 mp = mp1; 1756 ASSERT(ipsec_mp->b_datap->db_type == M_CTL && 1757 io->ipsec_out_type == IPSEC_OUT); 1758 ipsec_mp->b_cont = mp; 1759 1760 /* 1761 * Set ipsec_out_icmp_loopback so we can let the ICMP messages this 1762 * node generates be accepted in peace by all on-host destinations. 1763 * If we do NOT assume that all on-host destinations trust 1764 * self-generated ICMP messages, then rework here, ip.c, and spd.c. 1765 * (Look for ipsec_out_icmp_loopback). 1766 */ 1767 io->ipsec_out_icmp_loopback = B_TRUE; 1768 1769 ip6h = (ip6_t *)mp->b_rptr; 1770 mp1->b_wptr = (uchar_t *)ip6h + (IPV6_HDR_LEN + len); 1771 1772 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 1773 ip6h->ip6_nxt = IPPROTO_ICMPV6; 1774 ip6h->ip6_hops = ipv6_def_hops; 1775 ip6h->ip6_dst = v6dst; 1776 ip6h->ip6_src = v6src; 1777 msg_len += IPV6_HDR_LEN + len; 1778 if (msg_len > IP_MAXPACKET + IPV6_HDR_LEN) { 1779 (void) adjmsg(mp, IP_MAXPACKET + IPV6_HDR_LEN - msg_len); 1780 msg_len = IP_MAXPACKET + IPV6_HDR_LEN; 1781 } 1782 ip6h->ip6_plen = htons((uint16_t)(msgdsize(mp) - IPV6_HDR_LEN)); 1783 icmp6 = (icmp6_t *)&ip6h[1]; 1784 bcopy(stuff, (char *)icmp6, len); 1785 /* 1786 * Prepare for checksum by putting icmp length in the icmp 1787 * checksum field. The checksum is calculated in ip_wput_v6. 1788 */ 1789 icmp6->icmp6_cksum = ip6h->ip6_plen; 1790 if (icmp6->icmp6_type == ND_REDIRECT) { 1791 ip6h->ip6_hops = IPV6_MAX_HOPS; 1792 } 1793 /* Send to V6 writeside put routine */ 1794 put(q, ipsec_mp); 1795 } 1796 1797 /* 1798 * Update the output mib when ICMPv6 packets are sent. 1799 */ 1800 static void 1801 icmp_update_out_mib_v6(ill_t *ill, icmp6_t *icmp6) 1802 { 1803 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutMsgs); 1804 1805 switch (icmp6->icmp6_type) { 1806 case ICMP6_DST_UNREACH: 1807 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutDestUnreachs); 1808 if (icmp6->icmp6_code == ICMP6_DST_UNREACH_ADMIN) 1809 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutAdminProhibs); 1810 break; 1811 1812 case ICMP6_TIME_EXCEEDED: 1813 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutTimeExcds); 1814 break; 1815 1816 case ICMP6_PARAM_PROB: 1817 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutParmProblems); 1818 break; 1819 1820 case ICMP6_PACKET_TOO_BIG: 1821 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutPktTooBigs); 1822 break; 1823 1824 case ICMP6_ECHO_REQUEST: 1825 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutEchos); 1826 break; 1827 1828 case ICMP6_ECHO_REPLY: 1829 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutEchoReplies); 1830 break; 1831 1832 case ND_ROUTER_SOLICIT: 1833 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutRouterSolicits); 1834 break; 1835 1836 case ND_ROUTER_ADVERT: 1837 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutRouterAdvertisements); 1838 break; 1839 1840 case ND_NEIGHBOR_SOLICIT: 1841 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutNeighborSolicits); 1842 break; 1843 1844 case ND_NEIGHBOR_ADVERT: 1845 BUMP_MIB(ill->ill_icmp6_mib, 1846 ipv6IfIcmpOutNeighborAdvertisements); 1847 break; 1848 1849 case ND_REDIRECT: 1850 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutRedirects); 1851 break; 1852 1853 case MLD_LISTENER_QUERY: 1854 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutGroupMembQueries); 1855 break; 1856 1857 case MLD_LISTENER_REPORT: 1858 case MLD_V2_LISTENER_REPORT: 1859 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutGroupMembResponses); 1860 break; 1861 1862 case MLD_LISTENER_REDUCTION: 1863 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutGroupMembReductions); 1864 break; 1865 } 1866 } 1867 1868 /* 1869 * Check if it is ok to send an ICMPv6 error packet in 1870 * response to the IP packet in mp. 1871 * Free the message and return null if no 1872 * ICMP error packet should be sent. 1873 */ 1874 static mblk_t * 1875 icmp_pkt_err_ok_v6(queue_t *q, mblk_t *mp, 1876 boolean_t llbcast, boolean_t mcast_ok) 1877 { 1878 ip6_t *ip6h; 1879 1880 if (!mp) 1881 return (NULL); 1882 1883 ip6h = (ip6_t *)mp->b_rptr; 1884 1885 /* Check if source address uniquely identifies the host */ 1886 1887 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_src) || 1888 IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_src) || 1889 IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 1890 freemsg(mp); 1891 return (NULL); 1892 } 1893 1894 if (ip6h->ip6_nxt == IPPROTO_ICMPV6) { 1895 size_t len_needed = IPV6_HDR_LEN + ICMP6_MINLEN; 1896 icmp6_t *icmp6; 1897 1898 if (mp->b_wptr - mp->b_rptr < len_needed) { 1899 if (!pullupmsg(mp, len_needed)) { 1900 ill_t *ill; 1901 1902 ill = ip_queue_to_ill_v6(q); 1903 if (ill == NULL) { 1904 BUMP_MIB(&icmp6_mib, 1905 ipv6IfIcmpInErrors); 1906 } else { 1907 BUMP_MIB(ill->ill_icmp6_mib, 1908 ipv6IfIcmpInErrors); 1909 ill_refrele(ill); 1910 } 1911 freemsg(mp); 1912 return (NULL); 1913 } 1914 ip6h = (ip6_t *)mp->b_rptr; 1915 } 1916 icmp6 = (icmp6_t *)&ip6h[1]; 1917 /* Explicitly do not generate errors in response to redirects */ 1918 if (ICMP6_IS_ERROR(icmp6->icmp6_type) || 1919 icmp6->icmp6_type == ND_REDIRECT) { 1920 freemsg(mp); 1921 return (NULL); 1922 } 1923 } 1924 /* 1925 * Check that the destination is not multicast and that the packet 1926 * was not sent on link layer broadcast or multicast. (Exception 1927 * is Packet too big message as per the draft - when mcast_ok is set.) 1928 */ 1929 if (!mcast_ok && 1930 (llbcast || IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst))) { 1931 freemsg(mp); 1932 return (NULL); 1933 } 1934 if (icmp_err_rate_limit()) { 1935 /* 1936 * Only send ICMP error packets every so often. 1937 * This should be done on a per port/source basis, 1938 * but for now this will suffice. 1939 */ 1940 freemsg(mp); 1941 return (NULL); 1942 } 1943 return (mp); 1944 } 1945 1946 /* 1947 * Generate an ICMPv6 redirect message. 1948 * Include target link layer address option if it exits. 1949 * Always include redirect header. 1950 */ 1951 static void 1952 icmp_send_redirect_v6(queue_t *q, mblk_t *mp, in6_addr_t *targetp, 1953 in6_addr_t *dest, ill_t *ill, boolean_t llbcast) 1954 { 1955 nd_redirect_t *rd; 1956 nd_opt_rd_hdr_t *rdh; 1957 uchar_t *buf; 1958 nce_t *nce = NULL; 1959 nd_opt_hdr_t *opt; 1960 int len; 1961 int ll_opt_len = 0; 1962 int max_redir_hdr_data_len; 1963 int pkt_len; 1964 in6_addr_t *srcp; 1965 1966 /* 1967 * We are called from ip_rput where we could 1968 * not have attached an IPSEC_IN. 1969 */ 1970 ASSERT(mp->b_datap->db_type == M_DATA); 1971 1972 mp = icmp_pkt_err_ok_v6(q, mp, llbcast, B_FALSE); 1973 if (mp == NULL) 1974 return; 1975 nce = ndp_lookup_v6(ill, targetp, B_FALSE); 1976 if (nce != NULL && nce->nce_state != ND_INCOMPLETE) { 1977 ll_opt_len = (sizeof (nd_opt_hdr_t) + 1978 ill->ill_phys_addr_length + 7)/8 * 8; 1979 } 1980 len = sizeof (nd_redirect_t) + sizeof (nd_opt_rd_hdr_t) + ll_opt_len; 1981 ASSERT(len % 4 == 0); 1982 buf = kmem_alloc(len, KM_NOSLEEP); 1983 if (buf == NULL) { 1984 if (nce != NULL) 1985 NCE_REFRELE(nce); 1986 freemsg(mp); 1987 return; 1988 } 1989 1990 rd = (nd_redirect_t *)buf; 1991 rd->nd_rd_type = (uint8_t)ND_REDIRECT; 1992 rd->nd_rd_code = 0; 1993 rd->nd_rd_reserved = 0; 1994 rd->nd_rd_target = *targetp; 1995 rd->nd_rd_dst = *dest; 1996 1997 opt = (nd_opt_hdr_t *)(buf + sizeof (nd_redirect_t)); 1998 if (nce != NULL && ll_opt_len != 0) { 1999 opt->nd_opt_type = ND_OPT_TARGET_LINKADDR; 2000 opt->nd_opt_len = ll_opt_len/8; 2001 bcopy((char *)nce->nce_res_mp->b_rptr + 2002 NCE_LL_ADDR_OFFSET(ill), &opt[1], 2003 ill->ill_phys_addr_length); 2004 } 2005 if (nce != NULL) 2006 NCE_REFRELE(nce); 2007 rdh = (nd_opt_rd_hdr_t *)(buf + sizeof (nd_redirect_t) + ll_opt_len); 2008 rdh->nd_opt_rh_type = (uint8_t)ND_OPT_REDIRECTED_HEADER; 2009 /* max_redir_hdr_data_len and nd_opt_rh_len must be multiple of 8 */ 2010 max_redir_hdr_data_len = (ipv6_icmp_return - IPV6_HDR_LEN - len)/8*8; 2011 pkt_len = msgdsize(mp); 2012 /* Make sure mp is 8 byte aligned */ 2013 if (pkt_len > max_redir_hdr_data_len) { 2014 rdh->nd_opt_rh_len = (max_redir_hdr_data_len + 2015 sizeof (nd_opt_rd_hdr_t))/8; 2016 (void) adjmsg(mp, max_redir_hdr_data_len - pkt_len); 2017 } else { 2018 rdh->nd_opt_rh_len = (pkt_len + sizeof (nd_opt_rd_hdr_t))/8; 2019 (void) adjmsg(mp, -(pkt_len % 8)); 2020 } 2021 rdh->nd_opt_rh_reserved1 = 0; 2022 rdh->nd_opt_rh_reserved2 = 0; 2023 /* ipif_v6src_addr contains the link-local source address */ 2024 rw_enter(&ill_g_lock, RW_READER); 2025 if (ill->ill_group != NULL) { 2026 /* 2027 * The receiver of the redirect will verify whether it 2028 * had a route through us (srcp that we will use in 2029 * the redirect) or not. As we load spread even link-locals, 2030 * we don't know which source address the receiver of 2031 * redirect has in its route for communicating with us. 2032 * Thus we randomly choose a source here and finally we 2033 * should get to the right one and it will eventually 2034 * accept the redirect from us. We can't call 2035 * ip_lookup_scope_v6 because we don't have the right 2036 * link-local address here. Thus we randomly choose one. 2037 */ 2038 int cnt = ill->ill_group->illgrp_ill_count; 2039 2040 ill = ill->ill_group->illgrp_ill; 2041 cnt = ++icmp_redirect_v6_src_index % cnt; 2042 while (cnt--) 2043 ill = ill->ill_group_next; 2044 srcp = &ill->ill_ipif->ipif_v6src_addr; 2045 } else { 2046 srcp = &ill->ill_ipif->ipif_v6src_addr; 2047 } 2048 rw_exit(&ill_g_lock); 2049 /* Redirects sent by router, and router is global zone */ 2050 icmp_pkt_v6(q, mp, buf, len, srcp, B_FALSE, GLOBAL_ZONEID); 2051 kmem_free(buf, len); 2052 } 2053 2054 2055 /* Generate an ICMP time exceeded message. (May be called as writer.) */ 2056 void 2057 icmp_time_exceeded_v6(queue_t *q, mblk_t *mp, uint8_t code, 2058 boolean_t llbcast, boolean_t mcast_ok, zoneid_t zoneid) 2059 { 2060 icmp6_t icmp6; 2061 boolean_t mctl_present; 2062 mblk_t *first_mp; 2063 2064 EXTRACT_PKT_MP(mp, first_mp, mctl_present); 2065 2066 mp = icmp_pkt_err_ok_v6(q, mp, llbcast, mcast_ok); 2067 if (mp == NULL) { 2068 if (mctl_present) 2069 freeb(first_mp); 2070 return; 2071 } 2072 bzero(&icmp6, sizeof (icmp6_t)); 2073 icmp6.icmp6_type = ICMP6_TIME_EXCEEDED; 2074 icmp6.icmp6_code = code; 2075 icmp_pkt_v6(q, first_mp, &icmp6, sizeof (icmp6_t), NULL, mctl_present, 2076 zoneid); 2077 } 2078 2079 /* 2080 * Generate an ICMP unreachable message. 2081 */ 2082 void 2083 icmp_unreachable_v6(queue_t *q, mblk_t *mp, uint8_t code, 2084 boolean_t llbcast, boolean_t mcast_ok, zoneid_t zoneid) 2085 { 2086 icmp6_t icmp6; 2087 boolean_t mctl_present; 2088 mblk_t *first_mp; 2089 2090 EXTRACT_PKT_MP(mp, first_mp, mctl_present); 2091 2092 mp = icmp_pkt_err_ok_v6(q, mp, llbcast, mcast_ok); 2093 if (mp == NULL) { 2094 if (mctl_present) 2095 freeb(first_mp); 2096 return; 2097 } 2098 bzero(&icmp6, sizeof (icmp6_t)); 2099 icmp6.icmp6_type = ICMP6_DST_UNREACH; 2100 icmp6.icmp6_code = code; 2101 icmp_pkt_v6(q, first_mp, &icmp6, sizeof (icmp6_t), NULL, mctl_present, 2102 zoneid); 2103 } 2104 2105 /* 2106 * Generate an ICMP pkt too big message. 2107 */ 2108 static void 2109 icmp_pkt2big_v6(queue_t *q, mblk_t *mp, uint32_t mtu, 2110 boolean_t llbcast, boolean_t mcast_ok, zoneid_t zoneid) 2111 { 2112 icmp6_t icmp6; 2113 mblk_t *first_mp; 2114 boolean_t mctl_present; 2115 2116 EXTRACT_PKT_MP(mp, first_mp, mctl_present); 2117 2118 mp = icmp_pkt_err_ok_v6(q, mp, llbcast, mcast_ok); 2119 if (mp == NULL) { 2120 if (mctl_present) 2121 freeb(first_mp); 2122 return; 2123 } 2124 bzero(&icmp6, sizeof (icmp6_t)); 2125 icmp6.icmp6_type = ICMP6_PACKET_TOO_BIG; 2126 icmp6.icmp6_code = 0; 2127 icmp6.icmp6_mtu = htonl(mtu); 2128 2129 icmp_pkt_v6(q, first_mp, &icmp6, sizeof (icmp6_t), NULL, mctl_present, 2130 zoneid); 2131 } 2132 2133 /* 2134 * Generate an ICMP parameter problem message. (May be called as writer.) 2135 * 'offset' is the offset from the beginning of the packet in error. 2136 */ 2137 static void 2138 icmp_param_problem_v6(queue_t *q, mblk_t *mp, uint8_t code, 2139 uint32_t offset, boolean_t llbcast, boolean_t mcast_ok, zoneid_t zoneid) 2140 { 2141 icmp6_t icmp6; 2142 boolean_t mctl_present; 2143 mblk_t *first_mp; 2144 2145 EXTRACT_PKT_MP(mp, first_mp, mctl_present); 2146 2147 mp = icmp_pkt_err_ok_v6(q, mp, llbcast, mcast_ok); 2148 if (mp == NULL) { 2149 if (mctl_present) 2150 freeb(first_mp); 2151 return; 2152 } 2153 bzero((char *)&icmp6, sizeof (icmp6_t)); 2154 icmp6.icmp6_type = ICMP6_PARAM_PROB; 2155 icmp6.icmp6_code = code; 2156 icmp6.icmp6_pptr = htonl(offset); 2157 icmp_pkt_v6(q, first_mp, &icmp6, sizeof (icmp6_t), NULL, mctl_present, 2158 zoneid); 2159 } 2160 2161 /* 2162 * This code will need to take into account the possibility of binding 2163 * to a link local address on a multi-homed host, in which case the 2164 * outgoing interface (from the conn) will need to be used when getting 2165 * an ire for the dst. Going through proper outgoing interface and 2166 * choosing the source address corresponding to the outgoing interface 2167 * is necessary when the destination address is a link-local address and 2168 * IPV6_BOUND_IF or IPV6_PKTINFO or scope_id has been set. 2169 * This can happen when active connection is setup; thus ipp pointer 2170 * is passed here from tcp_connect_*() routines, in non-TCP cases NULL 2171 * pointer is passed as ipp pointer. 2172 */ 2173 mblk_t * 2174 ip_bind_v6(queue_t *q, mblk_t *mp, conn_t *connp, ip6_pkt_t *ipp) 2175 { 2176 ssize_t len; 2177 int protocol; 2178 struct T_bind_req *tbr; 2179 sin6_t *sin6; 2180 ipa6_conn_t *ac6; 2181 in6_addr_t *v6srcp; 2182 in6_addr_t *v6dstp; 2183 uint16_t lport; 2184 uint16_t fport; 2185 uchar_t *ucp; 2186 mblk_t *mp1; 2187 boolean_t ire_requested; 2188 boolean_t ipsec_policy_set; 2189 int error = 0; 2190 boolean_t local_bind; 2191 boolean_t orig_pkt_isv6 = connp->conn_pkt_isv6; 2192 ipa6_conn_x_t *acx6; 2193 boolean_t verify_dst; 2194 2195 ASSERT(connp->conn_af_isv6); 2196 len = mp->b_wptr - mp->b_rptr; 2197 if (len < (sizeof (*tbr) + 1)) { 2198 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 2199 "ip_bind_v6: bogus msg, len %ld", len); 2200 goto bad_addr; 2201 } 2202 /* Back up and extract the protocol identifier. */ 2203 mp->b_wptr--; 2204 tbr = (struct T_bind_req *)mp->b_rptr; 2205 /* Reset the message type in preparation for shipping it back. */ 2206 mp->b_datap->db_type = M_PCPROTO; 2207 2208 protocol = *mp->b_wptr & 0xFF; 2209 connp->conn_ulp = (uint8_t)protocol; 2210 2211 /* 2212 * Check for a zero length address. This is from a protocol that 2213 * wants to register to receive all packets of its type. 2214 */ 2215 if (tbr->ADDR_length == 0) { 2216 if ((protocol == IPPROTO_TCP || protocol == IPPROTO_SCTP || 2217 protocol == IPPROTO_ESP || protocol == IPPROTO_AH) && 2218 ipcl_proto_fanout_v6[protocol].connf_head != NULL) { 2219 /* 2220 * TCP, SCTP, AH, and ESP have single protocol fanouts. 2221 * Do not allow others to bind to these. 2222 */ 2223 goto bad_addr; 2224 } 2225 2226 /* 2227 * 2228 * The udp module never sends down a zero-length address, 2229 * and allowing this on a labeled system will break MLP 2230 * functionality. 2231 */ 2232 if (is_system_labeled() && protocol == IPPROTO_UDP) 2233 goto bad_addr; 2234 2235 /* Allow ipsec plumbing */ 2236 if (connp->conn_mac_exempt && protocol != IPPROTO_AH && 2237 protocol != IPPROTO_ESP) 2238 goto bad_addr; 2239 2240 connp->conn_srcv6 = ipv6_all_zeros; 2241 ipcl_proto_insert_v6(connp, protocol); 2242 2243 tbr->PRIM_type = T_BIND_ACK; 2244 return (mp); 2245 } 2246 2247 /* Extract the address pointer from the message. */ 2248 ucp = (uchar_t *)mi_offset_param(mp, tbr->ADDR_offset, 2249 tbr->ADDR_length); 2250 if (ucp == NULL) { 2251 ip1dbg(("ip_bind_v6: no address\n")); 2252 goto bad_addr; 2253 } 2254 if (!OK_32PTR(ucp)) { 2255 ip1dbg(("ip_bind_v6: unaligned address\n")); 2256 goto bad_addr; 2257 } 2258 mp1 = mp->b_cont; /* trailing mp if any */ 2259 ire_requested = (mp1 && mp1->b_datap->db_type == IRE_DB_REQ_TYPE); 2260 ipsec_policy_set = (mp1 && mp1->b_datap->db_type == IPSEC_POLICY_SET); 2261 2262 switch (tbr->ADDR_length) { 2263 default: 2264 ip1dbg(("ip_bind_v6: bad address length %d\n", 2265 (int)tbr->ADDR_length)); 2266 goto bad_addr; 2267 2268 case IPV6_ADDR_LEN: 2269 /* Verification of local address only */ 2270 v6srcp = (in6_addr_t *)ucp; 2271 lport = 0; 2272 local_bind = B_TRUE; 2273 break; 2274 2275 case sizeof (sin6_t): 2276 sin6 = (sin6_t *)ucp; 2277 v6srcp = &sin6->sin6_addr; 2278 lport = sin6->sin6_port; 2279 local_bind = B_TRUE; 2280 break; 2281 2282 case sizeof (ipa6_conn_t): 2283 /* 2284 * Verify that both the source and destination addresses 2285 * are valid. 2286 * Note that we allow connect to broadcast and multicast 2287 * addresses when ire_requested is set. Thus the ULP 2288 * has to check for IRE_BROADCAST and multicast. 2289 */ 2290 ac6 = (ipa6_conn_t *)ucp; 2291 v6srcp = &ac6->ac6_laddr; 2292 v6dstp = &ac6->ac6_faddr; 2293 fport = ac6->ac6_fport; 2294 /* For raw socket, the local port is not set. */ 2295 lport = ac6->ac6_lport != 0 ? ac6->ac6_lport : 2296 connp->conn_lport; 2297 local_bind = B_FALSE; 2298 /* Always verify destination reachability. */ 2299 verify_dst = B_TRUE; 2300 break; 2301 2302 case sizeof (ipa6_conn_x_t): 2303 /* 2304 * Verify that the source address is valid. 2305 * Note that we allow connect to broadcast and multicast 2306 * addresses when ire_requested is set. Thus the ULP 2307 * has to check for IRE_BROADCAST and multicast. 2308 */ 2309 acx6 = (ipa6_conn_x_t *)ucp; 2310 ac6 = &acx6->ac6x_conn; 2311 v6srcp = &ac6->ac6_laddr; 2312 v6dstp = &ac6->ac6_faddr; 2313 fport = ac6->ac6_fport; 2314 lport = ac6->ac6_lport; 2315 local_bind = B_FALSE; 2316 /* 2317 * Client that passed ipa6_conn_x_t to us specifies whether to 2318 * verify destination reachability. 2319 */ 2320 verify_dst = (acx6->ac6x_flags & ACX_VERIFY_DST) != 0; 2321 break; 2322 } 2323 if (local_bind) { 2324 if (IN6_IS_ADDR_V4MAPPED(v6srcp) && !connp->conn_ipv6_v6only) { 2325 /* Bind to IPv4 address */ 2326 ipaddr_t v4src; 2327 2328 IN6_V4MAPPED_TO_IPADDR(v6srcp, v4src); 2329 2330 error = ip_bind_laddr(connp, mp, v4src, lport, 2331 ire_requested, ipsec_policy_set, 2332 tbr->ADDR_length != IPV6_ADDR_LEN); 2333 if (error != 0) 2334 goto bad_addr; 2335 connp->conn_pkt_isv6 = B_FALSE; 2336 } else { 2337 if (IN6_IS_ADDR_V4MAPPED(v6srcp)) { 2338 error = 0; 2339 goto bad_addr; 2340 } 2341 error = ip_bind_laddr_v6(connp, mp, v6srcp, lport, 2342 ire_requested, ipsec_policy_set, 2343 (tbr->ADDR_length != IPV6_ADDR_LEN)); 2344 if (error != 0) 2345 goto bad_addr; 2346 connp->conn_pkt_isv6 = B_TRUE; 2347 } 2348 if (protocol == IPPROTO_TCP) 2349 connp->conn_recv = tcp_conn_request; 2350 } else { 2351 /* 2352 * Bind to local and remote address. Local might be 2353 * unspecified in which case it will be extracted from 2354 * ire_src_addr_v6 2355 */ 2356 if (IN6_IS_ADDR_V4MAPPED(v6dstp) && !connp->conn_ipv6_v6only) { 2357 /* Connect to IPv4 address */ 2358 ipaddr_t v4src; 2359 ipaddr_t v4dst; 2360 2361 /* Is the source unspecified or mapped? */ 2362 if (!IN6_IS_ADDR_V4MAPPED(v6srcp) && 2363 !IN6_IS_ADDR_UNSPECIFIED(v6srcp)) { 2364 ip1dbg(("ip_bind_v6: " 2365 "dst is mapped, but not the src\n")); 2366 goto bad_addr; 2367 } 2368 IN6_V4MAPPED_TO_IPADDR(v6srcp, v4src); 2369 IN6_V4MAPPED_TO_IPADDR(v6dstp, v4dst); 2370 2371 /* 2372 * XXX Fix needed. Need to pass ipsec_policy_set 2373 * instead of B_FALSE. 2374 */ 2375 2376 /* Always verify destination reachability. */ 2377 error = ip_bind_connected(connp, mp, &v4src, lport, 2378 v4dst, fport, ire_requested, ipsec_policy_set, 2379 B_TRUE, B_TRUE); 2380 if (error != 0) 2381 goto bad_addr; 2382 IN6_IPADDR_TO_V4MAPPED(v4src, v6srcp); 2383 connp->conn_pkt_isv6 = B_FALSE; 2384 } else if (IN6_IS_ADDR_V4MAPPED(v6srcp)) { 2385 ip1dbg(("ip_bind_v6: " 2386 "src is mapped, but not the dst\n")); 2387 goto bad_addr; 2388 } else { 2389 error = ip_bind_connected_v6(connp, mp, v6srcp, 2390 lport, v6dstp, ipp, fport, ire_requested, 2391 ipsec_policy_set, B_TRUE, verify_dst); 2392 if (error != 0) 2393 goto bad_addr; 2394 connp->conn_pkt_isv6 = B_TRUE; 2395 } 2396 if (protocol == IPPROTO_TCP) 2397 connp->conn_recv = tcp_input; 2398 } 2399 /* Update qinfo if v4/v6 changed */ 2400 if ((orig_pkt_isv6 != connp->conn_pkt_isv6) && 2401 !(IPCL_IS_TCP(connp) || IPCL_IS_UDP(connp))) { 2402 if (connp->conn_pkt_isv6) 2403 ip_setqinfo(RD(q), IPV6_MINOR, B_TRUE); 2404 else 2405 ip_setqinfo(RD(q), IPV4_MINOR, B_TRUE); 2406 } 2407 2408 /* 2409 * Pass the IPSEC headers size in ire_ipsec_overhead. 2410 * We can't do this in ip_bind_insert_ire because the policy 2411 * may not have been inherited at that point in time and hence 2412 * conn_out_enforce_policy may not be set. 2413 */ 2414 mp1 = mp->b_cont; 2415 if (ire_requested && connp->conn_out_enforce_policy && 2416 mp1 != NULL && DB_TYPE(mp1) == IRE_DB_REQ_TYPE) { 2417 ire_t *ire = (ire_t *)mp1->b_rptr; 2418 ASSERT(MBLKL(mp1) >= sizeof (ire_t)); 2419 ire->ire_ipsec_overhead = (conn_ipsec_length(connp)); 2420 } 2421 2422 /* Send it home. */ 2423 mp->b_datap->db_type = M_PCPROTO; 2424 tbr->PRIM_type = T_BIND_ACK; 2425 return (mp); 2426 2427 bad_addr: 2428 if (error == EINPROGRESS) 2429 return (NULL); 2430 if (error > 0) 2431 mp = mi_tpi_err_ack_alloc(mp, TSYSERR, error); 2432 else 2433 mp = mi_tpi_err_ack_alloc(mp, TBADADDR, 0); 2434 return (mp); 2435 } 2436 2437 /* 2438 * Here address is verified to be a valid local address. 2439 * If the IRE_DB_REQ_TYPE mp is present, a multicast 2440 * address is also considered a valid local address. 2441 * In the case of a multicast address, however, the 2442 * upper protocol is expected to reset the src address 2443 * to 0 if it sees an ire with IN6_IS_ADDR_MULTICAST returned so that 2444 * no packets are emitted with multicast address as 2445 * source address. 2446 * The addresses valid for bind are: 2447 * (1) - in6addr_any 2448 * (2) - IP address of an UP interface 2449 * (3) - IP address of a DOWN interface 2450 * (4) - a multicast address. In this case 2451 * the conn will only receive packets destined to 2452 * the specified multicast address. Note: the 2453 * application still has to issue an 2454 * IPV6_JOIN_GROUP socket option. 2455 * 2456 * In all the above cases, the bound address must be valid in the current zone. 2457 * When the address is loopback or multicast, there might be many matching IREs 2458 * so bind has to look up based on the zone. 2459 */ 2460 static int 2461 ip_bind_laddr_v6(conn_t *connp, mblk_t *mp, const in6_addr_t *v6src, 2462 uint16_t lport, boolean_t ire_requested, boolean_t ipsec_policy_set, 2463 boolean_t fanout_insert) 2464 { 2465 int error = 0; 2466 ire_t *src_ire = NULL; 2467 ipif_t *ipif = NULL; 2468 mblk_t *policy_mp; 2469 zoneid_t zoneid; 2470 2471 if (ipsec_policy_set) 2472 policy_mp = mp->b_cont; 2473 2474 /* 2475 * If it was previously connected, conn_fully_bound would have 2476 * been set. 2477 */ 2478 connp->conn_fully_bound = B_FALSE; 2479 2480 zoneid = connp->conn_zoneid; 2481 2482 if (!IN6_IS_ADDR_UNSPECIFIED(v6src)) { 2483 src_ire = ire_route_lookup_v6(v6src, 0, 0, 2484 0, NULL, NULL, zoneid, NULL, MATCH_IRE_ZONEONLY); 2485 /* 2486 * If an address other than in6addr_any is requested, 2487 * we verify that it is a valid address for bind 2488 * Note: Following code is in if-else-if form for 2489 * readability compared to a condition check. 2490 */ 2491 ASSERT(src_ire == NULL || !(src_ire->ire_type & IRE_BROADCAST)); 2492 if (IRE_IS_LOCAL(src_ire)) { 2493 /* 2494 * (2) Bind to address of local UP interface 2495 */ 2496 ipif = src_ire->ire_ipif; 2497 } else if (IN6_IS_ADDR_MULTICAST(v6src)) { 2498 ipif_t *multi_ipif = NULL; 2499 ire_t *save_ire; 2500 /* 2501 * (4) bind to multicast address. 2502 * Fake out the IRE returned to upper 2503 * layer to be a broadcast IRE in 2504 * ip_bind_insert_ire_v6(). 2505 * Pass other information that matches 2506 * the ipif (e.g. the source address). 2507 * conn_multicast_ill is only used for 2508 * IPv6 packets 2509 */ 2510 mutex_enter(&connp->conn_lock); 2511 if (connp->conn_multicast_ill != NULL) { 2512 (void) ipif_lookup_zoneid( 2513 connp->conn_multicast_ill, zoneid, 0, 2514 &multi_ipif); 2515 } else { 2516 /* 2517 * Look for default like 2518 * ip_wput_v6 2519 */ 2520 multi_ipif = ipif_lookup_group_v6( 2521 &ipv6_unspecified_group, zoneid); 2522 } 2523 mutex_exit(&connp->conn_lock); 2524 save_ire = src_ire; 2525 src_ire = NULL; 2526 if (multi_ipif == NULL || !ire_requested || 2527 (src_ire = ipif_to_ire_v6(multi_ipif)) == NULL) { 2528 src_ire = save_ire; 2529 error = EADDRNOTAVAIL; 2530 } else { 2531 ASSERT(src_ire != NULL); 2532 if (save_ire != NULL) 2533 ire_refrele(save_ire); 2534 } 2535 if (multi_ipif != NULL) 2536 ipif_refrele(multi_ipif); 2537 } else { 2538 *mp->b_wptr++ = (char)connp->conn_ulp; 2539 ipif = ipif_lookup_addr_v6(v6src, NULL, zoneid, 2540 CONNP_TO_WQ(connp), mp, ip_wput_nondata, &error); 2541 if (ipif == NULL) { 2542 if (error == EINPROGRESS) { 2543 if (src_ire != NULL) 2544 ire_refrele(src_ire); 2545 return (error); 2546 } 2547 /* 2548 * Not a valid address for bind 2549 */ 2550 error = EADDRNOTAVAIL; 2551 } else { 2552 ipif_refrele(ipif); 2553 } 2554 /* 2555 * Just to keep it consistent with the processing in 2556 * ip_bind_v6(). 2557 */ 2558 mp->b_wptr--; 2559 } 2560 2561 if (error != 0) { 2562 /* Red Alert! Attempting to be a bogon! */ 2563 if (ip_debug > 2) { 2564 /* ip1dbg */ 2565 pr_addr_dbg("ip_bind_laddr_v6: bad src" 2566 " address %s\n", AF_INET6, v6src); 2567 } 2568 goto bad_addr; 2569 } 2570 } 2571 2572 /* 2573 * Allow setting new policies. For example, disconnects come 2574 * down as ipa_t bind. As we would have set conn_policy_cached 2575 * to B_TRUE before, we should set it to B_FALSE, so that policy 2576 * can change after the disconnect. 2577 */ 2578 connp->conn_policy_cached = B_FALSE; 2579 2580 /* If not fanout_insert this was just an address verification */ 2581 if (fanout_insert) { 2582 /* 2583 * The addresses have been verified. Time to insert in 2584 * the correct fanout list. 2585 */ 2586 connp->conn_srcv6 = *v6src; 2587 connp->conn_remv6 = ipv6_all_zeros; 2588 connp->conn_lport = lport; 2589 connp->conn_fport = 0; 2590 error = ipcl_bind_insert_v6(connp, *mp->b_wptr, v6src, lport); 2591 } 2592 if (error == 0) { 2593 if (ire_requested) { 2594 if (!ip_bind_insert_ire_v6(mp, src_ire, v6src, NULL)) { 2595 error = -1; 2596 goto bad_addr; 2597 } 2598 } else if (ipsec_policy_set) { 2599 if (!ip_bind_ipsec_policy_set(connp, policy_mp)) { 2600 error = -1; 2601 goto bad_addr; 2602 } 2603 } 2604 } 2605 bad_addr: 2606 if (error != 0) { 2607 if (connp->conn_anon_port) { 2608 (void) tsol_mlp_anon(crgetzone(connp->conn_cred), 2609 connp->conn_mlp_type, connp->conn_ulp, ntohs(lport), 2610 B_FALSE); 2611 } 2612 connp->conn_mlp_type = mlptSingle; 2613 } 2614 2615 if (src_ire != NULL) 2616 ire_refrele(src_ire); 2617 2618 if (ipsec_policy_set) { 2619 ASSERT(policy_mp != NULL); 2620 freeb(policy_mp); 2621 /* 2622 * As of now assume that nothing else accompanies 2623 * IPSEC_POLICY_SET. 2624 */ 2625 mp->b_cont = NULL; 2626 } 2627 return (error); 2628 } 2629 2630 /* ARGSUSED */ 2631 static void 2632 ip_bind_connected_resume_v6(ipsq_t *ipsq, queue_t *q, mblk_t *mp, 2633 void *dummy_arg) 2634 { 2635 conn_t *connp = NULL; 2636 t_scalar_t prim; 2637 2638 ASSERT(DB_TYPE(mp) == M_PROTO || DB_TYPE(mp) == M_PCPROTO); 2639 2640 if (CONN_Q(q)) 2641 connp = Q_TO_CONN(q); 2642 ASSERT(connp != NULL); 2643 2644 prim = ((union T_primitives *)mp->b_rptr)->type; 2645 ASSERT(prim == O_T_BIND_REQ || prim == T_BIND_REQ); 2646 2647 if (IPCL_IS_TCP(connp)) { 2648 /* Pass sticky_ipp for scope_id and pktinfo */ 2649 mp = ip_bind_v6(q, mp, connp, &connp->conn_tcp->tcp_sticky_ipp); 2650 } else { 2651 /* For UDP and ICMP */ 2652 mp = ip_bind_v6(q, mp, connp, NULL); 2653 } 2654 if (mp != NULL) { 2655 if (IPCL_IS_TCP(connp)) { 2656 CONN_INC_REF(connp); 2657 squeue_fill(connp->conn_sqp, mp, ip_resume_tcp_bind, 2658 connp, SQTAG_TCP_RPUTOTHER); 2659 } else if (IPCL_IS_UDP(connp)) { 2660 udp_resume_bind(connp, mp); 2661 } else { 2662 qreply(q, mp); 2663 CONN_OPER_PENDING_DONE(connp); 2664 } 2665 } 2666 } 2667 2668 /* 2669 * Verify that both the source and destination addresses 2670 * are valid. If verify_dst, then destination address must also be reachable, 2671 * i.e. have a route. Protocols like TCP want this. Tunnels do not. 2672 * It takes ip6_pkt_t * as one of the arguments to determine correct 2673 * source address when IPV6_PKTINFO or scope_id is set along with a link-local 2674 * destination address. Note that parameter ipp is only useful for TCP connect 2675 * when scope_id is set or IPV6_PKTINFO option is set with an ifindex. For all 2676 * non-TCP cases, it is NULL and for all other tcp cases it is not useful. 2677 * 2678 */ 2679 static int 2680 ip_bind_connected_v6(conn_t *connp, mblk_t *mp, in6_addr_t *v6src, 2681 uint16_t lport, const in6_addr_t *v6dst, ip6_pkt_t *ipp, uint16_t fport, 2682 boolean_t ire_requested, boolean_t ipsec_policy_set, 2683 boolean_t fanout_insert, boolean_t verify_dst) 2684 { 2685 ire_t *src_ire; 2686 ire_t *dst_ire; 2687 int error = 0; 2688 int protocol; 2689 mblk_t *policy_mp; 2690 ire_t *sire = NULL; 2691 ire_t *md_dst_ire = NULL; 2692 ill_t *md_ill = NULL; 2693 ill_t *dst_ill = NULL; 2694 ipif_t *src_ipif = NULL; 2695 zoneid_t zoneid; 2696 boolean_t ill_held = B_FALSE; 2697 2698 src_ire = dst_ire = NULL; 2699 /* 2700 * NOTE: The protocol is beyond the wptr because that's how 2701 * the undocumented transport<-->IP T_BIND_REQ behavior works. 2702 */ 2703 protocol = *mp->b_wptr & 0xFF; 2704 2705 /* 2706 * If we never got a disconnect before, clear it now. 2707 */ 2708 connp->conn_fully_bound = B_FALSE; 2709 2710 if (ipsec_policy_set) { 2711 policy_mp = mp->b_cont; 2712 } 2713 2714 zoneid = connp->conn_zoneid; 2715 2716 if (IN6_IS_ADDR_MULTICAST(v6dst)) { 2717 ipif_t *ipif; 2718 2719 /* 2720 * Use an "emulated" IRE_BROADCAST to tell the transport it 2721 * is a multicast. 2722 * Pass other information that matches 2723 * the ipif (e.g. the source address). 2724 * 2725 * conn_multicast_ill is only used for IPv6 packets 2726 */ 2727 mutex_enter(&connp->conn_lock); 2728 if (connp->conn_multicast_ill != NULL) { 2729 (void) ipif_lookup_zoneid(connp->conn_multicast_ill, 2730 zoneid, 0, &ipif); 2731 } else { 2732 /* Look for default like ip_wput_v6 */ 2733 ipif = ipif_lookup_group_v6(v6dst, zoneid); 2734 } 2735 mutex_exit(&connp->conn_lock); 2736 if (ipif == NULL || !ire_requested || 2737 (dst_ire = ipif_to_ire_v6(ipif)) == NULL) { 2738 if (ipif != NULL) 2739 ipif_refrele(ipif); 2740 if (ip_debug > 2) { 2741 /* ip1dbg */ 2742 pr_addr_dbg("ip_bind_connected_v6: bad " 2743 "connected multicast %s\n", AF_INET6, 2744 v6dst); 2745 } 2746 error = ENETUNREACH; 2747 goto bad_addr; 2748 } 2749 if (ipif != NULL) 2750 ipif_refrele(ipif); 2751 } else { 2752 dst_ire = ire_route_lookup_v6(v6dst, NULL, NULL, 0, 2753 NULL, &sire, zoneid, MBLK_GETLABEL(mp), 2754 MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT | 2755 MATCH_IRE_PARENT | MATCH_IRE_RJ_BHOLE | MATCH_IRE_SECATTR); 2756 /* 2757 * We also prevent ire's with src address INADDR_ANY to 2758 * be used, which are created temporarily for 2759 * sending out packets from endpoints that have 2760 * conn_unspec_src set. 2761 */ 2762 if (dst_ire == NULL || 2763 (dst_ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) || 2764 IN6_IS_ADDR_UNSPECIFIED(&dst_ire->ire_src_addr_v6)) { 2765 /* 2766 * When verifying destination reachability, we always 2767 * complain. 2768 * 2769 * When not verifying destination reachability but we 2770 * found an IRE, i.e. the destination is reachable, 2771 * then the other tests still apply and we complain. 2772 */ 2773 if (verify_dst || (dst_ire != NULL)) { 2774 if (ip_debug > 2) { 2775 /* ip1dbg */ 2776 pr_addr_dbg("ip_bind_connected_v6: bad" 2777 " connected dst %s\n", AF_INET6, 2778 v6dst); 2779 } 2780 if (dst_ire == NULL || 2781 !(dst_ire->ire_type & IRE_HOST)) { 2782 error = ENETUNREACH; 2783 } else { 2784 error = EHOSTUNREACH; 2785 } 2786 goto bad_addr; 2787 } 2788 } 2789 } 2790 2791 /* 2792 * We now know that routing will allow us to reach the destination. 2793 * Check whether Trusted Solaris policy allows communication with this 2794 * host, and pretend that the destination is unreachable if not. 2795 * 2796 * This is never a problem for TCP, since that transport is known to 2797 * compute the label properly as part of the tcp_rput_other T_BIND_ACK 2798 * handling. If the remote is unreachable, it will be detected at that 2799 * point, so there's no reason to check it here. 2800 * 2801 * Note that for sendto (and other datagram-oriented friends), this 2802 * check is done as part of the data path label computation instead. 2803 * The check here is just to make non-TCP connect() report the right 2804 * error. 2805 */ 2806 if (dst_ire != NULL && is_system_labeled() && 2807 !IPCL_IS_TCP(connp) && 2808 tsol_compute_label_v6(DB_CREDDEF(mp, connp->conn_cred), v6dst, NULL, 2809 connp->conn_mac_exempt) != 0) { 2810 error = EHOSTUNREACH; 2811 if (ip_debug > 2) { 2812 pr_addr_dbg("ip_bind_connected: no label for dst %s\n", 2813 AF_INET6, v6dst); 2814 } 2815 goto bad_addr; 2816 } 2817 2818 /* 2819 * If the app does a connect(), it means that it will most likely 2820 * send more than 1 packet to the destination. It makes sense 2821 * to clear the temporary flag. 2822 */ 2823 if (dst_ire != NULL && dst_ire->ire_type == IRE_CACHE && 2824 (dst_ire->ire_marks & IRE_MARK_TEMPORARY)) { 2825 irb_t *irb = dst_ire->ire_bucket; 2826 2827 rw_enter(&irb->irb_lock, RW_WRITER); 2828 dst_ire->ire_marks &= ~IRE_MARK_TEMPORARY; 2829 irb->irb_tmp_ire_cnt--; 2830 rw_exit(&irb->irb_lock); 2831 } 2832 2833 ASSERT(dst_ire == NULL || dst_ire->ire_ipversion == IPV6_VERSION); 2834 2835 /* 2836 * See if we should notify ULP about MDT; we do this whether or not 2837 * ire_requested is TRUE, in order to handle active connects; MDT 2838 * eligibility tests for passive connects are handled separately 2839 * through tcp_adapt_ire(). We do this before the source address 2840 * selection, because dst_ire may change after a call to 2841 * ipif_select_source_v6(). This is a best-effort check, as the 2842 * packet for this connection may not actually go through 2843 * dst_ire->ire_stq, and the exact IRE can only be known after 2844 * calling ip_newroute_v6(). This is why we further check on the 2845 * IRE during Multidata packet transmission in tcp_multisend(). 2846 */ 2847 if (ip_multidata_outbound && !ipsec_policy_set && dst_ire != NULL && 2848 !(dst_ire->ire_type & (IRE_LOCAL | IRE_LOOPBACK | IRE_BROADCAST)) && 2849 (md_ill = ire_to_ill(dst_ire), md_ill != NULL) && 2850 ILL_MDT_CAPABLE(md_ill)) { 2851 md_dst_ire = dst_ire; 2852 IRE_REFHOLD(md_dst_ire); 2853 } 2854 2855 if (dst_ire != NULL && 2856 dst_ire->ire_type == IRE_LOCAL && 2857 dst_ire->ire_zoneid != zoneid && 2858 dst_ire->ire_zoneid != ALL_ZONES) { 2859 src_ire = ire_ftable_lookup_v6(v6dst, 0, 0, 0, NULL, NULL, 2860 zoneid, 0, NULL, 2861 MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT | 2862 MATCH_IRE_RJ_BHOLE); 2863 if (src_ire == NULL) { 2864 error = EHOSTUNREACH; 2865 goto bad_addr; 2866 } else if (src_ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) { 2867 if (!(src_ire->ire_type & IRE_HOST)) 2868 error = ENETUNREACH; 2869 else 2870 error = EHOSTUNREACH; 2871 goto bad_addr; 2872 } 2873 if (IN6_IS_ADDR_UNSPECIFIED(v6src)) { 2874 src_ipif = src_ire->ire_ipif; 2875 ipif_refhold(src_ipif); 2876 *v6src = src_ipif->ipif_v6lcl_addr; 2877 } 2878 ire_refrele(src_ire); 2879 src_ire = NULL; 2880 } else if (IN6_IS_ADDR_UNSPECIFIED(v6src) && dst_ire != NULL) { 2881 if ((sire != NULL) && (sire->ire_flags & RTF_SETSRC)) { 2882 *v6src = sire->ire_src_addr_v6; 2883 ire_refrele(dst_ire); 2884 dst_ire = sire; 2885 sire = NULL; 2886 } else if (dst_ire->ire_type == IRE_CACHE && 2887 (dst_ire->ire_flags & RTF_SETSRC)) { 2888 ASSERT(dst_ire->ire_zoneid == zoneid || 2889 dst_ire->ire_zoneid == ALL_ZONES); 2890 *v6src = dst_ire->ire_src_addr_v6; 2891 } else { 2892 /* 2893 * Pick a source address so that a proper inbound load 2894 * spreading would happen. Use dst_ill specified by the 2895 * app. when socket option or scopeid is set. 2896 */ 2897 int err; 2898 2899 if (ipp != NULL && ipp->ipp_ifindex != 0) { 2900 uint_t if_index; 2901 2902 /* 2903 * Scope id or IPV6_PKTINFO 2904 */ 2905 2906 if_index = ipp->ipp_ifindex; 2907 dst_ill = ill_lookup_on_ifindex( 2908 if_index, B_TRUE, NULL, NULL, NULL, NULL); 2909 if (dst_ill == NULL) { 2910 ip1dbg(("ip_bind_connected_v6:" 2911 " bad ifindex %d\n", if_index)); 2912 error = EADDRNOTAVAIL; 2913 goto bad_addr; 2914 } 2915 ill_held = B_TRUE; 2916 } else if (connp->conn_outgoing_ill != NULL) { 2917 /* 2918 * For IPV6_BOUND_IF socket option, 2919 * conn_outgoing_ill should be set 2920 * already in TCP or UDP/ICMP. 2921 */ 2922 dst_ill = conn_get_held_ill(connp, 2923 &connp->conn_outgoing_ill, &err); 2924 if (err == ILL_LOOKUP_FAILED) { 2925 ip1dbg(("ip_bind_connected_v6:" 2926 "no ill for bound_if\n")); 2927 error = EADDRNOTAVAIL; 2928 goto bad_addr; 2929 } 2930 ill_held = B_TRUE; 2931 } else if (dst_ire->ire_stq != NULL) { 2932 /* No need to hold ill here */ 2933 dst_ill = (ill_t *)dst_ire->ire_stq->q_ptr; 2934 } else { 2935 /* No need to hold ill here */ 2936 dst_ill = dst_ire->ire_ipif->ipif_ill; 2937 } 2938 if (!ip6_asp_can_lookup()) { 2939 *mp->b_wptr++ = (char)protocol; 2940 ip6_asp_pending_op(CONNP_TO_WQ(connp), mp, 2941 ip_bind_connected_resume_v6); 2942 error = EINPROGRESS; 2943 goto refrele_and_quit; 2944 } 2945 src_ipif = ipif_select_source_v6(dst_ill, v6dst, 2946 RESTRICT_TO_NONE, connp->conn_src_preferences, 2947 zoneid); 2948 ip6_asp_table_refrele(); 2949 if (src_ipif == NULL) { 2950 pr_addr_dbg("ip_bind_connected_v6: " 2951 "no usable source address for " 2952 "connection to %s\n", AF_INET6, v6dst); 2953 error = EADDRNOTAVAIL; 2954 goto bad_addr; 2955 } 2956 *v6src = src_ipif->ipif_v6lcl_addr; 2957 } 2958 } 2959 2960 /* 2961 * We do ire_route_lookup_v6() here (and not an interface lookup) 2962 * as we assert that v6src should only come from an 2963 * UP interface for hard binding. 2964 */ 2965 src_ire = ire_route_lookup_v6(v6src, 0, 0, 0, NULL, 2966 NULL, zoneid, NULL, MATCH_IRE_ZONEONLY); 2967 2968 /* src_ire must be a local|loopback */ 2969 if (!IRE_IS_LOCAL(src_ire)) { 2970 if (ip_debug > 2) { 2971 /* ip1dbg */ 2972 pr_addr_dbg("ip_bind_connected_v6: bad " 2973 "connected src %s\n", AF_INET6, v6src); 2974 } 2975 error = EADDRNOTAVAIL; 2976 goto bad_addr; 2977 } 2978 2979 /* 2980 * If the source address is a loopback address, the 2981 * destination had best be local or multicast. 2982 * The transports that can't handle multicast will reject 2983 * those addresses. 2984 */ 2985 if (src_ire->ire_type == IRE_LOOPBACK && 2986 !(IRE_IS_LOCAL(dst_ire) || IN6_IS_ADDR_MULTICAST(v6dst) || 2987 IN6_IS_ADDR_V4MAPPED_CLASSD(v6dst))) { 2988 ip1dbg(("ip_bind_connected_v6: bad connected loopback\n")); 2989 error = -1; 2990 goto bad_addr; 2991 } 2992 /* 2993 * Allow setting new policies. For example, disconnects come 2994 * down as ipa_t bind. As we would have set conn_policy_cached 2995 * to B_TRUE before, we should set it to B_FALSE, so that policy 2996 * can change after the disconnect. 2997 */ 2998 connp->conn_policy_cached = B_FALSE; 2999 3000 /* 3001 * The addresses have been verified. Initialize the conn 3002 * before calling the policy as they expect the conns 3003 * initialized. 3004 */ 3005 connp->conn_srcv6 = *v6src; 3006 connp->conn_remv6 = *v6dst; 3007 connp->conn_lport = lport; 3008 connp->conn_fport = fport; 3009 3010 ASSERT(!(ipsec_policy_set && ire_requested)); 3011 if (ire_requested) { 3012 iulp_t *ulp_info = NULL; 3013 3014 /* 3015 * Note that sire will not be NULL if this is an off-link 3016 * connection and there is not cache for that dest yet. 3017 * 3018 * XXX Because of an existing bug, if there are multiple 3019 * default routes, the IRE returned now may not be the actual 3020 * default route used (default routes are chosen in a 3021 * round robin fashion). So if the metrics for different 3022 * default routes are different, we may return the wrong 3023 * metrics. This will not be a problem if the existing 3024 * bug is fixed. 3025 */ 3026 if (sire != NULL) 3027 ulp_info = &(sire->ire_uinfo); 3028 3029 if (!ip_bind_insert_ire_v6(mp, dst_ire, v6dst, ulp_info)) { 3030 error = -1; 3031 goto bad_addr; 3032 } 3033 } else if (ipsec_policy_set) { 3034 if (!ip_bind_ipsec_policy_set(connp, policy_mp)) { 3035 error = -1; 3036 goto bad_addr; 3037 } 3038 } 3039 3040 /* 3041 * Cache IPsec policy in this conn. If we have per-socket policy, 3042 * we'll cache that. If we don't, we'll inherit global policy. 3043 * 3044 * We can't insert until the conn reflects the policy. Note that 3045 * conn_policy_cached is set by ipsec_conn_cache_policy() even for 3046 * connections where we don't have a policy. This is to prevent 3047 * global policy lookups in the inbound path. 3048 * 3049 * If we insert before we set conn_policy_cached, 3050 * CONN_INBOUND_POLICY_PRESENT_V6() check can still evaluate true 3051 * because global policy cound be non-empty. We normally call 3052 * ipsec_check_policy() for conn_policy_cached connections only if 3053 * conn_in_enforce_policy is set. But in this case, 3054 * conn_policy_cached can get set anytime since we made the 3055 * CONN_INBOUND_POLICY_PRESENT_V6() check and ipsec_check_policy() 3056 * is called, which will make the above assumption false. Thus, we 3057 * need to insert after we set conn_policy_cached. 3058 */ 3059 if ((error = ipsec_conn_cache_policy(connp, B_FALSE)) != 0) 3060 goto bad_addr; 3061 3062 /* If not fanout_insert this was just an address verification */ 3063 if (fanout_insert) { 3064 /* 3065 * The addresses have been verified. Time to insert in 3066 * the correct fanout list. 3067 */ 3068 error = ipcl_conn_insert_v6(connp, protocol, v6src, v6dst, 3069 connp->conn_ports, 3070 IPCL_IS_TCP(connp) ? connp->conn_tcp->tcp_bound_if : 0); 3071 } 3072 if (error == 0) { 3073 connp->conn_fully_bound = B_TRUE; 3074 /* 3075 * Our initial checks for MDT have passed; the IRE is not 3076 * LOCAL/LOOPBACK/BROADCAST, and the link layer seems to 3077 * be supporting MDT. Pass the IRE, IPC and ILL into 3078 * ip_mdinfo_return(), which performs further checks 3079 * against them and upon success, returns the MDT info 3080 * mblk which we will attach to the bind acknowledgment. 3081 */ 3082 if (md_dst_ire != NULL) { 3083 mblk_t *mdinfo_mp; 3084 3085 ASSERT(md_ill != NULL); 3086 ASSERT(md_ill->ill_mdt_capab != NULL); 3087 if ((mdinfo_mp = ip_mdinfo_return(md_dst_ire, connp, 3088 md_ill->ill_name, md_ill->ill_mdt_capab)) != NULL) 3089 linkb(mp, mdinfo_mp); 3090 } 3091 } 3092 bad_addr: 3093 if (ipsec_policy_set) { 3094 ASSERT(policy_mp != NULL); 3095 freeb(policy_mp); 3096 /* 3097 * As of now assume that nothing else accompanies 3098 * IPSEC_POLICY_SET. 3099 */ 3100 mp->b_cont = NULL; 3101 } 3102 refrele_and_quit: 3103 if (src_ire != NULL) 3104 IRE_REFRELE(src_ire); 3105 if (dst_ire != NULL) 3106 IRE_REFRELE(dst_ire); 3107 if (sire != NULL) 3108 IRE_REFRELE(sire); 3109 if (src_ipif != NULL) 3110 ipif_refrele(src_ipif); 3111 if (md_dst_ire != NULL) 3112 IRE_REFRELE(md_dst_ire); 3113 if (ill_held && dst_ill != NULL) 3114 ill_refrele(dst_ill); 3115 return (error); 3116 } 3117 3118 /* 3119 * Insert the ire in b_cont. Returns false if it fails (due to lack of space). 3120 * Makes the IRE be IRE_BROADCAST if dst is a multicast address. 3121 */ 3122 static boolean_t 3123 ip_bind_insert_ire_v6(mblk_t *mp, ire_t *ire, const in6_addr_t *dst, 3124 iulp_t *ulp_info) 3125 { 3126 mblk_t *mp1; 3127 ire_t *ret_ire; 3128 3129 mp1 = mp->b_cont; 3130 ASSERT(mp1 != NULL); 3131 3132 if (ire != NULL) { 3133 /* 3134 * mp1 initialized above to IRE_DB_REQ_TYPE 3135 * appended mblk. Its <upper protocol>'s 3136 * job to make sure there is room. 3137 */ 3138 if ((mp1->b_datap->db_lim - mp1->b_rptr) < sizeof (ire_t)) 3139 return (B_FALSE); 3140 3141 mp1->b_datap->db_type = IRE_DB_TYPE; 3142 mp1->b_wptr = mp1->b_rptr + sizeof (ire_t); 3143 bcopy(ire, mp1->b_rptr, sizeof (ire_t)); 3144 ret_ire = (ire_t *)mp1->b_rptr; 3145 if (IN6_IS_ADDR_MULTICAST(dst) || 3146 IN6_IS_ADDR_V4MAPPED_CLASSD(dst)) { 3147 ret_ire->ire_type = IRE_BROADCAST; 3148 ret_ire->ire_addr_v6 = *dst; 3149 } 3150 if (ulp_info != NULL) { 3151 bcopy(ulp_info, &(ret_ire->ire_uinfo), 3152 sizeof (iulp_t)); 3153 } 3154 ret_ire->ire_mp = mp1; 3155 } else { 3156 /* 3157 * No IRE was found. Remove IRE mblk. 3158 */ 3159 mp->b_cont = mp1->b_cont; 3160 freeb(mp1); 3161 } 3162 return (B_TRUE); 3163 } 3164 3165 /* 3166 * Add an ip6i_t header to the front of the mblk. 3167 * Inline if possible else allocate a separate mblk containing only the ip6i_t. 3168 * Returns NULL if allocation fails (and frees original message). 3169 * Used in outgoing path when going through ip_newroute_*v6(). 3170 * Used in incoming path to pass ifindex to transports. 3171 */ 3172 mblk_t * 3173 ip_add_info_v6(mblk_t *mp, ill_t *ill, const in6_addr_t *dst) 3174 { 3175 mblk_t *mp1; 3176 ip6i_t *ip6i; 3177 ip6_t *ip6h; 3178 3179 ip6h = (ip6_t *)mp->b_rptr; 3180 ip6i = (ip6i_t *)(mp->b_rptr - sizeof (ip6i_t)); 3181 if ((uchar_t *)ip6i < mp->b_datap->db_base || 3182 mp->b_datap->db_ref > 1) { 3183 mp1 = allocb(sizeof (ip6i_t), BPRI_MED); 3184 if (mp1 == NULL) { 3185 freemsg(mp); 3186 return (NULL); 3187 } 3188 mp1->b_wptr = mp1->b_rptr = mp1->b_datap->db_lim; 3189 mp1->b_cont = mp; 3190 mp = mp1; 3191 ip6i = (ip6i_t *)(mp->b_rptr - sizeof (ip6i_t)); 3192 } 3193 mp->b_rptr = (uchar_t *)ip6i; 3194 ip6i->ip6i_vcf = ip6h->ip6_vcf; 3195 ip6i->ip6i_nxt = IPPROTO_RAW; 3196 if (ill != NULL) { 3197 ip6i->ip6i_flags = IP6I_IFINDEX; 3198 ip6i->ip6i_ifindex = ill->ill_phyint->phyint_ifindex; 3199 } else { 3200 ip6i->ip6i_flags = 0; 3201 } 3202 ip6i->ip6i_nexthop = *dst; 3203 return (mp); 3204 } 3205 3206 /* 3207 * Handle protocols with which IP is less intimate. There 3208 * can be more than one stream bound to a particular 3209 * protocol. When this is the case, normally each one gets a copy 3210 * of any incoming packets. 3211 * However, if the packet was tunneled and not multicast we only send to it 3212 * the first match. 3213 * 3214 * Zones notes: 3215 * Packets will be distributed to streams in all zones. This is really only 3216 * useful for ICMPv6 as only applications in the global zone can create raw 3217 * sockets for other protocols. 3218 */ 3219 static void 3220 ip_fanout_proto_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, ill_t *ill, 3221 ill_t *inill, uint8_t nexthdr, uint_t nexthdr_offset, uint_t flags, 3222 boolean_t mctl_present, zoneid_t zoneid) 3223 { 3224 queue_t *rq; 3225 mblk_t *mp1, *first_mp1; 3226 in6_addr_t dst = ip6h->ip6_dst; 3227 in6_addr_t src = ip6h->ip6_src; 3228 boolean_t one_only; 3229 mblk_t *first_mp = mp; 3230 boolean_t secure, shared_addr; 3231 conn_t *connp, *first_connp, *next_connp; 3232 connf_t *connfp; 3233 3234 if (mctl_present) { 3235 mp = first_mp->b_cont; 3236 secure = ipsec_in_is_secure(first_mp); 3237 ASSERT(mp != NULL); 3238 } else { 3239 secure = B_FALSE; 3240 } 3241 3242 /* 3243 * If the packet was tunneled and not multicast we only send to it 3244 * the first match. 3245 */ 3246 one_only = ((nexthdr == IPPROTO_ENCAP || nexthdr == IPPROTO_IPV6) && 3247 !IN6_IS_ADDR_MULTICAST(&dst)); 3248 3249 shared_addr = (zoneid == ALL_ZONES); 3250 if (shared_addr) { 3251 /* 3252 * We don't allow multilevel ports for raw IP, so no need to 3253 * check for that here. 3254 */ 3255 zoneid = tsol_packet_to_zoneid(mp); 3256 } 3257 3258 connfp = &ipcl_proto_fanout_v6[nexthdr]; 3259 mutex_enter(&connfp->connf_lock); 3260 connp = connfp->connf_head; 3261 for (connp = connfp->connf_head; connp != NULL; 3262 connp = connp->conn_next) { 3263 if (IPCL_PROTO_MATCH_V6(connp, nexthdr, ip6h, ill, flags, 3264 zoneid) && 3265 (!is_system_labeled() || 3266 tsol_receive_local(mp, &dst, IPV6_VERSION, shared_addr, 3267 connp))) 3268 break; 3269 } 3270 3271 if (connp == NULL || connp->conn_upq == NULL) { 3272 /* 3273 * No one bound to this port. Is 3274 * there a client that wants all 3275 * unclaimed datagrams? 3276 */ 3277 mutex_exit(&connfp->connf_lock); 3278 if (ip_fanout_send_icmp_v6(q, first_mp, flags, 3279 ICMP6_PARAM_PROB, ICMP6_PARAMPROB_NEXTHEADER, 3280 nexthdr_offset, mctl_present, zoneid)) { 3281 BUMP_MIB(ill->ill_ip6_mib, ipv6InUnknownProtos); 3282 } 3283 3284 return; 3285 } 3286 3287 CONN_INC_REF(connp); 3288 first_connp = connp; 3289 3290 /* 3291 * XXX: Fix the multiple protocol listeners case. We should not 3292 * be walking the conn->next list here. 3293 */ 3294 if (one_only) { 3295 /* 3296 * Only send message to one tunnel driver by immediately 3297 * terminating the loop. 3298 */ 3299 connp = NULL; 3300 } else { 3301 connp = connp->conn_next; 3302 3303 } 3304 for (;;) { 3305 while (connp != NULL) { 3306 if (IPCL_PROTO_MATCH_V6(connp, nexthdr, ip6h, ill, 3307 flags, zoneid) && 3308 (!is_system_labeled() || 3309 tsol_receive_local(mp, &dst, IPV6_VERSION, 3310 shared_addr, connp))) 3311 break; 3312 connp = connp->conn_next; 3313 } 3314 3315 /* 3316 * Just copy the data part alone. The mctl part is 3317 * needed just for verifying policy and it is never 3318 * sent up. 3319 */ 3320 if (connp == NULL || connp->conn_upq == NULL || 3321 (((first_mp1 = dupmsg(first_mp)) == NULL) && 3322 ((first_mp1 = ip_copymsg(first_mp)) == NULL))) { 3323 /* 3324 * No more intested clients or memory 3325 * allocation failed 3326 */ 3327 connp = first_connp; 3328 break; 3329 } 3330 mp1 = mctl_present ? first_mp1->b_cont : first_mp1; 3331 CONN_INC_REF(connp); 3332 mutex_exit(&connfp->connf_lock); 3333 rq = connp->conn_rq; 3334 /* 3335 * For link-local always add ifindex so that transport can set 3336 * sin6_scope_id. Avoid it for ICMP error fanout. 3337 */ 3338 if ((connp->conn_ipv6_recvpktinfo || 3339 IN6_IS_ADDR_LINKLOCAL(&src)) && 3340 (flags & IP_FF_IP6INFO)) { 3341 /* Add header */ 3342 mp1 = ip_add_info_v6(mp1, inill, &dst); 3343 } 3344 if (mp1 == NULL) { 3345 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 3346 } else if (!canputnext(rq)) { 3347 if (flags & IP_FF_RAWIP) { 3348 BUMP_MIB(ill->ill_ip6_mib, rawipInOverflows); 3349 } else { 3350 BUMP_MIB(ill->ill_icmp6_mib, 3351 ipv6IfIcmpInOverflows); 3352 } 3353 3354 freemsg(mp1); 3355 } else { 3356 if (CONN_INBOUND_POLICY_PRESENT_V6(connp) || secure) { 3357 first_mp1 = ipsec_check_inbound_policy 3358 (first_mp1, connp, NULL, ip6h, 3359 mctl_present); 3360 } 3361 if (first_mp1 != NULL) { 3362 if (mctl_present) 3363 freeb(first_mp1); 3364 BUMP_MIB(ill->ill_ip6_mib, ipv6InDelivers); 3365 putnext(rq, mp1); 3366 } 3367 } 3368 mutex_enter(&connfp->connf_lock); 3369 /* Follow the next pointer before releasing the conn. */ 3370 next_connp = connp->conn_next; 3371 CONN_DEC_REF(connp); 3372 connp = next_connp; 3373 } 3374 3375 /* Last one. Send it upstream. */ 3376 mutex_exit(&connfp->connf_lock); 3377 3378 /* Initiate IPPF processing */ 3379 if (IP6_IN_IPP(flags)) { 3380 uint_t ifindex; 3381 3382 mutex_enter(&ill->ill_lock); 3383 ifindex = ill->ill_phyint->phyint_ifindex; 3384 mutex_exit(&ill->ill_lock); 3385 ip_process(IPP_LOCAL_IN, &mp, ifindex); 3386 if (mp == NULL) { 3387 CONN_DEC_REF(connp); 3388 if (mctl_present) 3389 freeb(first_mp); 3390 return; 3391 } 3392 } 3393 3394 /* 3395 * For link-local always add ifindex so that transport can set 3396 * sin6_scope_id. Avoid it for ICMP error fanout. 3397 */ 3398 if ((connp->conn_ipv6_recvpktinfo || IN6_IS_ADDR_LINKLOCAL(&src)) && 3399 (flags & IP_FF_IP6INFO)) { 3400 /* Add header */ 3401 mp = ip_add_info_v6(mp, inill, &dst); 3402 if (mp == NULL) { 3403 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 3404 CONN_DEC_REF(connp); 3405 if (mctl_present) 3406 freeb(first_mp); 3407 return; 3408 } else if (mctl_present) { 3409 first_mp->b_cont = mp; 3410 } else { 3411 first_mp = mp; 3412 } 3413 } 3414 3415 rq = connp->conn_rq; 3416 if (!canputnext(rq)) { 3417 if (flags & IP_FF_RAWIP) { 3418 BUMP_MIB(ill->ill_ip6_mib, rawipInOverflows); 3419 } else { 3420 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInOverflows); 3421 } 3422 3423 freemsg(first_mp); 3424 } else { 3425 if (CONN_INBOUND_POLICY_PRESENT_V6(connp) || secure) { 3426 first_mp = ipsec_check_inbound_policy(first_mp, connp, 3427 NULL, ip6h, mctl_present); 3428 if (first_mp == NULL) { 3429 CONN_DEC_REF(connp); 3430 return; 3431 } 3432 } 3433 BUMP_MIB(ill->ill_ip6_mib, ipv6InDelivers); 3434 putnext(rq, mp); 3435 if (mctl_present) 3436 freeb(first_mp); 3437 } 3438 CONN_DEC_REF(connp); 3439 } 3440 3441 /* 3442 * Send an ICMP error after patching up the packet appropriately. Returns 3443 * non-zero if the appropriate MIB should be bumped; zero otherwise. 3444 */ 3445 int 3446 ip_fanout_send_icmp_v6(queue_t *q, mblk_t *mp, uint_t flags, 3447 uint_t icmp_type, uint8_t icmp_code, uint_t nexthdr_offset, 3448 boolean_t mctl_present, zoneid_t zoneid) 3449 { 3450 ip6_t *ip6h; 3451 mblk_t *first_mp; 3452 boolean_t secure; 3453 unsigned char db_type; 3454 3455 first_mp = mp; 3456 if (mctl_present) { 3457 mp = mp->b_cont; 3458 secure = ipsec_in_is_secure(first_mp); 3459 ASSERT(mp != NULL); 3460 } else { 3461 /* 3462 * If this is an ICMP error being reported - which goes 3463 * up as M_CTLs, we need to convert them to M_DATA till 3464 * we finish checking with global policy because 3465 * ipsec_check_global_policy() assumes M_DATA as clear 3466 * and M_CTL as secure. 3467 */ 3468 db_type = mp->b_datap->db_type; 3469 mp->b_datap->db_type = M_DATA; 3470 secure = B_FALSE; 3471 } 3472 /* 3473 * We are generating an icmp error for some inbound packet. 3474 * Called from all ip_fanout_(udp, tcp, proto) functions. 3475 * Before we generate an error, check with global policy 3476 * to see whether this is allowed to enter the system. As 3477 * there is no "conn", we are checking with global policy. 3478 */ 3479 ip6h = (ip6_t *)mp->b_rptr; 3480 if (secure || ipsec_inbound_v6_policy_present) { 3481 first_mp = ipsec_check_global_policy(first_mp, NULL, 3482 NULL, ip6h, mctl_present); 3483 if (first_mp == NULL) 3484 return (0); 3485 } 3486 3487 if (!mctl_present) 3488 mp->b_datap->db_type = db_type; 3489 3490 if (flags & IP_FF_SEND_ICMP) { 3491 if (flags & IP_FF_HDR_COMPLETE) { 3492 if (ip_hdr_complete_v6(ip6h, zoneid)) { 3493 freemsg(first_mp); 3494 return (1); 3495 } 3496 } 3497 switch (icmp_type) { 3498 case ICMP6_DST_UNREACH: 3499 icmp_unreachable_v6(WR(q), first_mp, icmp_code, 3500 B_FALSE, B_FALSE, zoneid); 3501 break; 3502 case ICMP6_PARAM_PROB: 3503 icmp_param_problem_v6(WR(q), first_mp, icmp_code, 3504 nexthdr_offset, B_FALSE, B_FALSE, zoneid); 3505 break; 3506 default: 3507 #ifdef DEBUG 3508 panic("ip_fanout_send_icmp_v6: wrong type"); 3509 /*NOTREACHED*/ 3510 #else 3511 freemsg(first_mp); 3512 break; 3513 #endif 3514 } 3515 } else { 3516 freemsg(first_mp); 3517 return (0); 3518 } 3519 3520 return (1); 3521 } 3522 3523 3524 /* 3525 * Fanout for TCP packets 3526 * The caller puts <fport, lport> in the ports parameter. 3527 */ 3528 static void 3529 ip_fanout_tcp_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, ill_t *ill, ill_t *inill, 3530 uint_t flags, uint_t hdr_len, boolean_t mctl_present, zoneid_t zoneid) 3531 { 3532 mblk_t *first_mp; 3533 boolean_t secure; 3534 conn_t *connp; 3535 tcph_t *tcph; 3536 boolean_t syn_present = B_FALSE; 3537 3538 first_mp = mp; 3539 if (mctl_present) { 3540 mp = first_mp->b_cont; 3541 secure = ipsec_in_is_secure(first_mp); 3542 ASSERT(mp != NULL); 3543 } else { 3544 secure = B_FALSE; 3545 } 3546 3547 connp = ipcl_classify_v6(mp, IPPROTO_TCP, hdr_len, zoneid); 3548 3549 if (connp == NULL || 3550 !conn_wantpacket_v6(connp, ill, ip6h, flags, zoneid)) { 3551 /* 3552 * No hard-bound match. Send Reset. 3553 */ 3554 dblk_t *dp = mp->b_datap; 3555 uint32_t ill_index; 3556 3557 ASSERT((dp->db_struioflag & STRUIO_IP) == 0); 3558 3559 /* Initiate IPPf processing, if needed. */ 3560 if (IPP_ENABLED(IPP_LOCAL_IN) && (flags & IP6_NO_IPPOLICY)) { 3561 ill_index = ill->ill_phyint->phyint_ifindex; 3562 ip_process(IPP_LOCAL_IN, &first_mp, ill_index); 3563 if (first_mp == NULL) { 3564 if (connp != NULL) 3565 CONN_DEC_REF(connp); 3566 return; 3567 } 3568 } 3569 BUMP_MIB(ill->ill_ip6_mib, ipv6InDelivers); 3570 tcp_xmit_listeners_reset(first_mp, hdr_len, zoneid); 3571 if (connp != NULL) 3572 CONN_DEC_REF(connp); 3573 return; 3574 } 3575 3576 tcph = (tcph_t *)&mp->b_rptr[hdr_len]; 3577 if ((tcph->th_flags[0] & (TH_SYN|TH_ACK|TH_RST|TH_URG)) == TH_SYN) { 3578 if (connp->conn_flags & IPCL_TCP) { 3579 squeue_t *sqp; 3580 3581 /* 3582 * For fused tcp loopback, assign the eager's 3583 * squeue to be that of the active connect's. 3584 */ 3585 if ((flags & IP_FF_LOOPBACK) && do_tcp_fusion && 3586 !CONN_INBOUND_POLICY_PRESENT_V6(connp) && !secure && 3587 !IP6_IN_IPP(flags)) { 3588 ASSERT(Q_TO_CONN(q) != NULL); 3589 sqp = Q_TO_CONN(q)->conn_sqp; 3590 } else { 3591 sqp = IP_SQUEUE_GET(lbolt); 3592 } 3593 3594 mp->b_datap->db_struioflag |= STRUIO_EAGER; 3595 DB_CKSUMSTART(mp) = (intptr_t)sqp; 3596 3597 /* 3598 * db_cksumstuff is unused in the incoming 3599 * path; Thus store the ifindex here. It will 3600 * be cleared in tcp_conn_create_v6(). 3601 */ 3602 DB_CKSUMSTUFF(mp) = 3603 (intptr_t)ill->ill_phyint->phyint_ifindex; 3604 syn_present = B_TRUE; 3605 } 3606 } 3607 3608 if (IPCL_IS_TCP(connp) && IPCL_IS_BOUND(connp) && !syn_present) { 3609 uint_t flags = (unsigned int)tcph->th_flags[0] & 0xFF; 3610 if ((flags & TH_RST) || (flags & TH_URG)) { 3611 CONN_DEC_REF(connp); 3612 freemsg(first_mp); 3613 return; 3614 } 3615 if (flags & TH_ACK) { 3616 tcp_xmit_listeners_reset(first_mp, hdr_len, zoneid); 3617 CONN_DEC_REF(connp); 3618 return; 3619 } 3620 3621 CONN_DEC_REF(connp); 3622 freemsg(first_mp); 3623 return; 3624 } 3625 3626 if (CONN_INBOUND_POLICY_PRESENT_V6(connp) || secure) { 3627 first_mp = ipsec_check_inbound_policy(first_mp, connp, 3628 NULL, ip6h, mctl_present); 3629 if (first_mp == NULL) { 3630 CONN_DEC_REF(connp); 3631 return; 3632 } 3633 if (IPCL_IS_TCP(connp) && IPCL_IS_BOUND(connp)) { 3634 ASSERT(syn_present); 3635 if (mctl_present) { 3636 ASSERT(first_mp != mp); 3637 first_mp->b_datap->db_struioflag |= 3638 STRUIO_POLICY; 3639 } else { 3640 ASSERT(first_mp == mp); 3641 mp->b_datap->db_struioflag &= 3642 ~STRUIO_EAGER; 3643 mp->b_datap->db_struioflag |= 3644 STRUIO_POLICY; 3645 } 3646 } else { 3647 /* 3648 * Discard first_mp early since we're dealing with a 3649 * fully-connected conn_t and tcp doesn't do policy in 3650 * this case. Also, if someone is bound to IPPROTO_TCP 3651 * over raw IP, they don't expect to see a M_CTL. 3652 */ 3653 if (mctl_present) { 3654 freeb(first_mp); 3655 mctl_present = B_FALSE; 3656 } 3657 first_mp = mp; 3658 } 3659 } 3660 3661 /* Initiate IPPF processing */ 3662 if (IP6_IN_IPP(flags)) { 3663 uint_t ifindex; 3664 3665 mutex_enter(&ill->ill_lock); 3666 ifindex = ill->ill_phyint->phyint_ifindex; 3667 mutex_exit(&ill->ill_lock); 3668 ip_process(IPP_LOCAL_IN, &mp, ifindex); 3669 if (mp == NULL) { 3670 CONN_DEC_REF(connp); 3671 if (mctl_present) { 3672 freeb(first_mp); 3673 } 3674 return; 3675 } else if (mctl_present) { 3676 /* 3677 * ip_add_info_v6 might return a new mp. 3678 */ 3679 ASSERT(first_mp != mp); 3680 first_mp->b_cont = mp; 3681 } else { 3682 first_mp = mp; 3683 } 3684 } 3685 3686 /* 3687 * For link-local always add ifindex so that TCP can bind to that 3688 * interface. Avoid it for ICMP error fanout. 3689 */ 3690 if (!syn_present && ((connp->conn_ipv6_recvpktinfo || 3691 IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src)) && 3692 (flags & IP_FF_IP6INFO))) { 3693 /* Add header */ 3694 mp = ip_add_info_v6(mp, inill, &ip6h->ip6_dst); 3695 if (mp == NULL) { 3696 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 3697 CONN_DEC_REF(connp); 3698 if (mctl_present) 3699 freeb(first_mp); 3700 return; 3701 } else if (mctl_present) { 3702 ASSERT(first_mp != mp); 3703 first_mp->b_cont = mp; 3704 } else { 3705 first_mp = mp; 3706 } 3707 } 3708 3709 BUMP_MIB(ill->ill_ip6_mib, ipv6InDelivers); 3710 if (IPCL_IS_TCP(connp)) { 3711 (*ip_input_proc)(connp->conn_sqp, first_mp, 3712 connp->conn_recv, connp, SQTAG_IP6_TCP_INPUT); 3713 } else { 3714 putnext(connp->conn_rq, first_mp); 3715 CONN_DEC_REF(connp); 3716 } 3717 } 3718 3719 /* 3720 * Fanout for UDP packets. 3721 * The caller puts <fport, lport> in the ports parameter. 3722 * ire_type must be IRE_BROADCAST for multicast and broadcast packets. 3723 * 3724 * If SO_REUSEADDR is set all multicast and broadcast packets 3725 * will be delivered to all streams bound to the same port. 3726 * 3727 * Zones notes: 3728 * Multicast packets will be distributed to streams in all zones. 3729 */ 3730 static void 3731 ip_fanout_udp_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, uint32_t ports, 3732 ill_t *ill, ill_t *inill, uint_t flags, boolean_t mctl_present, 3733 zoneid_t zoneid) 3734 { 3735 uint32_t dstport, srcport; 3736 in6_addr_t dst; 3737 mblk_t *first_mp; 3738 boolean_t secure; 3739 conn_t *connp; 3740 connf_t *connfp; 3741 conn_t *first_conn; 3742 conn_t *next_conn; 3743 mblk_t *mp1, *first_mp1; 3744 in6_addr_t src; 3745 boolean_t shared_addr; 3746 3747 first_mp = mp; 3748 if (mctl_present) { 3749 mp = first_mp->b_cont; 3750 secure = ipsec_in_is_secure(first_mp); 3751 ASSERT(mp != NULL); 3752 } else { 3753 secure = B_FALSE; 3754 } 3755 3756 /* Extract ports in net byte order */ 3757 dstport = htons(ntohl(ports) & 0xFFFF); 3758 srcport = htons(ntohl(ports) >> 16); 3759 dst = ip6h->ip6_dst; 3760 src = ip6h->ip6_src; 3761 3762 shared_addr = (zoneid == ALL_ZONES); 3763 if (shared_addr) { 3764 zoneid = tsol_mlp_findzone(IPPROTO_UDP, dstport); 3765 /* 3766 * If no shared MLP is found, tsol_mlp_findzone returns 3767 * ALL_ZONES. In that case, we assume it's SLP, and 3768 * search for the zone based on the packet label. 3769 * That will also return ALL_ZONES on failure, but 3770 * we never allow conn_zoneid to be set to ALL_ZONES. 3771 */ 3772 if (zoneid == ALL_ZONES) 3773 zoneid = tsol_packet_to_zoneid(mp); 3774 } 3775 3776 /* Attempt to find a client stream based on destination port. */ 3777 connfp = &ipcl_udp_fanout[IPCL_UDP_HASH(dstport)]; 3778 mutex_enter(&connfp->connf_lock); 3779 connp = connfp->connf_head; 3780 if (!IN6_IS_ADDR_MULTICAST(&dst)) { 3781 /* 3782 * Not multicast. Send to the one (first) client we find. 3783 */ 3784 while (connp != NULL) { 3785 if (IPCL_UDP_MATCH_V6(connp, dstport, dst, srcport, 3786 src) && IPCL_ZONE_MATCH(connp, zoneid) && 3787 conn_wantpacket_v6(connp, ill, ip6h, 3788 flags, zoneid)) { 3789 break; 3790 } 3791 connp = connp->conn_next; 3792 } 3793 if (connp == NULL || connp->conn_upq == NULL) 3794 goto notfound; 3795 3796 if (is_system_labeled() && 3797 !tsol_receive_local(mp, &dst, IPV6_VERSION, shared_addr, 3798 connp)) 3799 goto notfound; 3800 3801 /* Found a client */ 3802 CONN_INC_REF(connp); 3803 mutex_exit(&connfp->connf_lock); 3804 3805 if (CONN_UDP_FLOWCTLD(connp)) { 3806 freemsg(first_mp); 3807 CONN_DEC_REF(connp); 3808 return; 3809 } 3810 if (CONN_INBOUND_POLICY_PRESENT_V6(connp) || secure) { 3811 first_mp = ipsec_check_inbound_policy(first_mp, 3812 connp, NULL, ip6h, mctl_present); 3813 if (first_mp == NULL) { 3814 CONN_DEC_REF(connp); 3815 return; 3816 } 3817 } 3818 /* Initiate IPPF processing */ 3819 if (IP6_IN_IPP(flags)) { 3820 uint_t ifindex; 3821 3822 mutex_enter(&ill->ill_lock); 3823 ifindex = ill->ill_phyint->phyint_ifindex; 3824 mutex_exit(&ill->ill_lock); 3825 ip_process(IPP_LOCAL_IN, &mp, ifindex); 3826 if (mp == NULL) { 3827 CONN_DEC_REF(connp); 3828 if (mctl_present) 3829 freeb(first_mp); 3830 return; 3831 } 3832 } 3833 /* 3834 * For link-local always add ifindex so that 3835 * transport can set sin6_scope_id. Avoid it for 3836 * ICMP error fanout. 3837 */ 3838 if ((connp->conn_ipv6_recvpktinfo || 3839 IN6_IS_ADDR_LINKLOCAL(&src)) && 3840 (flags & IP_FF_IP6INFO)) { 3841 /* Add header */ 3842 mp = ip_add_info_v6(mp, inill, &dst); 3843 if (mp == NULL) { 3844 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 3845 CONN_DEC_REF(connp); 3846 if (mctl_present) 3847 freeb(first_mp); 3848 return; 3849 } else if (mctl_present) { 3850 first_mp->b_cont = mp; 3851 } else { 3852 first_mp = mp; 3853 } 3854 } 3855 BUMP_MIB(ill->ill_ip6_mib, ipv6InDelivers); 3856 3857 /* Send it upstream */ 3858 CONN_UDP_RECV(connp, mp); 3859 3860 IP6_STAT(ip6_udp_fannorm); 3861 CONN_DEC_REF(connp); 3862 if (mctl_present) 3863 freeb(first_mp); 3864 return; 3865 } 3866 3867 while (connp != NULL) { 3868 if ((IPCL_UDP_MATCH_V6(connp, dstport, dst, srcport, src)) && 3869 conn_wantpacket_v6(connp, ill, ip6h, flags, zoneid) && 3870 (!is_system_labeled() || 3871 tsol_receive_local(mp, &dst, IPV6_VERSION, shared_addr, 3872 connp))) 3873 break; 3874 connp = connp->conn_next; 3875 } 3876 3877 if (connp == NULL || connp->conn_upq == NULL) 3878 goto notfound; 3879 3880 first_conn = connp; 3881 3882 CONN_INC_REF(connp); 3883 connp = connp->conn_next; 3884 for (;;) { 3885 while (connp != NULL) { 3886 if (IPCL_UDP_MATCH_V6(connp, dstport, dst, srcport, 3887 src) && conn_wantpacket_v6(connp, ill, ip6h, 3888 flags, zoneid) && 3889 (!is_system_labeled() || 3890 tsol_receive_local(mp, &dst, IPV6_VERSION, 3891 shared_addr, connp))) 3892 break; 3893 connp = connp->conn_next; 3894 } 3895 /* 3896 * Just copy the data part alone. The mctl part is 3897 * needed just for verifying policy and it is never 3898 * sent up. 3899 */ 3900 if (connp == NULL || 3901 (((first_mp1 = dupmsg(first_mp)) == NULL) && 3902 ((first_mp1 = ip_copymsg(first_mp)) 3903 == NULL))) { 3904 /* 3905 * No more interested clients or memory 3906 * allocation failed 3907 */ 3908 connp = first_conn; 3909 break; 3910 } 3911 mp1 = mctl_present ? first_mp1->b_cont : first_mp1; 3912 CONN_INC_REF(connp); 3913 mutex_exit(&connfp->connf_lock); 3914 /* 3915 * For link-local always add ifindex so that transport 3916 * can set sin6_scope_id. Avoid it for ICMP error 3917 * fanout. 3918 */ 3919 if ((connp->conn_ipv6_recvpktinfo || 3920 IN6_IS_ADDR_LINKLOCAL(&src)) && 3921 (flags & IP_FF_IP6INFO)) { 3922 /* Add header */ 3923 mp1 = ip_add_info_v6(mp1, inill, &dst); 3924 } 3925 /* mp1 could have changed */ 3926 if (mctl_present) 3927 first_mp1->b_cont = mp1; 3928 else 3929 first_mp1 = mp1; 3930 if (mp1 == NULL) { 3931 if (mctl_present) 3932 freeb(first_mp1); 3933 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 3934 goto next_one; 3935 } 3936 if (CONN_UDP_FLOWCTLD(connp)) { 3937 BUMP_MIB(ill->ill_ip6_mib, udpInOverflows); 3938 freemsg(first_mp1); 3939 goto next_one; 3940 } 3941 3942 if (CONN_INBOUND_POLICY_PRESENT_V6(connp) || 3943 secure) { 3944 first_mp1 = ipsec_check_inbound_policy 3945 (first_mp1, connp, NULL, ip6h, 3946 mctl_present); 3947 } 3948 if (first_mp1 != NULL) { 3949 if (mctl_present) 3950 freeb(first_mp1); 3951 BUMP_MIB(ill->ill_ip6_mib, ipv6InDelivers); 3952 3953 /* Send it upstream */ 3954 CONN_UDP_RECV(connp, mp1); 3955 } 3956 next_one: 3957 mutex_enter(&connfp->connf_lock); 3958 /* Follow the next pointer before releasing the conn. */ 3959 next_conn = connp->conn_next; 3960 IP6_STAT(ip6_udp_fanmb); 3961 CONN_DEC_REF(connp); 3962 connp = next_conn; 3963 } 3964 3965 /* Last one. Send it upstream. */ 3966 mutex_exit(&connfp->connf_lock); 3967 3968 /* Initiate IPPF processing */ 3969 if (IP6_IN_IPP(flags)) { 3970 uint_t ifindex; 3971 3972 mutex_enter(&ill->ill_lock); 3973 ifindex = ill->ill_phyint->phyint_ifindex; 3974 mutex_exit(&ill->ill_lock); 3975 ip_process(IPP_LOCAL_IN, &mp, ifindex); 3976 if (mp == NULL) { 3977 CONN_DEC_REF(connp); 3978 if (mctl_present) { 3979 freeb(first_mp); 3980 } 3981 return; 3982 } 3983 } 3984 3985 /* 3986 * For link-local always add ifindex so that transport can set 3987 * sin6_scope_id. Avoid it for ICMP error fanout. 3988 */ 3989 if ((connp->conn_ipv6_recvpktinfo || 3990 IN6_IS_ADDR_LINKLOCAL(&src)) && (flags & IP_FF_IP6INFO)) { 3991 /* Add header */ 3992 mp = ip_add_info_v6(mp, inill, &dst); 3993 if (mp == NULL) { 3994 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 3995 CONN_DEC_REF(connp); 3996 if (mctl_present) 3997 freeb(first_mp); 3998 return; 3999 } else if (mctl_present) { 4000 first_mp->b_cont = mp; 4001 } else { 4002 first_mp = mp; 4003 } 4004 } 4005 if (CONN_UDP_FLOWCTLD(connp)) { 4006 BUMP_MIB(ill->ill_ip6_mib, udpInOverflows); 4007 freemsg(mp); 4008 } else { 4009 if (CONN_INBOUND_POLICY_PRESENT_V6(connp) || secure) { 4010 first_mp = ipsec_check_inbound_policy(first_mp, 4011 connp, NULL, ip6h, mctl_present); 4012 if (first_mp == NULL) { 4013 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 4014 CONN_DEC_REF(connp); 4015 return; 4016 } 4017 } 4018 BUMP_MIB(ill->ill_ip6_mib, ipv6InDelivers); 4019 4020 /* Send it upstream */ 4021 CONN_UDP_RECV(connp, mp); 4022 } 4023 IP6_STAT(ip6_udp_fanmb); 4024 CONN_DEC_REF(connp); 4025 if (mctl_present) 4026 freeb(first_mp); 4027 return; 4028 4029 notfound: 4030 mutex_exit(&connfp->connf_lock); 4031 /* 4032 * No one bound to this port. Is 4033 * there a client that wants all 4034 * unclaimed datagrams? 4035 */ 4036 if (ipcl_proto_fanout_v6[IPPROTO_UDP].connf_head != NULL) { 4037 ip_fanout_proto_v6(q, first_mp, ip6h, ill, inill, IPPROTO_UDP, 4038 0, flags | IP_FF_RAWIP | IP_FF_IP6INFO, mctl_present, 4039 zoneid); 4040 } else { 4041 if (ip_fanout_send_icmp_v6(q, first_mp, flags, 4042 ICMP6_DST_UNREACH, ICMP6_DST_UNREACH_NOPORT, 0, 4043 mctl_present, zoneid)) { 4044 BUMP_MIB(&ip_mib, udpNoPorts); 4045 } 4046 } 4047 } 4048 4049 /* 4050 * int ip_find_hdr_v6() 4051 * 4052 * This routine is used by the upper layer protocols and the IP tunnel 4053 * module to: 4054 * - Set extension header pointers to appropriate locations 4055 * - Determine IPv6 header length and return it 4056 * - Return a pointer to the last nexthdr value 4057 * 4058 * The caller must initialize ipp_fields. 4059 * 4060 * NOTE: If multiple extension headers of the same type are present, 4061 * ip_find_hdr_v6() will set the respective extension header pointers 4062 * to the first one that it encounters in the IPv6 header. It also 4063 * skips fragment headers. This routine deals with malformed packets 4064 * of various sorts in which case the returned length is up to the 4065 * malformed part. 4066 */ 4067 int 4068 ip_find_hdr_v6(mblk_t *mp, ip6_t *ip6h, ip6_pkt_t *ipp, uint8_t *nexthdrp) 4069 { 4070 uint_t length, ehdrlen; 4071 uint8_t nexthdr; 4072 uint8_t *whereptr, *endptr; 4073 ip6_dest_t *tmpdstopts; 4074 ip6_rthdr_t *tmprthdr; 4075 ip6_hbh_t *tmphopopts; 4076 ip6_frag_t *tmpfraghdr; 4077 4078 length = IPV6_HDR_LEN; 4079 whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */ 4080 endptr = mp->b_wptr; 4081 4082 nexthdr = ip6h->ip6_nxt; 4083 while (whereptr < endptr) { 4084 /* Is there enough left for len + nexthdr? */ 4085 if (whereptr + MIN_EHDR_LEN > endptr) 4086 goto done; 4087 4088 switch (nexthdr) { 4089 case IPPROTO_HOPOPTS: 4090 tmphopopts = (ip6_hbh_t *)whereptr; 4091 ehdrlen = 8 * (tmphopopts->ip6h_len + 1); 4092 if ((uchar_t *)tmphopopts + ehdrlen > endptr) 4093 goto done; 4094 nexthdr = tmphopopts->ip6h_nxt; 4095 /* return only 1st hbh */ 4096 if (!(ipp->ipp_fields & IPPF_HOPOPTS)) { 4097 ipp->ipp_fields |= IPPF_HOPOPTS; 4098 ipp->ipp_hopopts = tmphopopts; 4099 ipp->ipp_hopoptslen = ehdrlen; 4100 } 4101 break; 4102 case IPPROTO_DSTOPTS: 4103 tmpdstopts = (ip6_dest_t *)whereptr; 4104 ehdrlen = 8 * (tmpdstopts->ip6d_len + 1); 4105 if ((uchar_t *)tmpdstopts + ehdrlen > endptr) 4106 goto done; 4107 nexthdr = tmpdstopts->ip6d_nxt; 4108 /* 4109 * ipp_dstopts is set to the destination header after a 4110 * routing header. 4111 * Assume it is a post-rthdr destination header 4112 * and adjust when we find an rthdr. 4113 */ 4114 if (!(ipp->ipp_fields & IPPF_DSTOPTS)) { 4115 ipp->ipp_fields |= IPPF_DSTOPTS; 4116 ipp->ipp_dstopts = tmpdstopts; 4117 ipp->ipp_dstoptslen = ehdrlen; 4118 } 4119 break; 4120 case IPPROTO_ROUTING: 4121 tmprthdr = (ip6_rthdr_t *)whereptr; 4122 ehdrlen = 8 * (tmprthdr->ip6r_len + 1); 4123 if ((uchar_t *)tmprthdr + ehdrlen > endptr) 4124 goto done; 4125 nexthdr = tmprthdr->ip6r_nxt; 4126 /* return only 1st rthdr */ 4127 if (!(ipp->ipp_fields & IPPF_RTHDR)) { 4128 ipp->ipp_fields |= IPPF_RTHDR; 4129 ipp->ipp_rthdr = tmprthdr; 4130 ipp->ipp_rthdrlen = ehdrlen; 4131 } 4132 /* 4133 * Make any destination header we've seen be a 4134 * pre-rthdr destination header. 4135 */ 4136 if (ipp->ipp_fields & IPPF_DSTOPTS) { 4137 ipp->ipp_fields &= ~IPPF_DSTOPTS; 4138 ipp->ipp_fields |= IPPF_RTDSTOPTS; 4139 ipp->ipp_rtdstopts = ipp->ipp_dstopts; 4140 ipp->ipp_dstopts = NULL; 4141 ipp->ipp_rtdstoptslen = ipp->ipp_dstoptslen; 4142 ipp->ipp_dstoptslen = 0; 4143 } 4144 break; 4145 case IPPROTO_FRAGMENT: 4146 /* 4147 * Fragment headers are skipped. Currently, only 4148 * IP cares for their existence. If anyone other 4149 * than IP ever has the need to know about the 4150 * location of fragment headers, support can be 4151 * added to the ip6_pkt_t at that time. 4152 */ 4153 tmpfraghdr = (ip6_frag_t *)whereptr; 4154 ehdrlen = sizeof (ip6_frag_t); 4155 if ((uchar_t *)tmpfraghdr + ehdrlen > endptr) 4156 goto done; 4157 nexthdr = tmpfraghdr->ip6f_nxt; 4158 break; 4159 case IPPROTO_NONE: 4160 default: 4161 goto done; 4162 } 4163 length += ehdrlen; 4164 whereptr += ehdrlen; 4165 } 4166 done: 4167 if (nexthdrp != NULL) 4168 *nexthdrp = nexthdr; 4169 return (length); 4170 } 4171 4172 int 4173 ip_hdr_complete_v6(ip6_t *ip6h, zoneid_t zoneid) 4174 { 4175 ire_t *ire; 4176 4177 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 4178 ire = ire_lookup_local_v6(zoneid); 4179 if (ire == NULL) { 4180 ip1dbg(("ip_hdr_complete_v6: no source IRE\n")); 4181 return (1); 4182 } 4183 ip6h->ip6_src = ire->ire_addr_v6; 4184 ire_refrele(ire); 4185 } 4186 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 4187 ip6h->ip6_hops = ipv6_def_hops; 4188 return (0); 4189 } 4190 4191 /* 4192 * Try to determine where and what are the IPv6 header length and 4193 * pointer to nexthdr value for the upper layer protocol (or an 4194 * unknown next hdr). 4195 * 4196 * Parameters returns a pointer to the nexthdr value; 4197 * Must handle malformed packets of various sorts. 4198 * Function returns failure for malformed cases. 4199 */ 4200 boolean_t 4201 ip_hdr_length_nexthdr_v6(mblk_t *mp, ip6_t *ip6h, uint16_t *hdr_length_ptr, 4202 uint8_t **nexthdrpp) 4203 { 4204 uint16_t length; 4205 uint_t ehdrlen; 4206 uint8_t *nexthdrp; 4207 uint8_t *whereptr; 4208 uint8_t *endptr; 4209 ip6_dest_t *desthdr; 4210 ip6_rthdr_t *rthdr; 4211 ip6_frag_t *fraghdr; 4212 4213 length = IPV6_HDR_LEN; 4214 whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */ 4215 endptr = mp->b_wptr; 4216 4217 nexthdrp = &ip6h->ip6_nxt; 4218 while (whereptr < endptr) { 4219 /* Is there enough left for len + nexthdr? */ 4220 if (whereptr + MIN_EHDR_LEN > endptr) 4221 break; 4222 4223 switch (*nexthdrp) { 4224 case IPPROTO_HOPOPTS: 4225 case IPPROTO_DSTOPTS: 4226 /* Assumes the headers are identical for hbh and dst */ 4227 desthdr = (ip6_dest_t *)whereptr; 4228 ehdrlen = 8 * (desthdr->ip6d_len + 1); 4229 if ((uchar_t *)desthdr + ehdrlen > endptr) 4230 return (B_FALSE); 4231 nexthdrp = &desthdr->ip6d_nxt; 4232 break; 4233 case IPPROTO_ROUTING: 4234 rthdr = (ip6_rthdr_t *)whereptr; 4235 ehdrlen = 8 * (rthdr->ip6r_len + 1); 4236 if ((uchar_t *)rthdr + ehdrlen > endptr) 4237 return (B_FALSE); 4238 nexthdrp = &rthdr->ip6r_nxt; 4239 break; 4240 case IPPROTO_FRAGMENT: 4241 fraghdr = (ip6_frag_t *)whereptr; 4242 ehdrlen = sizeof (ip6_frag_t); 4243 if ((uchar_t *)&fraghdr[1] > endptr) 4244 return (B_FALSE); 4245 nexthdrp = &fraghdr->ip6f_nxt; 4246 break; 4247 case IPPROTO_NONE: 4248 /* No next header means we're finished */ 4249 default: 4250 *hdr_length_ptr = length; 4251 *nexthdrpp = nexthdrp; 4252 return (B_TRUE); 4253 } 4254 length += ehdrlen; 4255 whereptr += ehdrlen; 4256 *hdr_length_ptr = length; 4257 *nexthdrpp = nexthdrp; 4258 } 4259 switch (*nexthdrp) { 4260 case IPPROTO_HOPOPTS: 4261 case IPPROTO_DSTOPTS: 4262 case IPPROTO_ROUTING: 4263 case IPPROTO_FRAGMENT: 4264 /* 4265 * If any know extension headers are still to be processed, 4266 * the packet's malformed (or at least all the IP header(s) are 4267 * not in the same mblk - and that should never happen. 4268 */ 4269 return (B_FALSE); 4270 4271 default: 4272 /* 4273 * If we get here, we know that all of the IP headers were in 4274 * the same mblk, even if the ULP header is in the next mblk. 4275 */ 4276 *hdr_length_ptr = length; 4277 *nexthdrpp = nexthdrp; 4278 return (B_TRUE); 4279 } 4280 } 4281 4282 /* 4283 * Return the length of the IPv6 related headers (including extension headers) 4284 * Returns a length even if the packet is malformed. 4285 */ 4286 int 4287 ip_hdr_length_v6(mblk_t *mp, ip6_t *ip6h) 4288 { 4289 uint16_t hdr_len; 4290 uint8_t *nexthdrp; 4291 4292 (void) ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_len, &nexthdrp); 4293 return (hdr_len); 4294 } 4295 4296 /* 4297 * Select an ill for the packet by considering load spreading across 4298 * a different ill in the group if dst_ill is part of some group. 4299 */ 4300 static ill_t * 4301 ip_newroute_get_dst_ill_v6(ill_t *dst_ill) 4302 { 4303 ill_t *ill; 4304 4305 /* 4306 * We schedule irrespective of whether the source address is 4307 * INADDR_UNSPECIED or not. 4308 */ 4309 ill = illgrp_scheduler(dst_ill); 4310 if (ill == NULL) 4311 return (NULL); 4312 4313 /* 4314 * For groups with names ip_sioctl_groupname ensures that all 4315 * ills are of same type. For groups without names, ifgrp_insert 4316 * ensures this. 4317 */ 4318 ASSERT(dst_ill->ill_type == ill->ill_type); 4319 4320 return (ill); 4321 } 4322 4323 /* 4324 * IPv6 - 4325 * ip_newroute_v6 is called by ip_rput_data_v6 or ip_wput_v6 whenever we need 4326 * to send out a packet to a destination address for which we do not have 4327 * specific routing information. 4328 * 4329 * Handle non-multicast packets. If ill is non-NULL the match is done 4330 * for that ill. 4331 * 4332 * When a specific ill is specified (using IPV6_PKTINFO, 4333 * IPV6_MULTICAST_IF, or IPV6_BOUND_IF) we will only match 4334 * on routing entries (ftable and ctable) that have a matching 4335 * ire->ire_ipif->ipif_ill. Thus this can only be used 4336 * for destinations that are on-link for the specific ill 4337 * and that can appear on multiple links. Thus it is useful 4338 * for multicast destinations, link-local destinations, and 4339 * at some point perhaps for site-local destinations (if the 4340 * node sits at a site boundary). 4341 * We create the cache entries in the regular ctable since 4342 * it can not "confuse" things for other destinations. 4343 * table. 4344 * 4345 * When ill is part of a ill group, we subject the packets 4346 * to load spreading even if the ill is specified by the 4347 * means described above. We disable only for IPV6_BOUND_PIF 4348 * and for the cases where IP6I_ATTACH_IF is set i.e NS/NA/ 4349 * Echo replies to link-local destinations have IP6I_ATTACH_IF 4350 * set. 4351 * 4352 * NOTE : These are the scopes of some of the variables that point at IRE, 4353 * which needs to be followed while making any future modifications 4354 * to avoid memory leaks. 4355 * 4356 * - ire and sire are the entries looked up initially by 4357 * ire_ftable_lookup_v6. 4358 * - ipif_ire is used to hold the interface ire associated with 4359 * the new cache ire. But it's scope is limited, so we always REFRELE 4360 * it before branching out to error paths. 4361 * - save_ire is initialized before ire_create, so that ire returned 4362 * by ire_create will not over-write the ire. We REFRELE save_ire 4363 * before breaking out of the switch. 4364 * 4365 * Thus on failures, we have to REFRELE only ire and sire, if they 4366 * are not NULL. 4367 * 4368 * v6srcp may be used in the future. Currently unused. 4369 */ 4370 /* ARGSUSED */ 4371 void 4372 ip_newroute_v6(queue_t *q, mblk_t *mp, const in6_addr_t *v6dstp, 4373 const in6_addr_t *v6srcp, ill_t *ill, zoneid_t zoneid) 4374 { 4375 in6_addr_t v6gw; 4376 in6_addr_t dst; 4377 ire_t *ire = NULL; 4378 ipif_t *src_ipif = NULL; 4379 ill_t *dst_ill = NULL; 4380 ire_t *sire = NULL; 4381 ire_t *save_ire; 4382 mblk_t *dlureq_mp; 4383 ip6_t *ip6h; 4384 int err = 0; 4385 mblk_t *first_mp; 4386 ipsec_out_t *io; 4387 ill_t *attach_ill = NULL; 4388 ushort_t ire_marks = 0; 4389 int match_flags; 4390 boolean_t ip6i_present; 4391 ire_t *first_sire = NULL; 4392 mblk_t *copy_mp = NULL; 4393 mblk_t *xmit_mp = NULL; 4394 in6_addr_t save_dst; 4395 uint32_t multirt_flags = 4396 MULTIRT_CACHEGW | MULTIRT_USESTAMP | MULTIRT_SETSTAMP; 4397 boolean_t multirt_is_resolvable; 4398 boolean_t multirt_resolve_next; 4399 boolean_t need_rele = B_FALSE; 4400 boolean_t do_attach_ill = B_FALSE; 4401 boolean_t ip6_asp_table_held = B_FALSE; 4402 tsol_ire_gw_secattr_t *attrp = NULL; 4403 tsol_gcgrp_t *gcgrp = NULL; 4404 tsol_gcgrp_addr_t ga; 4405 4406 ASSERT(!IN6_IS_ADDR_MULTICAST(v6dstp)); 4407 4408 first_mp = mp; 4409 if (mp->b_datap->db_type == M_CTL) { 4410 mp = mp->b_cont; 4411 io = (ipsec_out_t *)first_mp->b_rptr; 4412 ASSERT(io->ipsec_out_type == IPSEC_OUT); 4413 } else { 4414 io = NULL; 4415 } 4416 4417 /* 4418 * If this end point is bound to IPIF_NOFAILOVER, set bnf_ill and 4419 * bind_to_nofailover B_TRUE. We can't use conn to determine as it 4420 * could be NULL. 4421 * 4422 * This information can appear either in an ip6i_t or an IPSEC_OUT 4423 * message. 4424 */ 4425 ip6h = (ip6_t *)mp->b_rptr; 4426 ip6i_present = (ip6h->ip6_nxt == IPPROTO_RAW); 4427 if (ip6i_present || (io != NULL && io->ipsec_out_attach_if)) { 4428 if (!ip6i_present || 4429 ((ip6i_t *)ip6h)->ip6i_flags & IP6I_ATTACH_IF) { 4430 attach_ill = ip_grab_attach_ill(ill, first_mp, 4431 (ip6i_present ? ((ip6i_t *)ip6h)->ip6i_ifindex : 4432 io->ipsec_out_ill_index), B_TRUE); 4433 /* Failure case frees things for us. */ 4434 if (attach_ill == NULL) 4435 return; 4436 4437 /* 4438 * Check if we need an ire that will not be 4439 * looked up by anybody else i.e. HIDDEN. 4440 */ 4441 if (ill_is_probeonly(attach_ill)) 4442 ire_marks = IRE_MARK_HIDDEN; 4443 } 4444 } 4445 4446 if (IN6_IS_ADDR_LOOPBACK(v6dstp)) { 4447 ip1dbg(("ip_newroute_v6: dst with loopback addr\n")); 4448 goto icmp_err_ret; 4449 } else if ((v6srcp != NULL) && IN6_IS_ADDR_LOOPBACK(v6srcp)) { 4450 ip1dbg(("ip_newroute_v6: src with loopback addr\n")); 4451 goto icmp_err_ret; 4452 } 4453 4454 /* 4455 * If this IRE is created for forwarding or it is not for 4456 * TCP traffic, mark it as temporary. 4457 * 4458 * Is it sufficient just to check the next header?? 4459 */ 4460 if (mp->b_prev != NULL || !IP_FLOW_CONTROLLED_ULP(ip6h->ip6_nxt)) 4461 ire_marks |= IRE_MARK_TEMPORARY; 4462 4463 /* 4464 * Get what we can from ire_ftable_lookup_v6 which will follow an IRE 4465 * chain until it gets the most specific information available. 4466 * For example, we know that there is no IRE_CACHE for this dest, 4467 * but there may be an IRE_OFFSUBNET which specifies a gateway. 4468 * ire_ftable_lookup_v6 will look up the gateway, etc. 4469 */ 4470 4471 if (ill == NULL) { 4472 match_flags = MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT | 4473 MATCH_IRE_PARENT | MATCH_IRE_RJ_BHOLE | MATCH_IRE_SECATTR; 4474 ire = ire_ftable_lookup_v6(v6dstp, 0, 0, 0, 4475 NULL, &sire, zoneid, 0, MBLK_GETLABEL(mp), 4476 match_flags); 4477 /* 4478 * ire_add_then_send -> ip_newroute_v6 in the CGTP case passes 4479 * in a NULL ill, but the packet could be a neighbor 4480 * solicitation/advertisment and could have a valid attach_ill. 4481 */ 4482 if (attach_ill != NULL) 4483 ill_refrele(attach_ill); 4484 } else { 4485 if (attach_ill != NULL) { 4486 /* 4487 * attach_ill is set only for communicating with 4488 * on-link hosts. So, don't look for DEFAULT. 4489 * ip_wput_v6 passes the right ill in this case and 4490 * hence we can assert. 4491 */ 4492 ASSERT(ill == attach_ill); 4493 ill_refrele(attach_ill); 4494 do_attach_ill = B_TRUE; 4495 match_flags = MATCH_IRE_RJ_BHOLE | MATCH_IRE_ILL; 4496 } else { 4497 match_flags = MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT | 4498 MATCH_IRE_RJ_BHOLE | MATCH_IRE_ILL_GROUP; 4499 } 4500 match_flags |= MATCH_IRE_PARENT | MATCH_IRE_SECATTR; 4501 ire = ire_ftable_lookup_v6(v6dstp, NULL, NULL, 0, ill->ill_ipif, 4502 &sire, zoneid, 0, MBLK_GETLABEL(mp), match_flags); 4503 } 4504 4505 ip3dbg(("ip_newroute_v6: ire_ftable_lookup_v6() " 4506 "returned ire %p, sire %p\n", (void *)ire, (void *)sire)); 4507 4508 if (zoneid == ALL_ZONES && ire != NULL) { 4509 /* 4510 * In the forwarding case, we can use a route from any zone 4511 * since we won't change the source address. We can easily 4512 * assert that the source address is already set when there's no 4513 * ip6_info header - otherwise we'd have to call pullupmsg(). 4514 */ 4515 ASSERT(ip6i_present || 4516 !IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)); 4517 zoneid = ire->ire_zoneid; 4518 } 4519 4520 /* 4521 * We enter a loop that will be run only once in most cases. 4522 * The loop is re-entered in the case where the destination 4523 * can be reached through multiple RTF_MULTIRT-flagged routes. 4524 * The intention is to compute multiple routes to a single 4525 * destination in a single ip_newroute_v6 call. 4526 * The information is contained in sire->ire_flags. 4527 */ 4528 do { 4529 multirt_resolve_next = B_FALSE; 4530 4531 if (dst_ill != NULL) { 4532 ill_refrele(dst_ill); 4533 dst_ill = NULL; 4534 } 4535 if (src_ipif != NULL) { 4536 ipif_refrele(src_ipif); 4537 src_ipif = NULL; 4538 } 4539 if ((sire != NULL) && sire->ire_flags & RTF_MULTIRT) { 4540 ip3dbg(("ip_newroute_v6: starting new resolution " 4541 "with first_mp %p, tag %d\n", 4542 (void *)first_mp, MULTIRT_DEBUG_TAGGED(first_mp))); 4543 4544 /* 4545 * We check if there are trailing unresolved routes for 4546 * the destination contained in sire. 4547 */ 4548 multirt_is_resolvable = ire_multirt_lookup_v6(&ire, 4549 &sire, multirt_flags, MBLK_GETLABEL(mp)); 4550 4551 ip3dbg(("ip_newroute_v6: multirt_is_resolvable %d, " 4552 "ire %p, sire %p\n", 4553 multirt_is_resolvable, (void *)ire, (void *)sire)); 4554 4555 if (!multirt_is_resolvable) { 4556 /* 4557 * No more multirt routes to resolve; give up 4558 * (all routes resolved or no more resolvable 4559 * routes). 4560 */ 4561 if (ire != NULL) { 4562 ire_refrele(ire); 4563 ire = NULL; 4564 } 4565 } else { 4566 ASSERT(sire != NULL); 4567 ASSERT(ire != NULL); 4568 /* 4569 * We simply use first_sire as a flag that 4570 * indicates if a resolvable multirt route has 4571 * already been found during the preceding 4572 * loops. If it is not the case, we may have 4573 * to send an ICMP error to report that the 4574 * destination is unreachable. We do not 4575 * IRE_REFHOLD first_sire. 4576 */ 4577 if (first_sire == NULL) { 4578 first_sire = sire; 4579 } 4580 } 4581 } 4582 if ((ire == NULL) || (ire == sire)) { 4583 /* 4584 * either ire == NULL (the destination cannot be 4585 * resolved) or ire == sire (the gateway cannot be 4586 * resolved). At this point, there are no more routes 4587 * to resolve for the destination, thus we exit. 4588 */ 4589 if (ip_debug > 3) { 4590 /* ip2dbg */ 4591 pr_addr_dbg("ip_newroute_v6: " 4592 "can't resolve %s\n", AF_INET6, v6dstp); 4593 } 4594 ip3dbg(("ip_newroute_v6: " 4595 "ire %p, sire %p, first_sire %p\n", 4596 (void *)ire, (void *)sire, (void *)first_sire)); 4597 4598 if (sire != NULL) { 4599 ire_refrele(sire); 4600 sire = NULL; 4601 } 4602 4603 if (first_sire != NULL) { 4604 /* 4605 * At least one multirt route has been found 4606 * in the same ip_newroute() call; there is no 4607 * need to report an ICMP error. 4608 * first_sire was not IRE_REFHOLDed. 4609 */ 4610 MULTIRT_DEBUG_UNTAG(first_mp); 4611 freemsg(first_mp); 4612 return; 4613 } 4614 ip_rts_change_v6(RTM_MISS, v6dstp, 0, 0, 0, 0, 0, 0, 4615 RTA_DST); 4616 goto icmp_err_ret; 4617 } 4618 4619 ASSERT(ire->ire_ipversion == IPV6_VERSION); 4620 4621 /* 4622 * Verify that the returned IRE does not have either the 4623 * RTF_REJECT or RTF_BLACKHOLE flags set and that the IRE is 4624 * either an IRE_CACHE, IRE_IF_NORESOLVER or IRE_IF_RESOLVER. 4625 */ 4626 if ((ire->ire_flags & (RTF_REJECT | RTF_BLACKHOLE)) || 4627 (ire->ire_type & (IRE_CACHE | IRE_INTERFACE)) == 0) 4628 goto icmp_err_ret; 4629 4630 /* 4631 * Increment the ire_ob_pkt_count field for ire if it is an 4632 * INTERFACE (IF_RESOLVER or IF_NORESOLVER) IRE type, and 4633 * increment the same for the parent IRE, sire, if it is some 4634 * sort of prefix IRE (which includes DEFAULT, PREFIX, HOST 4635 * and HOST_REDIRECT). 4636 */ 4637 if ((ire->ire_type & IRE_INTERFACE) != 0) { 4638 UPDATE_OB_PKT_COUNT(ire); 4639 ire->ire_last_used_time = lbolt; 4640 } 4641 4642 if (sire != NULL) { 4643 mutex_enter(&sire->ire_lock); 4644 v6gw = sire->ire_gateway_addr_v6; 4645 mutex_exit(&sire->ire_lock); 4646 ASSERT((sire->ire_type & (IRE_CACHETABLE | 4647 IRE_INTERFACE)) == 0); 4648 UPDATE_OB_PKT_COUNT(sire); 4649 sire->ire_last_used_time = lbolt; 4650 } else { 4651 v6gw = ipv6_all_zeros; 4652 } 4653 4654 /* 4655 * We have a route to reach the destination. 4656 * 4657 * 1) If the interface is part of ill group, try to get a new 4658 * ill taking load spreading into account. 4659 * 4660 * 2) After selecting the ill, get a source address that might 4661 * create good inbound load spreading and that matches the 4662 * right scope. ipif_select_source_v6 does this for us. 4663 * 4664 * If the application specified the ill (ifindex), we still 4665 * load spread. Only if the packets needs to go out specifically 4666 * on a given ill e.g. bind to IPIF_NOFAILOVER address, 4667 * IPV6_BOUND_PIF we don't try to use a different ill for load 4668 * spreading. 4669 */ 4670 if (!do_attach_ill) { 4671 /* 4672 * If the interface belongs to an interface group, 4673 * make sure the next possible interface in the group 4674 * is used. This encourages load spreading among 4675 * peers in an interface group. However, in the case 4676 * of multirouting, load spreading is not used, as we 4677 * actually want to replicate outgoing packets through 4678 * particular interfaces. 4679 * 4680 * Note: While we pick a dst_ill we are really only 4681 * interested in the ill for load spreading. 4682 * The source ipif is determined by source address 4683 * selection below. 4684 */ 4685 if ((sire != NULL) && (sire->ire_flags & RTF_MULTIRT)) { 4686 dst_ill = ire->ire_ipif->ipif_ill; 4687 /* For uniformity do a refhold */ 4688 ill_refhold(dst_ill); 4689 } else { 4690 /* 4691 * If we are here trying to create an IRE_CACHE 4692 * for an offlink destination and have the 4693 * IRE_CACHE for the next hop and the latter is 4694 * using virtual IP source address selection i.e 4695 * it's ire->ire_ipif is pointing to a virtual 4696 * network interface (vni) then 4697 * ip_newroute_get_dst_ll() will return the vni 4698 * interface as the dst_ill. Since the vni is 4699 * virtual i.e not associated with any physical 4700 * interface, it cannot be the dst_ill, hence 4701 * in such a case call ip_newroute_get_dst_ll() 4702 * with the stq_ill instead of the ire_ipif ILL. 4703 * The function returns a refheld ill. 4704 */ 4705 if ((ire->ire_type == IRE_CACHE) && 4706 IS_VNI(ire->ire_ipif->ipif_ill)) 4707 dst_ill = ip_newroute_get_dst_ill_v6( 4708 ire->ire_stq->q_ptr); 4709 else 4710 dst_ill = ip_newroute_get_dst_ill_v6( 4711 ire->ire_ipif->ipif_ill); 4712 } 4713 if (dst_ill == NULL) { 4714 if (ip_debug > 2) { 4715 pr_addr_dbg("ip_newroute_v6 : no dst " 4716 "ill for dst %s\n", 4717 AF_INET6, v6dstp); 4718 } 4719 goto icmp_err_ret; 4720 } else if (dst_ill->ill_group == NULL && ill != NULL && 4721 dst_ill != ill) { 4722 /* 4723 * If "ill" is not part of any group, we should 4724 * have found a route matching "ill" as we 4725 * called ire_ftable_lookup_v6 with 4726 * MATCH_IRE_ILL_GROUP. 4727 * Rather than asserting when there is a 4728 * mismatch, we just drop the packet. 4729 */ 4730 ip0dbg(("ip_newroute_v6: BOUND_IF failed : " 4731 "dst_ill %s ill %s\n", 4732 dst_ill->ill_name, 4733 ill->ill_name)); 4734 goto icmp_err_ret; 4735 } 4736 } else { 4737 dst_ill = ire->ire_ipif->ipif_ill; 4738 /* For uniformity do refhold */ 4739 ill_refhold(dst_ill); 4740 /* 4741 * We should have found a route matching ill as we 4742 * called ire_ftable_lookup_v6 with MATCH_IRE_ILL. 4743 * Rather than asserting, while there is a mismatch, 4744 * we just drop the packet. 4745 */ 4746 if (dst_ill != ill) { 4747 ip0dbg(("ip_newroute_v6: Packet dropped as " 4748 "IP6I_ATTACH_IF ill is %s, " 4749 "ire->ire_ipif->ipif_ill is %s\n", 4750 ill->ill_name, 4751 dst_ill->ill_name)); 4752 goto icmp_err_ret; 4753 } 4754 } 4755 /* 4756 * Pick a source address which matches the scope of the 4757 * destination address. 4758 * For RTF_SETSRC routes, the source address is imposed by the 4759 * parent ire (sire). 4760 */ 4761 ASSERT(src_ipif == NULL); 4762 if (ire->ire_type == IRE_IF_RESOLVER && 4763 !IN6_IS_ADDR_UNSPECIFIED(&v6gw) && 4764 ip6_asp_can_lookup()) { 4765 /* 4766 * The ire cache entry we're adding is for the 4767 * gateway itself. The source address in this case 4768 * is relative to the gateway's address. 4769 */ 4770 ip6_asp_table_held = B_TRUE; 4771 src_ipif = ipif_select_source_v6(dst_ill, &v6gw, 4772 RESTRICT_TO_GROUP, IPV6_PREFER_SRC_DEFAULT, zoneid); 4773 if (src_ipif != NULL) 4774 ire_marks |= IRE_MARK_USESRC_CHECK; 4775 } else { 4776 if ((sire != NULL) && (sire->ire_flags & RTF_SETSRC)) { 4777 /* 4778 * Check that the ipif matching the requested 4779 * source address still exists. 4780 */ 4781 src_ipif = ipif_lookup_addr_v6( 4782 &sire->ire_src_addr_v6, NULL, zoneid, 4783 NULL, NULL, NULL, NULL); 4784 } 4785 if (src_ipif == NULL && ip6_asp_can_lookup()) { 4786 uint_t restrict_ill = RESTRICT_TO_NONE; 4787 4788 if (ip6i_present && ((ip6i_t *)ip6h)->ip6i_flags 4789 & IP6I_ATTACH_IF) 4790 restrict_ill = RESTRICT_TO_ILL; 4791 ip6_asp_table_held = B_TRUE; 4792 src_ipif = ipif_select_source_v6(dst_ill, 4793 v6dstp, restrict_ill, 4794 IPV6_PREFER_SRC_DEFAULT, zoneid); 4795 if (src_ipif != NULL) 4796 ire_marks |= IRE_MARK_USESRC_CHECK; 4797 } 4798 } 4799 4800 if (src_ipif == NULL) { 4801 if (ip_debug > 2) { 4802 /* ip1dbg */ 4803 pr_addr_dbg("ip_newroute_v6: no src for " 4804 "dst %s\n, ", AF_INET6, v6dstp); 4805 printf("ip_newroute_v6: interface name %s\n", 4806 dst_ill->ill_name); 4807 } 4808 goto icmp_err_ret; 4809 } 4810 4811 if (ip_debug > 3) { 4812 /* ip2dbg */ 4813 pr_addr_dbg("ip_newroute_v6: first hop %s\n", 4814 AF_INET6, &v6gw); 4815 } 4816 ip2dbg(("\tire type %s (%d)\n", 4817 ip_nv_lookup(ire_nv_tbl, ire->ire_type), ire->ire_type)); 4818 4819 /* 4820 * At this point in ip_newroute_v6(), ire is either the 4821 * IRE_CACHE of the next-hop gateway for an off-subnet 4822 * destination or an IRE_INTERFACE type that should be used 4823 * to resolve an on-subnet destination or an on-subnet 4824 * next-hop gateway. 4825 * 4826 * In the IRE_CACHE case, we have the following : 4827 * 4828 * 1) src_ipif - used for getting a source address. 4829 * 4830 * 2) dst_ill - from which we derive ire_stq/ire_rfq. This 4831 * means packets using this IRE_CACHE will go out on dst_ill. 4832 * 4833 * 3) The IRE sire will point to the prefix that is the longest 4834 * matching route for the destination. These prefix types 4835 * include IRE_DEFAULT, IRE_PREFIX, IRE_HOST, and 4836 * IRE_HOST_REDIRECT. 4837 * 4838 * The newly created IRE_CACHE entry for the off-subnet 4839 * destination is tied to both the prefix route and the 4840 * interface route used to resolve the next-hop gateway 4841 * via the ire_phandle and ire_ihandle fields, respectively. 4842 * 4843 * In the IRE_INTERFACE case, we have the following : 4844 * 4845 * 1) src_ipif - used for getting a source address. 4846 * 4847 * 2) dst_ill - from which we derive ire_stq/ire_rfq. This 4848 * means packets using the IRE_CACHE that we will build 4849 * here will go out on dst_ill. 4850 * 4851 * 3) sire may or may not be NULL. But, the IRE_CACHE that is 4852 * to be created will only be tied to the IRE_INTERFACE that 4853 * was derived from the ire_ihandle field. 4854 * 4855 * If sire is non-NULL, it means the destination is off-link 4856 * and we will first create the IRE_CACHE for the gateway. 4857 * Next time through ip_newroute_v6, we will create the 4858 * IRE_CACHE for the final destination as described above. 4859 */ 4860 save_ire = ire; 4861 switch (ire->ire_type) { 4862 case IRE_CACHE: { 4863 ire_t *ipif_ire; 4864 4865 ASSERT(sire != NULL); 4866 if (IN6_IS_ADDR_UNSPECIFIED(&v6gw)) { 4867 mutex_enter(&ire->ire_lock); 4868 v6gw = ire->ire_gateway_addr_v6; 4869 mutex_exit(&ire->ire_lock); 4870 } 4871 /* 4872 * We need 3 ire's to create a new cache ire for an 4873 * off-link destination from the cache ire of the 4874 * gateway. 4875 * 4876 * 1. The prefix ire 'sire' 4877 * 2. The cache ire of the gateway 'ire' 4878 * 3. The interface ire 'ipif_ire' 4879 * 4880 * We have (1) and (2). We lookup (3) below. 4881 * 4882 * If there is no interface route to the gateway, 4883 * it is a race condition, where we found the cache 4884 * but the inteface route has been deleted. 4885 */ 4886 ipif_ire = ire_ihandle_lookup_offlink_v6(ire, sire); 4887 if (ipif_ire == NULL) { 4888 ip1dbg(("ip_newroute_v6:" 4889 "ire_ihandle_lookup_offlink_v6 failed\n")); 4890 goto icmp_err_ret; 4891 } 4892 /* 4893 * Assume DL_UNITDATA_REQ is same for all physical 4894 * interfaces in the ifgrp. If it isn't, this code will 4895 * have to be seriously rewhacked to allow the 4896 * fastpath probing (such that I cache the link 4897 * header in the IRE_CACHE) to work over ifgrps. 4898 * We have what we need to build an IRE_CACHE. 4899 */ 4900 /* 4901 * Note: the new ire inherits RTF_SETSRC 4902 * and RTF_MULTIRT to propagate these flags from prefix 4903 * to cache. 4904 */ 4905 4906 /* 4907 * Check cached gateway IRE for any security 4908 * attributes; if found, associate the gateway 4909 * credentials group to the destination IRE. 4910 */ 4911 if ((attrp = save_ire->ire_gw_secattr) != NULL) { 4912 mutex_enter(&attrp->igsa_lock); 4913 if ((gcgrp = attrp->igsa_gcgrp) != NULL) 4914 GCGRP_REFHOLD(gcgrp); 4915 mutex_exit(&attrp->igsa_lock); 4916 } 4917 4918 ire = ire_create_v6( 4919 v6dstp, /* dest address */ 4920 &ipv6_all_ones, /* mask */ 4921 &src_ipif->ipif_v6src_addr, /* source address */ 4922 &v6gw, /* gateway address */ 4923 &save_ire->ire_max_frag, 4924 NULL, /* Fast Path header */ 4925 dst_ill->ill_rq, /* recv-from queue */ 4926 dst_ill->ill_wq, /* send-to queue */ 4927 IRE_CACHE, 4928 NULL, 4929 src_ipif, 4930 &sire->ire_mask_v6, /* Parent mask */ 4931 sire->ire_phandle, /* Parent handle */ 4932 ipif_ire->ire_ihandle, /* Interface handle */ 4933 sire->ire_flags & /* flags if any */ 4934 (RTF_SETSRC | RTF_MULTIRT), 4935 &(sire->ire_uinfo), 4936 NULL, 4937 gcgrp); 4938 4939 if (ire == NULL) { 4940 if (gcgrp != NULL) { 4941 GCGRP_REFRELE(gcgrp); 4942 gcgrp = NULL; 4943 } 4944 ire_refrele(save_ire); 4945 ire_refrele(ipif_ire); 4946 break; 4947 } 4948 4949 /* reference now held by IRE */ 4950 gcgrp = NULL; 4951 4952 ire->ire_marks |= ire_marks; 4953 4954 /* 4955 * Prevent sire and ipif_ire from getting deleted. The 4956 * newly created ire is tied to both of them via the 4957 * phandle and ihandle respectively. 4958 */ 4959 IRB_REFHOLD(sire->ire_bucket); 4960 /* Has it been removed already ? */ 4961 if (sire->ire_marks & IRE_MARK_CONDEMNED) { 4962 IRB_REFRELE(sire->ire_bucket); 4963 ire_refrele(ipif_ire); 4964 ire_refrele(save_ire); 4965 break; 4966 } 4967 4968 IRB_REFHOLD(ipif_ire->ire_bucket); 4969 /* Has it been removed already ? */ 4970 if (ipif_ire->ire_marks & IRE_MARK_CONDEMNED) { 4971 IRB_REFRELE(ipif_ire->ire_bucket); 4972 IRB_REFRELE(sire->ire_bucket); 4973 ire_refrele(ipif_ire); 4974 ire_refrele(save_ire); 4975 break; 4976 } 4977 4978 xmit_mp = first_mp; 4979 if (ire->ire_flags & RTF_MULTIRT) { 4980 copy_mp = copymsg(first_mp); 4981 if (copy_mp != NULL) { 4982 xmit_mp = copy_mp; 4983 MULTIRT_DEBUG_TAG(first_mp); 4984 } 4985 } 4986 ire_add_then_send(q, ire, xmit_mp); 4987 if (ip6_asp_table_held) { 4988 ip6_asp_table_refrele(); 4989 ip6_asp_table_held = B_FALSE; 4990 } 4991 ire_refrele(save_ire); 4992 4993 /* Assert that sire is not deleted yet. */ 4994 ASSERT(sire->ire_ptpn != NULL); 4995 IRB_REFRELE(sire->ire_bucket); 4996 4997 /* Assert that ipif_ire is not deleted yet. */ 4998 ASSERT(ipif_ire->ire_ptpn != NULL); 4999 IRB_REFRELE(ipif_ire->ire_bucket); 5000 ire_refrele(ipif_ire); 5001 5002 if (copy_mp != NULL) { 5003 /* 5004 * Search for the next unresolved 5005 * multirt route. 5006 */ 5007 copy_mp = NULL; 5008 ipif_ire = NULL; 5009 ire = NULL; 5010 /* re-enter the loop */ 5011 multirt_resolve_next = B_TRUE; 5012 continue; 5013 } 5014 ire_refrele(sire); 5015 ill_refrele(dst_ill); 5016 ipif_refrele(src_ipif); 5017 return; 5018 } 5019 case IRE_IF_NORESOLVER: 5020 /* 5021 * We have what we need to build an IRE_CACHE. 5022 * 5023 * Create a new dlureq_mp with the IPv6 gateway 5024 * address in destination address in the DLPI hdr 5025 * if the physical length is exactly 16 bytes. 5026 */ 5027 if (dst_ill->ill_phys_addr_length == IPV6_ADDR_LEN) { 5028 const in6_addr_t *addr; 5029 5030 if (!IN6_IS_ADDR_UNSPECIFIED(&v6gw)) 5031 addr = &v6gw; 5032 else 5033 addr = v6dstp; 5034 5035 dlureq_mp = ill_dlur_gen((uchar_t *)addr, 5036 dst_ill->ill_phys_addr_length, 5037 dst_ill->ill_sap, 5038 dst_ill->ill_sap_length); 5039 } else { 5040 dlureq_mp = ill_dlur_gen(NULL, 5041 dst_ill->ill_phys_addr_length, 5042 dst_ill->ill_sap, 5043 dst_ill->ill_sap_length); 5044 } 5045 if (dlureq_mp == NULL) 5046 break; 5047 /* 5048 * TSol note: We are creating the ire cache for the 5049 * destination 'dst'. If 'dst' is offlink, going 5050 * through the first hop 'gw', the security attributes 5051 * of 'dst' must be set to point to the gateway 5052 * credentials of gateway 'gw'. If 'dst' is onlink, it 5053 * is possible that 'dst' is a potential gateway that is 5054 * referenced by some route that has some security 5055 * attributes. Thus in the former case, we need to do a 5056 * gcgrp_lookup of 'gw' while in the latter case we 5057 * need to do gcgrp_lookup of 'dst' itself. 5058 */ 5059 ga.ga_af = AF_INET6; 5060 if (!IN6_IS_ADDR_UNSPECIFIED(&v6gw)) 5061 ga.ga_addr = v6gw; 5062 else 5063 ga.ga_addr = *v6dstp; 5064 gcgrp = gcgrp_lookup(&ga, B_FALSE); 5065 5066 /* 5067 * Note: the new ire inherits sire flags RTF_SETSRC 5068 * and RTF_MULTIRT to propagate those rules from prefix 5069 * to cache. 5070 */ 5071 ire = ire_create_v6( 5072 v6dstp, /* dest address */ 5073 &ipv6_all_ones, /* mask */ 5074 &src_ipif->ipif_v6src_addr, /* source address */ 5075 &v6gw, /* gateway address */ 5076 &save_ire->ire_max_frag, 5077 NULL, /* Fast Path header */ 5078 dst_ill->ill_rq, /* recv-from queue */ 5079 dst_ill->ill_wq, /* send-to queue */ 5080 IRE_CACHE, 5081 dlureq_mp, 5082 src_ipif, 5083 &save_ire->ire_mask_v6, /* Parent mask */ 5084 (sire != NULL) ? /* Parent handle */ 5085 sire->ire_phandle : 0, 5086 save_ire->ire_ihandle, /* Interface handle */ 5087 (sire != NULL) ? /* flags if any */ 5088 sire->ire_flags & 5089 (RTF_SETSRC | RTF_MULTIRT) : 0, 5090 &(save_ire->ire_uinfo), 5091 NULL, 5092 gcgrp); 5093 5094 freeb(dlureq_mp); 5095 5096 if (ire == NULL) { 5097 if (gcgrp != NULL) { 5098 GCGRP_REFRELE(gcgrp); 5099 gcgrp = NULL; 5100 } 5101 ire_refrele(save_ire); 5102 break; 5103 } 5104 5105 /* reference now held by IRE */ 5106 gcgrp = NULL; 5107 5108 ire->ire_marks |= ire_marks; 5109 5110 if (!IN6_IS_ADDR_UNSPECIFIED(&v6gw)) 5111 dst = v6gw; 5112 else 5113 dst = *v6dstp; 5114 err = ndp_noresolver(dst_ill, &dst); 5115 if (err != 0) { 5116 ire_refrele(save_ire); 5117 break; 5118 } 5119 5120 /* Prevent save_ire from getting deleted */ 5121 IRB_REFHOLD(save_ire->ire_bucket); 5122 /* Has it been removed already ? */ 5123 if (save_ire->ire_marks & IRE_MARK_CONDEMNED) { 5124 IRB_REFRELE(save_ire->ire_bucket); 5125 ire_refrele(save_ire); 5126 break; 5127 } 5128 5129 xmit_mp = first_mp; 5130 /* 5131 * In case of MULTIRT, a copy of the current packet 5132 * to send is made to further re-enter the 5133 * loop and attempt another route resolution 5134 */ 5135 if ((sire != NULL) && sire->ire_flags & RTF_MULTIRT) { 5136 copy_mp = copymsg(first_mp); 5137 if (copy_mp != NULL) { 5138 xmit_mp = copy_mp; 5139 MULTIRT_DEBUG_TAG(first_mp); 5140 } 5141 } 5142 ire_add_then_send(q, ire, xmit_mp); 5143 if (ip6_asp_table_held) { 5144 ip6_asp_table_refrele(); 5145 ip6_asp_table_held = B_FALSE; 5146 } 5147 5148 /* Assert that it is not deleted yet. */ 5149 ASSERT(save_ire->ire_ptpn != NULL); 5150 IRB_REFRELE(save_ire->ire_bucket); 5151 ire_refrele(save_ire); 5152 5153 if (copy_mp != NULL) { 5154 /* 5155 * If we found a (no)resolver, we ignore any 5156 * trailing top priority IRE_CACHE in 5157 * further loops. This ensures that we do not 5158 * omit any (no)resolver despite the priority 5159 * in this call. 5160 * IRE_CACHE, if any, will be processed 5161 * by another thread entering ip_newroute(), 5162 * (on resolver response, for example). 5163 * We use this to force multiple parallel 5164 * resolution as soon as a packet needs to be 5165 * sent. The result is, after one packet 5166 * emission all reachable routes are generally 5167 * resolved. 5168 * Otherwise, complete resolution of MULTIRT 5169 * routes would require several emissions as 5170 * side effect. 5171 */ 5172 multirt_flags &= ~MULTIRT_CACHEGW; 5173 5174 /* 5175 * Search for the next unresolved multirt 5176 * route. 5177 */ 5178 copy_mp = NULL; 5179 save_ire = NULL; 5180 ire = NULL; 5181 /* re-enter the loop */ 5182 multirt_resolve_next = B_TRUE; 5183 continue; 5184 } 5185 5186 /* Don't need sire anymore */ 5187 if (sire != NULL) 5188 ire_refrele(sire); 5189 ill_refrele(dst_ill); 5190 ipif_refrele(src_ipif); 5191 return; 5192 5193 case IRE_IF_RESOLVER: 5194 /* 5195 * We can't build an IRE_CACHE yet, but at least we 5196 * found a resolver that can help. 5197 */ 5198 dst = *v6dstp; 5199 5200 /* 5201 * To be at this point in the code with a non-zero gw 5202 * means that dst is reachable through a gateway that 5203 * we have never resolved. By changing dst to the gw 5204 * addr we resolve the gateway first. When 5205 * ire_add_then_send() tries to put the IP dg to dst, 5206 * it will reenter ip_newroute() at which time we will 5207 * find the IRE_CACHE for the gw and create another 5208 * IRE_CACHE above (for dst itself). 5209 */ 5210 if (!IN6_IS_ADDR_UNSPECIFIED(&v6gw)) { 5211 save_dst = dst; 5212 dst = v6gw; 5213 v6gw = ipv6_all_zeros; 5214 } 5215 if (dst_ill->ill_flags & ILLF_XRESOLV) { 5216 /* 5217 * Ask the external resolver to do its thing. 5218 * Make an mblk chain in the following form: 5219 * ARQ_REQ_MBLK-->IRE_MBLK-->packet 5220 */ 5221 mblk_t *ire_mp; 5222 mblk_t *areq_mp; 5223 areq_t *areq; 5224 in6_addr_t *addrp; 5225 5226 ip1dbg(("ip_newroute_v6:ILLF_XRESOLV\n")); 5227 if (ip6_asp_table_held) { 5228 ip6_asp_table_refrele(); 5229 ip6_asp_table_held = B_FALSE; 5230 } 5231 ire = ire_create_mp_v6( 5232 &dst, /* dest address */ 5233 &ipv6_all_ones, /* mask */ 5234 &src_ipif->ipif_v6src_addr, 5235 /* source address */ 5236 &v6gw, /* gateway address */ 5237 NULL, /* Fast Path header */ 5238 dst_ill->ill_rq, /* recv-from queue */ 5239 dst_ill->ill_wq, /* send-to queue */ 5240 IRE_CACHE, 5241 NULL, 5242 src_ipif, 5243 &save_ire->ire_mask_v6, 5244 /* Parent mask */ 5245 0, 5246 save_ire->ire_ihandle, 5247 /* Interface handle */ 5248 0, /* flags if any */ 5249 &(save_ire->ire_uinfo), 5250 NULL, 5251 NULL); 5252 5253 ire_refrele(save_ire); 5254 if (ire == NULL) { 5255 ip1dbg(("ip_newroute_v6:" 5256 "ire is NULL\n")); 5257 break; 5258 } 5259 5260 if ((sire != NULL) && 5261 (sire->ire_flags & RTF_MULTIRT)) { 5262 /* 5263 * processing a copy of the packet to 5264 * send for further resolution loops 5265 */ 5266 copy_mp = copymsg(first_mp); 5267 if (copy_mp != NULL) 5268 MULTIRT_DEBUG_TAG(copy_mp); 5269 } 5270 ire->ire_marks |= ire_marks; 5271 ire_mp = ire->ire_mp; 5272 /* 5273 * Now create or find an nce for this interface. 5274 * The hw addr will need to to be set from 5275 * the reply to the AR_ENTRY_QUERY that 5276 * we're about to send. This will be done in 5277 * ire_add_v6(). 5278 */ 5279 err = ndp_resolver(dst_ill, &dst, mp, zoneid); 5280 switch (err) { 5281 case 0: 5282 /* 5283 * New cache entry created. 5284 * Break, then ask the external 5285 * resolver. 5286 */ 5287 break; 5288 case EINPROGRESS: 5289 /* 5290 * Resolution in progress; 5291 * packet has been queued by 5292 * ndp_resolver(). 5293 */ 5294 ire_delete(ire); 5295 ire = NULL; 5296 /* 5297 * Check if another multirt 5298 * route must be resolved. 5299 */ 5300 if (copy_mp != NULL) { 5301 /* 5302 * If we found a resolver, we 5303 * ignore any trailing top 5304 * priority IRE_CACHE in 5305 * further loops. The reason is 5306 * the same as for noresolver. 5307 */ 5308 multirt_flags &= 5309 ~MULTIRT_CACHEGW; 5310 /* 5311 * Search for the next 5312 * unresolved multirt route. 5313 */ 5314 first_mp = copy_mp; 5315 copy_mp = NULL; 5316 mp = first_mp; 5317 if (mp->b_datap->db_type == 5318 M_CTL) { 5319 mp = mp->b_cont; 5320 } 5321 ASSERT(sire != NULL); 5322 dst = save_dst; 5323 /* 5324 * re-enter the loop 5325 */ 5326 multirt_resolve_next = 5327 B_TRUE; 5328 continue; 5329 } 5330 5331 if (sire != NULL) 5332 ire_refrele(sire); 5333 ill_refrele(dst_ill); 5334 ipif_refrele(src_ipif); 5335 return; 5336 default: 5337 /* 5338 * Transient error; packet will be 5339 * freed. 5340 */ 5341 ire_delete(ire); 5342 ire = NULL; 5343 break; 5344 } 5345 if (err != 0) 5346 break; 5347 /* 5348 * Now set up the AR_ENTRY_QUERY and send it. 5349 */ 5350 areq_mp = ill_arp_alloc(dst_ill, 5351 (uchar_t *)&ipv6_areq_template, 5352 (caddr_t)&dst); 5353 if (areq_mp == NULL) { 5354 ip1dbg(("ip_newroute_v6:" 5355 "areq_mp is NULL\n")); 5356 freemsg(ire_mp); 5357 break; 5358 } 5359 areq = (areq_t *)areq_mp->b_rptr; 5360 addrp = (in6_addr_t *)((char *)areq + 5361 areq->areq_target_addr_offset); 5362 *addrp = dst; 5363 addrp = (in6_addr_t *)((char *)areq + 5364 areq->areq_sender_addr_offset); 5365 *addrp = src_ipif->ipif_v6src_addr; 5366 /* 5367 * link the chain, then send up to the resolver. 5368 */ 5369 linkb(areq_mp, ire_mp); 5370 linkb(areq_mp, mp); 5371 ip1dbg(("ip_newroute_v6:" 5372 "putnext to resolver\n")); 5373 putnext(dst_ill->ill_rq, areq_mp); 5374 /* 5375 * Check if another multirt route 5376 * must be resolved. 5377 */ 5378 ire = NULL; 5379 if (copy_mp != NULL) { 5380 /* 5381 * If we find a resolver, we ignore any 5382 * trailing top priority IRE_CACHE in 5383 * further loops. The reason is the 5384 * same as for noresolver. 5385 */ 5386 multirt_flags &= ~MULTIRT_CACHEGW; 5387 /* 5388 * Search for the next unresolved 5389 * multirt route. 5390 */ 5391 first_mp = copy_mp; 5392 copy_mp = NULL; 5393 mp = first_mp; 5394 if (mp->b_datap->db_type == M_CTL) { 5395 mp = mp->b_cont; 5396 } 5397 ASSERT(sire != NULL); 5398 dst = save_dst; 5399 /* 5400 * re-enter the loop 5401 */ 5402 multirt_resolve_next = B_TRUE; 5403 continue; 5404 } 5405 5406 if (sire != NULL) 5407 ire_refrele(sire); 5408 ill_refrele(dst_ill); 5409 ipif_refrele(src_ipif); 5410 return; 5411 } 5412 /* 5413 * Non-external resolver case. 5414 * 5415 * TSol note: Please see the note above the 5416 * IRE_IF_NORESOLVER case. 5417 */ 5418 ga.ga_af = AF_INET6; 5419 ga.ga_addr = dst; 5420 gcgrp = gcgrp_lookup(&ga, B_FALSE); 5421 5422 ire = ire_create_v6( 5423 &dst, /* dest address */ 5424 &ipv6_all_ones, /* mask */ 5425 &src_ipif->ipif_v6src_addr, /* source address */ 5426 &v6gw, /* gateway address */ 5427 &save_ire->ire_max_frag, 5428 NULL, /* Fast Path header */ 5429 dst_ill->ill_rq, /* recv-from queue */ 5430 dst_ill->ill_wq, /* send-to queue */ 5431 IRE_CACHE, 5432 NULL, 5433 src_ipif, 5434 &save_ire->ire_mask_v6, /* Parent mask */ 5435 0, 5436 save_ire->ire_ihandle, /* Interface handle */ 5437 0, /* flags if any */ 5438 &(save_ire->ire_uinfo), 5439 NULL, 5440 gcgrp); 5441 5442 if (ire == NULL) { 5443 if (gcgrp != NULL) { 5444 GCGRP_REFRELE(gcgrp); 5445 gcgrp = NULL; 5446 } 5447 ire_refrele(save_ire); 5448 break; 5449 } 5450 5451 /* reference now held by IRE */ 5452 gcgrp = NULL; 5453 5454 if ((sire != NULL) && 5455 (sire->ire_flags & RTF_MULTIRT)) { 5456 copy_mp = copymsg(first_mp); 5457 if (copy_mp != NULL) 5458 MULTIRT_DEBUG_TAG(copy_mp); 5459 } 5460 5461 ire->ire_marks |= ire_marks; 5462 err = ndp_resolver(dst_ill, &dst, first_mp, zoneid); 5463 switch (err) { 5464 case 0: 5465 /* Prevent save_ire from getting deleted */ 5466 IRB_REFHOLD(save_ire->ire_bucket); 5467 /* Has it been removed already ? */ 5468 if (save_ire->ire_marks & IRE_MARK_CONDEMNED) { 5469 IRB_REFRELE(save_ire->ire_bucket); 5470 ire_refrele(save_ire); 5471 break; 5472 } 5473 5474 /* 5475 * We have a resolved cache entry, 5476 * add in the IRE. 5477 */ 5478 ire_add_then_send(q, ire, first_mp); 5479 if (ip6_asp_table_held) { 5480 ip6_asp_table_refrele(); 5481 ip6_asp_table_held = B_FALSE; 5482 } 5483 5484 /* Assert that it is not deleted yet. */ 5485 ASSERT(save_ire->ire_ptpn != NULL); 5486 IRB_REFRELE(save_ire->ire_bucket); 5487 ire_refrele(save_ire); 5488 /* 5489 * Check if another multirt route 5490 * must be resolved. 5491 */ 5492 ire = NULL; 5493 if (copy_mp != NULL) { 5494 /* 5495 * If we find a resolver, we ignore any 5496 * trailing top priority IRE_CACHE in 5497 * further loops. The reason is the 5498 * same as for noresolver. 5499 */ 5500 multirt_flags &= ~MULTIRT_CACHEGW; 5501 /* 5502 * Search for the next unresolved 5503 * multirt route. 5504 */ 5505 first_mp = copy_mp; 5506 copy_mp = NULL; 5507 mp = first_mp; 5508 if (mp->b_datap->db_type == M_CTL) { 5509 mp = mp->b_cont; 5510 } 5511 ASSERT(sire != NULL); 5512 dst = save_dst; 5513 /* 5514 * re-enter the loop 5515 */ 5516 multirt_resolve_next = B_TRUE; 5517 continue; 5518 } 5519 5520 if (sire != NULL) 5521 ire_refrele(sire); 5522 ill_refrele(dst_ill); 5523 ipif_refrele(src_ipif); 5524 return; 5525 5526 case EINPROGRESS: 5527 /* 5528 * mp was consumed - presumably queued. 5529 * No need for ire, presumably resolution is 5530 * in progress, and ire will be added when the 5531 * address is resolved. 5532 */ 5533 if (ip6_asp_table_held) { 5534 ip6_asp_table_refrele(); 5535 ip6_asp_table_held = B_FALSE; 5536 } 5537 ASSERT(ire->ire_nce == NULL); 5538 ire_delete(ire); 5539 ire_refrele(save_ire); 5540 /* 5541 * Check if another multirt route 5542 * must be resolved. 5543 */ 5544 ire = NULL; 5545 if (copy_mp != NULL) { 5546 /* 5547 * If we find a resolver, we ignore any 5548 * trailing top priority IRE_CACHE in 5549 * further loops. The reason is the 5550 * same as for noresolver. 5551 */ 5552 multirt_flags &= ~MULTIRT_CACHEGW; 5553 /* 5554 * Search for the next unresolved 5555 * multirt route. 5556 */ 5557 first_mp = copy_mp; 5558 copy_mp = NULL; 5559 mp = first_mp; 5560 if (mp->b_datap->db_type == M_CTL) { 5561 mp = mp->b_cont; 5562 } 5563 ASSERT(sire != NULL); 5564 dst = save_dst; 5565 /* 5566 * re-enter the loop 5567 */ 5568 multirt_resolve_next = B_TRUE; 5569 continue; 5570 } 5571 if (sire != NULL) 5572 ire_refrele(sire); 5573 ill_refrele(dst_ill); 5574 ipif_refrele(src_ipif); 5575 return; 5576 default: 5577 /* Some transient error */ 5578 ASSERT(ire->ire_nce == NULL); 5579 ire_refrele(save_ire); 5580 break; 5581 } 5582 break; 5583 default: 5584 break; 5585 } 5586 if (ip6_asp_table_held) { 5587 ip6_asp_table_refrele(); 5588 ip6_asp_table_held = B_FALSE; 5589 } 5590 } while (multirt_resolve_next); 5591 5592 err_ret: 5593 ip1dbg(("ip_newroute_v6: dropped\n")); 5594 if (src_ipif != NULL) 5595 ipif_refrele(src_ipif); 5596 if (dst_ill != NULL) { 5597 need_rele = B_TRUE; 5598 ill = dst_ill; 5599 } 5600 if (ill != NULL) { 5601 if (mp->b_prev != NULL) { 5602 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 5603 } else { 5604 BUMP_MIB(ill->ill_ip6_mib, ipv6OutDiscards); 5605 } 5606 5607 if (need_rele) 5608 ill_refrele(ill); 5609 } else { 5610 if (mp->b_prev != NULL) { 5611 BUMP_MIB(&ip6_mib, ipv6InDiscards); 5612 } else { 5613 BUMP_MIB(&ip6_mib, ipv6OutDiscards); 5614 } 5615 } 5616 /* Did this packet originate externally? */ 5617 if (mp->b_prev) { 5618 mp->b_next = NULL; 5619 mp->b_prev = NULL; 5620 } 5621 if (copy_mp != NULL) { 5622 MULTIRT_DEBUG_UNTAG(copy_mp); 5623 freemsg(copy_mp); 5624 } 5625 MULTIRT_DEBUG_UNTAG(first_mp); 5626 freemsg(first_mp); 5627 if (ire != NULL) 5628 ire_refrele(ire); 5629 if (sire != NULL) 5630 ire_refrele(sire); 5631 return; 5632 5633 icmp_err_ret: 5634 if (ip6_asp_table_held) 5635 ip6_asp_table_refrele(); 5636 if (src_ipif != NULL) 5637 ipif_refrele(src_ipif); 5638 if (dst_ill != NULL) { 5639 need_rele = B_TRUE; 5640 ill = dst_ill; 5641 } 5642 ip1dbg(("ip_newroute_v6: no route\n")); 5643 if (sire != NULL) 5644 ire_refrele(sire); 5645 /* 5646 * We need to set sire to NULL to avoid double freeing if we 5647 * ever goto err_ret from below. 5648 */ 5649 sire = NULL; 5650 ip6h = (ip6_t *)mp->b_rptr; 5651 /* Skip ip6i_t header if present */ 5652 if (ip6h->ip6_nxt == IPPROTO_RAW) { 5653 /* Make sure the IPv6 header is present */ 5654 if ((mp->b_wptr - (uchar_t *)ip6h) < 5655 sizeof (ip6i_t) + IPV6_HDR_LEN) { 5656 if (!pullupmsg(mp, sizeof (ip6i_t) + IPV6_HDR_LEN)) { 5657 ip1dbg(("ip_newroute_v6: pullupmsg failed\n")); 5658 goto err_ret; 5659 } 5660 } 5661 mp->b_rptr += sizeof (ip6i_t); 5662 ip6h = (ip6_t *)mp->b_rptr; 5663 } 5664 /* Did this packet originate externally? */ 5665 if (mp->b_prev) { 5666 if (ill != NULL) { 5667 BUMP_MIB(ill->ill_ip6_mib, ipv6InNoRoutes); 5668 } else { 5669 BUMP_MIB(&ip6_mib, ipv6InNoRoutes); 5670 } 5671 mp->b_next = NULL; 5672 mp->b_prev = NULL; 5673 q = WR(q); 5674 } else { 5675 if (ill != NULL) { 5676 BUMP_MIB(ill->ill_ip6_mib, ipv6OutNoRoutes); 5677 } else { 5678 BUMP_MIB(&ip6_mib, ipv6OutNoRoutes); 5679 } 5680 if (ip_hdr_complete_v6(ip6h, zoneid)) { 5681 /* Failed */ 5682 if (copy_mp != NULL) { 5683 MULTIRT_DEBUG_UNTAG(copy_mp); 5684 freemsg(copy_mp); 5685 } 5686 MULTIRT_DEBUG_UNTAG(first_mp); 5687 freemsg(first_mp); 5688 if (ire != NULL) 5689 ire_refrele(ire); 5690 if (need_rele) 5691 ill_refrele(ill); 5692 return; 5693 } 5694 } 5695 5696 if (need_rele) 5697 ill_refrele(ill); 5698 5699 /* 5700 * At this point we will have ire only if RTF_BLACKHOLE 5701 * or RTF_REJECT flags are set on the IRE. It will not 5702 * generate ICMP6_DST_UNREACH_NOROUTE if RTF_BLACKHOLE is set. 5703 */ 5704 if (ire != NULL) { 5705 if (ire->ire_flags & RTF_BLACKHOLE) { 5706 ire_refrele(ire); 5707 if (copy_mp != NULL) { 5708 MULTIRT_DEBUG_UNTAG(copy_mp); 5709 freemsg(copy_mp); 5710 } 5711 MULTIRT_DEBUG_UNTAG(first_mp); 5712 freemsg(first_mp); 5713 return; 5714 } 5715 ire_refrele(ire); 5716 } 5717 if (ip_debug > 3) { 5718 /* ip2dbg */ 5719 pr_addr_dbg("ip_newroute_v6: no route to %s\n", 5720 AF_INET6, v6dstp); 5721 } 5722 icmp_unreachable_v6(WR(q), first_mp, ICMP6_DST_UNREACH_NOROUTE, 5723 B_FALSE, B_FALSE, zoneid); 5724 } 5725 5726 /* 5727 * ip_newroute_ipif_v6 is called by ip_wput_v6 and ip_wput_ipsec_out_v6 whenever 5728 * we need to send out a packet to a destination address for which we do not 5729 * have specific routing information. It is only used for multicast packets. 5730 * 5731 * If unspec_src we allow creating an IRE with source address zero. 5732 * ire_send_v6() will delete it after the packet is sent. 5733 */ 5734 void 5735 ip_newroute_ipif_v6(queue_t *q, mblk_t *mp, ipif_t *ipif, 5736 in6_addr_t v6dst, int unspec_src, zoneid_t zoneid) 5737 { 5738 ire_t *ire = NULL; 5739 ipif_t *src_ipif = NULL; 5740 int err = 0; 5741 ill_t *dst_ill = NULL; 5742 ire_t *save_ire; 5743 ushort_t ire_marks = 0; 5744 ipsec_out_t *io; 5745 ill_t *attach_ill = NULL; 5746 ill_t *ill; 5747 ip6_t *ip6h; 5748 mblk_t *first_mp; 5749 boolean_t ip6i_present; 5750 ire_t *fire = NULL; 5751 mblk_t *copy_mp = NULL; 5752 boolean_t multirt_resolve_next; 5753 in6_addr_t *v6dstp = &v6dst; 5754 boolean_t ipif_held = B_FALSE; 5755 boolean_t ill_held = B_FALSE; 5756 boolean_t ip6_asp_table_held = B_FALSE; 5757 5758 /* 5759 * This loop is run only once in most cases. 5760 * We loop to resolve further routes only when the destination 5761 * can be reached through multiple RTF_MULTIRT-flagged ires. 5762 */ 5763 do { 5764 multirt_resolve_next = B_FALSE; 5765 if (dst_ill != NULL) { 5766 ill_refrele(dst_ill); 5767 dst_ill = NULL; 5768 } 5769 5770 if (src_ipif != NULL) { 5771 ipif_refrele(src_ipif); 5772 src_ipif = NULL; 5773 } 5774 ASSERT(ipif != NULL); 5775 ill = ipif->ipif_ill; 5776 5777 ASSERT(!IN6_IS_ADDR_V4MAPPED(v6dstp)); 5778 if (ip_debug > 2) { 5779 /* ip1dbg */ 5780 pr_addr_dbg("ip_newroute_ipif_v6: v6dst %s\n", 5781 AF_INET6, v6dstp); 5782 printf("ip_newroute_ipif_v6: if %s, v6 %d\n", 5783 ill->ill_name, ipif->ipif_isv6); 5784 } 5785 5786 first_mp = mp; 5787 if (mp->b_datap->db_type == M_CTL) { 5788 mp = mp->b_cont; 5789 io = (ipsec_out_t *)first_mp->b_rptr; 5790 ASSERT(io->ipsec_out_type == IPSEC_OUT); 5791 } else { 5792 io = NULL; 5793 } 5794 5795 /* 5796 * If the interface is a pt-pt interface we look for an 5797 * IRE_IF_RESOLVER or IRE_IF_NORESOLVER that matches both the 5798 * local_address and the pt-pt destination address. 5799 * Otherwise we just match the local address. 5800 */ 5801 if (!(ill->ill_flags & ILLF_MULTICAST)) { 5802 goto err_ret; 5803 } 5804 /* 5805 * If this end point is bound to IPIF_NOFAILOVER, set bnf_ill 5806 * and bind_to_nofailover B_TRUE. We can't use conn to determine 5807 * as it could be NULL. 5808 * 5809 * This information can appear either in an ip6i_t or an 5810 * IPSEC_OUT message. 5811 */ 5812 ip6h = (ip6_t *)mp->b_rptr; 5813 ip6i_present = (ip6h->ip6_nxt == IPPROTO_RAW); 5814 if (ip6i_present || (io != NULL && io->ipsec_out_attach_if)) { 5815 if (!ip6i_present || 5816 ((ip6i_t *)ip6h)->ip6i_flags & IP6I_ATTACH_IF) { 5817 attach_ill = ip_grab_attach_ill(ill, first_mp, 5818 (ip6i_present ? 5819 ((ip6i_t *)ip6h)->ip6i_ifindex : 5820 io->ipsec_out_ill_index), B_TRUE); 5821 /* Failure case frees things for us. */ 5822 if (attach_ill == NULL) 5823 return; 5824 5825 /* 5826 * Check if we need an ire that will not be 5827 * looked up by anybody else i.e. HIDDEN. 5828 */ 5829 if (ill_is_probeonly(attach_ill)) 5830 ire_marks = IRE_MARK_HIDDEN; 5831 } 5832 } 5833 5834 /* 5835 * We check if an IRE_OFFSUBNET for the addr that goes through 5836 * ipif exists. We need it to determine if the RTF_SETSRC and/or 5837 * RTF_MULTIRT flags must be honored. 5838 */ 5839 fire = ipif_lookup_multi_ire_v6(ipif, v6dstp); 5840 ip2dbg(("ip_newroute_ipif_v6: " 5841 "ipif_lookup_multi_ire_v6(" 5842 "ipif %p, dst %08x) = fire %p\n", 5843 (void *)ipif, ntohl(V4_PART_OF_V6((*v6dstp))), 5844 (void *)fire)); 5845 5846 /* 5847 * If the application specified the ill (ifindex), we still 5848 * load spread. Only if the packets needs to go out specifically 5849 * on a given ill e.g. binding to IPIF_NOFAILOVER address or 5850 * IPV6_BOUND_PIF, or there is a parent ire entry that specified 5851 * multirouting, then we don't try to use a different ill for 5852 * load spreading. 5853 */ 5854 if (attach_ill == NULL) { 5855 /* 5856 * If the interface belongs to an interface group, 5857 * make sure the next possible interface in the group 5858 * is used. This encourages load spreading among peers 5859 * in an interface group. 5860 * 5861 * Note: While we pick a dst_ill we are really only 5862 * interested in the ill for load spreading. The source 5863 * ipif is determined by source address selection below. 5864 */ 5865 if ((fire != NULL) && (fire->ire_flags & RTF_MULTIRT)) { 5866 dst_ill = ipif->ipif_ill; 5867 /* For uniformity do a refhold */ 5868 ill_refhold(dst_ill); 5869 } else { 5870 /* refheld by ip_newroute_get_dst_ill_v6 */ 5871 dst_ill = 5872 ip_newroute_get_dst_ill_v6(ipif->ipif_ill); 5873 } 5874 if (dst_ill == NULL) { 5875 if (ip_debug > 2) { 5876 pr_addr_dbg("ip_newroute_ipif_v6: " 5877 "no dst ill for dst %s\n", 5878 AF_INET6, v6dstp); 5879 } 5880 goto err_ret; 5881 } 5882 } else { 5883 dst_ill = ipif->ipif_ill; 5884 /* 5885 * ip_wput_v6 passes the right ipif for IPIF_NOFAILOVER 5886 * and IPV6_BOUND_PIF case. 5887 */ 5888 ASSERT(dst_ill == attach_ill); 5889 /* attach_ill is already refheld */ 5890 } 5891 /* 5892 * Pick a source address which matches the scope of the 5893 * destination address. 5894 * For RTF_SETSRC routes, the source address is imposed by the 5895 * parent ire (fire). 5896 */ 5897 ASSERT(src_ipif == NULL); 5898 if ((fire != NULL) && (fire->ire_flags & RTF_SETSRC)) { 5899 /* 5900 * Check that the ipif matching the requested source 5901 * address still exists. 5902 */ 5903 src_ipif = 5904 ipif_lookup_addr_v6(&fire->ire_src_addr_v6, 5905 NULL, zoneid, NULL, NULL, NULL, NULL); 5906 } 5907 if (src_ipif == NULL && ip6_asp_can_lookup()) { 5908 ip6_asp_table_held = B_TRUE; 5909 src_ipif = ipif_select_source_v6(dst_ill, v6dstp, 5910 RESTRICT_TO_NONE, IPV6_PREFER_SRC_DEFAULT, zoneid); 5911 } 5912 5913 if (src_ipif == NULL) { 5914 if (!unspec_src) { 5915 if (ip_debug > 2) { 5916 /* ip1dbg */ 5917 pr_addr_dbg("ip_newroute_ipif_v6: " 5918 "no src for dst %s\n,", 5919 AF_INET6, v6dstp); 5920 printf(" through interface %s\n", 5921 dst_ill->ill_name); 5922 } 5923 goto err_ret; 5924 } 5925 src_ipif = ipif; 5926 ipif_refhold(src_ipif); 5927 } 5928 ire = ipif_to_ire_v6(ipif); 5929 if (ire == NULL) { 5930 if (ip_debug > 2) { 5931 /* ip1dbg */ 5932 pr_addr_dbg("ip_newroute_ipif_v6: v6src %s\n", 5933 AF_INET6, &ipif->ipif_v6lcl_addr); 5934 printf("ip_newroute_ipif_v6: " 5935 "if %s\n", dst_ill->ill_name); 5936 } 5937 goto err_ret; 5938 } 5939 if (ire->ire_flags & (RTF_REJECT | RTF_BLACKHOLE)) 5940 goto err_ret; 5941 5942 ASSERT(ire->ire_ipversion == IPV6_VERSION); 5943 5944 ip1dbg(("ip_newroute_ipif_v6: interface type %s (%d),", 5945 ip_nv_lookup(ire_nv_tbl, ire->ire_type), ire->ire_type)); 5946 if (ip_debug > 2) { 5947 /* ip1dbg */ 5948 pr_addr_dbg(" address %s\n", 5949 AF_INET6, &ire->ire_src_addr_v6); 5950 } 5951 save_ire = ire; 5952 ip2dbg(("ip_newroute_ipif: ire %p, ipif %p\n", 5953 (void *)ire, (void *)ipif)); 5954 5955 if ((fire != NULL) && (fire->ire_flags & RTF_MULTIRT)) { 5956 /* 5957 * an IRE_OFFSUBET was looked up 5958 * on that interface. 5959 * this ire has RTF_MULTIRT flag, 5960 * so the resolution loop 5961 * will be re-entered to resolve 5962 * additional routes on other 5963 * interfaces. For that purpose, 5964 * a copy of the packet is 5965 * made at this point. 5966 */ 5967 fire->ire_last_used_time = lbolt; 5968 copy_mp = copymsg(first_mp); 5969 if (copy_mp) { 5970 MULTIRT_DEBUG_TAG(copy_mp); 5971 } 5972 } 5973 5974 ASSERT((attach_ill == NULL) || (dst_ill == attach_ill)); 5975 switch (ire->ire_type) { 5976 case IRE_IF_NORESOLVER: { 5977 /* We have what we need to build an IRE_CACHE. */ 5978 mblk_t *dlureq_mp; 5979 5980 /* 5981 * Create a new dlureq_mp with the 5982 * IPv6 gateway address in destination address in the 5983 * DLPI hdr if the physical length is exactly 16 bytes. 5984 */ 5985 ASSERT(dst_ill->ill_isv6); 5986 if (dst_ill->ill_phys_addr_length == IPV6_ADDR_LEN) { 5987 dlureq_mp = ill_dlur_gen((uchar_t *)v6dstp, 5988 dst_ill->ill_phys_addr_length, 5989 dst_ill->ill_sap, 5990 dst_ill->ill_sap_length); 5991 } else { 5992 dlureq_mp = ill_dlur_gen(NULL, 5993 dst_ill->ill_phys_addr_length, 5994 dst_ill->ill_sap, 5995 dst_ill->ill_sap_length); 5996 } 5997 5998 if (dlureq_mp == NULL) 5999 break; 6000 /* 6001 * The newly created ire will inherit the flags of the 6002 * parent ire, if any. 6003 */ 6004 ire = ire_create_v6( 6005 v6dstp, /* dest address */ 6006 &ipv6_all_ones, /* mask */ 6007 &src_ipif->ipif_v6src_addr, /* source address */ 6008 NULL, /* gateway address */ 6009 &save_ire->ire_max_frag, 6010 NULL, /* Fast Path header */ 6011 dst_ill->ill_rq, /* recv-from queue */ 6012 dst_ill->ill_wq, /* send-to queue */ 6013 IRE_CACHE, 6014 dlureq_mp, 6015 src_ipif, 6016 NULL, 6017 (fire != NULL) ? /* Parent handle */ 6018 fire->ire_phandle : 0, 6019 save_ire->ire_ihandle, /* Interface handle */ 6020 (fire != NULL) ? 6021 (fire->ire_flags & (RTF_SETSRC | RTF_MULTIRT)) : 6022 0, 6023 &ire_uinfo_null, 6024 NULL, 6025 NULL); 6026 6027 freeb(dlureq_mp); 6028 6029 if (ire == NULL) { 6030 ire_refrele(save_ire); 6031 break; 6032 } 6033 6034 ire->ire_marks |= ire_marks; 6035 6036 err = ndp_noresolver(dst_ill, v6dstp); 6037 if (err != 0) { 6038 ire_refrele(save_ire); 6039 break; 6040 } 6041 6042 /* Prevent save_ire from getting deleted */ 6043 IRB_REFHOLD(save_ire->ire_bucket); 6044 /* Has it been removed already ? */ 6045 if (save_ire->ire_marks & IRE_MARK_CONDEMNED) { 6046 IRB_REFRELE(save_ire->ire_bucket); 6047 ire_refrele(save_ire); 6048 break; 6049 } 6050 6051 ire_add_then_send(q, ire, first_mp); 6052 if (ip6_asp_table_held) { 6053 ip6_asp_table_refrele(); 6054 ip6_asp_table_held = B_FALSE; 6055 } 6056 6057 /* Assert that it is not deleted yet. */ 6058 ASSERT(save_ire->ire_ptpn != NULL); 6059 IRB_REFRELE(save_ire->ire_bucket); 6060 ire_refrele(save_ire); 6061 if (fire != NULL) { 6062 ire_refrele(fire); 6063 fire = NULL; 6064 } 6065 6066 /* 6067 * The resolution loop is re-entered if we 6068 * actually are in a multirouting case. 6069 */ 6070 if (copy_mp != NULL) { 6071 boolean_t need_resolve = 6072 ire_multirt_need_resolve_v6(v6dstp, 6073 MBLK_GETLABEL(copy_mp)); 6074 if (!need_resolve) { 6075 MULTIRT_DEBUG_UNTAG(copy_mp); 6076 freemsg(copy_mp); 6077 copy_mp = NULL; 6078 } else { 6079 /* 6080 * ipif_lookup_group_v6() calls 6081 * ire_lookup_multi_v6() that uses 6082 * ire_ftable_lookup_v6() to find 6083 * an IRE_INTERFACE for the group. 6084 * In the multirt case, 6085 * ire_lookup_multi_v6() then invokes 6086 * ire_multirt_lookup_v6() to find 6087 * the next resolvable ire. 6088 * As a result, we obtain a new 6089 * interface, derived from the 6090 * next ire. 6091 */ 6092 if (ipif_held) { 6093 ipif_refrele(ipif); 6094 ipif_held = B_FALSE; 6095 } 6096 ipif = ipif_lookup_group_v6(v6dstp, 6097 zoneid); 6098 ip2dbg(("ip_newroute_ipif: " 6099 "multirt dst %08x, ipif %p\n", 6100 ntohl(V4_PART_OF_V6((*v6dstp))), 6101 (void *)ipif)); 6102 if (ipif != NULL) { 6103 ipif_held = B_TRUE; 6104 mp = copy_mp; 6105 copy_mp = NULL; 6106 multirt_resolve_next = 6107 B_TRUE; 6108 continue; 6109 } else { 6110 freemsg(copy_mp); 6111 } 6112 } 6113 } 6114 ill_refrele(dst_ill); 6115 if (ipif_held) { 6116 ipif_refrele(ipif); 6117 ipif_held = B_FALSE; 6118 } 6119 if (src_ipif != NULL) 6120 ipif_refrele(src_ipif); 6121 return; 6122 } 6123 case IRE_IF_RESOLVER: { 6124 6125 ASSERT(dst_ill->ill_isv6); 6126 6127 /* 6128 * We obtain a partial IRE_CACHE which we will pass 6129 * along with the resolver query. When the response 6130 * comes back it will be there ready for us to add. 6131 */ 6132 /* 6133 * the newly created ire will inherit the flags of the 6134 * parent ire, if any. 6135 */ 6136 ire = ire_create_v6( 6137 v6dstp, /* dest address */ 6138 &ipv6_all_ones, /* mask */ 6139 &src_ipif->ipif_v6src_addr, /* source address */ 6140 NULL, /* gateway address */ 6141 &save_ire->ire_max_frag, 6142 NULL, /* Fast Path header */ 6143 dst_ill->ill_rq, /* recv-from queue */ 6144 dst_ill->ill_wq, /* send-to queue */ 6145 IRE_CACHE, 6146 NULL, 6147 src_ipif, 6148 NULL, 6149 (fire != NULL) ? /* Parent handle */ 6150 fire->ire_phandle : 0, 6151 save_ire->ire_ihandle, /* Interface handle */ 6152 (fire != NULL) ? 6153 (fire->ire_flags & (RTF_SETSRC | RTF_MULTIRT)) : 6154 0, 6155 &ire_uinfo_null, 6156 NULL, 6157 NULL); 6158 6159 if (ire == NULL) { 6160 ire_refrele(save_ire); 6161 break; 6162 } 6163 6164 ire->ire_marks |= ire_marks; 6165 6166 /* Resolve and add ire to the ctable */ 6167 err = ndp_resolver(dst_ill, v6dstp, first_mp, zoneid); 6168 switch (err) { 6169 case 0: 6170 /* Prevent save_ire from getting deleted */ 6171 IRB_REFHOLD(save_ire->ire_bucket); 6172 /* Has it been removed already ? */ 6173 if (save_ire->ire_marks & IRE_MARK_CONDEMNED) { 6174 IRB_REFRELE(save_ire->ire_bucket); 6175 ire_refrele(save_ire); 6176 break; 6177 } 6178 /* 6179 * We have a resolved cache entry, 6180 * add in the IRE. 6181 */ 6182 ire_add_then_send(q, ire, first_mp); 6183 if (ip6_asp_table_held) { 6184 ip6_asp_table_refrele(); 6185 ip6_asp_table_held = B_FALSE; 6186 } 6187 6188 /* Assert that it is not deleted yet. */ 6189 ASSERT(save_ire->ire_ptpn != NULL); 6190 IRB_REFRELE(save_ire->ire_bucket); 6191 ire_refrele(save_ire); 6192 if (fire != NULL) { 6193 ire_refrele(fire); 6194 fire = NULL; 6195 } 6196 6197 /* 6198 * The resolution loop is re-entered if we 6199 * actually are in a multirouting case. 6200 */ 6201 if (copy_mp != NULL) { 6202 boolean_t need_resolve = 6203 ire_multirt_need_resolve_v6(v6dstp, 6204 MBLK_GETLABEL(copy_mp)); 6205 if (!need_resolve) { 6206 MULTIRT_DEBUG_UNTAG(copy_mp); 6207 freemsg(copy_mp); 6208 copy_mp = NULL; 6209 } else { 6210 /* 6211 * ipif_lookup_group_v6() calls 6212 * ire_lookup_multi_v6() that 6213 * uses ire_ftable_lookup_v6() 6214 * to find an IRE_INTERFACE for 6215 * the group. In the multirt 6216 * case, ire_lookup_multi_v6() 6217 * then invokes 6218 * ire_multirt_lookup_v6() to 6219 * find the next resolvable ire. 6220 * As a result, we obtain a new 6221 * interface, derived from the 6222 * next ire. 6223 */ 6224 if (ipif_held) { 6225 ipif_refrele(ipif); 6226 ipif_held = B_FALSE; 6227 } 6228 ipif = ipif_lookup_group_v6( 6229 v6dstp, zoneid); 6230 ip2dbg(("ip_newroute_ipif: " 6231 "multirt dst %08x, " 6232 "ipif %p\n", 6233 ntohl(V4_PART_OF_V6( 6234 (*v6dstp))), 6235 (void *)ipif)); 6236 if (ipif != NULL) { 6237 ipif_held = B_TRUE; 6238 mp = copy_mp; 6239 copy_mp = NULL; 6240 multirt_resolve_next = 6241 B_TRUE; 6242 continue; 6243 } else { 6244 freemsg(copy_mp); 6245 } 6246 } 6247 } 6248 ill_refrele(dst_ill); 6249 if (ipif_held) { 6250 ipif_refrele(ipif); 6251 ipif_held = B_FALSE; 6252 } 6253 if (src_ipif != NULL) 6254 ipif_refrele(src_ipif); 6255 return; 6256 6257 case EINPROGRESS: 6258 /* 6259 * mp was consumed - presumably queued. 6260 * No need for ire, presumably resolution is 6261 * in progress, and ire will be added when the 6262 * address is resolved. 6263 */ 6264 if (ip6_asp_table_held) { 6265 ip6_asp_table_refrele(); 6266 ip6_asp_table_held = B_FALSE; 6267 } 6268 ire_delete(ire); 6269 ire_refrele(save_ire); 6270 if (fire != NULL) { 6271 ire_refrele(fire); 6272 fire = NULL; 6273 } 6274 6275 /* 6276 * The resolution loop is re-entered if we 6277 * actually are in a multirouting case. 6278 */ 6279 if (copy_mp != NULL) { 6280 boolean_t need_resolve = 6281 ire_multirt_need_resolve_v6(v6dstp, 6282 MBLK_GETLABEL(copy_mp)); 6283 if (!need_resolve) { 6284 MULTIRT_DEBUG_UNTAG(copy_mp); 6285 freemsg(copy_mp); 6286 copy_mp = NULL; 6287 } else { 6288 /* 6289 * ipif_lookup_group_v6() calls 6290 * ire_lookup_multi_v6() that 6291 * uses ire_ftable_lookup_v6() 6292 * to find an IRE_INTERFACE for 6293 * the group. In the multirt 6294 * case, ire_lookup_multi_v6() 6295 * then invokes 6296 * ire_multirt_lookup_v6() to 6297 * find the next resolvable ire. 6298 * As a result, we obtain a new 6299 * interface, derived from the 6300 * next ire. 6301 */ 6302 if (ipif_held) { 6303 ipif_refrele(ipif); 6304 ipif_held = B_FALSE; 6305 } 6306 ipif = ipif_lookup_group_v6( 6307 v6dstp, zoneid); 6308 ip2dbg(("ip_newroute_ipif: " 6309 "multirt dst %08x, " 6310 "ipif %p\n", 6311 ntohl(V4_PART_OF_V6( 6312 (*v6dstp))), 6313 (void *)ipif)); 6314 if (ipif != NULL) { 6315 ipif_held = B_TRUE; 6316 mp = copy_mp; 6317 copy_mp = NULL; 6318 multirt_resolve_next = 6319 B_TRUE; 6320 continue; 6321 } else { 6322 freemsg(copy_mp); 6323 } 6324 } 6325 } 6326 ill_refrele(dst_ill); 6327 if (ipif_held) { 6328 ipif_refrele(ipif); 6329 ipif_held = B_FALSE; 6330 } 6331 if (src_ipif != NULL) 6332 ipif_refrele(src_ipif); 6333 return; 6334 default: 6335 /* Some transient error */ 6336 ire_refrele(save_ire); 6337 break; 6338 } 6339 break; 6340 } 6341 default: 6342 break; 6343 } 6344 if (ip6_asp_table_held) { 6345 ip6_asp_table_refrele(); 6346 ip6_asp_table_held = B_FALSE; 6347 } 6348 } while (multirt_resolve_next); 6349 6350 err_ret: 6351 if (ip6_asp_table_held) 6352 ip6_asp_table_refrele(); 6353 if (ire != NULL) 6354 ire_refrele(ire); 6355 if (fire != NULL) 6356 ire_refrele(fire); 6357 if (ipif != NULL && ipif_held) 6358 ipif_refrele(ipif); 6359 if (src_ipif != NULL) 6360 ipif_refrele(src_ipif); 6361 /* Multicast - no point in trying to generate ICMP error */ 6362 ASSERT((attach_ill == NULL) || (dst_ill == attach_ill)); 6363 if (dst_ill != NULL) { 6364 ill = dst_ill; 6365 ill_held = B_TRUE; 6366 } 6367 if (mp->b_prev || mp->b_next) { 6368 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 6369 } else { 6370 BUMP_MIB(ill->ill_ip6_mib, ipv6OutDiscards); 6371 } 6372 ip1dbg(("ip_newroute_ipif_v6: dropped\n")); 6373 mp->b_next = NULL; 6374 mp->b_prev = NULL; 6375 freemsg(first_mp); 6376 if (ill_held) 6377 ill_refrele(ill); 6378 } 6379 6380 /* 6381 * Parse and process any hop-by-hop or destination options. 6382 * 6383 * Assumes that q is an ill read queue so that ICMP errors for link-local 6384 * destinations are sent out the correct interface. 6385 * 6386 * Returns -1 if there was an error and mp has been consumed. 6387 * Returns 0 if no special action is needed. 6388 * Returns 1 if the packet contained a router alert option for this node 6389 * which is verified to be "interesting/known" for our implementation. 6390 * 6391 * XXX Note: In future as more hbh or dest options are defined, 6392 * it may be better to have different routines for hbh and dest 6393 * options as opt_type fields other than IP6OPT_PAD1 and IP6OPT_PADN 6394 * may have same value in different namespaces. Or is it same namespace ?? 6395 * Current code checks for each opt_type (other than pads) if it is in 6396 * the expected nexthdr (hbh or dest) 6397 */ 6398 static int 6399 ip_process_options_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, 6400 uint8_t *optptr, uint_t optlen, uint8_t hdr_type) 6401 { 6402 uint8_t opt_type; 6403 uint_t optused; 6404 int ret = 0; 6405 mblk_t *first_mp; 6406 const char *errtype; 6407 zoneid_t zoneid; 6408 ill_t *ill = q->q_ptr; 6409 6410 first_mp = mp; 6411 if (mp->b_datap->db_type == M_CTL) { 6412 mp = mp->b_cont; 6413 } 6414 6415 while (optlen != 0) { 6416 opt_type = *optptr; 6417 if (opt_type == IP6OPT_PAD1) { 6418 optused = 1; 6419 } else { 6420 if (optlen < 2) 6421 goto bad_opt; 6422 errtype = "malformed"; 6423 if (opt_type == ip6opt_ls) { 6424 optused = 2 + optptr[1]; 6425 if (optused > optlen) 6426 goto bad_opt; 6427 } else switch (opt_type) { 6428 case IP6OPT_PADN: 6429 /* 6430 * Note:We don't verify that (N-2) pad octets 6431 * are zero as required by spec. Adhere to 6432 * "be liberal in what you accept..." part of 6433 * implementation philosophy (RFC791,RFC1122) 6434 */ 6435 optused = 2 + optptr[1]; 6436 if (optused > optlen) 6437 goto bad_opt; 6438 break; 6439 6440 case IP6OPT_JUMBO: 6441 if (hdr_type != IPPROTO_HOPOPTS) 6442 goto opt_error; 6443 goto opt_error; /* XXX Not implemented! */ 6444 6445 case IP6OPT_ROUTER_ALERT: { 6446 struct ip6_opt_router *or; 6447 6448 if (hdr_type != IPPROTO_HOPOPTS) 6449 goto opt_error; 6450 optused = 2 + optptr[1]; 6451 if (optused > optlen) 6452 goto bad_opt; 6453 or = (struct ip6_opt_router *)optptr; 6454 /* Check total length and alignment */ 6455 if (optused != sizeof (*or) || 6456 ((uintptr_t)or->ip6or_value & 0x1) != 0) 6457 goto opt_error; 6458 /* Check value */ 6459 switch (*((uint16_t *)or->ip6or_value)) { 6460 case IP6_ALERT_MLD: 6461 case IP6_ALERT_RSVP: 6462 ret = 1; 6463 } 6464 break; 6465 } 6466 case IP6OPT_HOME_ADDRESS: { 6467 /* 6468 * Minimal support for the home address option 6469 * (which is required by all IPv6 nodes). 6470 * Implement by just swapping the home address 6471 * and source address. 6472 * XXX Note: this has IPsec implications since 6473 * AH needs to take this into account. 6474 * Also, when IPsec is used we need to ensure 6475 * that this is only processed once 6476 * in the received packet (to avoid swapping 6477 * back and forth). 6478 * NOTE:This option processing is considered 6479 * to be unsafe and prone to a denial of 6480 * service attack. 6481 * The current processing is not safe even with 6482 * IPsec secured IP packets. Since the home 6483 * address option processing requirement still 6484 * is in the IETF draft and in the process of 6485 * being redefined for its usage, it has been 6486 * decided to turn off the option by default. 6487 * If this section of code needs to be executed, 6488 * ndd variable ip6_ignore_home_address_opt 6489 * should be set to 0 at the user's own risk. 6490 */ 6491 struct ip6_opt_home_address *oh; 6492 in6_addr_t tmp; 6493 6494 if (ipv6_ignore_home_address_opt) 6495 goto opt_error; 6496 6497 if (hdr_type != IPPROTO_DSTOPTS) 6498 goto opt_error; 6499 optused = 2 + optptr[1]; 6500 if (optused > optlen) 6501 goto bad_opt; 6502 6503 /* 6504 * We did this dest. opt the first time 6505 * around (i.e. before AH processing). 6506 * If we've done AH... stop now. 6507 */ 6508 if (first_mp != mp) { 6509 ipsec_in_t *ii; 6510 6511 ii = (ipsec_in_t *)first_mp->b_rptr; 6512 if (ii->ipsec_in_ah_sa != NULL) 6513 break; 6514 } 6515 6516 oh = (struct ip6_opt_home_address *)optptr; 6517 /* Check total length and alignment */ 6518 if (optused < sizeof (*oh) || 6519 ((uintptr_t)oh->ip6oh_addr & 0x7) != 0) 6520 goto opt_error; 6521 /* Swap ip6_src and the home address */ 6522 tmp = ip6h->ip6_src; 6523 /* XXX Note: only 8 byte alignment option */ 6524 ip6h->ip6_src = *(in6_addr_t *)oh->ip6oh_addr; 6525 *(in6_addr_t *)oh->ip6oh_addr = tmp; 6526 break; 6527 } 6528 6529 case IP6OPT_TUNNEL_LIMIT: 6530 if (hdr_type != IPPROTO_DSTOPTS) { 6531 goto opt_error; 6532 } 6533 optused = 2 + optptr[1]; 6534 if (optused > optlen) { 6535 goto bad_opt; 6536 } 6537 if (optused != 3) { 6538 goto opt_error; 6539 } 6540 break; 6541 6542 default: 6543 errtype = "unknown"; 6544 /* FALLTHROUGH */ 6545 opt_error: 6546 /* Determine which zone should send error */ 6547 zoneid = ipif_lookup_addr_zoneid_v6( 6548 &ip6h->ip6_dst, ill); 6549 switch (IP6OPT_TYPE(opt_type)) { 6550 case IP6OPT_TYPE_SKIP: 6551 optused = 2 + optptr[1]; 6552 if (optused > optlen) 6553 goto bad_opt; 6554 ip1dbg(("ip_process_options_v6: %s " 6555 "opt 0x%x skipped\n", 6556 errtype, opt_type)); 6557 break; 6558 case IP6OPT_TYPE_DISCARD: 6559 ip1dbg(("ip_process_options_v6: %s " 6560 "opt 0x%x; packet dropped\n", 6561 errtype, opt_type)); 6562 freemsg(first_mp); 6563 return (-1); 6564 case IP6OPT_TYPE_ICMP: 6565 if (zoneid == ALL_ZONES) { 6566 freemsg(first_mp); 6567 return (-1); 6568 } 6569 icmp_param_problem_v6(WR(q), first_mp, 6570 ICMP6_PARAMPROB_OPTION, 6571 (uint32_t)(optptr - 6572 (uint8_t *)ip6h), 6573 B_FALSE, B_FALSE, zoneid); 6574 return (-1); 6575 case IP6OPT_TYPE_FORCEICMP: 6576 if (zoneid == ALL_ZONES) { 6577 freemsg(first_mp); 6578 return (-1); 6579 } 6580 icmp_param_problem_v6(WR(q), first_mp, 6581 ICMP6_PARAMPROB_OPTION, 6582 (uint32_t)(optptr - 6583 (uint8_t *)ip6h), 6584 B_FALSE, B_TRUE, zoneid); 6585 return (-1); 6586 default: 6587 ASSERT(0); 6588 } 6589 } 6590 } 6591 optlen -= optused; 6592 optptr += optused; 6593 } 6594 return (ret); 6595 6596 bad_opt: 6597 /* Determine which zone should send error */ 6598 zoneid = ipif_lookup_addr_zoneid_v6(&ip6h->ip6_dst, ill); 6599 if (zoneid == ALL_ZONES) { 6600 freemsg(first_mp); 6601 } else { 6602 icmp_param_problem_v6(WR(q), first_mp, ICMP6_PARAMPROB_OPTION, 6603 (uint32_t)(optptr - (uint8_t *)ip6h), 6604 B_FALSE, B_FALSE, zoneid); 6605 } 6606 return (-1); 6607 } 6608 6609 /* 6610 * Process a routing header that is not yet empty. 6611 * Only handles type 0 routing headers. 6612 */ 6613 static void 6614 ip_process_rthdr(queue_t *q, mblk_t *mp, ip6_t *ip6h, ip6_rthdr_t *rth, 6615 ill_t *ill, uint_t flags, mblk_t *hada_mp, mblk_t *dl_mp) 6616 { 6617 ip6_rthdr0_t *rthdr; 6618 uint_t ehdrlen; 6619 uint_t numaddr; 6620 in6_addr_t *addrptr; 6621 in6_addr_t tmp; 6622 6623 ASSERT(rth->ip6r_segleft != 0); 6624 6625 if (!ipv6_forward_src_routed) { 6626 /* XXX Check for source routed out same interface? */ 6627 BUMP_MIB(ill->ill_ip6_mib, ipv6ForwProhibits); 6628 BUMP_MIB(ill->ill_ip6_mib, ipv6InAddrErrors); 6629 freemsg(hada_mp); 6630 freemsg(mp); 6631 return; 6632 } 6633 6634 if (rth->ip6r_type != 0) { 6635 if (hada_mp != NULL) 6636 goto hada_drop; 6637 /* Sent by forwarding path, and router is global zone */ 6638 icmp_param_problem_v6(WR(q), mp, 6639 ICMP6_PARAMPROB_HEADER, 6640 (uint32_t)((uchar_t *)&rth->ip6r_type - (uchar_t *)ip6h), 6641 B_FALSE, B_FALSE, GLOBAL_ZONEID); 6642 return; 6643 } 6644 rthdr = (ip6_rthdr0_t *)rth; 6645 ehdrlen = 8 * (rthdr->ip6r0_len + 1); 6646 ASSERT(mp->b_rptr + ehdrlen <= mp->b_wptr); 6647 addrptr = (in6_addr_t *)((char *)rthdr + sizeof (*rthdr)); 6648 /* rthdr->ip6r0_len is twice the number of addresses in the header */ 6649 if (rthdr->ip6r0_len & 0x1) { 6650 /* An odd length is impossible */ 6651 if (hada_mp != NULL) 6652 goto hada_drop; 6653 /* Sent by forwarding path, and router is global zone */ 6654 icmp_param_problem_v6(WR(q), mp, 6655 ICMP6_PARAMPROB_HEADER, 6656 (uint32_t)((uchar_t *)&rthdr->ip6r0_len - (uchar_t *)ip6h), 6657 B_FALSE, B_FALSE, GLOBAL_ZONEID); 6658 return; 6659 } 6660 numaddr = rthdr->ip6r0_len / 2; 6661 if (rthdr->ip6r0_segleft > numaddr) { 6662 /* segleft exceeds number of addresses in routing header */ 6663 if (hada_mp != NULL) 6664 goto hada_drop; 6665 /* Sent by forwarding path, and router is global zone */ 6666 icmp_param_problem_v6(WR(q), mp, 6667 ICMP6_PARAMPROB_HEADER, 6668 (uint32_t)((uchar_t *)&rthdr->ip6r0_segleft - 6669 (uchar_t *)ip6h), 6670 B_FALSE, B_FALSE, GLOBAL_ZONEID); 6671 return; 6672 } 6673 addrptr += (numaddr - rthdr->ip6r0_segleft); 6674 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst) || 6675 IN6_IS_ADDR_MULTICAST(addrptr)) { 6676 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 6677 freemsg(hada_mp); 6678 freemsg(mp); 6679 return; 6680 } 6681 /* Swap */ 6682 tmp = *addrptr; 6683 *addrptr = ip6h->ip6_dst; 6684 ip6h->ip6_dst = tmp; 6685 rthdr->ip6r0_segleft--; 6686 /* Don't allow any mapped addresses - ip_wput_v6 can't handle them */ 6687 if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_dst)) { 6688 if (hada_mp != NULL) 6689 goto hada_drop; 6690 /* Sent by forwarding path, and router is global zone */ 6691 icmp_unreachable_v6(WR(q), mp, ICMP6_DST_UNREACH_NOROUTE, 6692 B_FALSE, B_FALSE, GLOBAL_ZONEID); 6693 return; 6694 } 6695 ip_rput_data_v6(q, ill, mp, ip6h, flags, hada_mp, dl_mp); 6696 return; 6697 hada_drop: 6698 /* IPsec kstats: bean counter? */ 6699 freemsg(hada_mp); 6700 freemsg(mp); 6701 } 6702 6703 /* 6704 * Read side put procedure for IPv6 module. 6705 */ 6706 static void 6707 ip_rput_v6(queue_t *q, mblk_t *mp) 6708 { 6709 mblk_t *first_mp; 6710 mblk_t *hada_mp = NULL; 6711 ip6_t *ip6h; 6712 boolean_t ll_multicast = B_FALSE; 6713 boolean_t mctl_present = B_FALSE; 6714 ill_t *ill; 6715 struct iocblk *iocp; 6716 uint_t flags = 0; 6717 mblk_t *dl_mp; 6718 6719 ill = (ill_t *)q->q_ptr; 6720 if (ill->ill_state_flags & ILL_CONDEMNED) { 6721 union DL_primitives *dl; 6722 6723 dl = (union DL_primitives *)mp->b_rptr; 6724 /* 6725 * Things are opening or closing - only accept DLPI 6726 * ack messages. If the stream is closing and ip_wsrv 6727 * has completed, ip_close is out of the qwait, but has 6728 * not yet completed qprocsoff. Don't proceed any further 6729 * because the ill has been cleaned up and things hanging 6730 * off the ill have been freed. 6731 */ 6732 if ((mp->b_datap->db_type != M_PCPROTO) || 6733 (dl->dl_primitive == DL_UNITDATA_IND)) { 6734 inet_freemsg(mp); 6735 return; 6736 } 6737 } 6738 6739 dl_mp = NULL; 6740 switch (mp->b_datap->db_type) { 6741 case M_DATA: { 6742 int hlen; 6743 uchar_t *ucp; 6744 struct ether_header *eh; 6745 dl_unitdata_ind_t *dui; 6746 6747 /* 6748 * This is a work-around for CR 6451644, a bug in Nemo. It 6749 * should be removed when that problem is fixed. 6750 */ 6751 if (ill->ill_mactype == DL_ETHER && 6752 (hlen = MBLKHEAD(mp)) >= sizeof (struct ether_header) && 6753 (ucp = mp->b_rptr)[-1] == (IP6_DL_SAP & 0xFF) && 6754 ucp[-2] == (IP6_DL_SAP >> 8)) { 6755 if (hlen >= sizeof (struct ether_vlan_header) && 6756 ucp[-5] == 0 && ucp[-6] == 0x81) 6757 ucp -= sizeof (struct ether_vlan_header); 6758 else 6759 ucp -= sizeof (struct ether_header); 6760 /* 6761 * If it's a group address, then fabricate a 6762 * DL_UNITDATA_IND message. 6763 */ 6764 if ((ll_multicast = (ucp[0] & 1)) != 0 && 6765 (dl_mp = allocb(DL_UNITDATA_IND_SIZE + 16, 6766 BPRI_HI)) != NULL) { 6767 eh = (struct ether_header *)ucp; 6768 dui = (dl_unitdata_ind_t *)dl_mp->b_rptr; 6769 DB_TYPE(dl_mp) = M_PROTO; 6770 dl_mp->b_wptr = (uchar_t *)(dui + 1) + 16; 6771 dui->dl_primitive = DL_UNITDATA_IND; 6772 dui->dl_dest_addr_length = 8; 6773 dui->dl_dest_addr_offset = DL_UNITDATA_IND_SIZE; 6774 dui->dl_src_addr_length = 8; 6775 dui->dl_src_addr_offset = DL_UNITDATA_IND_SIZE + 6776 8; 6777 dui->dl_group_address = 1; 6778 ucp = (uchar_t *)(dui + 1); 6779 if (ill->ill_sap_length > 0) 6780 ucp += ill->ill_sap_length; 6781 bcopy(&eh->ether_dhost, ucp, 6); 6782 bcopy(&eh->ether_shost, ucp + 8, 6); 6783 ucp = (uchar_t *)(dui + 1); 6784 if (ill->ill_sap_length < 0) 6785 ucp += 8 + ill->ill_sap_length; 6786 bcopy(&eh->ether_type, ucp, 2); 6787 bcopy(&eh->ether_type, ucp + 8, 2); 6788 } 6789 } 6790 break; 6791 } 6792 6793 case M_PROTO: 6794 case M_PCPROTO: 6795 if (((dl_unitdata_ind_t *)mp->b_rptr)->dl_primitive != 6796 DL_UNITDATA_IND) { 6797 /* Go handle anything other than data elsewhere. */ 6798 ip_rput_dlpi(q, mp); 6799 return; 6800 } 6801 #define dlur ((dl_unitdata_ind_t *)mp->b_rptr) 6802 ll_multicast = dlur->dl_group_address; 6803 #undef dlur 6804 /* Save the DLPI header. */ 6805 dl_mp = mp; 6806 mp = mp->b_cont; 6807 dl_mp->b_cont = NULL; 6808 break; 6809 case M_BREAK: 6810 panic("ip_rput_v6: got an M_BREAK"); 6811 /*NOTREACHED*/ 6812 case M_IOCACK: 6813 iocp = (struct iocblk *)mp->b_rptr; 6814 switch (iocp->ioc_cmd) { 6815 case DL_IOC_HDR_INFO: 6816 ill = (ill_t *)q->q_ptr; 6817 ill_fastpath_ack(ill, mp); 6818 return; 6819 case SIOCSTUNPARAM: 6820 case SIOCGTUNPARAM: 6821 case OSIOCSTUNPARAM: 6822 case OSIOCGTUNPARAM: 6823 /* Go through qwriter */ 6824 break; 6825 default: 6826 putnext(q, mp); 6827 return; 6828 } 6829 /* FALLTHRU */ 6830 case M_ERROR: 6831 case M_HANGUP: 6832 mutex_enter(&ill->ill_lock); 6833 if (ill->ill_state_flags & ILL_CONDEMNED) { 6834 mutex_exit(&ill->ill_lock); 6835 freemsg(mp); 6836 return; 6837 } 6838 ill_refhold_locked(ill); 6839 mutex_exit(&ill->ill_lock); 6840 qwriter_ip(NULL, ill, q, mp, ip_rput_other, CUR_OP, B_FALSE); 6841 return; 6842 case M_CTL: 6843 if ((MBLKL(mp) > sizeof (int)) && 6844 ((da_ipsec_t *)mp->b_rptr)->da_type == IPHADA_M_CTL) { 6845 ASSERT(MBLKL(mp) >= sizeof (da_ipsec_t)); 6846 mctl_present = B_TRUE; 6847 break; 6848 } 6849 putnext(q, mp); 6850 return; 6851 case M_IOCNAK: 6852 iocp = (struct iocblk *)mp->b_rptr; 6853 switch (iocp->ioc_cmd) { 6854 case DL_IOC_HDR_INFO: 6855 case SIOCSTUNPARAM: 6856 case SIOCGTUNPARAM: 6857 case OSIOCSTUNPARAM: 6858 case OSIOCGTUNPARAM: 6859 mutex_enter(&ill->ill_lock); 6860 if (ill->ill_state_flags & ILL_CONDEMNED) { 6861 mutex_exit(&ill->ill_lock); 6862 freemsg(mp); 6863 return; 6864 } 6865 ill_refhold_locked(ill); 6866 mutex_exit(&ill->ill_lock); 6867 qwriter_ip(NULL, ill, q, mp, ip_rput_other, CUR_OP, 6868 B_FALSE); 6869 return; 6870 default: 6871 break; 6872 } 6873 /* FALLTHRU */ 6874 default: 6875 putnext(q, mp); 6876 return; 6877 } 6878 6879 BUMP_MIB(ill->ill_ip6_mib, ipv6InReceives); 6880 /* 6881 * if db_ref > 1 then copymsg and free original. Packet may be 6882 * changed and do not want other entity who has a reference to this 6883 * message to trip over the changes. This is a blind change because 6884 * trying to catch all places that might change packet is too 6885 * difficult (since it may be a module above this one). 6886 */ 6887 if (mp->b_datap->db_ref > 1) { 6888 mblk_t *mp1; 6889 6890 mp1 = copymsg(mp); 6891 freemsg(mp); 6892 if (mp1 == NULL) { 6893 first_mp = NULL; 6894 goto discard; 6895 } 6896 mp = mp1; 6897 } 6898 first_mp = mp; 6899 if (mctl_present) { 6900 hada_mp = first_mp; 6901 mp = first_mp->b_cont; 6902 } 6903 6904 ip6h = (ip6_t *)mp->b_rptr; 6905 6906 /* check for alignment and full IPv6 header */ 6907 if (!OK_32PTR((uchar_t *)ip6h) || 6908 (mp->b_wptr - (uchar_t *)ip6h) < IPV6_HDR_LEN) { 6909 if (!pullupmsg(mp, IPV6_HDR_LEN)) { 6910 ip1dbg(("ip_rput_v6: pullupmsg failed\n")); 6911 goto discard; 6912 } 6913 ip6h = (ip6_t *)mp->b_rptr; 6914 } 6915 if ((ip6h->ip6_vcf & IPV6_VERS_AND_FLOW_MASK) == 6916 IPV6_DEFAULT_VERS_AND_FLOW) { 6917 /* 6918 * It may be a bit too expensive to do this mapped address 6919 * check here, but in the interest of robustness, it seems 6920 * like the correct place. 6921 * TODO: Avoid this check for e.g. connected TCP sockets 6922 */ 6923 if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_src)) { 6924 ip1dbg(("ip_rput_v6: pkt with mapped src addr\n")); 6925 goto discard; 6926 } 6927 6928 if (IN6_IS_ADDR_LOOPBACK(&ip6h->ip6_src)) { 6929 ip1dbg(("ip_rput_v6: pkt with loopback src")); 6930 goto discard; 6931 } else if (IN6_IS_ADDR_LOOPBACK(&ip6h->ip6_dst)) { 6932 ip1dbg(("ip_rput_v6: pkt with loopback dst")); 6933 goto discard; 6934 } 6935 6936 flags |= (ll_multicast ? IP6_IN_LLMCAST : 0); 6937 ip_rput_data_v6(q, ill, mp, ip6h, flags, hada_mp, dl_mp); 6938 } else { 6939 BUMP_MIB(ill->ill_ip6_mib, ipv6InIPv4); 6940 goto discard; 6941 } 6942 freemsg(dl_mp); 6943 return; 6944 6945 discard: 6946 if (dl_mp != NULL) 6947 freeb(dl_mp); 6948 freemsg(first_mp); 6949 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 6950 } 6951 6952 /* 6953 * Walk through the IPv6 packet in mp and see if there's an AH header 6954 * in it. See if the AH header needs to get done before other headers in 6955 * the packet. (Worker function for ipsec_early_ah_v6().) 6956 */ 6957 #define IPSEC_HDR_DONT_PROCESS 0 6958 #define IPSEC_HDR_PROCESS 1 6959 #define IPSEC_MEMORY_ERROR 2 6960 static int 6961 ipsec_needs_processing_v6(mblk_t *mp, uint8_t *nexthdr) 6962 { 6963 uint_t length; 6964 uint_t ehdrlen; 6965 uint8_t *whereptr; 6966 uint8_t *endptr; 6967 uint8_t *nexthdrp; 6968 ip6_dest_t *desthdr; 6969 ip6_rthdr_t *rthdr; 6970 ip6_t *ip6h; 6971 6972 /* 6973 * For now just pullup everything. In general, the less pullups, 6974 * the better, but there's so much squirrelling through anyway, 6975 * it's just easier this way. 6976 */ 6977 if (!pullupmsg(mp, -1)) { 6978 return (IPSEC_MEMORY_ERROR); 6979 } 6980 6981 ip6h = (ip6_t *)mp->b_rptr; 6982 length = IPV6_HDR_LEN; 6983 whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */ 6984 endptr = mp->b_wptr; 6985 6986 /* 6987 * We can't just use the argument nexthdr in the place 6988 * of nexthdrp becaue we don't dereference nexthdrp 6989 * till we confirm whether it is a valid address. 6990 */ 6991 nexthdrp = &ip6h->ip6_nxt; 6992 while (whereptr < endptr) { 6993 /* Is there enough left for len + nexthdr? */ 6994 if (whereptr + MIN_EHDR_LEN > endptr) 6995 return (IPSEC_MEMORY_ERROR); 6996 6997 switch (*nexthdrp) { 6998 case IPPROTO_HOPOPTS: 6999 case IPPROTO_DSTOPTS: 7000 /* Assumes the headers are identical for hbh and dst */ 7001 desthdr = (ip6_dest_t *)whereptr; 7002 ehdrlen = 8 * (desthdr->ip6d_len + 1); 7003 if ((uchar_t *)desthdr + ehdrlen > endptr) 7004 return (IPSEC_MEMORY_ERROR); 7005 /* 7006 * Return DONT_PROCESS because of potential Mobile IPv6 7007 * cruft for destination options. 7008 */ 7009 if (*nexthdrp == IPPROTO_DSTOPTS) 7010 return (IPSEC_HDR_DONT_PROCESS); 7011 nexthdrp = &desthdr->ip6d_nxt; 7012 break; 7013 case IPPROTO_ROUTING: 7014 rthdr = (ip6_rthdr_t *)whereptr; 7015 7016 /* 7017 * If there's more hops left on the routing header, 7018 * return now with DON'T PROCESS. 7019 */ 7020 if (rthdr->ip6r_segleft > 0) 7021 return (IPSEC_HDR_DONT_PROCESS); 7022 7023 ehdrlen = 8 * (rthdr->ip6r_len + 1); 7024 if ((uchar_t *)rthdr + ehdrlen > endptr) 7025 return (IPSEC_MEMORY_ERROR); 7026 nexthdrp = &rthdr->ip6r_nxt; 7027 break; 7028 case IPPROTO_FRAGMENT: 7029 /* Wait for reassembly */ 7030 return (IPSEC_HDR_DONT_PROCESS); 7031 case IPPROTO_AH: 7032 *nexthdr = IPPROTO_AH; 7033 return (IPSEC_HDR_PROCESS); 7034 case IPPROTO_NONE: 7035 /* No next header means we're finished */ 7036 default: 7037 return (IPSEC_HDR_DONT_PROCESS); 7038 } 7039 length += ehdrlen; 7040 whereptr += ehdrlen; 7041 } 7042 panic("ipsec_needs_processing_v6"); 7043 /*NOTREACHED*/ 7044 } 7045 7046 /* 7047 * Path for AH if options are present. If this is the first time we are 7048 * sending a datagram to AH, allocate a IPSEC_IN message and prepend it. 7049 * Otherwise, just fanout. Return value answers the boolean question: 7050 * "Did I consume the mblk you sent me?" 7051 * 7052 * Sometimes AH needs to be done before other IPv6 headers for security 7053 * reasons. This function (and its ipsec_needs_processing_v6() above) 7054 * indicates if that is so, and fans out to the appropriate IPsec protocol 7055 * for the datagram passed in. 7056 */ 7057 static boolean_t 7058 ipsec_early_ah_v6(queue_t *q, mblk_t *first_mp, boolean_t mctl_present, 7059 ill_t *ill, ire_t *ire, mblk_t *hada_mp, zoneid_t zoneid) 7060 { 7061 mblk_t *mp; 7062 uint8_t nexthdr; 7063 ipsec_in_t *ii = NULL; 7064 ah_t *ah; 7065 ipsec_status_t ipsec_rc; 7066 7067 ASSERT((hada_mp == NULL) || (!mctl_present)); 7068 7069 switch (ipsec_needs_processing_v6( 7070 (mctl_present ? first_mp->b_cont : first_mp), &nexthdr)) { 7071 case IPSEC_MEMORY_ERROR: 7072 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 7073 freemsg(hada_mp); 7074 freemsg(first_mp); 7075 return (B_TRUE); 7076 case IPSEC_HDR_DONT_PROCESS: 7077 return (B_FALSE); 7078 } 7079 7080 /* Default means send it to AH! */ 7081 ASSERT(nexthdr == IPPROTO_AH); 7082 if (!mctl_present) { 7083 mp = first_mp; 7084 if ((first_mp = ipsec_in_alloc(B_FALSE)) == NULL) { 7085 ip1dbg(("ipsec_early_ah_v6: IPSEC_IN " 7086 "allocation failure.\n")); 7087 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 7088 freemsg(hada_mp); 7089 freemsg(mp); 7090 return (B_TRUE); 7091 } 7092 /* 7093 * Store the ill_index so that when we come back 7094 * from IPSEC we ride on the same queue. 7095 */ 7096 ii = (ipsec_in_t *)first_mp->b_rptr; 7097 ii->ipsec_in_ill_index = ill->ill_phyint->phyint_ifindex; 7098 ii->ipsec_in_rill_index = ii->ipsec_in_ill_index; 7099 first_mp->b_cont = mp; 7100 } 7101 /* 7102 * Cache hardware acceleration info. 7103 */ 7104 if (hada_mp != NULL) { 7105 ASSERT(ii != NULL); 7106 IPSECHW_DEBUG(IPSECHW_PKT, ("ipsec_early_ah_v6: " 7107 "caching data attr.\n")); 7108 ii->ipsec_in_accelerated = B_TRUE; 7109 ii->ipsec_in_da = hada_mp; 7110 } 7111 7112 if (!ipsec_loaded()) { 7113 ip_proto_not_sup(q, first_mp, IP_FF_SEND_ICMP, zoneid); 7114 return (B_TRUE); 7115 } 7116 7117 ah = ipsec_inbound_ah_sa(first_mp); 7118 if (ah == NULL) 7119 return (B_TRUE); 7120 ASSERT(ii->ipsec_in_ah_sa != NULL); 7121 ASSERT(ii->ipsec_in_ah_sa->ipsa_input_func != NULL); 7122 ipsec_rc = ii->ipsec_in_ah_sa->ipsa_input_func(first_mp, ah); 7123 7124 switch (ipsec_rc) { 7125 case IPSEC_STATUS_SUCCESS: 7126 /* we're done with IPsec processing, send it up */ 7127 ip_fanout_proto_again(first_mp, ill, ill, ire); 7128 break; 7129 case IPSEC_STATUS_FAILED: 7130 BUMP_MIB(&ip6_mib, ipv6InDiscards); 7131 break; 7132 case IPSEC_STATUS_PENDING: 7133 /* no action needed */ 7134 break; 7135 } 7136 return (B_TRUE); 7137 } 7138 7139 /* 7140 * ip_rput_data_v6 -- received IPv6 packets in M_DATA messages show up here. 7141 * ip_rput_v6 has already verified alignment, the min length, the version, 7142 * and db_ref = 1. 7143 * 7144 * The ill passed in (the arg named inill) is the ill that the packet 7145 * actually arrived on. We need to remember this when saving the 7146 * input interface index into potential IPV6_PKTINFO data in 7147 * ip_add_info_v6(). 7148 * 7149 * This routine doesn't free dl_mp; that's the caller's responsibility on 7150 * return. (Note that the callers are complex enough that there's no tail 7151 * recursion here anyway.) 7152 */ 7153 void 7154 ip_rput_data_v6(queue_t *q, ill_t *inill, mblk_t *mp, ip6_t *ip6h, 7155 uint_t flags, mblk_t *hada_mp, mblk_t *dl_mp) 7156 { 7157 ire_t *ire = NULL; 7158 queue_t *rq; 7159 ill_t *ill = inill; 7160 ipif_t *ipif; 7161 uint8_t *whereptr; 7162 uint8_t nexthdr; 7163 uint16_t remlen; 7164 uint_t prev_nexthdr_offset; 7165 uint_t used; 7166 size_t pkt_len; 7167 uint16_t ip6_len; 7168 uint_t hdr_len; 7169 boolean_t mctl_present; 7170 mblk_t *first_mp; 7171 mblk_t *first_mp1; 7172 boolean_t no_forward; 7173 ip6_hbh_t *hbhhdr; 7174 boolean_t ll_multicast = (flags & IP6_IN_LLMCAST); 7175 conn_t *connp; 7176 ilm_t *ilm; 7177 uint32_t ports; 7178 uint_t ipif_id = 0; 7179 zoneid_t zoneid = GLOBAL_ZONEID; 7180 uint16_t hck_flags, reass_hck_flags; 7181 uint32_t reass_sum; 7182 boolean_t cksum_err; 7183 mblk_t *mp1; 7184 7185 EXTRACT_PKT_MP(mp, first_mp, mctl_present); 7186 7187 if (hada_mp != NULL) { 7188 /* 7189 * It's an IPsec accelerated packet. 7190 * Keep a pointer to the data attributes around until 7191 * we allocate the ipsecinfo structure. 7192 */ 7193 IPSECHW_DEBUG(IPSECHW_PKT, 7194 ("ip_rput_data_v6: inbound HW accelerated IPsec pkt\n")); 7195 hada_mp->b_cont = NULL; 7196 /* 7197 * Since it is accelerated, it came directly from 7198 * the ill. 7199 */ 7200 ASSERT(mctl_present == B_FALSE); 7201 ASSERT(mp->b_datap->db_type != M_CTL); 7202 } 7203 7204 ASSERT(OK_32PTR((uchar_t *)ip6h) && 7205 (mp->b_wptr - (uchar_t *)ip6h) >= IPV6_HDR_LEN); 7206 7207 if (mp->b_cont == NULL) 7208 pkt_len = mp->b_wptr - mp->b_rptr; 7209 else 7210 pkt_len = msgdsize(mp); 7211 ip6_len = ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN; 7212 7213 /* 7214 * Check for bogus (too short packet) and packet which 7215 * was padded by the link layer. 7216 */ 7217 if (ip6_len != pkt_len) { 7218 ssize_t diff; 7219 7220 if (ip6_len > pkt_len) { 7221 ip1dbg(("ip_rput_data_v6: packet too short %d %lu\n", 7222 ip6_len, pkt_len)); 7223 BUMP_MIB(ill->ill_ip6_mib, ipv6InTruncatedPkts); 7224 freemsg(hada_mp); 7225 freemsg(first_mp); 7226 return; 7227 } 7228 diff = (ssize_t)(pkt_len - ip6_len); 7229 7230 if (!adjmsg(mp, -diff)) { 7231 ip1dbg(("ip_rput_data_v6: adjmsg failed\n")); 7232 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 7233 freemsg(hada_mp); 7234 freemsg(first_mp); 7235 return; 7236 } 7237 pkt_len -= diff; 7238 } 7239 7240 if (ILL_HCKSUM_CAPABLE(ill) && !mctl_present && dohwcksum) 7241 hck_flags = DB_CKSUMFLAGS(mp); 7242 else 7243 hck_flags = 0; 7244 7245 /* Clear checksum flags in case we need to forward */ 7246 DB_CKSUMFLAGS(mp) = 0; 7247 reass_sum = reass_hck_flags = 0; 7248 7249 nexthdr = ip6h->ip6_nxt; 7250 7251 prev_nexthdr_offset = (uint_t)((uchar_t *)&ip6h->ip6_nxt - 7252 (uchar_t *)ip6h); 7253 whereptr = (uint8_t *)&ip6h[1]; 7254 remlen = pkt_len - IPV6_HDR_LEN; /* Track how much is left */ 7255 7256 /* Process hop by hop header options */ 7257 if (nexthdr == IPPROTO_HOPOPTS) { 7258 uint_t ehdrlen; 7259 uint8_t *optptr; 7260 7261 if (remlen < MIN_EHDR_LEN) 7262 goto pkt_too_short; 7263 if (mp->b_cont != NULL && 7264 whereptr + MIN_EHDR_LEN > mp->b_wptr) { 7265 if (!pullupmsg(mp, IPV6_HDR_LEN + MIN_EHDR_LEN)) { 7266 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 7267 freemsg(hada_mp); 7268 freemsg(first_mp); 7269 return; 7270 } 7271 ip6h = (ip6_t *)mp->b_rptr; 7272 whereptr = (uint8_t *)ip6h + pkt_len - remlen; 7273 } 7274 hbhhdr = (ip6_hbh_t *)whereptr; 7275 nexthdr = hbhhdr->ip6h_nxt; 7276 prev_nexthdr_offset = (uint_t)(whereptr - (uint8_t *)ip6h); 7277 ehdrlen = 8 * (hbhhdr->ip6h_len + 1); 7278 7279 if (remlen < ehdrlen) 7280 goto pkt_too_short; 7281 if (mp->b_cont != NULL && 7282 whereptr + ehdrlen > mp->b_wptr) { 7283 if (!pullupmsg(mp, IPV6_HDR_LEN + ehdrlen)) { 7284 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 7285 freemsg(hada_mp); 7286 freemsg(first_mp); 7287 return; 7288 } 7289 ip6h = (ip6_t *)mp->b_rptr; 7290 whereptr = (uint8_t *)ip6h + pkt_len - remlen; 7291 hbhhdr = (ip6_hbh_t *)whereptr; 7292 } 7293 7294 optptr = whereptr + 2; 7295 whereptr += ehdrlen; 7296 remlen -= ehdrlen; 7297 switch (ip_process_options_v6(q, first_mp, ip6h, optptr, 7298 ehdrlen - 2, IPPROTO_HOPOPTS)) { 7299 case -1: 7300 /* 7301 * Packet has been consumed and any 7302 * needed ICMP messages sent. 7303 */ 7304 BUMP_MIB(ill->ill_ip6_mib, ipv6InHdrErrors); 7305 freemsg(hada_mp); 7306 return; 7307 case 0: 7308 /* no action needed */ 7309 break; 7310 case 1: 7311 /* Known router alert */ 7312 goto ipv6forus; 7313 } 7314 } 7315 7316 /* 7317 * Attach any necessary label information to this packet. 7318 */ 7319 if (is_system_labeled() && !tsol_get_pkt_label(mp, IPV6_VERSION)) { 7320 if (ip6opt_ls != 0) 7321 ip0dbg(("tsol_get_pkt_label v6 failed\n")); 7322 BUMP_MIB(ill->ill_ip6_mib, ipv6InHdrErrors); 7323 freemsg(hada_mp); 7324 freemsg(first_mp); 7325 return; 7326 } 7327 7328 /* 7329 * On incoming v6 multicast packets we will bypass the ire table, 7330 * and assume that the read queue corresponds to the targetted 7331 * interface. 7332 * 7333 * The effect of this is the same as the IPv4 original code, but is 7334 * much cleaner I think. See ip_rput for how that was done. 7335 */ 7336 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 7337 BUMP_MIB(ill->ill_ip6_mib, ipv6InMcastPkts); 7338 /* 7339 * XXX TODO Give to mrouted to for multicast forwarding. 7340 */ 7341 ILM_WALKER_HOLD(ill); 7342 ilm = ilm_lookup_ill_v6(ill, &ip6h->ip6_dst, ALL_ZONES); 7343 ILM_WALKER_RELE(ill); 7344 if (ilm == NULL) { 7345 if (ip_debug > 3) { 7346 /* ip2dbg */ 7347 pr_addr_dbg("ip_rput_data_v6: got mcast packet" 7348 " which is not for us: %s\n", AF_INET6, 7349 &ip6h->ip6_dst); 7350 } 7351 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 7352 freemsg(hada_mp); 7353 freemsg(first_mp); 7354 return; 7355 } 7356 if (ip_debug > 3) { 7357 /* ip2dbg */ 7358 pr_addr_dbg("ip_rput_data_v6: multicast for us: %s\n", 7359 AF_INET6, &ip6h->ip6_dst); 7360 } 7361 rq = ill->ill_rq; 7362 zoneid = GLOBAL_ZONEID; 7363 goto ipv6forus; 7364 } 7365 7366 ipif = ill->ill_ipif; 7367 7368 /* 7369 * If a packet was received on an interface that is a 6to4 tunnel, 7370 * incoming IPv6 packets, with a 6to4 addressed IPv6 destination, must 7371 * be checked to have a 6to4 prefix (2002:V4ADDR::/48) that is equal to 7372 * the 6to4 prefix of the address configured on the receiving interface. 7373 * Otherwise, the packet was delivered to this interface in error and 7374 * the packet must be dropped. 7375 */ 7376 if ((ill->ill_is_6to4tun) && IN6_IS_ADDR_6TO4(&ip6h->ip6_dst)) { 7377 7378 if (!IN6_ARE_6TO4_PREFIX_EQUAL(&ipif->ipif_v6lcl_addr, 7379 &ip6h->ip6_dst)) { 7380 if (ip_debug > 2) { 7381 /* ip1dbg */ 7382 pr_addr_dbg("ip_rput_data_v6: received 6to4 " 7383 "addressed packet which is not for us: " 7384 "%s\n", AF_INET6, &ip6h->ip6_dst); 7385 } 7386 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 7387 freemsg(first_mp); 7388 return; 7389 } 7390 } 7391 7392 /* 7393 * Find an ire that matches destination. For link-local addresses 7394 * we have to match the ill. 7395 * TBD for site local addresses. 7396 */ 7397 if (IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_dst)) { 7398 ire = ire_ctable_lookup_v6(&ip6h->ip6_dst, NULL, 7399 IRE_CACHE|IRE_LOCAL, ill->ill_ipif, ALL_ZONES, NULL, 7400 MATCH_IRE_TYPE | MATCH_IRE_ILL_GROUP); 7401 } else { 7402 ire = ire_cache_lookup_v6(&ip6h->ip6_dst, ALL_ZONES, 7403 MBLK_GETLABEL(mp)); 7404 } 7405 if (ire == NULL) { 7406 /* 7407 * No matching IRE found. Mark this packet as having 7408 * originated externally. 7409 */ 7410 if (!(ill->ill_flags & ILLF_ROUTER) || ll_multicast) { 7411 BUMP_MIB(ill->ill_ip6_mib, ipv6ForwProhibits); 7412 if (!(ill->ill_flags & ILLF_ROUTER)) 7413 BUMP_MIB(ill->ill_ip6_mib, ipv6InAddrErrors); 7414 freemsg(hada_mp); 7415 freemsg(first_mp); 7416 return; 7417 } 7418 if (ip6h->ip6_hops <= 1) { 7419 if (hada_mp != NULL) 7420 goto hada_drop; 7421 /* Sent by forwarding path, and router is global zone */ 7422 icmp_time_exceeded_v6(WR(q), first_mp, 7423 ICMP6_TIME_EXCEED_TRANSIT, ll_multicast, B_FALSE, 7424 GLOBAL_ZONEID); 7425 return; 7426 } 7427 /* 7428 * Per RFC 3513 section 2.5.2, we must not forward packets with 7429 * an unspecified source address. 7430 */ 7431 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 7432 BUMP_MIB(ill->ill_ip6_mib, ipv6ForwProhibits); 7433 freemsg(hada_mp); 7434 freemsg(first_mp); 7435 return; 7436 } 7437 mp->b_prev = (mblk_t *)(uintptr_t) 7438 ill->ill_phyint->phyint_ifindex; 7439 ip_newroute_v6(q, mp, &ip6h->ip6_dst, &ip6h->ip6_src, 7440 IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_dst) ? ill : NULL, 7441 ALL_ZONES); 7442 return; 7443 } 7444 ipif_id = ire->ire_ipif->ipif_seqid; 7445 /* we have a matching IRE */ 7446 if (ire->ire_stq != NULL) { 7447 ill_group_t *ill_group; 7448 ill_group_t *ire_group; 7449 7450 /* 7451 * To be quicker, we may wish not to chase pointers 7452 * (ire->ire_ipif->ipif_ill...) and instead store the 7453 * forwarding policy in the ire. An unfortunate side- 7454 * effect of this would be requiring an ire flush whenever 7455 * the ILLF_ROUTER flag changes. For now, chase pointers 7456 * once and store in the boolean no_forward. 7457 * 7458 * This appears twice to keep it out of the non-forwarding, 7459 * yes-it's-for-us-on-the-right-interface case. 7460 */ 7461 no_forward = ((ill->ill_flags & 7462 ire->ire_ipif->ipif_ill->ill_flags & ILLF_ROUTER) == 0); 7463 7464 7465 ASSERT(first_mp == mp); 7466 /* 7467 * This ire has a send-to queue - forward the packet. 7468 */ 7469 if (no_forward || ll_multicast || (hada_mp != NULL)) { 7470 freemsg(hada_mp); 7471 BUMP_MIB(ill->ill_ip6_mib, ipv6ForwProhibits); 7472 if (no_forward) 7473 BUMP_MIB(ill->ill_ip6_mib, ipv6InAddrErrors); 7474 freemsg(mp); 7475 ire_refrele(ire); 7476 return; 7477 } 7478 if (ip6h->ip6_hops <= 1) { 7479 ip1dbg(("ip_rput_data_v6: hop limit expired.\n")); 7480 /* Sent by forwarding path, and router is global zone */ 7481 icmp_time_exceeded_v6(WR(q), mp, 7482 ICMP6_TIME_EXCEED_TRANSIT, ll_multicast, B_FALSE, 7483 GLOBAL_ZONEID); 7484 ire_refrele(ire); 7485 return; 7486 } 7487 /* 7488 * Per RFC 3513 section 2.5.2, we must not forward packets with 7489 * an unspecified source address. 7490 */ 7491 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 7492 BUMP_MIB(ill->ill_ip6_mib, ipv6ForwProhibits); 7493 freemsg(mp); 7494 ire_refrele(ire); 7495 return; 7496 } 7497 7498 if (is_system_labeled()) { 7499 mblk_t *mp1; 7500 7501 if ((mp1 = tsol_ip_forward(ire, mp)) == NULL) { 7502 BUMP_MIB(ill->ill_ip6_mib, ipv6ForwProhibits); 7503 freemsg(mp); 7504 ire_refrele(ire); 7505 return; 7506 } 7507 /* Size may have changed */ 7508 mp = mp1; 7509 ip6h = (ip6_t *)mp->b_rptr; 7510 pkt_len = msgdsize(mp); 7511 } 7512 7513 if (pkt_len > ire->ire_max_frag) { 7514 BUMP_MIB(ill->ill_ip6_mib, ipv6InTooBigErrors); 7515 /* Sent by forwarding path, and router is global zone */ 7516 icmp_pkt2big_v6(WR(q), mp, ire->ire_max_frag, 7517 ll_multicast, B_TRUE, GLOBAL_ZONEID); 7518 ire_refrele(ire); 7519 return; 7520 } 7521 7522 /* 7523 * Check to see if we're forwarding the packet to a 7524 * different link from which it came. If so, check the 7525 * source and destination addresses since routers must not 7526 * forward any packets with link-local source or 7527 * destination addresses to other links. Otherwise (if 7528 * we're forwarding onto the same link), conditionally send 7529 * a redirect message. 7530 */ 7531 ill_group = ill->ill_group; 7532 ire_group = ((ill_t *)(ire->ire_rfq)->q_ptr)->ill_group; 7533 if (ire->ire_rfq != q && (ill_group == NULL || 7534 ill_group != ire_group)) { 7535 if (IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_dst) || 7536 IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src)) { 7537 BUMP_MIB(ill->ill_ip6_mib, ipv6InAddrErrors); 7538 freemsg(mp); 7539 ire_refrele(ire); 7540 return; 7541 } 7542 /* TBD add site-local check at site boundary? */ 7543 } else if (ipv6_send_redirects) { 7544 in6_addr_t *v6targ; 7545 in6_addr_t gw_addr_v6; 7546 ire_t *src_ire_v6 = NULL; 7547 7548 /* 7549 * Don't send a redirect when forwarding a source 7550 * routed packet. 7551 */ 7552 if (ip_source_routed_v6(ip6h, mp)) 7553 goto forward; 7554 7555 mutex_enter(&ire->ire_lock); 7556 gw_addr_v6 = ire->ire_gateway_addr_v6; 7557 mutex_exit(&ire->ire_lock); 7558 if (!IN6_IS_ADDR_UNSPECIFIED(&gw_addr_v6)) { 7559 v6targ = &gw_addr_v6; 7560 /* 7561 * We won't send redirects to a router 7562 * that doesn't have a link local 7563 * address, but will forward. 7564 */ 7565 if (!IN6_IS_ADDR_LINKLOCAL(v6targ)) { 7566 BUMP_MIB(ill->ill_ip6_mib, 7567 ipv6InAddrErrors); 7568 goto forward; 7569 } 7570 } else { 7571 v6targ = &ip6h->ip6_dst; 7572 } 7573 7574 src_ire_v6 = ire_ftable_lookup_v6(&ip6h->ip6_src, 7575 NULL, NULL, IRE_INTERFACE, ire->ire_ipif, NULL, 7576 ALL_ZONES, 0, NULL, 7577 MATCH_IRE_IPIF | MATCH_IRE_TYPE); 7578 7579 if (src_ire_v6 != NULL) { 7580 /* 7581 * The source is directly connected. 7582 */ 7583 mp1 = copymsg(mp); 7584 if (mp1 != NULL) { 7585 icmp_send_redirect_v6(WR(q), 7586 mp1, v6targ, &ip6h->ip6_dst, 7587 ill, B_FALSE); 7588 } 7589 ire_refrele(src_ire_v6); 7590 } 7591 } 7592 7593 forward: 7594 /* Hoplimit verified above */ 7595 ip6h->ip6_hops--; 7596 UPDATE_IB_PKT_COUNT(ire); 7597 ire->ire_last_used_time = lbolt; 7598 BUMP_MIB(ill->ill_ip6_mib, ipv6OutForwDatagrams); 7599 ip_xmit_v6(mp, ire, 0, NULL, B_FALSE, NULL); 7600 IRE_REFRELE(ire); 7601 return; 7602 } 7603 rq = ire->ire_rfq; 7604 7605 /* 7606 * Need to put on correct queue for reassembly to find it. 7607 * No need to use put() since reassembly has its own locks. 7608 * Note: multicast packets and packets destined to addresses 7609 * assigned to loopback (ire_rfq is NULL) will be reassembled on 7610 * the arriving ill. 7611 */ 7612 if (rq != q) { 7613 boolean_t check_multi = B_TRUE; 7614 ill_group_t *ill_group = NULL; 7615 ill_group_t *ire_group = NULL; 7616 ill_t *ire_ill = NULL; 7617 uint_t ill_ifindex = ill->ill_usesrc_ifindex; 7618 7619 /* 7620 * To be quicker, we may wish not to chase pointers 7621 * (ire->ire_ipif->ipif_ill...) and instead store the 7622 * forwarding policy in the ire. An unfortunate side- 7623 * effect of this would be requiring an ire flush whenever 7624 * the ILLF_ROUTER flag changes. For now, chase pointers 7625 * once and store in the boolean no_forward. 7626 */ 7627 no_forward = ((ill->ill_flags & 7628 ire->ire_ipif->ipif_ill->ill_flags & ILLF_ROUTER) == 0); 7629 7630 ill_group = ill->ill_group; 7631 if (rq != NULL) { 7632 ire_ill = (ill_t *)(rq->q_ptr); 7633 ire_group = ire_ill->ill_group; 7634 } 7635 7636 /* 7637 * If it's part of the same IPMP group, or if it's a legal 7638 * address on the 'usesrc' interface, then bypass strict 7639 * checks. 7640 */ 7641 if (ill_group != NULL && ill_group == ire_group) { 7642 check_multi = B_FALSE; 7643 } else if (ill_ifindex != 0 && ire_ill != NULL && 7644 ill_ifindex == ire_ill->ill_phyint->phyint_ifindex) { 7645 check_multi = B_FALSE; 7646 } 7647 7648 ASSERT(!IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)); 7649 if (check_multi && ipv6_strict_dst_multihoming && no_forward) { 7650 /* 7651 * This packet came in on an interface other than the 7652 * one associated with the destination address 7653 * and we are strict about matches. 7654 * 7655 * As long as the ills belong to the same group, 7656 * we don't consider them to arriving on the wrong 7657 * interface. Thus, when the switch is doing inbound 7658 * load spreading, we won't drop packets when we 7659 * are doing strict multihoming checks. 7660 */ 7661 BUMP_MIB(ill->ill_ip6_mib, ipv6ForwProhibits); 7662 freemsg(hada_mp); 7663 freemsg(first_mp); 7664 ire_refrele(ire); 7665 return; 7666 } 7667 7668 if (rq != NULL) 7669 q = rq; 7670 7671 ill = (ill_t *)q->q_ptr; 7672 ASSERT(ill); 7673 } 7674 7675 zoneid = ire->ire_zoneid; 7676 UPDATE_IB_PKT_COUNT(ire); 7677 ire->ire_last_used_time = lbolt; 7678 /* Don't use the ire after this point. */ 7679 ire_refrele(ire); 7680 ipv6forus: 7681 /* 7682 * Looks like this packet is for us one way or another. 7683 * This is where we'll process destination headers etc. 7684 */ 7685 for (; ; ) { 7686 switch (nexthdr) { 7687 case IPPROTO_TCP: { 7688 uint16_t *up; 7689 uint32_t sum; 7690 int offset; 7691 7692 hdr_len = pkt_len - remlen; 7693 7694 if (hada_mp != NULL) { 7695 ip0dbg(("tcp hada drop\n")); 7696 goto hada_drop; 7697 } 7698 7699 7700 /* TCP needs all of the TCP header */ 7701 if (remlen < TCP_MIN_HEADER_LENGTH) 7702 goto pkt_too_short; 7703 if (mp->b_cont != NULL && 7704 whereptr + TCP_MIN_HEADER_LENGTH > mp->b_wptr) { 7705 if (!pullupmsg(mp, 7706 hdr_len + TCP_MIN_HEADER_LENGTH)) { 7707 BUMP_MIB(ill->ill_ip6_mib, 7708 ipv6InDiscards); 7709 freemsg(first_mp); 7710 return; 7711 } 7712 hck_flags = 0; 7713 ip6h = (ip6_t *)mp->b_rptr; 7714 whereptr = (uint8_t *)ip6h + hdr_len; 7715 } 7716 /* 7717 * Extract the offset field from the TCP header. 7718 */ 7719 offset = ((uchar_t *)ip6h)[hdr_len + 12] >> 4; 7720 if (offset != 5) { 7721 if (offset < 5) { 7722 ip1dbg(("ip_rput_data_v6: short " 7723 "TCP data offset")); 7724 BUMP_MIB(ill->ill_ip6_mib, 7725 ipv6InDiscards); 7726 freemsg(first_mp); 7727 return; 7728 } 7729 /* 7730 * There must be TCP options. 7731 * Make sure we can grab them. 7732 */ 7733 offset <<= 2; 7734 if (remlen < offset) 7735 goto pkt_too_short; 7736 if (mp->b_cont != NULL && 7737 whereptr + offset > mp->b_wptr) { 7738 if (!pullupmsg(mp, 7739 hdr_len + offset)) { 7740 BUMP_MIB(ill->ill_ip6_mib, 7741 ipv6InDiscards); 7742 freemsg(first_mp); 7743 return; 7744 } 7745 hck_flags = 0; 7746 ip6h = (ip6_t *)mp->b_rptr; 7747 whereptr = (uint8_t *)ip6h + hdr_len; 7748 } 7749 } 7750 7751 up = (uint16_t *)&ip6h->ip6_src; 7752 /* 7753 * TCP checksum calculation. First sum up the 7754 * pseudo-header fields: 7755 * - Source IPv6 address 7756 * - Destination IPv6 address 7757 * - TCP payload length 7758 * - TCP protocol ID 7759 */ 7760 sum = htons(IPPROTO_TCP + remlen) + 7761 up[0] + up[1] + up[2] + up[3] + 7762 up[4] + up[5] + up[6] + up[7] + 7763 up[8] + up[9] + up[10] + up[11] + 7764 up[12] + up[13] + up[14] + up[15]; 7765 7766 /* Fold initial sum */ 7767 sum = (sum & 0xffff) + (sum >> 16); 7768 7769 mp1 = mp->b_cont; 7770 7771 if ((hck_flags & (HCK_FULLCKSUM|HCK_PARTIALCKSUM)) == 0) 7772 IP6_STAT(ip6_in_sw_cksum); 7773 7774 IP_CKSUM_RECV(hck_flags, sum, (uchar_t *) 7775 ((uchar_t *)mp->b_rptr + DB_CKSUMSTART(mp)), 7776 (int32_t)(whereptr - (uchar_t *)mp->b_rptr), 7777 mp, mp1, cksum_err); 7778 7779 if (cksum_err) { 7780 BUMP_MIB(&ip_mib, tcpInErrs); 7781 7782 if (hck_flags & HCK_FULLCKSUM) 7783 IP6_STAT(ip6_tcp_in_full_hw_cksum_err); 7784 else if (hck_flags & HCK_PARTIALCKSUM) 7785 IP6_STAT(ip6_tcp_in_part_hw_cksum_err); 7786 else 7787 IP6_STAT(ip6_tcp_in_sw_cksum_err); 7788 7789 freemsg(first_mp); 7790 return; 7791 } 7792 tcp_fanout: 7793 ip_fanout_tcp_v6(q, first_mp, ip6h, ill, inill, 7794 (flags|IP_FF_SEND_ICMP|IP_FF_SYN_ADDIRE| 7795 IP_FF_IP6INFO), hdr_len, mctl_present, zoneid); 7796 return; 7797 } 7798 case IPPROTO_SCTP: 7799 { 7800 sctp_hdr_t *sctph; 7801 uint32_t calcsum, pktsum; 7802 uint_t hdr_len = pkt_len - remlen; 7803 7804 /* SCTP needs all of the SCTP header */ 7805 if (remlen < sizeof (*sctph)) { 7806 goto pkt_too_short; 7807 } 7808 if (whereptr + sizeof (*sctph) > mp->b_wptr) { 7809 ASSERT(mp->b_cont != NULL); 7810 if (!pullupmsg(mp, hdr_len + sizeof (*sctph))) { 7811 BUMP_MIB(ill->ill_ip6_mib, 7812 ipv6InDiscards); 7813 freemsg(mp); 7814 return; 7815 } 7816 ip6h = (ip6_t *)mp->b_rptr; 7817 whereptr = (uint8_t *)ip6h + hdr_len; 7818 } 7819 7820 sctph = (sctp_hdr_t *)(mp->b_rptr + hdr_len); 7821 /* checksum */ 7822 pktsum = sctph->sh_chksum; 7823 sctph->sh_chksum = 0; 7824 calcsum = sctp_cksum(mp, hdr_len); 7825 if (calcsum != pktsum) { 7826 BUMP_MIB(&sctp_mib, sctpChecksumError); 7827 freemsg(mp); 7828 return; 7829 } 7830 sctph->sh_chksum = pktsum; 7831 ports = *(uint32_t *)(mp->b_rptr + hdr_len); 7832 if ((connp = sctp_fanout(&ip6h->ip6_src, &ip6h->ip6_dst, 7833 ports, ipif_id, zoneid, mp)) == NULL) { 7834 ip_fanout_sctp_raw(first_mp, ill, 7835 (ipha_t *)ip6h, B_FALSE, ports, 7836 mctl_present, 7837 (flags|IP_FF_SEND_ICMP|IP_FF_IP6INFO), 7838 B_TRUE, ipif_id, zoneid); 7839 return; 7840 } 7841 BUMP_MIB(&ip_mib, ipInDelivers); 7842 sctp_input(connp, (ipha_t *)ip6h, mp, first_mp, ill, 7843 B_FALSE, mctl_present); 7844 return; 7845 } 7846 case IPPROTO_UDP: { 7847 uint16_t *up; 7848 uint32_t sum; 7849 7850 hdr_len = pkt_len - remlen; 7851 7852 if (hada_mp != NULL) { 7853 ip0dbg(("udp hada drop\n")); 7854 goto hada_drop; 7855 } 7856 7857 /* Verify that at least the ports are present */ 7858 if (remlen < UDPH_SIZE) 7859 goto pkt_too_short; 7860 if (mp->b_cont != NULL && 7861 whereptr + UDPH_SIZE > mp->b_wptr) { 7862 if (!pullupmsg(mp, hdr_len + UDPH_SIZE)) { 7863 BUMP_MIB(ill->ill_ip6_mib, 7864 ipv6InDiscards); 7865 freemsg(first_mp); 7866 return; 7867 } 7868 hck_flags = 0; 7869 ip6h = (ip6_t *)mp->b_rptr; 7870 whereptr = (uint8_t *)ip6h + hdr_len; 7871 } 7872 7873 /* 7874 * Before going through the regular checksum 7875 * calculation, make sure the received checksum 7876 * is non-zero. RFC 2460 says, a 0x0000 checksum 7877 * in a UDP packet (within IPv6 packet) is invalid 7878 * and should be replaced by 0xffff. This makes 7879 * sense as regular checksum calculation will 7880 * pass for both the cases i.e. 0x0000 and 0xffff. 7881 * Removing one of the case makes error detection 7882 * stronger. 7883 */ 7884 7885 if (((udpha_t *)whereptr)->uha_checksum == 0) { 7886 /* 0x0000 checksum is invalid */ 7887 ip1dbg(("ip_rput_data_v6: Invalid UDP " 7888 "checksum value 0x0000\n")); 7889 BUMP_MIB(ill->ill_ip6_mib, udpInCksumErrs); 7890 freemsg(first_mp); 7891 return; 7892 } 7893 7894 up = (uint16_t *)&ip6h->ip6_src; 7895 7896 /* 7897 * UDP checksum calculation. First sum up the 7898 * pseudo-header fields: 7899 * - Source IPv6 address 7900 * - Destination IPv6 address 7901 * - UDP payload length 7902 * - UDP protocol ID 7903 */ 7904 7905 sum = htons(IPPROTO_UDP + remlen) + 7906 up[0] + up[1] + up[2] + up[3] + 7907 up[4] + up[5] + up[6] + up[7] + 7908 up[8] + up[9] + up[10] + up[11] + 7909 up[12] + up[13] + up[14] + up[15]; 7910 7911 /* Fold initial sum */ 7912 sum = (sum & 0xffff) + (sum >> 16); 7913 7914 if (reass_hck_flags != 0) { 7915 hck_flags = reass_hck_flags; 7916 7917 IP_CKSUM_RECV_REASS(hck_flags, 7918 (int32_t)(whereptr - (uchar_t *)mp->b_rptr), 7919 sum, reass_sum, cksum_err); 7920 } else { 7921 mp1 = mp->b_cont; 7922 7923 IP_CKSUM_RECV(hck_flags, sum, (uchar_t *) 7924 ((uchar_t *)mp->b_rptr + DB_CKSUMSTART(mp)), 7925 (int32_t)(whereptr - (uchar_t *)mp->b_rptr), 7926 mp, mp1, cksum_err); 7927 } 7928 7929 if ((hck_flags & (HCK_FULLCKSUM|HCK_PARTIALCKSUM)) == 0) 7930 IP6_STAT(ip6_in_sw_cksum); 7931 7932 if (cksum_err) { 7933 BUMP_MIB(ill->ill_ip6_mib, udpInCksumErrs); 7934 7935 if (hck_flags & HCK_FULLCKSUM) 7936 IP6_STAT(ip6_udp_in_full_hw_cksum_err); 7937 else if (hck_flags & HCK_PARTIALCKSUM) 7938 IP6_STAT(ip6_udp_in_part_hw_cksum_err); 7939 else 7940 IP6_STAT(ip6_udp_in_sw_cksum_err); 7941 7942 freemsg(first_mp); 7943 return; 7944 } 7945 goto udp_fanout; 7946 } 7947 case IPPROTO_ICMPV6: { 7948 uint16_t *up; 7949 uint32_t sum; 7950 uint_t hdr_len = pkt_len - remlen; 7951 7952 if (hada_mp != NULL) { 7953 ip0dbg(("icmp hada drop\n")); 7954 goto hada_drop; 7955 } 7956 7957 up = (uint16_t *)&ip6h->ip6_src; 7958 sum = htons(IPPROTO_ICMPV6 + remlen) + 7959 up[0] + up[1] + up[2] + up[3] + 7960 up[4] + up[5] + up[6] + up[7] + 7961 up[8] + up[9] + up[10] + up[11] + 7962 up[12] + up[13] + up[14] + up[15]; 7963 sum = (sum & 0xffff) + (sum >> 16); 7964 sum = IP_CSUM(mp, hdr_len, sum); 7965 if (sum != 0) { 7966 /* IPv6 ICMP checksum failed */ 7967 ip1dbg(("ip_rput_data_v6: ICMPv6 checksum " 7968 "failed %x\n", 7969 sum)); 7970 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInMsgs); 7971 BUMP_MIB(ill->ill_icmp6_mib, 7972 ipv6IfIcmpInErrors); 7973 freemsg(first_mp); 7974 return; 7975 } 7976 7977 icmp_fanout: 7978 /* Check variable for testing applications */ 7979 if (ipv6_drop_inbound_icmpv6) { 7980 freemsg(first_mp); 7981 return; 7982 } 7983 /* 7984 * Assume that there is always at least one conn for 7985 * ICMPv6 (in.ndpd) i.e. don't optimize the case 7986 * where there is no conn. 7987 */ 7988 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 7989 ASSERT(!(ill->ill_phyint->phyint_flags & 7990 PHYI_LOOPBACK)); 7991 /* 7992 * In the multicast case, applications may have 7993 * joined the group from different zones, so we 7994 * need to deliver the packet to each of them. 7995 * Loop through the multicast memberships 7996 * structures (ilm) on the receive ill and send 7997 * a copy of the packet up each matching one. 7998 */ 7999 ILM_WALKER_HOLD(ill); 8000 for (ilm = ill->ill_ilm; ilm != NULL; 8001 ilm = ilm->ilm_next) { 8002 if (ilm->ilm_flags & ILM_DELETED) 8003 continue; 8004 if (!IN6_ARE_ADDR_EQUAL( 8005 &ilm->ilm_v6addr, &ip6h->ip6_dst)) 8006 continue; 8007 if (!ipif_lookup_zoneid(ill, 8008 ilm->ilm_zoneid, IPIF_UP, NULL)) 8009 continue; 8010 8011 first_mp1 = ip_copymsg(first_mp); 8012 if (first_mp1 == NULL) 8013 continue; 8014 icmp_inbound_v6(q, first_mp1, ill, 8015 hdr_len, mctl_present, 0, 8016 ilm->ilm_zoneid, dl_mp); 8017 } 8018 ILM_WALKER_RELE(ill); 8019 } else { 8020 first_mp1 = ip_copymsg(first_mp); 8021 if (first_mp1 != NULL) 8022 icmp_inbound_v6(q, first_mp1, ill, 8023 hdr_len, mctl_present, 0, zoneid, 8024 dl_mp); 8025 } 8026 } 8027 /* FALLTHRU */ 8028 default: { 8029 /* 8030 * Handle protocols with which IPv6 is less intimate. 8031 */ 8032 uint_t proto_flags = IP_FF_RAWIP|IP_FF_IP6INFO; 8033 8034 if (hada_mp != NULL) { 8035 ip0dbg(("default hada drop\n")); 8036 goto hada_drop; 8037 } 8038 8039 /* 8040 * Enable sending ICMP for "Unknown" nexthdr 8041 * case. i.e. where we did not FALLTHRU from 8042 * IPPROTO_ICMPV6 processing case above. 8043 * If we did FALLTHRU, then the packet has already been 8044 * processed for IPPF, don't process it again in 8045 * ip_fanout_proto_v6; set IP6_NO_IPPOLICY in the 8046 * flags 8047 */ 8048 if (nexthdr != IPPROTO_ICMPV6) 8049 proto_flags |= IP_FF_SEND_ICMP; 8050 else 8051 proto_flags |= IP6_NO_IPPOLICY; 8052 8053 ip_fanout_proto_v6(q, first_mp, ip6h, ill, inill, 8054 nexthdr, prev_nexthdr_offset, (flags|proto_flags), 8055 mctl_present, zoneid); 8056 return; 8057 } 8058 8059 case IPPROTO_DSTOPTS: { 8060 uint_t ehdrlen; 8061 uint8_t *optptr; 8062 ip6_dest_t *desthdr; 8063 8064 /* Check if AH is present. */ 8065 if (ipsec_early_ah_v6(q, first_mp, mctl_present, ill, 8066 ire, hada_mp, zoneid)) { 8067 ip0dbg(("dst early hada drop\n")); 8068 return; 8069 } 8070 8071 /* 8072 * Reinitialize pointers, as ipsec_early_ah_v6() does 8073 * complete pullups. We don't have to do more pullups 8074 * as a result. 8075 */ 8076 whereptr = (uint8_t *)((uintptr_t)mp->b_rptr + 8077 (uintptr_t)(whereptr - ((uint8_t *)ip6h))); 8078 ip6h = (ip6_t *)mp->b_rptr; 8079 8080 if (remlen < MIN_EHDR_LEN) 8081 goto pkt_too_short; 8082 8083 desthdr = (ip6_dest_t *)whereptr; 8084 nexthdr = desthdr->ip6d_nxt; 8085 prev_nexthdr_offset = (uint_t)(whereptr - 8086 (uint8_t *)ip6h); 8087 ehdrlen = 8 * (desthdr->ip6d_len + 1); 8088 if (remlen < ehdrlen) 8089 goto pkt_too_short; 8090 optptr = whereptr + 2; 8091 /* 8092 * Note: XXX This code does not seem to make 8093 * distinction between Destination Options Header 8094 * being before/after Routing Header which can 8095 * happen if we are at the end of source route. 8096 * This may become significant in future. 8097 * (No real significant Destination Options are 8098 * defined/implemented yet ). 8099 */ 8100 switch (ip_process_options_v6(q, first_mp, ip6h, optptr, 8101 ehdrlen - 2, IPPROTO_DSTOPTS)) { 8102 case -1: 8103 /* 8104 * Packet has been consumed and any needed 8105 * ICMP errors sent. 8106 */ 8107 BUMP_MIB(ill->ill_ip6_mib, ipv6InHdrErrors); 8108 freemsg(hada_mp); 8109 return; 8110 case 0: 8111 /* No action needed continue */ 8112 break; 8113 case 1: 8114 /* 8115 * Unnexpected return value 8116 * (Router alert is a Hop-by-Hop option) 8117 */ 8118 #ifdef DEBUG 8119 panic("ip_rput_data_v6: router " 8120 "alert hbh opt indication in dest opt"); 8121 /*NOTREACHED*/ 8122 #else 8123 freemsg(hada_mp); 8124 freemsg(first_mp); 8125 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 8126 return; 8127 #endif 8128 } 8129 used = ehdrlen; 8130 break; 8131 } 8132 case IPPROTO_FRAGMENT: { 8133 ip6_frag_t *fraghdr; 8134 size_t no_frag_hdr_len; 8135 8136 if (hada_mp != NULL) { 8137 ip0dbg(("frag hada drop\n")); 8138 goto hada_drop; 8139 } 8140 8141 ASSERT(first_mp == mp); 8142 if (remlen < sizeof (ip6_frag_t)) 8143 goto pkt_too_short; 8144 8145 if (mp->b_cont != NULL && 8146 whereptr + sizeof (ip6_frag_t) > mp->b_wptr) { 8147 if (!pullupmsg(mp, 8148 pkt_len - remlen + sizeof (ip6_frag_t))) { 8149 BUMP_MIB(ill->ill_ip6_mib, 8150 ipv6InDiscards); 8151 freemsg(mp); 8152 return; 8153 } 8154 hck_flags = 0; 8155 ip6h = (ip6_t *)mp->b_rptr; 8156 whereptr = (uint8_t *)ip6h + pkt_len - remlen; 8157 } 8158 8159 fraghdr = (ip6_frag_t *)whereptr; 8160 used = (uint_t)sizeof (ip6_frag_t); 8161 BUMP_MIB(ill->ill_ip6_mib, ipv6ReasmReqds); 8162 8163 /* 8164 * Invoke the CGTP (multirouting) filtering module to 8165 * process the incoming packet. Packets identified as 8166 * duplicates must be discarded. Filtering is active 8167 * only if the the ip_cgtp_filter ndd variable is 8168 * non-zero. 8169 */ 8170 if (ip_cgtp_filter && (ip_cgtp_filter_ops != NULL)) { 8171 int cgtp_flt_pkt = 8172 ip_cgtp_filter_ops->cfo_filter_v6( 8173 inill->ill_rq, ip6h, fraghdr); 8174 if (cgtp_flt_pkt == CGTP_IP_PKT_DUPLICATE) { 8175 freemsg(mp); 8176 return; 8177 } 8178 } 8179 8180 /* Restore the flags */ 8181 DB_CKSUMFLAGS(mp) = hck_flags; 8182 8183 mp = ip_rput_frag_v6(q, mp, ip6h, fraghdr, 8184 remlen - used, &prev_nexthdr_offset, 8185 &reass_sum, &reass_hck_flags); 8186 if (mp == NULL) { 8187 /* Reassembly is still pending */ 8188 return; 8189 } 8190 /* The first mblk are the headers before the frag hdr */ 8191 BUMP_MIB(ill->ill_ip6_mib, ipv6ReasmOKs); 8192 8193 first_mp = mp; /* mp has most likely changed! */ 8194 no_frag_hdr_len = mp->b_wptr - mp->b_rptr; 8195 ip6h = (ip6_t *)mp->b_rptr; 8196 nexthdr = ((char *)ip6h)[prev_nexthdr_offset]; 8197 whereptr = mp->b_rptr + no_frag_hdr_len; 8198 remlen = ntohs(ip6h->ip6_plen) + 8199 (uint16_t)(IPV6_HDR_LEN - no_frag_hdr_len); 8200 pkt_len = msgdsize(mp); 8201 used = 0; 8202 break; 8203 } 8204 case IPPROTO_HOPOPTS: 8205 if (hada_mp != NULL) { 8206 ip0dbg(("hop hada drop\n")); 8207 goto hada_drop; 8208 } 8209 /* 8210 * Illegal header sequence. 8211 * (Hop-by-hop headers are processed above 8212 * and required to immediately follow IPv6 header) 8213 */ 8214 icmp_param_problem_v6(WR(q), first_mp, 8215 ICMP6_PARAMPROB_NEXTHEADER, 8216 prev_nexthdr_offset, 8217 B_FALSE, B_FALSE, zoneid); 8218 return; 8219 8220 case IPPROTO_ROUTING: { 8221 uint_t ehdrlen; 8222 ip6_rthdr_t *rthdr; 8223 8224 /* Check if AH is present. */ 8225 if (ipsec_early_ah_v6(q, first_mp, mctl_present, ill, 8226 ire, hada_mp, zoneid)) { 8227 ip0dbg(("routing hada drop\n")); 8228 return; 8229 } 8230 8231 /* 8232 * Reinitialize pointers, as ipsec_early_ah_v6() does 8233 * complete pullups. We don't have to do more pullups 8234 * as a result. 8235 */ 8236 whereptr = (uint8_t *)((uintptr_t)mp->b_rptr + 8237 (uintptr_t)(whereptr - ((uint8_t *)ip6h))); 8238 ip6h = (ip6_t *)mp->b_rptr; 8239 8240 if (remlen < MIN_EHDR_LEN) 8241 goto pkt_too_short; 8242 rthdr = (ip6_rthdr_t *)whereptr; 8243 nexthdr = rthdr->ip6r_nxt; 8244 prev_nexthdr_offset = (uint_t)(whereptr - 8245 (uint8_t *)ip6h); 8246 ehdrlen = 8 * (rthdr->ip6r_len + 1); 8247 if (remlen < ehdrlen) 8248 goto pkt_too_short; 8249 if (rthdr->ip6r_segleft != 0) { 8250 /* Not end of source route */ 8251 if (ll_multicast) { 8252 BUMP_MIB(ill->ill_ip6_mib, 8253 ipv6ForwProhibits); 8254 freemsg(hada_mp); 8255 freemsg(mp); 8256 return; 8257 } 8258 ip_process_rthdr(q, mp, ip6h, rthdr, ill, 8259 flags, hada_mp, dl_mp); 8260 return; 8261 } 8262 used = ehdrlen; 8263 break; 8264 } 8265 case IPPROTO_AH: 8266 case IPPROTO_ESP: { 8267 /* 8268 * Fast path for AH/ESP. If this is the first time 8269 * we are sending a datagram to AH/ESP, allocate 8270 * a IPSEC_IN message and prepend it. Otherwise, 8271 * just fanout. 8272 */ 8273 8274 ipsec_in_t *ii; 8275 int ipsec_rc; 8276 8277 if (!mctl_present) { 8278 ASSERT(first_mp == mp); 8279 if ((first_mp = ipsec_in_alloc(B_FALSE)) == 8280 NULL) { 8281 ip1dbg(("ip_rput_data_v6: IPSEC_IN " 8282 "allocation failure.\n")); 8283 BUMP_MIB(ill->ill_ip6_mib, 8284 ipv6InDiscards); 8285 freemsg(mp); 8286 return; 8287 } 8288 /* 8289 * Store the ill_index so that when we come back 8290 * from IPSEC we ride on the same queue. 8291 */ 8292 ii = (ipsec_in_t *)first_mp->b_rptr; 8293 ii->ipsec_in_ill_index = 8294 ill->ill_phyint->phyint_ifindex; 8295 ii->ipsec_in_rill_index = 8296 ii->ipsec_in_ill_index; 8297 first_mp->b_cont = mp; 8298 /* 8299 * Cache hardware acceleration info. 8300 */ 8301 if (hada_mp != NULL) { 8302 IPSECHW_DEBUG(IPSECHW_PKT, 8303 ("ip_rput_data_v6: " 8304 "caching data attr.\n")); 8305 ii->ipsec_in_accelerated = B_TRUE; 8306 ii->ipsec_in_da = hada_mp; 8307 hada_mp = NULL; 8308 } 8309 } else { 8310 ii = (ipsec_in_t *)first_mp->b_rptr; 8311 } 8312 8313 if (!ipsec_loaded()) { 8314 ip_proto_not_sup(q, first_mp, IP_FF_SEND_ICMP, 8315 ire->ire_zoneid); 8316 return; 8317 } 8318 8319 /* select inbound SA and have IPsec process the pkt */ 8320 if (nexthdr == IPPROTO_ESP) { 8321 esph_t *esph = ipsec_inbound_esp_sa(first_mp); 8322 if (esph == NULL) 8323 return; 8324 ASSERT(ii->ipsec_in_esp_sa != NULL); 8325 ASSERT(ii->ipsec_in_esp_sa->ipsa_input_func != 8326 NULL); 8327 ipsec_rc = ii->ipsec_in_esp_sa->ipsa_input_func( 8328 first_mp, esph); 8329 } else { 8330 ah_t *ah = ipsec_inbound_ah_sa(first_mp); 8331 if (ah == NULL) 8332 return; 8333 ASSERT(ii->ipsec_in_ah_sa != NULL); 8334 ASSERT(ii->ipsec_in_ah_sa->ipsa_input_func != 8335 NULL); 8336 ipsec_rc = ii->ipsec_in_ah_sa->ipsa_input_func( 8337 first_mp, ah); 8338 } 8339 8340 switch (ipsec_rc) { 8341 case IPSEC_STATUS_SUCCESS: 8342 break; 8343 case IPSEC_STATUS_FAILED: 8344 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 8345 /* FALLTHRU */ 8346 case IPSEC_STATUS_PENDING: 8347 return; 8348 } 8349 /* we're done with IPsec processing, send it up */ 8350 ip_fanout_proto_again(first_mp, ill, inill, ire); 8351 return; 8352 } 8353 case IPPROTO_NONE: 8354 /* All processing is done. Count as "delivered". */ 8355 freemsg(hada_mp); 8356 freemsg(first_mp); 8357 BUMP_MIB(ill->ill_ip6_mib, ipv6InDelivers); 8358 return; 8359 } 8360 whereptr += used; 8361 ASSERT(remlen >= used); 8362 remlen -= used; 8363 } 8364 /* NOTREACHED */ 8365 8366 pkt_too_short: 8367 ip1dbg(("ip_rput_data_v6: packet too short %d %lu %d\n", 8368 ip6_len, pkt_len, remlen)); 8369 BUMP_MIB(ill->ill_ip6_mib, ipv6InTruncatedPkts); 8370 freemsg(hada_mp); 8371 freemsg(first_mp); 8372 return; 8373 udp_fanout: 8374 if (mctl_present || IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 8375 connp = NULL; 8376 } else { 8377 connp = ipcl_classify_v6(mp, IPPROTO_UDP, hdr_len, zoneid); 8378 if ((connp != NULL) && (connp->conn_upq == NULL)) { 8379 CONN_DEC_REF(connp); 8380 connp = NULL; 8381 } 8382 } 8383 8384 if (connp == NULL) { 8385 uint32_t ports; 8386 8387 ports = *(uint32_t *)(mp->b_rptr + hdr_len + 8388 UDP_PORTS_OFFSET); 8389 IP6_STAT(ip6_udp_slow_path); 8390 ip_fanout_udp_v6(q, first_mp, ip6h, ports, ill, inill, 8391 (flags|IP_FF_SEND_ICMP|IP_FF_IP6INFO), mctl_present, 8392 zoneid); 8393 return; 8394 } 8395 8396 if (CONN_UDP_FLOWCTLD(connp)) { 8397 freemsg(first_mp); 8398 BUMP_MIB(ill->ill_ip6_mib, udpInOverflows); 8399 CONN_DEC_REF(connp); 8400 return; 8401 } 8402 8403 /* Initiate IPPF processing */ 8404 if (IP6_IN_IPP(flags)) { 8405 ip_process(IPP_LOCAL_IN, &mp, ill->ill_phyint->phyint_ifindex); 8406 if (mp == NULL) { 8407 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 8408 CONN_DEC_REF(connp); 8409 return; 8410 } 8411 } 8412 8413 if (connp->conn_ipv6_recvpktinfo || 8414 IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src)) { 8415 mp = ip_add_info_v6(mp, inill, &ip6h->ip6_dst); 8416 if (mp == NULL) { 8417 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 8418 CONN_DEC_REF(connp); 8419 return; 8420 } 8421 } 8422 8423 IP6_STAT(ip6_udp_fast_path); 8424 BUMP_MIB(ill->ill_ip6_mib, ipv6InReceives); 8425 BUMP_MIB(ill->ill_ip6_mib, ipv6InDelivers); 8426 8427 /* Send it upstream */ 8428 CONN_UDP_RECV(connp, mp); 8429 8430 CONN_DEC_REF(connp); 8431 freemsg(hada_mp); 8432 return; 8433 8434 hada_drop: 8435 ip1dbg(("ip_rput_data_v6: malformed accelerated packet\n")); 8436 /* IPsec kstats: bump counter here */ 8437 freemsg(hada_mp); 8438 freemsg(first_mp); 8439 } 8440 8441 /* 8442 * Reassemble fragment. 8443 * When it returns a completed message the first mblk will only contain 8444 * the headers prior to the fragment header. 8445 * 8446 * prev_nexthdr_offset is an offset indication of where the nexthdr field is 8447 * of the preceding header. This is needed to patch the previous header's 8448 * nexthdr field when reassembly completes. 8449 */ 8450 static mblk_t * 8451 ip_rput_frag_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, 8452 ip6_frag_t *fraghdr, uint_t remlen, uint_t *prev_nexthdr_offset, 8453 uint32_t *cksum_val, uint16_t *cksum_flags) 8454 { 8455 ill_t *ill = (ill_t *)q->q_ptr; 8456 uint32_t ident = ntohl(fraghdr->ip6f_ident); 8457 uint16_t offset; 8458 boolean_t more_frags; 8459 uint8_t nexthdr = fraghdr->ip6f_nxt; 8460 in6_addr_t *v6dst_ptr; 8461 in6_addr_t *v6src_ptr; 8462 uint_t end; 8463 uint_t hdr_length; 8464 size_t count; 8465 ipf_t *ipf; 8466 ipf_t **ipfp; 8467 ipfb_t *ipfb; 8468 mblk_t *mp1; 8469 uint8_t ecn_info = 0; 8470 size_t msg_len; 8471 mblk_t *tail_mp; 8472 mblk_t *t_mp; 8473 boolean_t pruned = B_FALSE; 8474 uint32_t sum_val; 8475 uint16_t sum_flags; 8476 8477 8478 if (cksum_val != NULL) 8479 *cksum_val = 0; 8480 if (cksum_flags != NULL) 8481 *cksum_flags = 0; 8482 8483 /* 8484 * We utilize hardware computed checksum info only for UDP since 8485 * IP fragmentation is a normal occurence for the protocol. In 8486 * addition, checksum offload support for IP fragments carrying 8487 * UDP payload is commonly implemented across network adapters. 8488 */ 8489 ASSERT(ill != NULL); 8490 if (nexthdr == IPPROTO_UDP && dohwcksum && ILL_HCKSUM_CAPABLE(ill) && 8491 (DB_CKSUMFLAGS(mp) & (HCK_FULLCKSUM | HCK_PARTIALCKSUM))) { 8492 mblk_t *mp1 = mp->b_cont; 8493 int32_t len; 8494 8495 /* Record checksum information from the packet */ 8496 sum_val = (uint32_t)DB_CKSUM16(mp); 8497 sum_flags = DB_CKSUMFLAGS(mp); 8498 8499 /* fragmented payload offset from beginning of mblk */ 8500 offset = (uint16_t)((uchar_t *)&fraghdr[1] - mp->b_rptr); 8501 8502 if ((sum_flags & HCK_PARTIALCKSUM) && 8503 (mp1 == NULL || mp1->b_cont == NULL) && 8504 offset >= (uint16_t)DB_CKSUMSTART(mp) && 8505 ((len = offset - (uint16_t)DB_CKSUMSTART(mp)) & 1) == 0) { 8506 uint32_t adj; 8507 /* 8508 * Partial checksum has been calculated by hardware 8509 * and attached to the packet; in addition, any 8510 * prepended extraneous data is even byte aligned. 8511 * If any such data exists, we adjust the checksum; 8512 * this would also handle any postpended data. 8513 */ 8514 IP_ADJCKSUM_PARTIAL(mp->b_rptr + DB_CKSUMSTART(mp), 8515 mp, mp1, len, adj); 8516 8517 /* One's complement subtract extraneous checksum */ 8518 if (adj >= sum_val) 8519 sum_val = ~(adj - sum_val) & 0xFFFF; 8520 else 8521 sum_val -= adj; 8522 } 8523 } else { 8524 sum_val = 0; 8525 sum_flags = 0; 8526 } 8527 8528 /* Clear hardware checksumming flag */ 8529 DB_CKSUMFLAGS(mp) = 0; 8530 8531 /* 8532 * Note: Fragment offset in header is in 8-octet units. 8533 * Clearing least significant 3 bits not only extracts 8534 * it but also gets it in units of octets. 8535 */ 8536 offset = ntohs(fraghdr->ip6f_offlg) & ~7; 8537 more_frags = (fraghdr->ip6f_offlg & IP6F_MORE_FRAG); 8538 8539 /* 8540 * Is the more frags flag on and the payload length not a multiple 8541 * of eight? 8542 */ 8543 if (more_frags && (ntohs(ip6h->ip6_plen) & 7)) { 8544 zoneid_t zoneid; 8545 8546 BUMP_MIB(ill->ill_ip6_mib, ipv6InHdrErrors); 8547 zoneid = ipif_lookup_addr_zoneid_v6(&ip6h->ip6_dst, ill); 8548 if (zoneid == ALL_ZONES) { 8549 freemsg(mp); 8550 return (NULL); 8551 } 8552 icmp_param_problem_v6(WR(q), mp, ICMP6_PARAMPROB_HEADER, 8553 (uint32_t)((char *)&ip6h->ip6_plen - 8554 (char *)ip6h), B_FALSE, B_FALSE, zoneid); 8555 return (NULL); 8556 } 8557 8558 v6src_ptr = &ip6h->ip6_src; 8559 v6dst_ptr = &ip6h->ip6_dst; 8560 end = remlen; 8561 8562 hdr_length = (uint_t)((char *)&fraghdr[1] - (char *)ip6h); 8563 end += offset; 8564 8565 /* 8566 * Would fragment cause reassembled packet to have a payload length 8567 * greater than IP_MAXPACKET - the max payload size? 8568 */ 8569 if (end > IP_MAXPACKET) { 8570 zoneid_t zoneid; 8571 8572 BUMP_MIB(ill->ill_ip6_mib, ipv6InHdrErrors); 8573 zoneid = ipif_lookup_addr_zoneid_v6(&ip6h->ip6_dst, ill); 8574 if (zoneid == ALL_ZONES) { 8575 freemsg(mp); 8576 return (NULL); 8577 } 8578 icmp_param_problem_v6(WR(q), mp, ICMP6_PARAMPROB_HEADER, 8579 (uint32_t)((char *)&fraghdr->ip6f_offlg - 8580 (char *)ip6h), B_FALSE, B_FALSE, zoneid); 8581 return (NULL); 8582 } 8583 8584 /* 8585 * This packet just has one fragment. Reassembly not 8586 * needed. 8587 */ 8588 if (!more_frags && offset == 0) { 8589 goto reass_done; 8590 } 8591 8592 /* 8593 * Drop the fragmented as early as possible, if 8594 * we don't have resource(s) to re-assemble. 8595 */ 8596 if (ip_reass_queue_bytes == 0) { 8597 freemsg(mp); 8598 return (NULL); 8599 } 8600 8601 /* Record the ECN field info. */ 8602 ecn_info = (uint8_t)(ntohl(ip6h->ip6_vcf & htonl(~0xFFCFFFFF)) >> 20); 8603 /* 8604 * If this is not the first fragment, dump the unfragmentable 8605 * portion of the packet. 8606 */ 8607 if (offset) 8608 mp->b_rptr = (uchar_t *)&fraghdr[1]; 8609 8610 /* 8611 * Fragmentation reassembly. Each ILL has a hash table for 8612 * queueing packets undergoing reassembly for all IPIFs 8613 * associated with the ILL. The hash is based on the packet 8614 * IP ident field. The ILL frag hash table was allocated 8615 * as a timer block at the time the ILL was created. Whenever 8616 * there is anything on the reassembly queue, the timer will 8617 * be running. 8618 */ 8619 msg_len = MBLKSIZE(mp); 8620 tail_mp = mp; 8621 while (tail_mp->b_cont != NULL) { 8622 tail_mp = tail_mp->b_cont; 8623 msg_len += MBLKSIZE(tail_mp); 8624 } 8625 /* 8626 * If the reassembly list for this ILL will get too big 8627 * prune it. 8628 */ 8629 8630 if ((msg_len + sizeof (*ipf) + ill->ill_frag_count) >= 8631 ip_reass_queue_bytes) { 8632 ill_frag_prune(ill, (ip_reass_queue_bytes < msg_len) ? 0 8633 : (ip_reass_queue_bytes - msg_len)); 8634 pruned = B_TRUE; 8635 } 8636 8637 ipfb = &ill->ill_frag_hash_tbl[ILL_FRAG_HASH_V6(*v6src_ptr, ident)]; 8638 mutex_enter(&ipfb->ipfb_lock); 8639 8640 ipfp = &ipfb->ipfb_ipf; 8641 /* Try to find an existing fragment queue for this packet. */ 8642 for (;;) { 8643 ipf = ipfp[0]; 8644 if (ipf) { 8645 /* 8646 * It has to match on ident, source address, and 8647 * dest address. 8648 */ 8649 if (ipf->ipf_ident == ident && 8650 IN6_ARE_ADDR_EQUAL(&ipf->ipf_v6src, v6src_ptr) && 8651 IN6_ARE_ADDR_EQUAL(&ipf->ipf_v6dst, v6dst_ptr)) { 8652 8653 /* 8654 * If we have received too many 8655 * duplicate fragments for this packet 8656 * free it. 8657 */ 8658 if (ipf->ipf_num_dups > ip_max_frag_dups) { 8659 ill_frag_free_pkts(ill, ipfb, ipf, 1); 8660 freemsg(mp); 8661 mutex_exit(&ipfb->ipfb_lock); 8662 return (NULL); 8663 } 8664 8665 break; 8666 } 8667 ipfp = &ipf->ipf_hash_next; 8668 continue; 8669 } 8670 8671 8672 /* 8673 * If we pruned the list, do we want to store this new 8674 * fragment?. We apply an optimization here based on the 8675 * fact that most fragments will be received in order. 8676 * So if the offset of this incoming fragment is zero, 8677 * it is the first fragment of a new packet. We will 8678 * keep it. Otherwise drop the fragment, as we have 8679 * probably pruned the packet already (since the 8680 * packet cannot be found). 8681 */ 8682 8683 if (pruned && offset != 0) { 8684 mutex_exit(&ipfb->ipfb_lock); 8685 freemsg(mp); 8686 return (NULL); 8687 } 8688 8689 /* New guy. Allocate a frag message. */ 8690 mp1 = allocb(sizeof (*ipf), BPRI_MED); 8691 if (!mp1) { 8692 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 8693 freemsg(mp); 8694 partial_reass_done: 8695 mutex_exit(&ipfb->ipfb_lock); 8696 return (NULL); 8697 } 8698 8699 if (ipfb->ipfb_frag_pkts >= MAX_FRAG_PKTS) { 8700 /* 8701 * Too many fragmented packets in this hash bucket. 8702 * Free the oldest. 8703 */ 8704 ill_frag_free_pkts(ill, ipfb, ipfb->ipfb_ipf, 1); 8705 } 8706 8707 mp1->b_cont = mp; 8708 8709 /* Initialize the fragment header. */ 8710 ipf = (ipf_t *)mp1->b_rptr; 8711 ipf->ipf_mp = mp1; 8712 ipf->ipf_ptphn = ipfp; 8713 ipfp[0] = ipf; 8714 ipf->ipf_hash_next = NULL; 8715 ipf->ipf_ident = ident; 8716 ipf->ipf_v6src = *v6src_ptr; 8717 ipf->ipf_v6dst = *v6dst_ptr; 8718 /* Record reassembly start time. */ 8719 ipf->ipf_timestamp = gethrestime_sec(); 8720 /* Record ipf generation and account for frag header */ 8721 ipf->ipf_gen = ill->ill_ipf_gen++; 8722 ipf->ipf_count = MBLKSIZE(mp1); 8723 ipf->ipf_protocol = nexthdr; 8724 ipf->ipf_nf_hdr_len = 0; 8725 ipf->ipf_prev_nexthdr_offset = 0; 8726 ipf->ipf_last_frag_seen = B_FALSE; 8727 ipf->ipf_ecn = ecn_info; 8728 ipf->ipf_num_dups = 0; 8729 ipfb->ipfb_frag_pkts++; 8730 ipf->ipf_checksum = 0; 8731 ipf->ipf_checksum_flags = 0; 8732 8733 /* Store checksum value in fragment header */ 8734 if (sum_flags != 0) { 8735 sum_val = (sum_val & 0xFFFF) + (sum_val >> 16); 8736 sum_val = (sum_val & 0xFFFF) + (sum_val >> 16); 8737 ipf->ipf_checksum = sum_val; 8738 ipf->ipf_checksum_flags = sum_flags; 8739 } 8740 8741 /* 8742 * We handle reassembly two ways. In the easy case, 8743 * where all the fragments show up in order, we do 8744 * minimal bookkeeping, and just clip new pieces on 8745 * the end. If we ever see a hole, then we go off 8746 * to ip_reassemble which has to mark the pieces and 8747 * keep track of the number of holes, etc. Obviously, 8748 * the point of having both mechanisms is so we can 8749 * handle the easy case as efficiently as possible. 8750 */ 8751 if (offset == 0) { 8752 /* Easy case, in-order reassembly so far. */ 8753 /* Update the byte count */ 8754 ipf->ipf_count += msg_len; 8755 ipf->ipf_tail_mp = tail_mp; 8756 /* 8757 * Keep track of next expected offset in 8758 * ipf_end. 8759 */ 8760 ipf->ipf_end = end; 8761 ipf->ipf_nf_hdr_len = hdr_length; 8762 ipf->ipf_prev_nexthdr_offset = *prev_nexthdr_offset; 8763 } else { 8764 /* Hard case, hole at the beginning. */ 8765 ipf->ipf_tail_mp = NULL; 8766 /* 8767 * ipf_end == 0 means that we have given up 8768 * on easy reassembly. 8769 */ 8770 ipf->ipf_end = 0; 8771 8772 /* Forget checksum offload from now on */ 8773 ipf->ipf_checksum_flags = 0; 8774 8775 /* 8776 * ipf_hole_cnt is set by ip_reassemble. 8777 * ipf_count is updated by ip_reassemble. 8778 * No need to check for return value here 8779 * as we don't expect reassembly to complete or 8780 * fail for the first fragment itself. 8781 */ 8782 (void) ip_reassemble(mp, ipf, offset, more_frags, ill, 8783 msg_len); 8784 } 8785 /* Update per ipfb and ill byte counts */ 8786 ipfb->ipfb_count += ipf->ipf_count; 8787 ASSERT(ipfb->ipfb_count > 0); /* Wraparound */ 8788 ill->ill_frag_count += ipf->ipf_count; 8789 ASSERT(ill->ill_frag_count > 0); /* Wraparound */ 8790 /* If the frag timer wasn't already going, start it. */ 8791 mutex_enter(&ill->ill_lock); 8792 ill_frag_timer_start(ill); 8793 mutex_exit(&ill->ill_lock); 8794 goto partial_reass_done; 8795 } 8796 8797 /* 8798 * If the packet's flag has changed (it could be coming up 8799 * from an interface different than the previous, therefore 8800 * possibly different checksum capability), then forget about 8801 * any stored checksum states. Otherwise add the value to 8802 * the existing one stored in the fragment header. 8803 */ 8804 if (sum_flags != 0 && sum_flags == ipf->ipf_checksum_flags) { 8805 sum_val += ipf->ipf_checksum; 8806 sum_val = (sum_val & 0xFFFF) + (sum_val >> 16); 8807 sum_val = (sum_val & 0xFFFF) + (sum_val >> 16); 8808 ipf->ipf_checksum = sum_val; 8809 } else if (ipf->ipf_checksum_flags != 0) { 8810 /* Forget checksum offload from now on */ 8811 ipf->ipf_checksum_flags = 0; 8812 } 8813 8814 /* 8815 * We have a new piece of a datagram which is already being 8816 * reassembled. Update the ECN info if all IP fragments 8817 * are ECN capable. If there is one which is not, clear 8818 * all the info. If there is at least one which has CE 8819 * code point, IP needs to report that up to transport. 8820 */ 8821 if (ecn_info != IPH_ECN_NECT && ipf->ipf_ecn != IPH_ECN_NECT) { 8822 if (ecn_info == IPH_ECN_CE) 8823 ipf->ipf_ecn = IPH_ECN_CE; 8824 } else { 8825 ipf->ipf_ecn = IPH_ECN_NECT; 8826 } 8827 8828 if (offset && ipf->ipf_end == offset) { 8829 /* The new fragment fits at the end */ 8830 ipf->ipf_tail_mp->b_cont = mp; 8831 /* Update the byte count */ 8832 ipf->ipf_count += msg_len; 8833 /* Update per ipfb and ill byte counts */ 8834 ipfb->ipfb_count += msg_len; 8835 ASSERT(ipfb->ipfb_count > 0); /* Wraparound */ 8836 ill->ill_frag_count += msg_len; 8837 ASSERT(ill->ill_frag_count > 0); /* Wraparound */ 8838 if (more_frags) { 8839 /* More to come. */ 8840 ipf->ipf_end = end; 8841 ipf->ipf_tail_mp = tail_mp; 8842 goto partial_reass_done; 8843 } 8844 } else { 8845 /* 8846 * Go do the hard cases. 8847 * Call ip_reassemble(). 8848 */ 8849 int ret; 8850 8851 if (offset == 0) { 8852 if (ipf->ipf_prev_nexthdr_offset == 0) { 8853 ipf->ipf_nf_hdr_len = hdr_length; 8854 ipf->ipf_prev_nexthdr_offset = 8855 *prev_nexthdr_offset; 8856 } 8857 } 8858 /* Save current byte count */ 8859 count = ipf->ipf_count; 8860 ret = ip_reassemble(mp, ipf, offset, more_frags, ill, msg_len); 8861 8862 /* Count of bytes added and subtracted (freeb()ed) */ 8863 count = ipf->ipf_count - count; 8864 if (count) { 8865 /* Update per ipfb and ill byte counts */ 8866 ipfb->ipfb_count += count; 8867 ASSERT(ipfb->ipfb_count > 0); /* Wraparound */ 8868 ill->ill_frag_count += count; 8869 ASSERT(ill->ill_frag_count > 0); /* Wraparound */ 8870 } 8871 if (ret == IP_REASS_PARTIAL) { 8872 goto partial_reass_done; 8873 } else if (ret == IP_REASS_FAILED) { 8874 /* Reassembly failed. Free up all resources */ 8875 ill_frag_free_pkts(ill, ipfb, ipf, 1); 8876 for (t_mp = mp; t_mp != NULL; t_mp = t_mp->b_cont) { 8877 IP_REASS_SET_START(t_mp, 0); 8878 IP_REASS_SET_END(t_mp, 0); 8879 } 8880 freemsg(mp); 8881 goto partial_reass_done; 8882 } 8883 8884 /* We will reach here iff 'ret' is IP_REASS_COMPLETE */ 8885 } 8886 /* 8887 * We have completed reassembly. Unhook the frag header from 8888 * the reassembly list. 8889 * 8890 * Grab the unfragmentable header length next header value out 8891 * of the first fragment 8892 */ 8893 ASSERT(ipf->ipf_nf_hdr_len != 0); 8894 hdr_length = ipf->ipf_nf_hdr_len; 8895 8896 /* 8897 * Before we free the frag header, record the ECN info 8898 * to report back to the transport. 8899 */ 8900 ecn_info = ipf->ipf_ecn; 8901 8902 /* 8903 * Store the nextheader field in the header preceding the fragment 8904 * header 8905 */ 8906 nexthdr = ipf->ipf_protocol; 8907 *prev_nexthdr_offset = ipf->ipf_prev_nexthdr_offset; 8908 ipfp = ipf->ipf_ptphn; 8909 8910 /* We need to supply these to caller */ 8911 if ((sum_flags = ipf->ipf_checksum_flags) != 0) 8912 sum_val = ipf->ipf_checksum; 8913 else 8914 sum_val = 0; 8915 8916 mp1 = ipf->ipf_mp; 8917 count = ipf->ipf_count; 8918 ipf = ipf->ipf_hash_next; 8919 if (ipf) 8920 ipf->ipf_ptphn = ipfp; 8921 ipfp[0] = ipf; 8922 ill->ill_frag_count -= count; 8923 ASSERT(ipfb->ipfb_count >= count); 8924 ipfb->ipfb_count -= count; 8925 ipfb->ipfb_frag_pkts--; 8926 mutex_exit(&ipfb->ipfb_lock); 8927 /* Ditch the frag header. */ 8928 mp = mp1->b_cont; 8929 freeb(mp1); 8930 8931 /* 8932 * Make sure the packet is good by doing some sanity 8933 * check. If bad we can silentely drop the packet. 8934 */ 8935 reass_done: 8936 if (hdr_length < sizeof (ip6_frag_t)) { 8937 BUMP_MIB(ill->ill_ip6_mib, ipv6InHdrErrors); 8938 ip1dbg(("ip_rput_frag_v6: bad packet\n")); 8939 freemsg(mp); 8940 return (NULL); 8941 } 8942 8943 /* 8944 * Remove the fragment header from the initial header by 8945 * splitting the mblk into the non-fragmentable header and 8946 * everthing after the fragment extension header. This has the 8947 * side effect of putting all the headers that need destination 8948 * processing into the b_cont block-- on return this fact is 8949 * used in order to avoid having to look at the extensions 8950 * already processed. 8951 * 8952 * Note that this code assumes that the unfragmentable portion 8953 * of the header is in the first mblk and increments 8954 * the read pointer past it. If this assumption is broken 8955 * this code fails badly. 8956 */ 8957 if (mp->b_rptr + hdr_length != mp->b_wptr) { 8958 mblk_t *nmp; 8959 8960 if (!(nmp = dupb(mp))) { 8961 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 8962 ip1dbg(("ip_rput_frag_v6: dupb failed\n")); 8963 freemsg(mp); 8964 return (NULL); 8965 } 8966 nmp->b_cont = mp->b_cont; 8967 mp->b_cont = nmp; 8968 nmp->b_rptr += hdr_length; 8969 } 8970 mp->b_wptr = mp->b_rptr + hdr_length - sizeof (ip6_frag_t); 8971 8972 ip6h = (ip6_t *)mp->b_rptr; 8973 ((char *)ip6h)[*prev_nexthdr_offset] = nexthdr; 8974 8975 /* Restore original IP length in header. */ 8976 ip6h->ip6_plen = htons((uint16_t)(msgdsize(mp) - IPV6_HDR_LEN)); 8977 /* Record the ECN info. */ 8978 ip6h->ip6_vcf &= htonl(0xFFCFFFFF); 8979 ip6h->ip6_vcf |= htonl(ecn_info << 20); 8980 8981 /* Reassembly is successful; return checksum information if needed */ 8982 if (cksum_val != NULL) 8983 *cksum_val = sum_val; 8984 if (cksum_flags != NULL) 8985 *cksum_flags = sum_flags; 8986 8987 return (mp); 8988 } 8989 8990 /* 8991 * Walk through the options to see if there is a routing header. 8992 * If present get the destination which is the last address of 8993 * the option. 8994 */ 8995 in6_addr_t 8996 ip_get_dst_v6(ip6_t *ip6h, boolean_t *is_fragment) 8997 { 8998 uint8_t nexthdr; 8999 uint8_t *whereptr; 9000 ip6_hbh_t *hbhhdr; 9001 ip6_dest_t *dsthdr; 9002 ip6_rthdr0_t *rthdr; 9003 ip6_frag_t *fraghdr; 9004 int ehdrlen; 9005 int left; 9006 in6_addr_t *ap, rv; 9007 9008 if (is_fragment != NULL) 9009 *is_fragment = B_FALSE; 9010 9011 rv = ip6h->ip6_dst; 9012 9013 nexthdr = ip6h->ip6_nxt; 9014 whereptr = (uint8_t *)&ip6h[1]; 9015 for (;;) { 9016 9017 ASSERT(nexthdr != IPPROTO_RAW); 9018 switch (nexthdr) { 9019 case IPPROTO_HOPOPTS: 9020 hbhhdr = (ip6_hbh_t *)whereptr; 9021 nexthdr = hbhhdr->ip6h_nxt; 9022 ehdrlen = 8 * (hbhhdr->ip6h_len + 1); 9023 break; 9024 case IPPROTO_DSTOPTS: 9025 dsthdr = (ip6_dest_t *)whereptr; 9026 nexthdr = dsthdr->ip6d_nxt; 9027 ehdrlen = 8 * (dsthdr->ip6d_len + 1); 9028 break; 9029 case IPPROTO_ROUTING: 9030 rthdr = (ip6_rthdr0_t *)whereptr; 9031 nexthdr = rthdr->ip6r0_nxt; 9032 ehdrlen = 8 * (rthdr->ip6r0_len + 1); 9033 9034 left = rthdr->ip6r0_segleft; 9035 ap = (in6_addr_t *)((char *)rthdr + sizeof (*rthdr)); 9036 rv = *(ap + left - 1); 9037 /* 9038 * If the caller doesn't care whether the packet 9039 * is a fragment or not, we can stop here since 9040 * we have our destination. 9041 */ 9042 if (is_fragment == NULL) 9043 goto done; 9044 break; 9045 case IPPROTO_FRAGMENT: 9046 fraghdr = (ip6_frag_t *)whereptr; 9047 nexthdr = fraghdr->ip6f_nxt; 9048 ehdrlen = sizeof (ip6_frag_t); 9049 if (is_fragment != NULL) 9050 *is_fragment = B_TRUE; 9051 goto done; 9052 default : 9053 goto done; 9054 } 9055 whereptr += ehdrlen; 9056 } 9057 9058 done: 9059 return (rv); 9060 } 9061 9062 /* 9063 * ip_source_routed_v6: 9064 * This function is called by redirect code in ip_rput_data_v6 to 9065 * know whether this packet is source routed through this node i.e 9066 * whether this node (router) is part of the journey. This 9067 * function is called under two cases : 9068 * 9069 * case 1 : Routing header was processed by this node and 9070 * ip_process_rthdr replaced ip6_dst with the next hop 9071 * and we are forwarding the packet to the next hop. 9072 * 9073 * case 2 : Routing header was not processed by this node and we 9074 * are just forwarding the packet. 9075 * 9076 * For case (1) we don't want to send redirects. For case(2) we 9077 * want to send redirects. 9078 */ 9079 static boolean_t 9080 ip_source_routed_v6(ip6_t *ip6h, mblk_t *mp) 9081 { 9082 uint8_t nexthdr; 9083 in6_addr_t *addrptr; 9084 ip6_rthdr0_t *rthdr; 9085 uint8_t numaddr; 9086 ip6_hbh_t *hbhhdr; 9087 uint_t ehdrlen; 9088 uint8_t *byteptr; 9089 9090 ip2dbg(("ip_source_routed_v6\n")); 9091 nexthdr = ip6h->ip6_nxt; 9092 ehdrlen = IPV6_HDR_LEN; 9093 9094 /* if a routing hdr is preceeded by HOPOPT or DSTOPT */ 9095 while (nexthdr == IPPROTO_HOPOPTS || 9096 nexthdr == IPPROTO_DSTOPTS) { 9097 byteptr = (uint8_t *)ip6h + ehdrlen; 9098 /* 9099 * Check if we have already processed 9100 * packets or we are just a forwarding 9101 * router which only pulled up msgs up 9102 * to IPV6HDR and one HBH ext header 9103 */ 9104 if (byteptr + MIN_EHDR_LEN > mp->b_wptr) { 9105 ip2dbg(("ip_source_routed_v6: Extension" 9106 " headers not processed\n")); 9107 return (B_FALSE); 9108 } 9109 hbhhdr = (ip6_hbh_t *)byteptr; 9110 nexthdr = hbhhdr->ip6h_nxt; 9111 ehdrlen = ehdrlen + 8 * (hbhhdr->ip6h_len + 1); 9112 } 9113 switch (nexthdr) { 9114 case IPPROTO_ROUTING: 9115 byteptr = (uint8_t *)ip6h + ehdrlen; 9116 /* 9117 * If for some reason, we haven't pulled up 9118 * the routing hdr data mblk, then we must 9119 * not have processed it at all. So for sure 9120 * we are not part of the source routed journey. 9121 */ 9122 if (byteptr + MIN_EHDR_LEN > mp->b_wptr) { 9123 ip2dbg(("ip_source_routed_v6: Routing" 9124 " header not processed\n")); 9125 return (B_FALSE); 9126 } 9127 rthdr = (ip6_rthdr0_t *)byteptr; 9128 /* 9129 * Either we are an intermediate router or the 9130 * last hop before destination and we have 9131 * already processed the routing header. 9132 * If segment_left is greater than or equal to zero, 9133 * then we must be the (numaddr - segleft) entry 9134 * of the routing header. Although ip6r0_segleft 9135 * is a unit8_t variable, we still check for zero 9136 * or greater value, if in case the data type 9137 * is changed someday in future. 9138 */ 9139 if (rthdr->ip6r0_segleft > 0 || 9140 rthdr->ip6r0_segleft == 0) { 9141 ire_t *ire = NULL; 9142 9143 numaddr = rthdr->ip6r0_len / 2; 9144 addrptr = (in6_addr_t *)((char *)rthdr + 9145 sizeof (*rthdr)); 9146 addrptr += (numaddr - (rthdr->ip6r0_segleft + 1)); 9147 if (addrptr != NULL) { 9148 ire = ire_ctable_lookup_v6(addrptr, NULL, 9149 IRE_LOCAL, NULL, ALL_ZONES, NULL, 9150 MATCH_IRE_TYPE); 9151 if (ire != NULL) { 9152 ire_refrele(ire); 9153 return (B_TRUE); 9154 } 9155 ip1dbg(("ip_source_routed_v6: No ire found\n")); 9156 } 9157 } 9158 /* FALLTHRU */ 9159 default: 9160 ip2dbg(("ip_source_routed_v6: Not source routed here\n")); 9161 return (B_FALSE); 9162 } 9163 } 9164 9165 /* 9166 * ip_wput_v6 -- Packets sent down from transport modules show up here. 9167 * Assumes that the following set of headers appear in the first 9168 * mblk: 9169 * ip6i_t (if present) CAN also appear as a separate mblk. 9170 * ip6_t 9171 * Any extension headers 9172 * TCP/UDP/SCTP header (if present) 9173 * The routine can handle an ICMPv6 header that is not in the first mblk. 9174 * 9175 * The order to determine the outgoing interface is as follows: 9176 * 1. IPV6_BOUND_PIF is set, use that ill (conn_outgoing_pill) 9177 * 2. If conn_nofailover_ill is set then use that ill. 9178 * 3. If an ip6i_t with IP6I_IFINDEX set then use that ill. 9179 * 4. If q is an ill queue and (link local or multicast destination) then 9180 * use that ill. 9181 * 5. If IPV6_BOUND_IF has been set use that ill. 9182 * 6. For multicast: if IPV6_MULTICAST_IF has been set use it. Otherwise 9183 * look for the best IRE match for the unspecified group to determine 9184 * the ill. 9185 * 7. For unicast: Just do an IRE lookup for the best match. 9186 * 9187 * arg2 is always a queue_t *. 9188 * When that queue is an ill_t (i.e. q_next != NULL), then arg must be 9189 * the zoneid. 9190 * When that queue is not an ill_t, then arg must be a conn_t pointer. 9191 */ 9192 void 9193 ip_output_v6(void *arg, mblk_t *mp, void *arg2, int caller) 9194 { 9195 conn_t *connp = NULL; 9196 queue_t *q = (queue_t *)arg2; 9197 ire_t *ire = NULL; 9198 ire_t *sctp_ire = NULL; 9199 ip6_t *ip6h; 9200 in6_addr_t *v6dstp; 9201 ill_t *ill = NULL; 9202 ipif_t *ipif; 9203 ip6i_t *ip6i; 9204 int cksum_request; /* -1 => normal. */ 9205 /* 1 => Skip TCP/UDP/SCTP checksum */ 9206 /* Otherwise contains insert offset for checksum */ 9207 int unspec_src; 9208 boolean_t do_outrequests; /* Increment OutRequests? */ 9209 mib2_ipv6IfStatsEntry_t *mibptr; 9210 int match_flags = MATCH_IRE_ILL_GROUP; 9211 boolean_t attach_if = B_FALSE; 9212 mblk_t *first_mp; 9213 boolean_t mctl_present; 9214 ipsec_out_t *io; 9215 boolean_t drop_if_delayed = B_FALSE; 9216 boolean_t multirt_need_resolve = B_FALSE; 9217 mblk_t *copy_mp = NULL; 9218 int err; 9219 int ip6i_flags = 0; 9220 zoneid_t zoneid; 9221 ill_t *saved_ill = NULL; 9222 boolean_t conn_lock_held; 9223 boolean_t need_decref = B_FALSE; 9224 9225 /* 9226 * Highest bit in version field is Reachability Confirmation bit 9227 * used by NUD in ip_xmit_v6(). 9228 */ 9229 #ifdef _BIG_ENDIAN 9230 #define IPVER(ip6h) ((((uint32_t *)ip6h)[0] >> 28) & 0x7) 9231 #else 9232 #define IPVER(ip6h) ((((uint32_t *)ip6h)[0] >> 4) & 0x7) 9233 #endif 9234 9235 /* 9236 * M_CTL comes from 5 places 9237 * 9238 * 1) TCP sends down IPSEC_OUT(M_CTL) for detached connections 9239 * both V4 and V6 datagrams. 9240 * 9241 * 2) AH/ESP sends down M_CTL after doing their job with both 9242 * V4 and V6 datagrams. 9243 * 9244 * 3) NDP callbacks when nce is resolved and IPSEC_OUT has been 9245 * attached. 9246 * 9247 * 4) Notifications from an external resolver (for XRESOLV ifs) 9248 * 9249 * 5) AH/ESP send down IPSEC_CTL(M_CTL) to be relayed to hardware for 9250 * IPsec hardware acceleration support. 9251 * 9252 * We need to handle (1)'s IPv6 case and (3) here. For the 9253 * IPv4 case in (1), and (2), IPSEC processing has already 9254 * started. The code in ip_wput() already knows how to handle 9255 * continuing IPSEC processing (for IPv4 and IPv6). All other 9256 * M_CTLs (including case (4)) are passed on to ip_wput_nondata() 9257 * for handling. 9258 */ 9259 first_mp = mp; 9260 mctl_present = B_FALSE; 9261 io = NULL; 9262 9263 /* Multidata transmit? */ 9264 if (DB_TYPE(mp) == M_MULTIDATA) { 9265 /* 9266 * We should never get here, since all Multidata messages 9267 * originating from tcp should have been directed over to 9268 * tcp_multisend() in the first place. 9269 */ 9270 BUMP_MIB(&ip6_mib, ipv6OutDiscards); 9271 freemsg(mp); 9272 return; 9273 } else if (DB_TYPE(mp) == M_CTL) { 9274 uint32_t mctltype = 0; 9275 uint32_t mlen = MBLKL(first_mp); 9276 9277 mp = mp->b_cont; 9278 mctl_present = B_TRUE; 9279 io = (ipsec_out_t *)first_mp->b_rptr; 9280 9281 /* 9282 * Validate this M_CTL message. The only three types of 9283 * M_CTL messages we expect to see in this code path are 9284 * ipsec_out_t or ipsec_in_t structures (allocated as 9285 * ipsec_info_t unions), or ipsec_ctl_t structures. 9286 * The ipsec_out_type and ipsec_in_type overlap in the two 9287 * data structures, and they are either set to IPSEC_OUT 9288 * or IPSEC_IN depending on which data structure it is. 9289 * ipsec_ctl_t is an IPSEC_CTL. 9290 * 9291 * All other M_CTL messages are sent to ip_wput_nondata() 9292 * for handling. 9293 */ 9294 if (mlen >= sizeof (io->ipsec_out_type)) 9295 mctltype = io->ipsec_out_type; 9296 9297 if ((mlen == sizeof (ipsec_ctl_t)) && 9298 (mctltype == IPSEC_CTL)) { 9299 ip_output(arg, first_mp, arg2, caller); 9300 return; 9301 } 9302 9303 if ((mlen < sizeof (ipsec_info_t)) || 9304 (mctltype != IPSEC_OUT && mctltype != IPSEC_IN) || 9305 mp == NULL) { 9306 ip_wput_nondata(NULL, q, first_mp, NULL); 9307 return; 9308 } 9309 /* NDP callbacks have q_next non-NULL. That's case #3. */ 9310 if (q->q_next == NULL) { 9311 ip6h = (ip6_t *)mp->b_rptr; 9312 /* 9313 * For a freshly-generated TCP dgram that needs IPV6 9314 * processing, don't call ip_wput immediately. We can 9315 * tell this by the ipsec_out_proc_begin. In-progress 9316 * IPSEC_OUT messages have proc_begin set to TRUE, 9317 * and we want to send all IPSEC_IN messages to 9318 * ip_wput() for IPsec processing or finishing. 9319 */ 9320 if (mctltype == IPSEC_IN || 9321 IPVER(ip6h) != IPV6_VERSION || 9322 io->ipsec_out_proc_begin) { 9323 mibptr = &ip6_mib; 9324 goto notv6; 9325 } 9326 } 9327 } else if (DB_TYPE(mp) != M_DATA) { 9328 ip_wput_nondata(NULL, q, mp, NULL); 9329 return; 9330 } 9331 9332 ip6h = (ip6_t *)mp->b_rptr; 9333 9334 if (IPVER(ip6h) != IPV6_VERSION) { 9335 mibptr = &ip6_mib; 9336 goto notv6; 9337 } 9338 9339 if (q->q_next != NULL) { 9340 ill = (ill_t *)q->q_ptr; 9341 /* 9342 * We don't know if this ill will be used for IPv6 9343 * until the ILLF_IPV6 flag is set via SIOCSLIFNAME. 9344 * ipif_set_values() sets the ill_isv6 flag to true if 9345 * ILLF_IPV6 is set. If the ill_isv6 flag isn't true, 9346 * just drop the packet. 9347 */ 9348 if (!ill->ill_isv6) { 9349 ip1dbg(("ip_wput_v6: Received an IPv6 packet before " 9350 "ILLF_IPV6 was set\n")); 9351 freemsg(first_mp); 9352 return; 9353 } 9354 /* For uniformity do a refhold */ 9355 mutex_enter(&ill->ill_lock); 9356 if (!ILL_CAN_LOOKUP(ill)) { 9357 mutex_exit(&ill->ill_lock); 9358 freemsg(first_mp); 9359 return; 9360 } 9361 ill_refhold_locked(ill); 9362 mutex_exit(&ill->ill_lock); 9363 mibptr = ill->ill_ip6_mib; 9364 /* 9365 * ill_ip6_mib is allocated by ipif_set_values() when 9366 * ill_isv6 is set. Thus if ill_isv6 is true, 9367 * ill_ip6_mib had better not be NULL. 9368 */ 9369 ASSERT(mibptr != NULL); 9370 unspec_src = 0; 9371 BUMP_MIB(mibptr, ipv6OutRequests); 9372 do_outrequests = B_FALSE; 9373 zoneid = (zoneid_t)(uintptr_t)arg; 9374 } else { 9375 connp = (conn_t *)arg; 9376 ASSERT(connp != NULL); 9377 zoneid = connp->conn_zoneid; 9378 9379 /* is queue flow controlled? */ 9380 if ((q->q_first || connp->conn_draining) && 9381 (caller == IP_WPUT)) { 9382 /* 9383 * 1) TCP sends down M_CTL for detached connections. 9384 * 2) AH/ESP sends down M_CTL. 9385 * 9386 * We don't flow control either of the above. Only 9387 * UDP and others are flow controlled for which we 9388 * can't have a M_CTL. 9389 */ 9390 ASSERT(first_mp == mp); 9391 (void) putq(q, mp); 9392 return; 9393 } 9394 mibptr = &ip6_mib; 9395 unspec_src = connp->conn_unspec_src; 9396 do_outrequests = B_TRUE; 9397 if (mp->b_flag & MSGHASREF) { 9398 mp->b_flag &= ~MSGHASREF; 9399 ASSERT(connp->conn_ulp == IPPROTO_SCTP); 9400 SCTP_EXTRACT_IPINFO(mp, sctp_ire); 9401 need_decref = B_TRUE; 9402 } 9403 9404 /* 9405 * If there is a policy, try to attach an ipsec_out in 9406 * the front. At the end, first_mp either points to a 9407 * M_DATA message or IPSEC_OUT message linked to a 9408 * M_DATA message. We have to do it now as we might 9409 * lose the "conn" if we go through ip_newroute. 9410 */ 9411 if (!mctl_present && 9412 (connp->conn_out_enforce_policy || 9413 connp->conn_latch != NULL)) { 9414 ASSERT(first_mp == mp); 9415 /* XXX Any better way to get the protocol fast ? */ 9416 if (((mp = ipsec_attach_ipsec_out(mp, connp, NULL, 9417 connp->conn_ulp)) == NULL)) { 9418 if (need_decref) 9419 CONN_DEC_REF(connp); 9420 return; 9421 } else { 9422 ASSERT(mp->b_datap->db_type == M_CTL); 9423 first_mp = mp; 9424 mp = mp->b_cont; 9425 mctl_present = B_TRUE; 9426 io = (ipsec_out_t *)first_mp->b_rptr; 9427 } 9428 } 9429 } 9430 9431 /* check for alignment and full IPv6 header */ 9432 if (!OK_32PTR((uchar_t *)ip6h) || 9433 (mp->b_wptr - (uchar_t *)ip6h) < IPV6_HDR_LEN) { 9434 ip0dbg(("ip_wput_v6: bad alignment or length\n")); 9435 if (do_outrequests) 9436 BUMP_MIB(mibptr, ipv6OutRequests); 9437 BUMP_MIB(mibptr, ipv6OutDiscards); 9438 freemsg(first_mp); 9439 if (ill != NULL) 9440 ill_refrele(ill); 9441 if (need_decref) 9442 CONN_DEC_REF(connp); 9443 return; 9444 } 9445 v6dstp = &ip6h->ip6_dst; 9446 cksum_request = -1; 9447 ip6i = NULL; 9448 9449 /* 9450 * Once neighbor discovery has completed, ndp_process() will provide 9451 * locally generated packets for which processing can be reattempted. 9452 * In these cases, connp is NULL and the original zone is part of a 9453 * prepended ipsec_out_t. 9454 */ 9455 if (io != NULL) { 9456 /* 9457 * When coming from icmp_input_v6, the zoneid might not match 9458 * for the loopback case, because inside icmp_input_v6 the 9459 * queue_t is a conn queue from the sending side. 9460 */ 9461 zoneid = io->ipsec_out_zoneid; 9462 ASSERT(zoneid != ALL_ZONES); 9463 } 9464 9465 if (ip6h->ip6_nxt == IPPROTO_RAW) { 9466 /* 9467 * This is an ip6i_t header followed by an ip6_hdr. 9468 * Check which fields are set. 9469 * 9470 * When the packet comes from a transport we should have 9471 * all needed headers in the first mblk. However, when 9472 * going through ip_newroute*_v6 the ip6i might be in 9473 * a separate mblk when we return here. In that case 9474 * we pullup everything to ensure that extension and transport 9475 * headers "stay" in the first mblk. 9476 */ 9477 ip6i = (ip6i_t *)ip6h; 9478 ip6i_flags = ip6i->ip6i_flags; 9479 9480 ASSERT((mp->b_wptr - (uchar_t *)ip6i) == sizeof (ip6i_t) || 9481 ((mp->b_wptr - (uchar_t *)ip6i) >= 9482 sizeof (ip6i_t) + IPV6_HDR_LEN)); 9483 9484 if ((mp->b_wptr - (uchar_t *)ip6i) == sizeof (ip6i_t)) { 9485 if (!pullupmsg(mp, -1)) { 9486 ip1dbg(("ip_wput_v6: pullupmsg failed\n")); 9487 if (do_outrequests) 9488 BUMP_MIB(mibptr, ipv6OutRequests); 9489 BUMP_MIB(mibptr, ipv6OutDiscards); 9490 freemsg(first_mp); 9491 if (ill != NULL) 9492 ill_refrele(ill); 9493 if (need_decref) 9494 CONN_DEC_REF(connp); 9495 return; 9496 } 9497 ip6h = (ip6_t *)mp->b_rptr; 9498 v6dstp = &ip6h->ip6_dst; 9499 ip6i = (ip6i_t *)ip6h; 9500 } 9501 ip6h = (ip6_t *)&ip6i[1]; 9502 9503 /* 9504 * Advance rptr past the ip6i_t to get ready for 9505 * transmitting the packet. However, if the packet gets 9506 * passed to ip_newroute*_v6 then rptr is moved back so 9507 * that the ip6i_t header can be inspected when the 9508 * packet comes back here after passing through 9509 * ire_add_then_send. 9510 */ 9511 mp->b_rptr = (uchar_t *)ip6h; 9512 9513 /* 9514 * IP6I_ATTACH_IF is set in this function when we had a 9515 * conn and it was either bound to the IPFF_NOFAILOVER address 9516 * or IPV6_BOUND_PIF was set. These options override other 9517 * options that set the ifindex. We come here with 9518 * IP6I_ATTACH_IF set when we can't find the ire and 9519 * ip_newroute_v6 is feeding the packet for second time. 9520 */ 9521 if ((ip6i->ip6i_flags & IP6I_IFINDEX) || 9522 (ip6i->ip6i_flags & IP6I_ATTACH_IF)) { 9523 ASSERT(ip6i->ip6i_ifindex != 0); 9524 if (ill != NULL) 9525 ill_refrele(ill); 9526 ill = ill_lookup_on_ifindex(ip6i->ip6i_ifindex, 1, 9527 NULL, NULL, NULL, NULL); 9528 if (ill == NULL) { 9529 if (do_outrequests) 9530 BUMP_MIB(mibptr, ipv6OutRequests); 9531 BUMP_MIB(mibptr, ipv6OutDiscards); 9532 ip1dbg(("ip_wput_v6: bad ifindex %d\n", 9533 ip6i->ip6i_ifindex)); 9534 if (need_decref) 9535 CONN_DEC_REF(connp); 9536 freemsg(first_mp); 9537 return; 9538 } 9539 mibptr = ill->ill_ip6_mib; 9540 if (ip6i->ip6i_flags & IP6I_IFINDEX) { 9541 /* 9542 * Preserve the index so that when we return 9543 * from IPSEC processing, we know where to 9544 * send the packet. 9545 */ 9546 if (mctl_present) { 9547 ASSERT(io != NULL); 9548 io->ipsec_out_ill_index = 9549 ip6i->ip6i_ifindex; 9550 } 9551 } 9552 if (ip6i->ip6i_flags & IP6I_ATTACH_IF) { 9553 /* 9554 * This is a multipathing probe packet that has 9555 * been delayed in ND resolution. Drop the 9556 * packet for the reasons mentioned in 9557 * nce_queue_mp() 9558 */ 9559 if ((ip6i->ip6i_flags & IP6I_DROP_IFDELAYED) && 9560 (ip6i->ip6i_flags & IP6I_ND_DELAYED)) { 9561 freemsg(first_mp); 9562 ill_refrele(ill); 9563 if (need_decref) 9564 CONN_DEC_REF(connp); 9565 return; 9566 } 9567 } 9568 } 9569 if (ip6i->ip6i_flags & IP6I_VERIFY_SRC) { 9570 cred_t *cr = DB_CREDDEF(mp, GET_QUEUE_CRED(q)); 9571 9572 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)); 9573 if (secpolicy_net_rawaccess(cr) != 0) { 9574 ire = ire_route_lookup_v6(&ip6h->ip6_src, 9575 0, 0, (IRE_LOCAL|IRE_LOOPBACK), NULL, 9576 NULL, zoneid, NULL, 9577 MATCH_IRE_TYPE | MATCH_IRE_ZONEONLY); 9578 if (ire == NULL) { 9579 if (do_outrequests) 9580 BUMP_MIB(mibptr, 9581 ipv6OutRequests); 9582 BUMP_MIB(mibptr, ipv6OutDiscards); 9583 ip1dbg(("ip_wput_v6: bad source " 9584 "addr\n")); 9585 freemsg(first_mp); 9586 if (ill != NULL) 9587 ill_refrele(ill); 9588 if (need_decref) 9589 CONN_DEC_REF(connp); 9590 return; 9591 } 9592 ire_refrele(ire); 9593 } 9594 /* No need to verify again when using ip_newroute */ 9595 ip6i->ip6i_flags &= ~IP6I_VERIFY_SRC; 9596 } 9597 if (!(ip6i->ip6i_flags & IP6I_NEXTHOP)) { 9598 /* 9599 * Make sure they match since ip_newroute*_v6 etc might 9600 * (unknown to them) inspect ip6i_nexthop when 9601 * they think they access ip6_dst. 9602 */ 9603 ip6i->ip6i_nexthop = ip6h->ip6_dst; 9604 } 9605 if (ip6i->ip6i_flags & IP6I_NO_ULP_CKSUM) 9606 cksum_request = 1; 9607 if (ip6i->ip6i_flags & IP6I_RAW_CHECKSUM) 9608 cksum_request = ip6i->ip6i_checksum_off; 9609 if (ip6i->ip6i_flags & IP6I_UNSPEC_SRC) 9610 unspec_src = 1; 9611 9612 if (do_outrequests && ill != NULL) { 9613 BUMP_MIB(mibptr, ipv6OutRequests); 9614 do_outrequests = B_FALSE; 9615 } 9616 /* 9617 * Store ip6i_t info that we need after we come back 9618 * from IPSEC processing. 9619 */ 9620 if (mctl_present) { 9621 ASSERT(io != NULL); 9622 io->ipsec_out_unspec_src = unspec_src; 9623 } 9624 } 9625 if (connp != NULL && connp->conn_dontroute) 9626 ip6h->ip6_hops = 1; 9627 9628 if (IN6_IS_ADDR_MULTICAST(v6dstp)) 9629 goto ipv6multicast; 9630 9631 /* 1. IPV6_BOUND_PIF takes precedence over all the ifindex settings. */ 9632 if (connp != NULL && connp->conn_outgoing_pill != NULL) { 9633 ill_t *conn_outgoing_pill; 9634 9635 conn_outgoing_pill = conn_get_held_ill(connp, 9636 &connp->conn_outgoing_pill, &err); 9637 if (err == ILL_LOOKUP_FAILED) { 9638 if (ill != NULL) 9639 ill_refrele(ill); 9640 if (need_decref) 9641 CONN_DEC_REF(connp); 9642 freemsg(first_mp); 9643 return; 9644 } 9645 if (conn_outgoing_pill != NULL) { 9646 if (ill != NULL) 9647 ill_refrele(ill); 9648 ill = conn_outgoing_pill; 9649 attach_if = B_TRUE; 9650 match_flags = MATCH_IRE_ILL; 9651 mibptr = ill->ill_ip6_mib; 9652 9653 /* 9654 * Check if we need an ire that will not be 9655 * looked up by anybody else i.e. HIDDEN. 9656 */ 9657 if (ill_is_probeonly(ill)) 9658 match_flags |= MATCH_IRE_MARK_HIDDEN; 9659 goto send_from_ill; 9660 } 9661 } 9662 9663 /* 2. If ipc_nofailover_ill is set then use that ill. */ 9664 if (connp != NULL && connp->conn_nofailover_ill != NULL) { 9665 ill_t *conn_nofailover_ill; 9666 9667 conn_nofailover_ill = conn_get_held_ill(connp, 9668 &connp->conn_nofailover_ill, &err); 9669 if (err == ILL_LOOKUP_FAILED) { 9670 if (ill != NULL) 9671 ill_refrele(ill); 9672 if (need_decref) 9673 CONN_DEC_REF(connp); 9674 freemsg(first_mp); 9675 return; 9676 } 9677 if (conn_nofailover_ill != NULL) { 9678 if (ill != NULL) 9679 ill_refrele(ill); 9680 ill = conn_nofailover_ill; 9681 attach_if = B_TRUE; 9682 /* 9683 * Assumes that ipc_nofailover_ill is used only for 9684 * multipathing probe packets. These packets are better 9685 * dropped, if they are delayed in ND resolution, for 9686 * the reasons described in nce_queue_mp(). 9687 * IP6I_DROP_IFDELAYED will be set later on in this 9688 * function for this packet. 9689 */ 9690 drop_if_delayed = B_TRUE; 9691 match_flags = MATCH_IRE_ILL; 9692 mibptr = ill->ill_ip6_mib; 9693 9694 /* 9695 * Check if we need an ire that will not be 9696 * looked up by anybody else i.e. HIDDEN. 9697 */ 9698 if (ill_is_probeonly(ill)) 9699 match_flags |= MATCH_IRE_MARK_HIDDEN; 9700 goto send_from_ill; 9701 } 9702 } 9703 9704 /* 9705 * Redo 1. If we did not find an IRE_CACHE the first time, we should 9706 * have an ip6i_t with IP6I_ATTACH_IF if IPV6_BOUND_PIF or 9707 * bind to the IPIF_NOFAILOVER address was used on this endpoint. 9708 */ 9709 if (ip6i != NULL && (ip6i->ip6i_flags & IP6I_ATTACH_IF)) { 9710 ASSERT(ip6i->ip6i_ifindex != 0); 9711 attach_if = B_TRUE; 9712 ASSERT(ill != NULL); 9713 match_flags = MATCH_IRE_ILL; 9714 9715 /* 9716 * Check if we need an ire that will not be 9717 * looked up by anybody else i.e. HIDDEN. 9718 */ 9719 if (ill_is_probeonly(ill)) 9720 match_flags |= MATCH_IRE_MARK_HIDDEN; 9721 goto send_from_ill; 9722 } 9723 9724 /* 3. If an ip6i_t with IP6I_IFINDEX set then use that ill. */ 9725 if (ip6i != NULL && (ip6i->ip6i_flags & IP6I_IFINDEX)) { 9726 ASSERT(ill != NULL); 9727 goto send_from_ill; 9728 } 9729 9730 /* 9731 * 4. If q is an ill queue and (link local or multicast destination) 9732 * then use that ill. 9733 */ 9734 if (ill != NULL && IN6_IS_ADDR_LINKLOCAL(v6dstp)) { 9735 goto send_from_ill; 9736 } 9737 9738 /* 5. If IPV6_BOUND_IF has been set use that ill. */ 9739 if (connp != NULL && connp->conn_outgoing_ill != NULL) { 9740 ill_t *conn_outgoing_ill; 9741 9742 conn_outgoing_ill = conn_get_held_ill(connp, 9743 &connp->conn_outgoing_ill, &err); 9744 if (err == ILL_LOOKUP_FAILED) { 9745 if (ill != NULL) 9746 ill_refrele(ill); 9747 if (need_decref) 9748 CONN_DEC_REF(connp); 9749 freemsg(first_mp); 9750 return; 9751 } 9752 if (ill != NULL) 9753 ill_refrele(ill); 9754 ill = conn_outgoing_ill; 9755 mibptr = ill->ill_ip6_mib; 9756 goto send_from_ill; 9757 } 9758 9759 /* 9760 * 6. For unicast: Just do an IRE lookup for the best match. 9761 * If we get here for a link-local address it is rather random 9762 * what interface we pick on a multihomed host. 9763 * *If* there is an IRE_CACHE (and the link-local address 9764 * isn't duplicated on multi links) this will find the IRE_CACHE. 9765 * Otherwise it will use one of the matching IRE_INTERFACE routes 9766 * for the link-local prefix. Hence, applications 9767 * *should* be encouraged to specify an outgoing interface when sending 9768 * to a link local address. 9769 */ 9770 if (connp == NULL || (IP_FLOW_CONTROLLED_ULP(connp->conn_ulp) && 9771 !connp->conn_fully_bound)) { 9772 /* 9773 * We cache IRE_CACHEs to avoid lookups. We don't do 9774 * this for the tcp global queue and listen end point 9775 * as it does not really have a real destination to 9776 * talk to. 9777 */ 9778 ire = ire_cache_lookup_v6(v6dstp, zoneid, MBLK_GETLABEL(mp)); 9779 } else { 9780 /* 9781 * IRE_MARK_CONDEMNED is marked in ire_delete. We don't 9782 * grab a lock here to check for CONDEMNED as it is okay 9783 * to send a packet or two with the IRE_CACHE that is going 9784 * away. 9785 */ 9786 mutex_enter(&connp->conn_lock); 9787 ire = sctp_ire != NULL ? sctp_ire : connp->conn_ire_cache; 9788 if (ire != NULL && 9789 IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, v6dstp) && 9790 !(ire->ire_marks & IRE_MARK_CONDEMNED)) { 9791 9792 IRE_REFHOLD(ire); 9793 mutex_exit(&connp->conn_lock); 9794 9795 } else { 9796 boolean_t cached = B_FALSE; 9797 9798 connp->conn_ire_cache = NULL; 9799 mutex_exit(&connp->conn_lock); 9800 /* Release the old ire */ 9801 if (ire != NULL && sctp_ire == NULL) 9802 IRE_REFRELE_NOTR(ire); 9803 9804 ire = ire_cache_lookup_v6(v6dstp, zoneid, 9805 MBLK_GETLABEL(mp)); 9806 if (ire != NULL) { 9807 IRE_REFHOLD_NOTR(ire); 9808 9809 mutex_enter(&connp->conn_lock); 9810 if (!(connp->conn_state_flags & CONN_CLOSING) && 9811 (connp->conn_ire_cache == NULL)) { 9812 rw_enter(&ire->ire_bucket->irb_lock, 9813 RW_READER); 9814 if (!(ire->ire_marks & 9815 IRE_MARK_CONDEMNED)) { 9816 connp->conn_ire_cache = ire; 9817 cached = B_TRUE; 9818 } 9819 rw_exit(&ire->ire_bucket->irb_lock); 9820 } 9821 mutex_exit(&connp->conn_lock); 9822 9823 /* 9824 * We can continue to use the ire but since it 9825 * was not cached, we should drop the extra 9826 * reference. 9827 */ 9828 if (!cached) 9829 IRE_REFRELE_NOTR(ire); 9830 } 9831 } 9832 } 9833 9834 if (ire != NULL) { 9835 if (do_outrequests) { 9836 /* Handle IRE_LOCAL's that might appear here */ 9837 if (ire->ire_type == IRE_CACHE) { 9838 mibptr = ((ill_t *)ire->ire_stq->q_ptr)-> 9839 ill_ip6_mib; 9840 } else { 9841 mibptr = ire->ire_ipif->ipif_ill->ill_ip6_mib; 9842 } 9843 BUMP_MIB(mibptr, ipv6OutRequests); 9844 } 9845 ASSERT(!attach_if); 9846 9847 /* 9848 * Check if the ire has the RTF_MULTIRT flag, inherited 9849 * from an IRE_OFFSUBNET ire entry in ip_newroute(). 9850 */ 9851 if (ire->ire_flags & RTF_MULTIRT) { 9852 /* 9853 * Force hop limit of multirouted packets if required. 9854 * The hop limit of such packets is bounded by the 9855 * ip_multirt_ttl ndd variable. 9856 * NDP packets must have a hop limit of 255; don't 9857 * change the hop limit in that case. 9858 */ 9859 if ((ip_multirt_ttl > 0) && 9860 (ip6h->ip6_hops > ip_multirt_ttl) && 9861 (ip6h->ip6_hops != IPV6_MAX_HOPS)) { 9862 if (ip_debug > 3) { 9863 ip2dbg(("ip_wput_v6: forcing multirt " 9864 "hop limit to %d (was %d) ", 9865 ip_multirt_ttl, ip6h->ip6_hops)); 9866 pr_addr_dbg("v6dst %s\n", AF_INET6, 9867 &ire->ire_addr_v6); 9868 } 9869 ip6h->ip6_hops = ip_multirt_ttl; 9870 } 9871 9872 /* 9873 * We look at this point if there are pending 9874 * unresolved routes. ire_multirt_need_resolve_v6() 9875 * checks in O(n) that all IRE_OFFSUBNET ire 9876 * entries for the packet's destination and 9877 * flagged RTF_MULTIRT are currently resolved. 9878 * If some remain unresolved, we do a copy 9879 * of the current message. It will be used 9880 * to initiate additional route resolutions. 9881 */ 9882 multirt_need_resolve = 9883 ire_multirt_need_resolve_v6(&ire->ire_addr_v6, 9884 MBLK_GETLABEL(first_mp)); 9885 ip2dbg(("ip_wput_v6: ire %p, " 9886 "multirt_need_resolve %d, first_mp %p\n", 9887 (void *)ire, multirt_need_resolve, 9888 (void *)first_mp)); 9889 if (multirt_need_resolve) { 9890 copy_mp = copymsg(first_mp); 9891 if (copy_mp != NULL) { 9892 MULTIRT_DEBUG_TAG(copy_mp); 9893 } 9894 } 9895 } 9896 ip_wput_ire_v6(q, first_mp, ire, unspec_src, cksum_request, 9897 connp, caller, 0, ip6i_flags, zoneid); 9898 if (need_decref) { 9899 CONN_DEC_REF(connp); 9900 connp = NULL; 9901 } 9902 IRE_REFRELE(ire); 9903 9904 /* 9905 * Try to resolve another multiroute if 9906 * ire_multirt_need_resolve_v6() deemed it necessary. 9907 * copy_mp will be consumed (sent or freed) by 9908 * ip_newroute_v6(). 9909 */ 9910 if (copy_mp != NULL) { 9911 if (mctl_present) { 9912 ip6h = (ip6_t *)copy_mp->b_cont->b_rptr; 9913 } else { 9914 ip6h = (ip6_t *)copy_mp->b_rptr; 9915 } 9916 ip_newroute_v6(q, copy_mp, &ip6h->ip6_dst, 9917 &ip6h->ip6_src, NULL, zoneid); 9918 } 9919 if (ill != NULL) 9920 ill_refrele(ill); 9921 return; 9922 } 9923 9924 /* 9925 * No full IRE for this destination. Send it to 9926 * ip_newroute_v6 to see if anything else matches. 9927 * Mark this packet as having originated on this 9928 * machine. 9929 * Update rptr if there was an ip6i_t header. 9930 */ 9931 mp->b_prev = NULL; 9932 mp->b_next = NULL; 9933 if (ip6i != NULL) 9934 mp->b_rptr -= sizeof (ip6i_t); 9935 9936 if (unspec_src) { 9937 if (ip6i == NULL) { 9938 /* 9939 * Add ip6i_t header to carry unspec_src 9940 * until the packet comes back in ip_wput_v6. 9941 */ 9942 mp = ip_add_info_v6(mp, NULL, v6dstp); 9943 if (mp == NULL) { 9944 if (do_outrequests) 9945 BUMP_MIB(mibptr, ipv6OutRequests); 9946 BUMP_MIB(mibptr, ipv6OutDiscards); 9947 if (mctl_present) 9948 freeb(first_mp); 9949 if (ill != NULL) 9950 ill_refrele(ill); 9951 if (need_decref) 9952 CONN_DEC_REF(connp); 9953 return; 9954 } 9955 ip6i = (ip6i_t *)mp->b_rptr; 9956 9957 if (mctl_present) { 9958 ASSERT(first_mp != mp); 9959 first_mp->b_cont = mp; 9960 } else { 9961 first_mp = mp; 9962 } 9963 9964 if ((mp->b_wptr - (uchar_t *)ip6i) == 9965 sizeof (ip6i_t)) { 9966 /* 9967 * ndp_resolver called from ip_newroute_v6 9968 * expects pulled up message. 9969 */ 9970 if (!pullupmsg(mp, -1)) { 9971 ip1dbg(("ip_wput_v6: pullupmsg" 9972 " failed\n")); 9973 if (do_outrequests) { 9974 BUMP_MIB(mibptr, 9975 ipv6OutRequests); 9976 } 9977 BUMP_MIB(mibptr, ipv6OutDiscards); 9978 freemsg(first_mp); 9979 if (ill != NULL) 9980 ill_refrele(ill); 9981 if (need_decref) 9982 CONN_DEC_REF(connp); 9983 return; 9984 } 9985 ip6i = (ip6i_t *)mp->b_rptr; 9986 } 9987 ip6h = (ip6_t *)&ip6i[1]; 9988 v6dstp = &ip6h->ip6_dst; 9989 } 9990 ip6i->ip6i_flags |= IP6I_UNSPEC_SRC; 9991 if (mctl_present) { 9992 ASSERT(io != NULL); 9993 io->ipsec_out_unspec_src = unspec_src; 9994 } 9995 } 9996 if (do_outrequests) 9997 BUMP_MIB(mibptr, ipv6OutRequests); 9998 if (need_decref) 9999 CONN_DEC_REF(connp); 10000 ip_newroute_v6(q, first_mp, v6dstp, &ip6h->ip6_src, NULL, zoneid); 10001 if (ill != NULL) 10002 ill_refrele(ill); 10003 return; 10004 10005 10006 /* 10007 * Handle multicast packets with or without an conn. 10008 * Assumes that the transports set ip6_hops taking 10009 * IPV6_MULTICAST_HOPS (and the other ways to set the hoplimit) 10010 * into account. 10011 */ 10012 ipv6multicast: 10013 ip2dbg(("ip_wput_v6: multicast\n")); 10014 10015 /* 10016 * 1. IPV6_BOUND_PIF takes precedence over all the ifindex settings 10017 * 2. If conn_nofailover_ill is set then use that ill. 10018 * 10019 * Hold the conn_lock till we refhold the ill of interest that is 10020 * pointed to from the conn. Since we cannot do an ill/ipif_refrele 10021 * while holding any locks, postpone the refrele until after the 10022 * conn_lock is dropped. 10023 */ 10024 if (connp != NULL) { 10025 mutex_enter(&connp->conn_lock); 10026 conn_lock_held = B_TRUE; 10027 } else { 10028 conn_lock_held = B_FALSE; 10029 } 10030 if (connp != NULL && connp->conn_outgoing_pill != NULL) { 10031 err = ill_check_and_refhold(connp->conn_outgoing_pill); 10032 if (err == ILL_LOOKUP_FAILED) { 10033 ip1dbg(("ip_output_v6: multicast" 10034 " conn_outgoing_pill no ipif\n")); 10035 multicast_discard: 10036 ASSERT(saved_ill == NULL); 10037 if (conn_lock_held) 10038 mutex_exit(&connp->conn_lock); 10039 if (ill != NULL) 10040 ill_refrele(ill); 10041 freemsg(first_mp); 10042 if (do_outrequests) 10043 BUMP_MIB(mibptr, ipv6OutDiscards); 10044 if (need_decref) 10045 CONN_DEC_REF(connp); 10046 return; 10047 } 10048 saved_ill = ill; 10049 ill = connp->conn_outgoing_pill; 10050 attach_if = B_TRUE; 10051 match_flags = MATCH_IRE_ILL; 10052 mibptr = ill->ill_ip6_mib; 10053 10054 /* 10055 * Check if we need an ire that will not be 10056 * looked up by anybody else i.e. HIDDEN. 10057 */ 10058 if (ill_is_probeonly(ill)) 10059 match_flags |= MATCH_IRE_MARK_HIDDEN; 10060 } else if (connp != NULL && connp->conn_nofailover_ill != NULL) { 10061 err = ill_check_and_refhold(connp->conn_nofailover_ill); 10062 if (err == ILL_LOOKUP_FAILED) { 10063 ip1dbg(("ip_output_v6: multicast" 10064 " conn_nofailover_ill no ipif\n")); 10065 goto multicast_discard; 10066 } 10067 saved_ill = ill; 10068 ill = connp->conn_nofailover_ill; 10069 attach_if = B_TRUE; 10070 match_flags = MATCH_IRE_ILL; 10071 10072 /* 10073 * Check if we need an ire that will not be 10074 * looked up by anybody else i.e. HIDDEN. 10075 */ 10076 if (ill_is_probeonly(ill)) 10077 match_flags |= MATCH_IRE_MARK_HIDDEN; 10078 } else if (ip6i != NULL && (ip6i->ip6i_flags & IP6I_ATTACH_IF)) { 10079 /* 10080 * Redo 1. If we did not find an IRE_CACHE the first time, 10081 * we should have an ip6i_t with IP6I_ATTACH_IF if 10082 * IPV6_BOUND_PIF or bind to the IPIF_NOFAILOVER address was 10083 * used on this endpoint. 10084 */ 10085 ASSERT(ip6i->ip6i_ifindex != 0); 10086 attach_if = B_TRUE; 10087 ASSERT(ill != NULL); 10088 match_flags = MATCH_IRE_ILL; 10089 10090 /* 10091 * Check if we need an ire that will not be 10092 * looked up by anybody else i.e. HIDDEN. 10093 */ 10094 if (ill_is_probeonly(ill)) 10095 match_flags |= MATCH_IRE_MARK_HIDDEN; 10096 } else if (ip6i != NULL && (ip6i->ip6i_flags & IP6I_IFINDEX)) { 10097 /* 3. If an ip6i_t with IP6I_IFINDEX set then use that ill. */ 10098 10099 ASSERT(ill != NULL); 10100 } else if (ill != NULL) { 10101 /* 10102 * 4. If q is an ill queue and (link local or multicast 10103 * destination) then use that ill. 10104 * We don't need the ipif initialization here. 10105 * This useless assert below is just to prevent lint from 10106 * reporting a null body if statement. 10107 */ 10108 ASSERT(ill != NULL); 10109 } else if (connp != NULL) { 10110 /* 10111 * 5. If IPV6_BOUND_IF has been set use that ill. 10112 * 10113 * 6. For multicast: if IPV6_MULTICAST_IF has been set use it. 10114 * Otherwise look for the best IRE match for the unspecified 10115 * group to determine the ill. 10116 * 10117 * conn_multicast_ill is used for only IPv6 packets. 10118 * conn_multicast_ipif is used for only IPv4 packets. 10119 * Thus a PF_INET6 socket send both IPv4 and IPv6 10120 * multicast packets using different IP*_MULTICAST_IF 10121 * interfaces. 10122 */ 10123 if (connp->conn_outgoing_ill != NULL) { 10124 err = ill_check_and_refhold(connp->conn_outgoing_ill); 10125 if (err == ILL_LOOKUP_FAILED) { 10126 ip1dbg(("ip_output_v6: multicast" 10127 " conn_outgoing_ill no ipif\n")); 10128 goto multicast_discard; 10129 } 10130 ill = connp->conn_outgoing_ill; 10131 } else if (connp->conn_multicast_ill != NULL) { 10132 err = ill_check_and_refhold(connp->conn_multicast_ill); 10133 if (err == ILL_LOOKUP_FAILED) { 10134 ip1dbg(("ip_output_v6: multicast" 10135 " conn_multicast_ill no ipif\n")); 10136 goto multicast_discard; 10137 } 10138 ill = connp->conn_multicast_ill; 10139 } else { 10140 mutex_exit(&connp->conn_lock); 10141 conn_lock_held = B_FALSE; 10142 ipif = ipif_lookup_group_v6(v6dstp, zoneid); 10143 if (ipif == NULL) { 10144 ip1dbg(("ip_output_v6: multicast no ipif\n")); 10145 goto multicast_discard; 10146 } 10147 /* 10148 * We have a ref to this ipif, so we can safely 10149 * access ipif_ill. 10150 */ 10151 ill = ipif->ipif_ill; 10152 mutex_enter(&ill->ill_lock); 10153 if (!ILL_CAN_LOOKUP(ill)) { 10154 mutex_exit(&ill->ill_lock); 10155 ipif_refrele(ipif); 10156 ill = NULL; 10157 ip1dbg(("ip_output_v6: multicast no ipif\n")); 10158 goto multicast_discard; 10159 } 10160 ill_refhold_locked(ill); 10161 mutex_exit(&ill->ill_lock); 10162 ipif_refrele(ipif); 10163 /* 10164 * Save binding until IPV6_MULTICAST_IF 10165 * changes it 10166 */ 10167 mutex_enter(&connp->conn_lock); 10168 connp->conn_multicast_ill = ill; 10169 connp->conn_orig_multicast_ifindex = 10170 ill->ill_phyint->phyint_ifindex; 10171 mutex_exit(&connp->conn_lock); 10172 } 10173 } 10174 if (conn_lock_held) 10175 mutex_exit(&connp->conn_lock); 10176 10177 if (saved_ill != NULL) 10178 ill_refrele(saved_ill); 10179 10180 ASSERT(ill != NULL); 10181 /* 10182 * For multicast loopback interfaces replace the multicast address 10183 * with a unicast address for the ire lookup. 10184 */ 10185 if (ill->ill_phyint->phyint_flags & PHYI_LOOPBACK) 10186 v6dstp = &ill->ill_ipif->ipif_v6lcl_addr; 10187 10188 mibptr = ill->ill_ip6_mib; 10189 if (do_outrequests) { 10190 BUMP_MIB(mibptr, ipv6OutRequests); 10191 do_outrequests = B_FALSE; 10192 } 10193 BUMP_MIB(mibptr, ipv6OutMcastPkts); 10194 10195 /* 10196 * As we may lose the conn by the time we reach ip_wput_ire_v6 10197 * we copy conn_multicast_loop and conn_dontroute on to an 10198 * ipsec_out. In case if this datagram goes out secure, 10199 * we need the ill_index also. Copy that also into the 10200 * ipsec_out. 10201 */ 10202 if (mctl_present) { 10203 io = (ipsec_out_t *)first_mp->b_rptr; 10204 ASSERT(first_mp->b_datap->db_type == M_CTL); 10205 ASSERT(io->ipsec_out_type == IPSEC_OUT); 10206 } else { 10207 ASSERT(mp == first_mp); 10208 if ((first_mp = ipsec_alloc_ipsec_out()) == NULL) { 10209 BUMP_MIB(mibptr, ipv6OutDiscards); 10210 freemsg(mp); 10211 if (ill != NULL) 10212 ill_refrele(ill); 10213 if (need_decref) 10214 CONN_DEC_REF(connp); 10215 return; 10216 } 10217 io = (ipsec_out_t *)first_mp->b_rptr; 10218 /* This is not a secure packet */ 10219 io->ipsec_out_secure = B_FALSE; 10220 io->ipsec_out_use_global_policy = B_TRUE; 10221 io->ipsec_out_zoneid = 10222 (zoneid != ALL_ZONES ? zoneid : GLOBAL_ZONEID); 10223 first_mp->b_cont = mp; 10224 mctl_present = B_TRUE; 10225 } 10226 io->ipsec_out_ill_index = ill->ill_phyint->phyint_ifindex; 10227 io->ipsec_out_unspec_src = unspec_src; 10228 if (connp != NULL) 10229 io->ipsec_out_dontroute = connp->conn_dontroute; 10230 10231 send_from_ill: 10232 ASSERT(ill != NULL); 10233 ASSERT(mibptr == ill->ill_ip6_mib); 10234 if (do_outrequests) { 10235 BUMP_MIB(mibptr, ipv6OutRequests); 10236 do_outrequests = B_FALSE; 10237 } 10238 10239 if (io != NULL) 10240 io->ipsec_out_ill_index = ill->ill_phyint->phyint_ifindex; 10241 10242 /* 10243 * When a specific ill is specified (using IPV6_PKTINFO, 10244 * IPV6_MULTICAST_IF, or IPV6_BOUND_IF) we will only match 10245 * on routing entries (ftable and ctable) that have a matching 10246 * ire->ire_ipif->ipif_ill. Thus this can only be used 10247 * for destinations that are on-link for the specific ill 10248 * and that can appear on multiple links. Thus it is useful 10249 * for multicast destinations, link-local destinations, and 10250 * at some point perhaps for site-local destinations (if the 10251 * node sits at a site boundary). 10252 * We create the cache entries in the regular ctable since 10253 * it can not "confuse" things for other destinations. 10254 * table. 10255 * 10256 * NOTE : conn_ire_cache is not used for caching ire_ctable_lookups. 10257 * It is used only when ire_cache_lookup is used above. 10258 */ 10259 ire = ire_ctable_lookup_v6(v6dstp, 0, 0, ill->ill_ipif, 10260 zoneid, MBLK_GETLABEL(mp), match_flags); 10261 if (ire != NULL) { 10262 /* 10263 * Check if the ire has the RTF_MULTIRT flag, inherited 10264 * from an IRE_OFFSUBNET ire entry in ip_newroute(). 10265 */ 10266 if (ire->ire_flags & RTF_MULTIRT) { 10267 /* 10268 * Force hop limit of multirouted packets if required. 10269 * The hop limit of such packets is bounded by the 10270 * ip_multirt_ttl ndd variable. 10271 * NDP packets must have a hop limit of 255; don't 10272 * change the hop limit in that case. 10273 */ 10274 if ((ip_multirt_ttl > 0) && 10275 (ip6h->ip6_hops > ip_multirt_ttl) && 10276 (ip6h->ip6_hops != IPV6_MAX_HOPS)) { 10277 if (ip_debug > 3) { 10278 ip2dbg(("ip_wput_v6: forcing multirt " 10279 "hop limit to %d (was %d) ", 10280 ip_multirt_ttl, ip6h->ip6_hops)); 10281 pr_addr_dbg("v6dst %s\n", AF_INET6, 10282 &ire->ire_addr_v6); 10283 } 10284 ip6h->ip6_hops = ip_multirt_ttl; 10285 } 10286 10287 /* 10288 * We look at this point if there are pending 10289 * unresolved routes. ire_multirt_need_resolve_v6() 10290 * checks in O(n) that all IRE_OFFSUBNET ire 10291 * entries for the packet's destination and 10292 * flagged RTF_MULTIRT are currently resolved. 10293 * If some remain unresolved, we make a copy 10294 * of the current message. It will be used 10295 * to initiate additional route resolutions. 10296 */ 10297 multirt_need_resolve = 10298 ire_multirt_need_resolve_v6(&ire->ire_addr_v6, 10299 MBLK_GETLABEL(first_mp)); 10300 ip2dbg(("ip_wput_v6[send_from_ill]: ire %p, " 10301 "multirt_need_resolve %d, first_mp %p\n", 10302 (void *)ire, multirt_need_resolve, 10303 (void *)first_mp)); 10304 if (multirt_need_resolve) { 10305 copy_mp = copymsg(first_mp); 10306 if (copy_mp != NULL) { 10307 MULTIRT_DEBUG_TAG(copy_mp); 10308 } 10309 } 10310 } 10311 10312 ip1dbg(("ip_wput_v6: send on %s, ire = %p, ill index = %d\n", 10313 ill->ill_name, (void *)ire, 10314 ill->ill_phyint->phyint_ifindex)); 10315 ip_wput_ire_v6(q, first_mp, ire, unspec_src, cksum_request, 10316 connp, caller, 10317 (attach_if ? ill->ill_phyint->phyint_ifindex : 0), 10318 ip6i_flags, zoneid); 10319 ire_refrele(ire); 10320 if (need_decref) { 10321 CONN_DEC_REF(connp); 10322 connp = NULL; 10323 } 10324 10325 /* 10326 * Try to resolve another multiroute if 10327 * ire_multirt_need_resolve_v6() deemed it necessary. 10328 * copy_mp will be consumed (sent or freed) by 10329 * ip_newroute_[ipif_]v6(). 10330 */ 10331 if (copy_mp != NULL) { 10332 if (mctl_present) { 10333 ip6h = (ip6_t *)copy_mp->b_cont->b_rptr; 10334 } else { 10335 ip6h = (ip6_t *)copy_mp->b_rptr; 10336 } 10337 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 10338 ipif = ipif_lookup_group_v6(&ip6h->ip6_dst, 10339 zoneid); 10340 if (ipif == NULL) { 10341 ip1dbg(("ip_wput_v6: No ipif for " 10342 "multicast\n")); 10343 MULTIRT_DEBUG_UNTAG(copy_mp); 10344 freemsg(copy_mp); 10345 return; 10346 } 10347 ip_newroute_ipif_v6(q, copy_mp, ipif, 10348 ip6h->ip6_dst, unspec_src, zoneid); 10349 ipif_refrele(ipif); 10350 } else { 10351 ip_newroute_v6(q, copy_mp, &ip6h->ip6_dst, 10352 &ip6h->ip6_src, ill, zoneid); 10353 } 10354 } 10355 ill_refrele(ill); 10356 return; 10357 } 10358 if (need_decref) { 10359 CONN_DEC_REF(connp); 10360 connp = NULL; 10361 } 10362 10363 /* Update rptr if there was an ip6i_t header. */ 10364 if (ip6i != NULL) 10365 mp->b_rptr -= sizeof (ip6i_t); 10366 if (unspec_src || attach_if) { 10367 if (ip6i == NULL) { 10368 /* 10369 * Add ip6i_t header to carry unspec_src 10370 * or attach_if until the packet comes back in 10371 * ip_wput_v6. 10372 */ 10373 if (mctl_present) { 10374 first_mp->b_cont = 10375 ip_add_info_v6(mp, NULL, v6dstp); 10376 mp = first_mp->b_cont; 10377 if (mp == NULL) 10378 freeb(first_mp); 10379 } else { 10380 first_mp = mp = ip_add_info_v6(mp, NULL, 10381 v6dstp); 10382 } 10383 if (mp == NULL) { 10384 BUMP_MIB(mibptr, ipv6OutDiscards); 10385 ill_refrele(ill); 10386 return; 10387 } 10388 ip6i = (ip6i_t *)mp->b_rptr; 10389 if ((mp->b_wptr - (uchar_t *)ip6i) == 10390 sizeof (ip6i_t)) { 10391 /* 10392 * ndp_resolver called from ip_newroute_v6 10393 * expects a pulled up message. 10394 */ 10395 if (!pullupmsg(mp, -1)) { 10396 ip1dbg(("ip_wput_v6: pullupmsg" 10397 " failed\n")); 10398 BUMP_MIB(mibptr, ipv6OutDiscards); 10399 freemsg(first_mp); 10400 return; 10401 } 10402 ip6i = (ip6i_t *)mp->b_rptr; 10403 } 10404 ip6h = (ip6_t *)&ip6i[1]; 10405 v6dstp = &ip6h->ip6_dst; 10406 } 10407 if (unspec_src) 10408 ip6i->ip6i_flags |= IP6I_UNSPEC_SRC; 10409 if (attach_if) { 10410 /* 10411 * Bind to nofailover/BOUND_PIF overrides ifindex. 10412 */ 10413 ip6i->ip6i_flags |= IP6I_ATTACH_IF; 10414 ip6i->ip6i_flags &= ~IP6I_IFINDEX; 10415 ip6i->ip6i_ifindex = ill->ill_phyint->phyint_ifindex; 10416 if (drop_if_delayed) { 10417 /* This is a multipathing probe packet */ 10418 ip6i->ip6i_flags |= IP6I_DROP_IFDELAYED; 10419 } 10420 } 10421 if (mctl_present) { 10422 ASSERT(io != NULL); 10423 io->ipsec_out_unspec_src = unspec_src; 10424 } 10425 } 10426 if (IN6_IS_ADDR_MULTICAST(v6dstp)) { 10427 ip_newroute_ipif_v6(q, first_mp, ill->ill_ipif, *v6dstp, 10428 unspec_src, zoneid); 10429 } else { 10430 ip_newroute_v6(q, first_mp, v6dstp, &ip6h->ip6_src, ill, 10431 zoneid); 10432 } 10433 ill_refrele(ill); 10434 return; 10435 10436 notv6: 10437 /* 10438 * XXX implement a IPv4 and IPv6 packet counter per conn and 10439 * switch when ratio exceeds e.g. 10:1 10440 */ 10441 if (q->q_next == NULL) { 10442 connp = Q_TO_CONN(q); 10443 10444 if (IPCL_IS_TCP(connp)) { 10445 /* change conn_send for the tcp_v4_connections */ 10446 connp->conn_send = ip_output; 10447 } else if (connp->conn_ulp == IPPROTO_SCTP) { 10448 /* The 'q' is the default SCTP queue */ 10449 connp = (conn_t *)arg; 10450 } else { 10451 ip_setqinfo(RD(q), IPV4_MINOR, B_TRUE); 10452 } 10453 } 10454 BUMP_MIB(mibptr, ipv6OutIPv4); 10455 (void) ip_output(arg, first_mp, arg2, caller); 10456 if (ill != NULL) 10457 ill_refrele(ill); 10458 } 10459 10460 /* 10461 * If this is a conn_t queue, then we pass in the conn. This includes the 10462 * zoneid. 10463 * Otherwise, this is a message for an ill_t queue, 10464 * in which case we use the global zoneid since those are all part of 10465 * the global zone. 10466 */ 10467 static void 10468 ip_wput_v6(queue_t *q, mblk_t *mp) 10469 { 10470 if (CONN_Q(q)) 10471 ip_output_v6(Q_TO_CONN(q), mp, q, IP_WPUT); 10472 else 10473 ip_output_v6(GLOBAL_ZONEID, mp, q, IP_WPUT); 10474 } 10475 10476 static void 10477 ipsec_out_attach_if(ipsec_out_t *io, int attach_index) 10478 { 10479 ASSERT(io->ipsec_out_type == IPSEC_OUT); 10480 io->ipsec_out_attach_if = B_TRUE; 10481 io->ipsec_out_ill_index = attach_index; 10482 } 10483 10484 /* 10485 * NULL send-to queue - packet is to be delivered locally. 10486 */ 10487 void 10488 ip_wput_local_v6(queue_t *q, ill_t *ill, ip6_t *ip6h, mblk_t *first_mp, 10489 ire_t *ire, int fanout_flags) 10490 { 10491 uint32_t ports; 10492 mblk_t *mp = first_mp, *first_mp1; 10493 boolean_t mctl_present; 10494 uint8_t nexthdr; 10495 uint16_t hdr_length; 10496 ipsec_out_t *io; 10497 mib2_ipv6IfStatsEntry_t *mibptr; 10498 ilm_t *ilm; 10499 uint_t nexthdr_offset; 10500 10501 if (DB_TYPE(mp) == M_CTL) { 10502 io = (ipsec_out_t *)mp->b_rptr; 10503 if (!io->ipsec_out_secure) { 10504 mp = mp->b_cont; 10505 freeb(first_mp); 10506 first_mp = mp; 10507 mctl_present = B_FALSE; 10508 } else { 10509 mctl_present = B_TRUE; 10510 mp = first_mp->b_cont; 10511 ipsec_out_to_in(first_mp); 10512 } 10513 } else { 10514 mctl_present = B_FALSE; 10515 } 10516 10517 nexthdr = ip6h->ip6_nxt; 10518 mibptr = ill->ill_ip6_mib; 10519 10520 /* Fastpath */ 10521 switch (nexthdr) { 10522 case IPPROTO_TCP: 10523 case IPPROTO_UDP: 10524 case IPPROTO_ICMPV6: 10525 case IPPROTO_SCTP: 10526 hdr_length = IPV6_HDR_LEN; 10527 nexthdr_offset = (uint_t)((uchar_t *)&ip6h->ip6_nxt - 10528 (uchar_t *)ip6h); 10529 break; 10530 default: { 10531 uint8_t *nexthdrp; 10532 10533 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, 10534 &hdr_length, &nexthdrp)) { 10535 /* Malformed packet */ 10536 BUMP_MIB(mibptr, ipv6OutDiscards); 10537 freemsg(first_mp); 10538 return; 10539 } 10540 nexthdr = *nexthdrp; 10541 nexthdr_offset = nexthdrp - (uint8_t *)ip6h; 10542 break; 10543 } 10544 } 10545 10546 10547 UPDATE_OB_PKT_COUNT(ire); 10548 ire->ire_last_used_time = lbolt; 10549 10550 /* 10551 * Remove reacability confirmation bit from version field 10552 * before looping back the packet. 10553 */ 10554 if (ip6h->ip6_vcf & IP_FORWARD_PROG) { 10555 ip6h->ip6_vcf &= ~IP_FORWARD_PROG; 10556 } 10557 10558 switch (nexthdr) { 10559 case IPPROTO_TCP: 10560 if (DB_TYPE(mp) == M_DATA) { 10561 /* 10562 * M_DATA mblk, so init mblk (chain) for 10563 * no struio(). 10564 */ 10565 mblk_t *mp1 = mp; 10566 10567 do { 10568 mp1->b_datap->db_struioflag = 0; 10569 } while ((mp1 = mp1->b_cont) != NULL); 10570 } 10571 ports = *(uint32_t *)(mp->b_rptr + hdr_length + 10572 TCP_PORTS_OFFSET); 10573 ip_fanout_tcp_v6(q, first_mp, ip6h, ill, ill, 10574 fanout_flags|IP_FF_SEND_ICMP|IP_FF_SYN_ADDIRE| 10575 IP_FF_IP6INFO|IP6_NO_IPPOLICY|IP_FF_LOOPBACK, 10576 hdr_length, mctl_present, ire->ire_zoneid); 10577 return; 10578 10579 case IPPROTO_UDP: 10580 ports = *(uint32_t *)(mp->b_rptr + hdr_length + 10581 UDP_PORTS_OFFSET); 10582 ip_fanout_udp_v6(q, first_mp, ip6h, ports, ill, ill, 10583 fanout_flags|IP_FF_SEND_ICMP|IP_FF_IP6INFO| 10584 IP6_NO_IPPOLICY, mctl_present, ire->ire_zoneid); 10585 return; 10586 10587 case IPPROTO_SCTP: 10588 { 10589 uint_t ipif_seqid = ire->ire_ipif->ipif_seqid; 10590 10591 ports = *(uint32_t *)(mp->b_rptr + hdr_length); 10592 ip_fanout_sctp(mp, ill, (ipha_t *)ip6h, ports, 10593 fanout_flags|IP_FF_SEND_ICMP|IP_FF_IP6INFO, 10594 mctl_present, IP6_NO_IPPOLICY, ipif_seqid, 10595 ire->ire_zoneid); 10596 return; 10597 } 10598 case IPPROTO_ICMPV6: { 10599 icmp6_t *icmp6; 10600 10601 /* check for full IPv6+ICMPv6 header */ 10602 if ((mp->b_wptr - mp->b_rptr) < 10603 (hdr_length + ICMP6_MINLEN)) { 10604 if (!pullupmsg(mp, hdr_length + ICMP6_MINLEN)) { 10605 ip1dbg(("ip_wput_v6: ICMP hdr pullupmsg" 10606 " failed\n")); 10607 BUMP_MIB(mibptr, ipv6OutDiscards); 10608 freemsg(first_mp); 10609 return; 10610 } 10611 ip6h = (ip6_t *)mp->b_rptr; 10612 } 10613 icmp6 = (icmp6_t *)((uchar_t *)ip6h + hdr_length); 10614 10615 /* Update output mib stats */ 10616 icmp_update_out_mib_v6(ill, icmp6); 10617 10618 /* Check variable for testing applications */ 10619 if (ipv6_drop_inbound_icmpv6) { 10620 freemsg(first_mp); 10621 return; 10622 } 10623 /* 10624 * Assume that there is always at least one conn for 10625 * ICMPv6 (in.ndpd) i.e. don't optimize the case 10626 * where there is no conn. 10627 */ 10628 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst) && 10629 !(ill->ill_phyint->phyint_flags & PHYI_LOOPBACK)) { 10630 /* 10631 * In the multicast case, applications may have 10632 * joined the group from different zones, so we 10633 * need to deliver the packet to each of them. 10634 * Loop through the multicast memberships 10635 * structures (ilm) on the receive ill and send 10636 * a copy of the packet up each matching one. 10637 * However, we don't do this for multicasts sent 10638 * on the loopback interface (PHYI_LOOPBACK flag 10639 * set) as they must stay in the sender's zone. 10640 */ 10641 ILM_WALKER_HOLD(ill); 10642 for (ilm = ill->ill_ilm; ilm != NULL; 10643 ilm = ilm->ilm_next) { 10644 if (ilm->ilm_flags & ILM_DELETED) 10645 continue; 10646 if (!IN6_ARE_ADDR_EQUAL( 10647 &ilm->ilm_v6addr, &ip6h->ip6_dst)) 10648 continue; 10649 if ((fanout_flags & 10650 IP_FF_NO_MCAST_LOOP) && 10651 ilm->ilm_zoneid == ire->ire_zoneid) 10652 continue; 10653 if (!ipif_lookup_zoneid(ill, 10654 ilm->ilm_zoneid, IPIF_UP, NULL)) 10655 continue; 10656 10657 first_mp1 = ip_copymsg(first_mp); 10658 if (first_mp1 == NULL) 10659 continue; 10660 icmp_inbound_v6(q, first_mp1, ill, 10661 hdr_length, mctl_present, 10662 IP6_NO_IPPOLICY, ilm->ilm_zoneid, 10663 NULL); 10664 } 10665 ILM_WALKER_RELE(ill); 10666 } else { 10667 first_mp1 = ip_copymsg(first_mp); 10668 if (first_mp1 != NULL) 10669 icmp_inbound_v6(q, first_mp1, ill, 10670 hdr_length, mctl_present, 10671 IP6_NO_IPPOLICY, ire->ire_zoneid, 10672 NULL); 10673 } 10674 } 10675 /* FALLTHRU */ 10676 default: { 10677 /* 10678 * Handle protocols with which IPv6 is less intimate. 10679 */ 10680 fanout_flags |= IP_FF_RAWIP|IP_FF_IP6INFO; 10681 10682 /* 10683 * Enable sending ICMP for "Unknown" nexthdr 10684 * case. i.e. where we did not FALLTHRU from 10685 * IPPROTO_ICMPV6 processing case above. 10686 */ 10687 if (nexthdr != IPPROTO_ICMPV6) 10688 fanout_flags |= IP_FF_SEND_ICMP; 10689 /* 10690 * Note: There can be more than one stream bound 10691 * to a particular protocol. When this is the case, 10692 * each one gets a copy of any incoming packets. 10693 */ 10694 ip_fanout_proto_v6(q, first_mp, ip6h, ill, ill, nexthdr, 10695 nexthdr_offset, fanout_flags|IP6_NO_IPPOLICY, 10696 mctl_present, ire->ire_zoneid); 10697 return; 10698 } 10699 } 10700 } 10701 10702 /* 10703 * Send packet using IRE. 10704 * Checksumming is controlled by cksum_request: 10705 * -1 => normal i.e. TCP/UDP/SCTP/ICMPv6 are checksummed and nothing else. 10706 * 1 => Skip TCP/UDP/SCTP checksum 10707 * Otherwise => checksum_request contains insert offset for checksum 10708 * 10709 * Assumes that the following set of headers appear in the first 10710 * mblk: 10711 * ip6_t 10712 * Any extension headers 10713 * TCP/UDP/SCTP header (if present) 10714 * The routine can handle an ICMPv6 header that is not in the first mblk. 10715 * 10716 * NOTE : This function does not ire_refrele the ire passed in as the 10717 * argument unlike ip_wput_ire where the REFRELE is done. 10718 * Refer to ip_wput_ire for more on this. 10719 */ 10720 static void 10721 ip_wput_ire_v6(queue_t *q, mblk_t *mp, ire_t *ire, int unspec_src, 10722 int cksum_request, conn_t *connp, int caller, int attach_index, int flags, 10723 zoneid_t zoneid) 10724 { 10725 ip6_t *ip6h; 10726 uint8_t nexthdr; 10727 uint16_t hdr_length; 10728 uint_t reachable = 0x0; 10729 ill_t *ill; 10730 mib2_ipv6IfStatsEntry_t *mibptr; 10731 mblk_t *first_mp; 10732 boolean_t mctl_present; 10733 ipsec_out_t *io; 10734 boolean_t conn_dontroute; /* conn value for multicast */ 10735 boolean_t conn_multicast_loop; /* conn value for multicast */ 10736 boolean_t multicast_forward; /* Should we forward ? */ 10737 int max_frag; 10738 10739 ill = ire_to_ill(ire); 10740 first_mp = mp; 10741 multicast_forward = B_FALSE; 10742 10743 if (mp->b_datap->db_type != M_CTL) { 10744 ip6h = (ip6_t *)first_mp->b_rptr; 10745 } else { 10746 io = (ipsec_out_t *)first_mp->b_rptr; 10747 ASSERT(io->ipsec_out_type == IPSEC_OUT); 10748 /* 10749 * Grab the zone id now because the M_CTL can be discarded by 10750 * ip_wput_ire_parse_ipsec_out() below. 10751 */ 10752 ASSERT(zoneid == io->ipsec_out_zoneid); 10753 ASSERT(zoneid != ALL_ZONES); 10754 ip6h = (ip6_t *)first_mp->b_cont->b_rptr; 10755 /* 10756 * For the multicast case, ipsec_out carries conn_dontroute and 10757 * conn_multicast_loop as conn may not be available here. We 10758 * need this for multicast loopback and forwarding which is done 10759 * later in the code. 10760 */ 10761 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 10762 conn_dontroute = io->ipsec_out_dontroute; 10763 conn_multicast_loop = io->ipsec_out_multicast_loop; 10764 /* 10765 * If conn_dontroute is not set or conn_multicast_loop 10766 * is set, we need to do forwarding/loopback. For 10767 * datagrams from ip_wput_multicast, conn_dontroute is 10768 * set to B_TRUE and conn_multicast_loop is set to 10769 * B_FALSE so that we neither do forwarding nor 10770 * loopback. 10771 */ 10772 if (!conn_dontroute || conn_multicast_loop) 10773 multicast_forward = B_TRUE; 10774 } 10775 } 10776 10777 /* 10778 * If the sender didn't supply the hop limit and there is a default 10779 * unicast hop limit associated with the output interface, we use 10780 * that if the packet is unicast. Interface specific unicast hop 10781 * limits as set via the SIOCSLIFLNKINFO ioctl. 10782 */ 10783 if (ill->ill_max_hops != 0 && !(flags & IP6I_HOPLIMIT) && 10784 !(IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst))) { 10785 ip6h->ip6_hops = ill->ill_max_hops; 10786 } 10787 10788 if (ire->ire_type == IRE_LOCAL && ire->ire_zoneid != zoneid && 10789 ire->ire_zoneid != ALL_ZONES) { 10790 /* 10791 * When a zone sends a packet to another zone, we try to deliver 10792 * the packet under the same conditions as if the destination 10793 * was a real node on the network. To do so, we look for a 10794 * matching route in the forwarding table. 10795 * RTF_REJECT and RTF_BLACKHOLE are handled just like 10796 * ip_newroute_v6() does. 10797 * Note that IRE_LOCAL are special, since they are used 10798 * when the zoneid doesn't match in some cases. This means that 10799 * we need to handle ipha_src differently since ire_src_addr 10800 * belongs to the receiving zone instead of the sending zone. 10801 * When ip_restrict_interzone_loopback is set, then 10802 * ire_cache_lookup_v6() ensures that IRE_LOCAL are only used 10803 * for loopback between zones when the logical "Ethernet" would 10804 * have looped them back. 10805 */ 10806 ire_t *src_ire; 10807 10808 src_ire = ire_ftable_lookup_v6(&ip6h->ip6_dst, 0, 0, 0, 10809 NULL, NULL, zoneid, 0, NULL, (MATCH_IRE_RECURSIVE | 10810 MATCH_IRE_DEFAULT | MATCH_IRE_RJ_BHOLE)); 10811 if (src_ire != NULL && 10812 !(src_ire->ire_flags & (RTF_REJECT | RTF_BLACKHOLE)) && 10813 (!ip_restrict_interzone_loopback || 10814 ire_local_same_ill_group(ire, src_ire))) { 10815 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src) && 10816 !unspec_src) { 10817 ip6h->ip6_src = src_ire->ire_src_addr_v6; 10818 } 10819 ire_refrele(src_ire); 10820 } else { 10821 BUMP_MIB(ill->ill_ip6_mib, ipv6OutNoRoutes); 10822 if (src_ire != NULL) { 10823 if (src_ire->ire_flags & RTF_BLACKHOLE) { 10824 ire_refrele(src_ire); 10825 freemsg(first_mp); 10826 return; 10827 } 10828 ire_refrele(src_ire); 10829 } 10830 if (ip_hdr_complete_v6(ip6h, zoneid)) { 10831 /* Failed */ 10832 freemsg(first_mp); 10833 return; 10834 } 10835 icmp_unreachable_v6(q, first_mp, 10836 ICMP6_DST_UNREACH_NOROUTE, B_FALSE, B_FALSE, 10837 zoneid); 10838 return; 10839 } 10840 } 10841 10842 if (mp->b_datap->db_type == M_CTL || ipsec_outbound_v6_policy_present) { 10843 mp = ip_wput_ire_parse_ipsec_out(first_mp, NULL, ip6h, ire, 10844 connp, unspec_src, zoneid); 10845 if (mp == NULL) { 10846 return; 10847 } 10848 } 10849 10850 first_mp = mp; 10851 if (mp->b_datap->db_type == M_CTL) { 10852 io = (ipsec_out_t *)mp->b_rptr; 10853 ASSERT(io->ipsec_out_type == IPSEC_OUT); 10854 mp = mp->b_cont; 10855 mctl_present = B_TRUE; 10856 } else { 10857 mctl_present = B_FALSE; 10858 } 10859 10860 ip6h = (ip6_t *)mp->b_rptr; 10861 nexthdr = ip6h->ip6_nxt; 10862 mibptr = ill->ill_ip6_mib; 10863 10864 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src) && !unspec_src) { 10865 ipif_t *ipif; 10866 10867 /* 10868 * Select the source address using ipif_select_source_v6. 10869 */ 10870 if (attach_index != 0) { 10871 ipif = ipif_select_source_v6(ill, &ip6h->ip6_dst, 10872 RESTRICT_TO_ILL, IPV6_PREFER_SRC_DEFAULT, zoneid); 10873 } else { 10874 ipif = ipif_select_source_v6(ill, &ip6h->ip6_dst, 10875 RESTRICT_TO_NONE, IPV6_PREFER_SRC_DEFAULT, zoneid); 10876 } 10877 if (ipif == NULL) { 10878 if (ip_debug > 2) { 10879 /* ip1dbg */ 10880 pr_addr_dbg("ip_wput_ire_v6: no src for " 10881 "dst %s\n, ", AF_INET6, &ip6h->ip6_dst); 10882 printf("ip_wput_ire_v6: interface name %s\n", 10883 ill->ill_name); 10884 } 10885 freemsg(first_mp); 10886 return; 10887 } 10888 ip6h->ip6_src = ipif->ipif_v6src_addr; 10889 ipif_refrele(ipif); 10890 } 10891 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 10892 if ((connp != NULL && connp->conn_multicast_loop) || 10893 !(ill->ill_phyint->phyint_flags & PHYI_LOOPBACK)) { 10894 ilm_t *ilm; 10895 10896 ILM_WALKER_HOLD(ill); 10897 ilm = ilm_lookup_ill_v6(ill, &ip6h->ip6_dst, ALL_ZONES); 10898 ILM_WALKER_RELE(ill); 10899 if (ilm != NULL) { 10900 mblk_t *nmp; 10901 int fanout_flags = 0; 10902 10903 if (connp != NULL && 10904 !connp->conn_multicast_loop) { 10905 fanout_flags |= IP_FF_NO_MCAST_LOOP; 10906 } 10907 ip1dbg(("ip_wput_ire_v6: " 10908 "Loopback multicast\n")); 10909 nmp = ip_copymsg(first_mp); 10910 if (nmp != NULL) { 10911 ip6_t *nip6h; 10912 10913 if (mctl_present) { 10914 nip6h = (ip6_t *) 10915 nmp->b_cont->b_rptr; 10916 } else { 10917 nip6h = (ip6_t *)nmp->b_rptr; 10918 } 10919 /* 10920 * Deliver locally and to every local 10921 * zone, except the sending zone when 10922 * IPV6_MULTICAST_LOOP is disabled. 10923 */ 10924 ip_wput_local_v6(RD(q), ill, nip6h, nmp, 10925 ire, fanout_flags); 10926 } else { 10927 BUMP_MIB(mibptr, ipv6OutDiscards); 10928 ip1dbg(("ip_wput_ire_v6: " 10929 "copymsg failed\n")); 10930 } 10931 } 10932 } 10933 if (ip6h->ip6_hops == 0 || 10934 IN6_IS_ADDR_MC_NODELOCAL(&ip6h->ip6_dst) || 10935 (ill->ill_phyint->phyint_flags & PHYI_LOOPBACK)) { 10936 /* 10937 * Local multicast or just loopback on loopback 10938 * interface. 10939 */ 10940 BUMP_MIB(mibptr, ipv6OutMcastPkts); 10941 ip1dbg(("ip_wput_ire_v6: local multicast only\n")); 10942 freemsg(first_mp); 10943 return; 10944 } 10945 } 10946 10947 if (ire->ire_stq != NULL) { 10948 uint32_t sum; 10949 uint_t ill_index = ((ill_t *)ire->ire_stq->q_ptr)-> 10950 ill_phyint->phyint_ifindex; 10951 queue_t *dev_q = ire->ire_stq->q_next; 10952 10953 /* 10954 * non-NULL send-to queue - packet is to be sent 10955 * out an interface. 10956 */ 10957 10958 /* Driver is flow-controlling? */ 10959 if (!IP_FLOW_CONTROLLED_ULP(nexthdr) && 10960 ((dev_q->q_next || dev_q->q_first) && !canput(dev_q))) { 10961 /* 10962 * Queue packet if we have an conn to give back 10963 * pressure. We can't queue packets intended for 10964 * hardware acceleration since we've tossed that 10965 * state already. If the packet is being fed back 10966 * from ire_send_v6, we don't know the position in 10967 * the queue to enqueue the packet and we discard 10968 * the packet. 10969 */ 10970 if (ip_output_queue && connp != NULL && 10971 !mctl_present && caller != IRE_SEND) { 10972 if (caller == IP_WSRV) { 10973 connp->conn_did_putbq = 1; 10974 (void) putbq(connp->conn_wq, mp); 10975 conn_drain_insert(connp); 10976 /* 10977 * caller == IP_WSRV implies we are 10978 * the service thread, and the 10979 * queue is already noenabled. 10980 * The check for canput and 10981 * the putbq is not atomic. 10982 * So we need to check again. 10983 */ 10984 if (canput(dev_q)) 10985 connp->conn_did_putbq = 0; 10986 } else { 10987 (void) putq(connp->conn_wq, mp); 10988 } 10989 return; 10990 } 10991 BUMP_MIB(mibptr, ipv6OutDiscards); 10992 freemsg(first_mp); 10993 return; 10994 } 10995 10996 /* 10997 * Look for reachability confirmations from the transport. 10998 */ 10999 if (ip6h->ip6_vcf & IP_FORWARD_PROG) { 11000 reachable |= IPV6_REACHABILITY_CONFIRMATION; 11001 ip6h->ip6_vcf &= ~IP_FORWARD_PROG; 11002 if (mctl_present) 11003 io->ipsec_out_reachable = B_TRUE; 11004 } 11005 /* Fastpath */ 11006 switch (nexthdr) { 11007 case IPPROTO_TCP: 11008 case IPPROTO_UDP: 11009 case IPPROTO_ICMPV6: 11010 case IPPROTO_SCTP: 11011 hdr_length = IPV6_HDR_LEN; 11012 break; 11013 default: { 11014 uint8_t *nexthdrp; 11015 11016 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, 11017 &hdr_length, &nexthdrp)) { 11018 /* Malformed packet */ 11019 BUMP_MIB(mibptr, ipv6OutDiscards); 11020 freemsg(first_mp); 11021 return; 11022 } 11023 nexthdr = *nexthdrp; 11024 break; 11025 } 11026 } 11027 11028 if (cksum_request != -1 && nexthdr != IPPROTO_ICMPV6) { 11029 uint16_t *up; 11030 uint16_t *insp; 11031 11032 /* 11033 * The packet header is processed once for all, even 11034 * in the multirouting case. We disable hardware 11035 * checksum if the packet is multirouted, as it will be 11036 * replicated via several interfaces, and not all of 11037 * them may have this capability. 11038 */ 11039 if (cksum_request == 1 && 11040 !(ire->ire_flags & RTF_MULTIRT)) { 11041 /* Skip the transport checksum */ 11042 goto cksum_done; 11043 } 11044 /* 11045 * Do user-configured raw checksum. 11046 * Compute checksum and insert at offset "cksum_request" 11047 */ 11048 11049 /* check for enough headers for checksum */ 11050 cksum_request += hdr_length; /* offset from rptr */ 11051 if ((mp->b_wptr - mp->b_rptr) < 11052 (cksum_request + sizeof (int16_t))) { 11053 if (!pullupmsg(mp, 11054 cksum_request + sizeof (int16_t))) { 11055 ip1dbg(("ip_wput_v6: ICMP hdr pullupmsg" 11056 " failed\n")); 11057 BUMP_MIB(mibptr, ipv6OutDiscards); 11058 freemsg(first_mp); 11059 return; 11060 } 11061 ip6h = (ip6_t *)mp->b_rptr; 11062 } 11063 insp = (uint16_t *)((uchar_t *)ip6h + cksum_request); 11064 ASSERT(((uintptr_t)insp & 0x1) == 0); 11065 up = (uint16_t *)&ip6h->ip6_src; 11066 /* 11067 * icmp has placed length and routing 11068 * header adjustment in *insp. 11069 */ 11070 sum = htons(nexthdr) + 11071 up[0] + up[1] + up[2] + up[3] + 11072 up[4] + up[5] + up[6] + up[7] + 11073 up[8] + up[9] + up[10] + up[11] + 11074 up[12] + up[13] + up[14] + up[15]; 11075 sum = (sum & 0xffff) + (sum >> 16); 11076 *insp = IP_CSUM(mp, hdr_length, sum); 11077 if (*insp == 0) 11078 *insp = 0xFFFF; 11079 } else if (nexthdr == IPPROTO_TCP) { 11080 uint16_t *up; 11081 11082 /* 11083 * Check for full IPv6 header + enough TCP header 11084 * to get at the checksum field. 11085 */ 11086 if ((mp->b_wptr - mp->b_rptr) < 11087 (hdr_length + TCP_CHECKSUM_OFFSET + 11088 TCP_CHECKSUM_SIZE)) { 11089 if (!pullupmsg(mp, hdr_length + 11090 TCP_CHECKSUM_OFFSET + TCP_CHECKSUM_SIZE)) { 11091 ip1dbg(("ip_wput_v6: TCP hdr pullupmsg" 11092 " failed\n")); 11093 BUMP_MIB(mibptr, ipv6OutDiscards); 11094 freemsg(first_mp); 11095 return; 11096 } 11097 ip6h = (ip6_t *)mp->b_rptr; 11098 } 11099 11100 up = (uint16_t *)&ip6h->ip6_src; 11101 /* 11102 * Note: The TCP module has stored the length value 11103 * into the tcp checksum field, so we don't 11104 * need to explicitly sum it in here. 11105 */ 11106 sum = up[0] + up[1] + up[2] + up[3] + 11107 up[4] + up[5] + up[6] + up[7] + 11108 up[8] + up[9] + up[10] + up[11] + 11109 up[12] + up[13] + up[14] + up[15]; 11110 11111 /* Fold the initial sum */ 11112 sum = (sum & 0xffff) + (sum >> 16); 11113 11114 up = (uint16_t *)(((uchar_t *)ip6h) + 11115 hdr_length + TCP_CHECKSUM_OFFSET); 11116 11117 IP_CKSUM_XMIT(ill, ire, mp, ip6h, up, IPPROTO_TCP, 11118 hdr_length, ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN, 11119 ire->ire_max_frag, mctl_present, sum); 11120 11121 /* Software checksum? */ 11122 if (DB_CKSUMFLAGS(mp) == 0) { 11123 IP6_STAT(ip6_out_sw_cksum); 11124 IP6_STAT_UPDATE(ip6_tcp_out_sw_cksum_bytes, 11125 (ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN) - 11126 hdr_length); 11127 } 11128 } else if (nexthdr == IPPROTO_UDP) { 11129 uint16_t *up; 11130 11131 /* 11132 * check for full IPv6 header + enough UDP header 11133 * to get at the UDP checksum field 11134 */ 11135 if ((mp->b_wptr - mp->b_rptr) < (hdr_length + 11136 UDP_CHECKSUM_OFFSET + UDP_CHECKSUM_SIZE)) { 11137 if (!pullupmsg(mp, hdr_length + 11138 UDP_CHECKSUM_OFFSET + UDP_CHECKSUM_SIZE)) { 11139 ip1dbg(("ip_wput_v6: UDP hdr pullupmsg" 11140 " failed\n")); 11141 BUMP_MIB(mibptr, ipv6OutDiscards); 11142 freemsg(first_mp); 11143 return; 11144 } 11145 ip6h = (ip6_t *)mp->b_rptr; 11146 } 11147 up = (uint16_t *)&ip6h->ip6_src; 11148 /* 11149 * Note: The UDP module has stored the length value 11150 * into the udp checksum field, so we don't 11151 * need to explicitly sum it in here. 11152 */ 11153 sum = up[0] + up[1] + up[2] + up[3] + 11154 up[4] + up[5] + up[6] + up[7] + 11155 up[8] + up[9] + up[10] + up[11] + 11156 up[12] + up[13] + up[14] + up[15]; 11157 11158 /* Fold the initial sum */ 11159 sum = (sum & 0xffff) + (sum >> 16); 11160 11161 up = (uint16_t *)(((uchar_t *)ip6h) + 11162 hdr_length + UDP_CHECKSUM_OFFSET); 11163 11164 IP_CKSUM_XMIT(ill, ire, mp, ip6h, up, IPPROTO_UDP, 11165 hdr_length, ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN, 11166 ire->ire_max_frag, mctl_present, sum); 11167 11168 /* Software checksum? */ 11169 if (DB_CKSUMFLAGS(mp) == 0) { 11170 IP6_STAT(ip6_out_sw_cksum); 11171 IP6_STAT_UPDATE(ip6_udp_out_sw_cksum_bytes, 11172 (ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN) - 11173 hdr_length); 11174 } 11175 } else if (nexthdr == IPPROTO_ICMPV6) { 11176 uint16_t *up; 11177 icmp6_t *icmp6; 11178 11179 /* check for full IPv6+ICMPv6 header */ 11180 if ((mp->b_wptr - mp->b_rptr) < 11181 (hdr_length + ICMP6_MINLEN)) { 11182 if (!pullupmsg(mp, hdr_length + ICMP6_MINLEN)) { 11183 ip1dbg(("ip_wput_v6: ICMP hdr pullupmsg" 11184 " failed\n")); 11185 BUMP_MIB(mibptr, ipv6OutDiscards); 11186 freemsg(first_mp); 11187 return; 11188 } 11189 ip6h = (ip6_t *)mp->b_rptr; 11190 } 11191 icmp6 = (icmp6_t *)((uchar_t *)ip6h + hdr_length); 11192 up = (uint16_t *)&ip6h->ip6_src; 11193 /* 11194 * icmp has placed length and routing 11195 * header adjustment in icmp6_cksum. 11196 */ 11197 sum = htons(IPPROTO_ICMPV6) + 11198 up[0] + up[1] + up[2] + up[3] + 11199 up[4] + up[5] + up[6] + up[7] + 11200 up[8] + up[9] + up[10] + up[11] + 11201 up[12] + up[13] + up[14] + up[15]; 11202 sum = (sum & 0xffff) + (sum >> 16); 11203 icmp6->icmp6_cksum = IP_CSUM(mp, hdr_length, sum); 11204 if (icmp6->icmp6_cksum == 0) 11205 icmp6->icmp6_cksum = 0xFFFF; 11206 11207 /* Update output mib stats */ 11208 icmp_update_out_mib_v6(ill, icmp6); 11209 } else if (nexthdr == IPPROTO_SCTP) { 11210 sctp_hdr_t *sctph; 11211 11212 if (MBLKL(mp) < (hdr_length + sizeof (*sctph))) { 11213 if (!pullupmsg(mp, hdr_length + 11214 sizeof (*sctph))) { 11215 ip1dbg(("ip_wput_v6: SCTP hdr pullupmsg" 11216 " failed\n")); 11217 BUMP_MIB(ill->ill_ip6_mib, 11218 ipv6OutDiscards); 11219 freemsg(mp); 11220 return; 11221 } 11222 ip6h = (ip6_t *)mp->b_rptr; 11223 } 11224 sctph = (sctp_hdr_t *)(mp->b_rptr + hdr_length); 11225 sctph->sh_chksum = 0; 11226 sctph->sh_chksum = sctp_cksum(mp, hdr_length); 11227 } 11228 11229 cksum_done: 11230 /* 11231 * We force the insertion of a fragment header using the 11232 * IPH_FRAG_HDR flag in two cases: 11233 * - after reception of an ICMPv6 "packet too big" message 11234 * with a MTU < 1280 (cf. RFC 2460 section 5) 11235 * - for multirouted IPv6 packets, so that the receiver can 11236 * discard duplicates according to their fragment identifier 11237 * 11238 * Two flags modifed from the API can modify this behavior. 11239 * The first is IPV6_USE_MIN_MTU. With this API the user 11240 * can specify how to manage PMTUD for unicast and multicast. 11241 * 11242 * IPV6_DONTFRAG disallows fragmentation. 11243 */ 11244 max_frag = ire->ire_max_frag; 11245 switch (IP6I_USE_MIN_MTU_API(flags)) { 11246 case IPV6_USE_MIN_MTU_DEFAULT: 11247 case IPV6_USE_MIN_MTU_UNICAST: 11248 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 11249 max_frag = IPV6_MIN_MTU; 11250 } 11251 break; 11252 11253 case IPV6_USE_MIN_MTU_NEVER: 11254 max_frag = IPV6_MIN_MTU; 11255 break; 11256 } 11257 if (ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN > max_frag || 11258 (ire->ire_frag_flag & IPH_FRAG_HDR)) { 11259 if (connp != NULL && (flags & IP6I_DONTFRAG)) { 11260 icmp_pkt2big_v6(ire->ire_stq, first_mp, 11261 max_frag, B_FALSE, B_TRUE, zoneid); 11262 return; 11263 } 11264 11265 if (ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN != 11266 (mp->b_cont ? msgdsize(mp) : 11267 mp->b_wptr - (uchar_t *)ip6h)) { 11268 ip0dbg(("Packet length mismatch: %d, %ld\n", 11269 ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN, 11270 msgdsize(mp))); 11271 freemsg(first_mp); 11272 return; 11273 } 11274 /* Do IPSEC processing first */ 11275 if (mctl_present) { 11276 if (attach_index != 0) 11277 ipsec_out_attach_if(io, attach_index); 11278 ipsec_out_process(q, first_mp, ire, ill_index); 11279 return; 11280 } 11281 ASSERT(mp->b_prev == NULL); 11282 ip2dbg(("Fragmenting Size = %d, mtu = %d\n", 11283 ntohs(ip6h->ip6_plen) + 11284 IPV6_HDR_LEN, max_frag)); 11285 ASSERT(mp == first_mp); 11286 /* Initiate IPPF processing */ 11287 if (IPP_ENABLED(IPP_LOCAL_OUT)) { 11288 ip_process(IPP_LOCAL_OUT, &mp, ill_index); 11289 if (mp == NULL) { 11290 return; 11291 } 11292 } 11293 ip_wput_frag_v6(mp, ire, reachable, connp, 11294 caller, max_frag); 11295 return; 11296 } 11297 /* Do IPSEC processing first */ 11298 if (mctl_present) { 11299 int extra_len = ipsec_out_extra_length(first_mp); 11300 11301 if (ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN + extra_len > 11302 max_frag && ip_ulp_cando_pkt2big(nexthdr)) { 11303 /* 11304 * IPsec headers will push the packet over the 11305 * MTU limit. Issue an ICMPv6 Packet Too Big 11306 * message for this packet if the upper-layer 11307 * that issued this packet will be able to 11308 * react to the icmp_pkt2big_v6() that we'll 11309 * generate. 11310 */ 11311 icmp_pkt2big_v6(ire->ire_stq, first_mp, 11312 max_frag, B_FALSE, B_TRUE, zoneid); 11313 return; 11314 } 11315 if (attach_index != 0) 11316 ipsec_out_attach_if(io, attach_index); 11317 ipsec_out_process(q, first_mp, ire, ill_index); 11318 return; 11319 } 11320 /* 11321 * XXX multicast: add ip_mforward_v6() here. 11322 * Check conn_dontroute 11323 */ 11324 #ifdef lint 11325 /* 11326 * XXX The only purpose of this statement is to avoid lint 11327 * errors. See the above "XXX multicast". When that gets 11328 * fixed, remove this whole #ifdef lint section. 11329 */ 11330 ip3dbg(("multicast forward is %s.\n", 11331 (multicast_forward ? "TRUE" : "FALSE"))); 11332 #endif 11333 11334 UPDATE_OB_PKT_COUNT(ire); 11335 ire->ire_last_used_time = lbolt; 11336 ASSERT(mp == first_mp); 11337 ip_xmit_v6(mp, ire, reachable, connp, caller, NULL); 11338 } else { 11339 ip_wput_local_v6(RD(q), ill, ip6h, first_mp, ire, 0); 11340 } 11341 } 11342 11343 /* 11344 * Outbound IPv6 fragmentation routine using MDT. 11345 */ 11346 static void 11347 ip_wput_frag_mdt_v6(mblk_t *mp, ire_t *ire, size_t max_chunk, 11348 size_t unfragmentable_len, uint8_t nexthdr, uint_t prev_nexthdr_offset) 11349 { 11350 ip6_t *ip6h = (ip6_t *)mp->b_rptr; 11351 uint_t pkts, wroff, hdr_chunk_len, pbuf_idx; 11352 mblk_t *hdr_mp, *md_mp = NULL; 11353 int i1; 11354 multidata_t *mmd; 11355 unsigned char *hdr_ptr, *pld_ptr; 11356 ip_pdescinfo_t pdi; 11357 uint32_t ident; 11358 size_t len; 11359 uint16_t offset; 11360 queue_t *stq = ire->ire_stq; 11361 ill_t *ill = (ill_t *)stq->q_ptr; 11362 11363 ASSERT(DB_TYPE(mp) == M_DATA); 11364 ASSERT(MBLKL(mp) > unfragmentable_len); 11365 11366 /* 11367 * Move read ptr past unfragmentable portion, we don't want this part 11368 * of the data in our fragments. 11369 */ 11370 mp->b_rptr += unfragmentable_len; 11371 11372 /* Calculate how many packets we will send out */ 11373 i1 = (mp->b_cont == NULL) ? MBLKL(mp) : msgsize(mp); 11374 pkts = (i1 + max_chunk - 1) / max_chunk; 11375 ASSERT(pkts > 1); 11376 11377 /* Allocate a message block which will hold all the IP Headers. */ 11378 wroff = ip_wroff_extra; 11379 hdr_chunk_len = wroff + unfragmentable_len + sizeof (ip6_frag_t); 11380 11381 i1 = pkts * hdr_chunk_len; 11382 /* 11383 * Create the header buffer, Multidata and destination address 11384 * and SAP attribute that should be associated with it. 11385 */ 11386 if ((hdr_mp = allocb(i1, BPRI_HI)) == NULL || 11387 ((hdr_mp->b_wptr += i1), 11388 (mmd = mmd_alloc(hdr_mp, &md_mp, KM_NOSLEEP)) == NULL) || 11389 !ip_md_addr_attr(mmd, NULL, ire->ire_nce->nce_res_mp)) { 11390 freemsg(mp); 11391 if (md_mp == NULL) { 11392 freemsg(hdr_mp); 11393 } else { 11394 free_mmd: IP6_STAT(ip6_frag_mdt_discarded); 11395 freemsg(md_mp); 11396 } 11397 IP6_STAT(ip6_frag_mdt_allocfail); 11398 BUMP_MIB(ill->ill_ip6_mib, ipv6OutFragFails); 11399 UPDATE_MIB(ill->ill_ip6_mib, ipv6OutDiscards, pkts); 11400 return; 11401 } 11402 IP6_STAT(ip6_frag_mdt_allocd); 11403 11404 /* 11405 * Add a payload buffer to the Multidata; this operation must not 11406 * fail, or otherwise our logic in this routine is broken. There 11407 * is no memory allocation done by the routine, so any returned 11408 * failure simply tells us that we've done something wrong. 11409 * 11410 * A failure tells us that either we're adding the same payload 11411 * buffer more than once, or we're trying to add more buffers than 11412 * allowed. None of the above cases should happen, and we panic 11413 * because either there's horrible heap corruption, and/or 11414 * programming mistake. 11415 */ 11416 if ((pbuf_idx = mmd_addpldbuf(mmd, mp)) < 0) { 11417 goto pbuf_panic; 11418 } 11419 11420 hdr_ptr = hdr_mp->b_rptr; 11421 pld_ptr = mp->b_rptr; 11422 11423 pdi.flags = PDESC_HBUF_REF | PDESC_PBUF_REF; 11424 11425 ident = htonl(atomic_add_32_nv(&ire->ire_ident, 1)); 11426 11427 /* 11428 * len is the total length of the fragmentable data in this 11429 * datagram. For each fragment sent, we will decrement len 11430 * by the amount of fragmentable data sent in that fragment 11431 * until len reaches zero. 11432 */ 11433 len = ntohs(ip6h->ip6_plen) - (unfragmentable_len - IPV6_HDR_LEN); 11434 11435 offset = 0; 11436 prev_nexthdr_offset += wroff; 11437 11438 while (len != 0) { 11439 size_t mlen; 11440 ip6_t *fip6h; 11441 ip6_frag_t *fraghdr; 11442 int error; 11443 11444 ASSERT((hdr_ptr + hdr_chunk_len) <= hdr_mp->b_wptr); 11445 mlen = MIN(len, max_chunk); 11446 len -= mlen; 11447 11448 fip6h = (ip6_t *)(hdr_ptr + wroff); 11449 ASSERT(OK_32PTR(fip6h)); 11450 bcopy(ip6h, fip6h, unfragmentable_len); 11451 hdr_ptr[prev_nexthdr_offset] = IPPROTO_FRAGMENT; 11452 11453 fip6h->ip6_plen = htons((uint16_t)(mlen + 11454 unfragmentable_len - IPV6_HDR_LEN + sizeof (ip6_frag_t))); 11455 11456 fraghdr = (ip6_frag_t *)((unsigned char *)fip6h + 11457 unfragmentable_len); 11458 fraghdr->ip6f_nxt = nexthdr; 11459 fraghdr->ip6f_reserved = 0; 11460 fraghdr->ip6f_offlg = htons(offset) | 11461 ((len != 0) ? IP6F_MORE_FRAG : 0); 11462 fraghdr->ip6f_ident = ident; 11463 11464 /* 11465 * Record offset and size of header and data of the next packet 11466 * in the multidata message. 11467 */ 11468 PDESC_HDR_ADD(&pdi, hdr_ptr, wroff, 11469 unfragmentable_len + sizeof (ip6_frag_t), 0); 11470 PDESC_PLD_INIT(&pdi); 11471 i1 = MIN(mp->b_wptr - pld_ptr, mlen); 11472 ASSERT(i1 > 0); 11473 PDESC_PLD_SPAN_ADD(&pdi, pbuf_idx, pld_ptr, i1); 11474 if (i1 == mlen) { 11475 pld_ptr += mlen; 11476 } else { 11477 i1 = mlen - i1; 11478 mp = mp->b_cont; 11479 ASSERT(mp != NULL); 11480 ASSERT(MBLKL(mp) >= i1); 11481 /* 11482 * Attach the next payload message block to the 11483 * multidata message. 11484 */ 11485 if ((pbuf_idx = mmd_addpldbuf(mmd, mp)) < 0) 11486 goto pbuf_panic; 11487 PDESC_PLD_SPAN_ADD(&pdi, pbuf_idx, mp->b_rptr, i1); 11488 pld_ptr = mp->b_rptr + i1; 11489 } 11490 11491 if ((mmd_addpdesc(mmd, (pdescinfo_t *)&pdi, &error, 11492 KM_NOSLEEP)) == NULL) { 11493 /* 11494 * Any failure other than ENOMEM indicates that we 11495 * have passed in invalid pdesc info or parameters 11496 * to mmd_addpdesc, which must not happen. 11497 * 11498 * EINVAL is a result of failure on boundary checks 11499 * against the pdesc info contents. It should not 11500 * happen, and we panic because either there's 11501 * horrible heap corruption, and/or programming 11502 * mistake. 11503 */ 11504 if (error != ENOMEM) { 11505 cmn_err(CE_PANIC, "ip_wput_frag_mdt_v6: " 11506 "pdesc logic error detected for " 11507 "mmd %p pinfo %p (%d)\n", 11508 (void *)mmd, (void *)&pdi, error); 11509 /* NOTREACHED */ 11510 } 11511 IP6_STAT(ip6_frag_mdt_addpdescfail); 11512 /* Free unattached payload message blocks as well */ 11513 md_mp->b_cont = mp->b_cont; 11514 goto free_mmd; 11515 } 11516 11517 /* Advance fragment offset. */ 11518 offset += mlen; 11519 11520 /* Advance to location for next header in the buffer. */ 11521 hdr_ptr += hdr_chunk_len; 11522 11523 /* Did we reach the next payload message block? */ 11524 if (pld_ptr == mp->b_wptr && mp->b_cont != NULL) { 11525 mp = mp->b_cont; 11526 /* 11527 * Attach the next message block with payload 11528 * data to the multidata message. 11529 */ 11530 if ((pbuf_idx = mmd_addpldbuf(mmd, mp)) < 0) 11531 goto pbuf_panic; 11532 pld_ptr = mp->b_rptr; 11533 } 11534 } 11535 11536 ASSERT(hdr_mp->b_wptr == hdr_ptr); 11537 ASSERT(mp->b_wptr == pld_ptr); 11538 11539 /* Update IP statistics */ 11540 UPDATE_MIB(ill->ill_ip6_mib, ipv6OutFragCreates, pkts); 11541 BUMP_MIB(ill->ill_ip6_mib, ipv6OutFragOKs); 11542 IP6_STAT_UPDATE(ip6_frag_mdt_pkt_out, pkts); 11543 11544 ire->ire_ob_pkt_count += pkts; 11545 if (ire->ire_ipif != NULL) 11546 atomic_add_32(&ire->ire_ipif->ipif_ob_pkt_count, pkts); 11547 11548 ire->ire_last_used_time = lbolt; 11549 /* Send it down */ 11550 putnext(stq, md_mp); 11551 return; 11552 11553 pbuf_panic: 11554 cmn_err(CE_PANIC, "ip_wput_frag_mdt_v6: payload buffer logic " 11555 "error for mmd %p pbuf %p (%d)", (void *)mmd, (void *)mp, 11556 pbuf_idx); 11557 /* NOTREACHED */ 11558 } 11559 11560 /* 11561 * IPv6 fragmentation. Essentially the same as IPv4 fragmentation. 11562 * We have not optimized this in terms of number of mblks 11563 * allocated. For instance, for each fragment sent we always allocate a 11564 * mblk to hold the IPv6 header and fragment header. 11565 * 11566 * Assumes that all the extension headers are contained in the first mblk. 11567 * 11568 * The fragment header is inserted after an hop-by-hop options header 11569 * and after [an optional destinations header followed by] a routing header. 11570 * 11571 * NOTE : This function does not ire_refrele the ire passed in as 11572 * the argument. 11573 */ 11574 void 11575 ip_wput_frag_v6(mblk_t *mp, ire_t *ire, uint_t reachable, conn_t *connp, 11576 int caller, int max_frag) 11577 { 11578 ip6_t *ip6h = (ip6_t *)mp->b_rptr; 11579 ip6_t *fip6h; 11580 mblk_t *hmp; 11581 mblk_t *hmp0; 11582 mblk_t *dmp; 11583 ip6_frag_t *fraghdr; 11584 size_t unfragmentable_len; 11585 size_t len; 11586 size_t mlen; 11587 size_t max_chunk; 11588 uint32_t ident; 11589 uint16_t off_flags; 11590 uint16_t offset = 0; 11591 ill_t *ill; 11592 uint8_t nexthdr; 11593 uint_t prev_nexthdr_offset; 11594 uint8_t *ptr; 11595 11596 ASSERT(ire->ire_type == IRE_CACHE); 11597 ill = (ill_t *)ire->ire_stq->q_ptr; 11598 11599 /* 11600 * Determine the length of the unfragmentable portion of this 11601 * datagram. This consists of the IPv6 header, a potential 11602 * hop-by-hop options header, a potential pre-routing-header 11603 * destination options header, and a potential routing header. 11604 */ 11605 nexthdr = ip6h->ip6_nxt; 11606 prev_nexthdr_offset = (uint8_t *)&ip6h->ip6_nxt - (uint8_t *)ip6h; 11607 ptr = (uint8_t *)&ip6h[1]; 11608 11609 if (nexthdr == IPPROTO_HOPOPTS) { 11610 ip6_hbh_t *hbh_hdr; 11611 uint_t hdr_len; 11612 11613 hbh_hdr = (ip6_hbh_t *)ptr; 11614 hdr_len = 8 * (hbh_hdr->ip6h_len + 1); 11615 nexthdr = hbh_hdr->ip6h_nxt; 11616 prev_nexthdr_offset = (uint8_t *)&hbh_hdr->ip6h_nxt 11617 - (uint8_t *)ip6h; 11618 ptr += hdr_len; 11619 } 11620 if (nexthdr == IPPROTO_DSTOPTS) { 11621 ip6_dest_t *dest_hdr; 11622 uint_t hdr_len; 11623 11624 dest_hdr = (ip6_dest_t *)ptr; 11625 if (dest_hdr->ip6d_nxt == IPPROTO_ROUTING) { 11626 hdr_len = 8 * (dest_hdr->ip6d_len + 1); 11627 nexthdr = dest_hdr->ip6d_nxt; 11628 prev_nexthdr_offset = (uint8_t *)&dest_hdr->ip6d_nxt 11629 - (uint8_t *)ip6h; 11630 ptr += hdr_len; 11631 } 11632 } 11633 if (nexthdr == IPPROTO_ROUTING) { 11634 ip6_rthdr_t *rthdr; 11635 uint_t hdr_len; 11636 11637 rthdr = (ip6_rthdr_t *)ptr; 11638 nexthdr = rthdr->ip6r_nxt; 11639 prev_nexthdr_offset = (uint8_t *)&rthdr->ip6r_nxt 11640 - (uint8_t *)ip6h; 11641 hdr_len = 8 * (rthdr->ip6r_len + 1); 11642 ptr += hdr_len; 11643 } 11644 unfragmentable_len = (uint_t)(ptr - (uint8_t *)ip6h); 11645 11646 max_chunk = (min(max_frag, ire->ire_max_frag) - unfragmentable_len - 11647 sizeof (ip6_frag_t)) & ~7; 11648 11649 /* Check if we can use MDT to send out the frags. */ 11650 ASSERT(!IRE_IS_LOCAL(ire)); 11651 if (ip_multidata_outbound && reachable == 0 && 11652 !(ire->ire_flags & RTF_MULTIRT) && ILL_MDT_CAPABLE(ill) && 11653 IP_CAN_FRAG_MDT(mp, unfragmentable_len, max_chunk)) { 11654 ip_wput_frag_mdt_v6(mp, ire, max_chunk, unfragmentable_len, 11655 nexthdr, prev_nexthdr_offset); 11656 return; 11657 } 11658 11659 /* 11660 * Allocate an mblk with enough room for the link-layer 11661 * header, the unfragmentable part of the datagram, and the 11662 * fragment header. This (or a copy) will be used as the 11663 * first mblk for each fragment we send. 11664 */ 11665 hmp = allocb(unfragmentable_len + sizeof (ip6_frag_t) + ip_wroff_extra, 11666 BPRI_HI); 11667 if (hmp == NULL) { 11668 BUMP_MIB(ill->ill_ip6_mib, ipv6OutFragFails); 11669 freemsg(mp); 11670 return; 11671 } 11672 hmp->b_rptr += ip_wroff_extra; 11673 hmp->b_wptr = hmp->b_rptr + unfragmentable_len + sizeof (ip6_frag_t); 11674 11675 fip6h = (ip6_t *)hmp->b_rptr; 11676 fraghdr = (ip6_frag_t *)(hmp->b_rptr + unfragmentable_len); 11677 11678 bcopy(ip6h, fip6h, unfragmentable_len); 11679 hmp->b_rptr[prev_nexthdr_offset] = IPPROTO_FRAGMENT; 11680 11681 ident = atomic_add_32_nv(&ire->ire_ident, 1); 11682 11683 fraghdr->ip6f_nxt = nexthdr; 11684 fraghdr->ip6f_reserved = 0; 11685 fraghdr->ip6f_offlg = 0; 11686 fraghdr->ip6f_ident = htonl(ident); 11687 11688 /* 11689 * len is the total length of the fragmentable data in this 11690 * datagram. For each fragment sent, we will decrement len 11691 * by the amount of fragmentable data sent in that fragment 11692 * until len reaches zero. 11693 */ 11694 len = ntohs(ip6h->ip6_plen) - (unfragmentable_len - IPV6_HDR_LEN); 11695 11696 /* 11697 * Move read ptr past unfragmentable portion, we don't want this part 11698 * of the data in our fragments. 11699 */ 11700 mp->b_rptr += unfragmentable_len; 11701 11702 while (len != 0) { 11703 mlen = MIN(len, max_chunk); 11704 len -= mlen; 11705 if (len != 0) { 11706 /* Not last */ 11707 hmp0 = copyb(hmp); 11708 if (hmp0 == NULL) { 11709 freeb(hmp); 11710 freemsg(mp); 11711 BUMP_MIB(ill->ill_ip6_mib, ipv6OutFragFails); 11712 ip1dbg(("ip_wput_frag_v6: copyb failed\n")); 11713 return; 11714 } 11715 off_flags = IP6F_MORE_FRAG; 11716 } else { 11717 /* Last fragment */ 11718 hmp0 = hmp; 11719 hmp = NULL; 11720 off_flags = 0; 11721 } 11722 fip6h = (ip6_t *)(hmp0->b_rptr); 11723 fraghdr = (ip6_frag_t *)(hmp0->b_rptr + unfragmentable_len); 11724 11725 fip6h->ip6_plen = htons((uint16_t)(mlen + 11726 unfragmentable_len - IPV6_HDR_LEN + sizeof (ip6_frag_t))); 11727 /* 11728 * Note: Optimization alert. 11729 * In IPv6 (and IPv4) protocol header, Fragment Offset 11730 * ("offset") is 13 bits wide and in 8-octet units. 11731 * In IPv6 protocol header (unlike IPv4) in a 16 bit field, 11732 * it occupies the most significant 13 bits. 11733 * (least significant 13 bits in IPv4). 11734 * We do not do any shifts here. Not shifting is same effect 11735 * as taking offset value in octet units, dividing by 8 and 11736 * then shifting 3 bits left to line it up in place in proper 11737 * place protocol header. 11738 */ 11739 fraghdr->ip6f_offlg = htons(offset) | off_flags; 11740 11741 if (!(dmp = ip_carve_mp(&mp, mlen))) { 11742 /* mp has already been freed by ip_carve_mp() */ 11743 if (hmp != NULL) 11744 freeb(hmp); 11745 freeb(hmp0); 11746 ip1dbg(("ip_carve_mp: failed\n")); 11747 BUMP_MIB(ill->ill_ip6_mib, ipv6OutFragFails); 11748 return; 11749 } 11750 hmp0->b_cont = dmp; 11751 /* Get the priority marking, if any */ 11752 hmp0->b_band = dmp->b_band; 11753 UPDATE_OB_PKT_COUNT(ire); 11754 ire->ire_last_used_time = lbolt; 11755 ip_xmit_v6(hmp0, ire, reachable | IP6_NO_IPPOLICY, connp, 11756 caller, NULL); 11757 reachable = 0; /* No need to redo state machine in loop */ 11758 BUMP_MIB(ill->ill_ip6_mib, ipv6OutFragCreates); 11759 offset += mlen; 11760 } 11761 BUMP_MIB(ill->ill_ip6_mib, ipv6OutFragOKs); 11762 } 11763 11764 /* 11765 * Determine if the ill and multicast aspects of that packets 11766 * "matches" the conn. 11767 */ 11768 boolean_t 11769 conn_wantpacket_v6(conn_t *connp, ill_t *ill, ip6_t *ip6h, int fanout_flags, 11770 zoneid_t zoneid) 11771 { 11772 ill_t *in_ill; 11773 boolean_t wantpacket = B_TRUE; 11774 in6_addr_t *v6dst_ptr = &ip6h->ip6_dst; 11775 in6_addr_t *v6src_ptr = &ip6h->ip6_src; 11776 11777 /* 11778 * conn_incoming_ill is set by IPV6_BOUND_IF which limits 11779 * unicast and multicast reception to conn_incoming_ill. 11780 * conn_wantpacket_v6 is called both for unicast and 11781 * multicast. 11782 * 11783 * 1) The unicast copy of the packet can come anywhere in 11784 * the ill group if it is part of the group. Thus, we 11785 * need to check to see whether the ill group matches 11786 * if in_ill is part of a group. 11787 * 11788 * 2) ip_rput does not suppress duplicate multicast packets. 11789 * If there are two interfaces in a ill group and we have 11790 * 2 applications (conns) joined a multicast group G on 11791 * both the interfaces, ilm_lookup_ill filter in ip_rput 11792 * will give us two packets because we join G on both the 11793 * interfaces rather than nominating just one interface 11794 * for receiving multicast like broadcast above. So, 11795 * we have to call ilg_lookup_ill to filter out duplicate 11796 * copies, if ill is part of a group, to supress duplicates. 11797 */ 11798 in_ill = connp->conn_incoming_ill; 11799 if (in_ill != NULL) { 11800 mutex_enter(&connp->conn_lock); 11801 in_ill = connp->conn_incoming_ill; 11802 mutex_enter(&ill->ill_lock); 11803 /* 11804 * No IPMP, and the packet did not arrive on conn_incoming_ill 11805 * OR, IPMP in use and the packet arrived on an IPMP group 11806 * different from the conn_incoming_ill's IPMP group. 11807 * Reject the packet. 11808 */ 11809 if ((in_ill->ill_group == NULL && in_ill != ill) || 11810 (in_ill->ill_group != NULL && 11811 in_ill->ill_group != ill->ill_group)) { 11812 wantpacket = B_FALSE; 11813 } 11814 mutex_exit(&ill->ill_lock); 11815 mutex_exit(&connp->conn_lock); 11816 if (!wantpacket) 11817 return (B_FALSE); 11818 } 11819 11820 if (connp->conn_multi_router) 11821 return (B_TRUE); 11822 11823 if (!IN6_IS_ADDR_MULTICAST(v6dst_ptr) && 11824 !IN6_IS_ADDR_V4MAPPED_CLASSD(v6dst_ptr)) { 11825 /* 11826 * Unicast case: we match the conn only if it's in the specified 11827 * zone. 11828 */ 11829 return (IPCL_ZONE_MATCH(connp, zoneid)); 11830 } 11831 11832 if ((fanout_flags & IP_FF_NO_MCAST_LOOP) && 11833 (connp->conn_zoneid == zoneid || zoneid == ALL_ZONES)) { 11834 /* 11835 * Loopback case: the sending endpoint has IP_MULTICAST_LOOP 11836 * disabled, therefore we don't dispatch the multicast packet to 11837 * the sending zone. 11838 */ 11839 return (B_FALSE); 11840 } 11841 11842 if ((ill->ill_phyint->phyint_flags & PHYI_LOOPBACK) && 11843 connp->conn_zoneid != zoneid && zoneid != ALL_ZONES) { 11844 /* 11845 * Multicast packet on the loopback interface: we only match 11846 * conns who joined the group in the specified zone. 11847 */ 11848 return (B_FALSE); 11849 } 11850 11851 mutex_enter(&connp->conn_lock); 11852 wantpacket = 11853 ilg_lookup_ill_withsrc_v6(connp, v6dst_ptr, v6src_ptr, ill) != NULL; 11854 mutex_exit(&connp->conn_lock); 11855 11856 return (wantpacket); 11857 } 11858 11859 11860 /* 11861 * Transmit a packet and update any NUD state based on the flags 11862 * XXX need to "recover" any ip6i_t when doing putq! 11863 * 11864 * NOTE : This function does not ire_refrele the ire passed in as the 11865 * argument. 11866 */ 11867 void 11868 ip_xmit_v6(mblk_t *mp, ire_t *ire, uint_t flags, conn_t *connp, 11869 int caller, ipsec_out_t *io) 11870 { 11871 mblk_t *mp1; 11872 nce_t *nce = ire->ire_nce; 11873 ill_t *ill; 11874 uint64_t delta; 11875 ip6_t *ip6h; 11876 queue_t *stq = ire->ire_stq; 11877 ire_t *ire1 = NULL; 11878 ire_t *save_ire = ire; 11879 boolean_t multirt_send = B_FALSE; 11880 mblk_t *next_mp = NULL; 11881 11882 ip6h = (ip6_t *)mp->b_rptr; 11883 ASSERT(!IN6_IS_ADDR_V4MAPPED(&ire->ire_addr_v6)); 11884 ASSERT(ire->ire_ipversion == IPV6_VERSION); 11885 ASSERT(nce != NULL); 11886 ASSERT(mp->b_datap->db_type == M_DATA); 11887 ASSERT(stq != NULL); 11888 11889 ill = ire_to_ill(ire); 11890 if (!ill) { 11891 ip0dbg(("ip_xmit_v6: ire_to_ill failed\n")); 11892 freemsg(mp); 11893 return; 11894 } 11895 11896 /* 11897 * If a packet is to be sent out an interface that is a 6to4 11898 * tunnel, outgoing IPv6 packets, with a 6to4 addressed IPv6 11899 * destination, must be checked to have a 6to4 prefix 11900 * (2002:V4ADDR::/48) that is NOT equal to the 6to4 prefix of 11901 * address configured on the sending interface. Otherwise, 11902 * the packet was delivered to this interface in error and the 11903 * packet must be dropped. 11904 */ 11905 if ((ill->ill_is_6to4tun) && IN6_IS_ADDR_6TO4(&ip6h->ip6_dst)) { 11906 ipif_t *ipif = ill->ill_ipif; 11907 11908 if (IN6_ARE_6TO4_PREFIX_EQUAL(&ipif->ipif_v6lcl_addr, 11909 &ip6h->ip6_dst)) { 11910 if (ip_debug > 2) { 11911 /* ip1dbg */ 11912 pr_addr_dbg("ip_xmit_v6: attempting to " 11913 "send 6to4 addressed IPv6 " 11914 "destination (%s) out the wrong " 11915 "interface.\n", AF_INET6, 11916 &ip6h->ip6_dst); 11917 } 11918 BUMP_MIB(ill->ill_ip6_mib, ipv6OutDiscards); 11919 freemsg(mp); 11920 return; 11921 } 11922 } 11923 11924 /* Flow-control check has been done in ip_wput_ire_v6 */ 11925 if (IP_FLOW_CONTROLLED_ULP(ip6h->ip6_nxt) || caller == IP_WPUT || 11926 caller == IP_WSRV || canput(stq->q_next)) { 11927 uint32_t ill_index; 11928 11929 /* 11930 * In most cases, the emission loop below is entered only 11931 * once. Only in the case where the ire holds the 11932 * RTF_MULTIRT flag, do we loop to process all RTF_MULTIRT 11933 * flagged ires in the bucket, and send the packet 11934 * through all crossed RTF_MULTIRT routes. 11935 */ 11936 if (ire->ire_flags & RTF_MULTIRT) { 11937 /* 11938 * Multirouting case. The bucket where ire is stored 11939 * probably holds other RTF_MULTIRT flagged ires 11940 * to the destination. In this call to ip_xmit_v6, 11941 * we attempt to send the packet through all 11942 * those ires. Thus, we first ensure that ire is the 11943 * first RTF_MULTIRT ire in the bucket, 11944 * before walking the ire list. 11945 */ 11946 ire_t *first_ire; 11947 irb_t *irb = ire->ire_bucket; 11948 ASSERT(irb != NULL); 11949 multirt_send = B_TRUE; 11950 11951 /* Make sure we do not omit any multiroute ire. */ 11952 IRB_REFHOLD(irb); 11953 for (first_ire = irb->irb_ire; 11954 first_ire != NULL; 11955 first_ire = first_ire->ire_next) { 11956 if ((first_ire->ire_flags & RTF_MULTIRT) && 11957 (IN6_ARE_ADDR_EQUAL(&first_ire->ire_addr_v6, 11958 &ire->ire_addr_v6)) && 11959 !(first_ire->ire_marks & 11960 (IRE_MARK_CONDEMNED | IRE_MARK_HIDDEN))) 11961 break; 11962 } 11963 11964 if ((first_ire != NULL) && (first_ire != ire)) { 11965 IRE_REFHOLD(first_ire); 11966 /* ire will be released by the caller */ 11967 ire = first_ire; 11968 nce = ire->ire_nce; 11969 stq = ire->ire_stq; 11970 ill = ire_to_ill(ire); 11971 } 11972 IRB_REFRELE(irb); 11973 } else if (connp != NULL && IPCL_IS_TCP(connp) && 11974 connp->conn_mdt_ok && !connp->conn_tcp->tcp_mdt && 11975 ILL_MDT_USABLE(ill)) { 11976 /* 11977 * This tcp connection was marked as MDT-capable, but 11978 * it has been turned off due changes in the interface. 11979 * Now that the interface support is back, turn it on 11980 * by notifying tcp. We don't directly modify tcp_mdt, 11981 * since we leave all the details to the tcp code that 11982 * knows better. 11983 */ 11984 mblk_t *mdimp = ip_mdinfo_alloc(ill->ill_mdt_capab); 11985 11986 if (mdimp == NULL) { 11987 ip0dbg(("ip_xmit_v6: can't re-enable MDT for " 11988 "connp %p (ENOMEM)\n", (void *)connp)); 11989 } else { 11990 CONN_INC_REF(connp); 11991 squeue_fill(connp->conn_sqp, mdimp, tcp_input, 11992 connp, SQTAG_TCP_INPUT_MCTL); 11993 } 11994 } 11995 11996 do { 11997 boolean_t qos_done = B_FALSE; 11998 11999 if (multirt_send) { 12000 irb_t *irb; 12001 /* 12002 * We are in a multiple send case, need to get 12003 * the next ire and make a duplicate of the 12004 * packet. ire1 holds here the next ire to 12005 * process in the bucket. If multirouting is 12006 * expected, any non-RTF_MULTIRT ire that has 12007 * the right destination address is ignored. 12008 */ 12009 irb = ire->ire_bucket; 12010 ASSERT(irb != NULL); 12011 12012 IRB_REFHOLD(irb); 12013 for (ire1 = ire->ire_next; 12014 ire1 != NULL; 12015 ire1 = ire1->ire_next) { 12016 if (!(ire1->ire_flags & RTF_MULTIRT)) 12017 continue; 12018 if (!IN6_ARE_ADDR_EQUAL( 12019 &ire1->ire_addr_v6, 12020 &ire->ire_addr_v6)) 12021 continue; 12022 if (ire1->ire_marks & 12023 (IRE_MARK_CONDEMNED| 12024 IRE_MARK_HIDDEN)) 12025 continue; 12026 12027 /* Got one */ 12028 if (ire1 != save_ire) { 12029 IRE_REFHOLD(ire1); 12030 } 12031 break; 12032 } 12033 IRB_REFRELE(irb); 12034 12035 if (ire1 != NULL) { 12036 next_mp = copyb(mp); 12037 if ((next_mp == NULL) || 12038 ((mp->b_cont != NULL) && 12039 ((next_mp->b_cont = 12040 dupmsg(mp->b_cont)) == 12041 NULL))) { 12042 freemsg(next_mp); 12043 next_mp = NULL; 12044 ire_refrele(ire1); 12045 ire1 = NULL; 12046 } 12047 } 12048 12049 /* Last multiroute ire; don't loop anymore. */ 12050 if (ire1 == NULL) { 12051 multirt_send = B_FALSE; 12052 } 12053 } 12054 12055 ill_index = 12056 ((ill_t *)stq->q_ptr)->ill_phyint->phyint_ifindex; 12057 12058 /* 12059 * Check for fastpath, we need to hold nce_lock to 12060 * prevent fastpath update from chaining nce_fp_mp. 12061 */ 12062 12063 ASSERT(nce->nce_ipversion != IPV4_VERSION); 12064 mutex_enter(&nce->nce_lock); 12065 if ((mp1 = nce->nce_fp_mp) != NULL) { 12066 uint32_t hlen; 12067 uchar_t *rptr; 12068 12069 /* Initiate IPPF processing */ 12070 if (IP6_OUT_IPP(flags)) { 12071 /* 12072 * We have to release the nce lock since 12073 * IPPF components use 12074 * ill_lookup_on_ifindex(), 12075 * which takes the ill_g_lock and the 12076 * ill_lock locks. 12077 */ 12078 mutex_exit(&nce->nce_lock); 12079 ip_process(IPP_LOCAL_OUT, &mp, 12080 ill_index); 12081 if (mp == NULL) { 12082 BUMP_MIB( 12083 ill->ill_ip6_mib, 12084 ipv6OutDiscards); 12085 if (next_mp != NULL) 12086 freemsg(next_mp); 12087 if (ire != save_ire) { 12088 ire_refrele(ire); 12089 } 12090 return; 12091 } 12092 mutex_enter(&nce->nce_lock); 12093 if ((mp1 = nce->nce_fp_mp) == NULL) { 12094 /* 12095 * Probably disappeared during 12096 * IPQoS processing. 12097 */ 12098 qos_done = B_TRUE; 12099 goto prepend_unitdata; 12100 } 12101 } 12102 hlen = MBLKL(mp1); 12103 rptr = mp->b_rptr - hlen; 12104 /* 12105 * make sure there is room for the fastpath 12106 * datalink header 12107 */ 12108 if (rptr < mp->b_datap->db_base) { 12109 mp1 = copyb(mp1); 12110 if (mp1 == NULL) { 12111 mutex_exit(&nce->nce_lock); 12112 BUMP_MIB(ill->ill_ip6_mib, 12113 ipv6OutDiscards); 12114 freemsg(mp); 12115 if (next_mp != NULL) 12116 freemsg(next_mp); 12117 if (ire != save_ire) { 12118 ire_refrele(ire); 12119 } 12120 return; 12121 } 12122 mp1->b_cont = mp; 12123 12124 /* Get the priority marking, if any */ 12125 mp1->b_band = mp->b_band; 12126 mp = mp1; 12127 } else { 12128 mp->b_rptr = rptr; 12129 /* 12130 * fastpath - pre-pend datalink 12131 * header 12132 */ 12133 bcopy(mp1->b_rptr, rptr, hlen); 12134 } 12135 12136 mutex_exit(&nce->nce_lock); 12137 12138 } else { 12139 prepend_unitdata: 12140 mutex_exit(&nce->nce_lock); 12141 mp1 = nce->nce_res_mp; 12142 if (mp1 == NULL) { 12143 ip1dbg(("ip_xmit_v6: No resolution " 12144 "block ire = %p\n", (void *)ire)); 12145 freemsg(mp); 12146 if (next_mp != NULL) 12147 freemsg(next_mp); 12148 if (ire != save_ire) { 12149 ire_refrele(ire); 12150 } 12151 return; 12152 } 12153 /* 12154 * Prepend the DL_UNITDATA_REQ. 12155 */ 12156 mp1 = copyb(mp1); 12157 if (mp1 == NULL) { 12158 BUMP_MIB(ill->ill_ip6_mib, 12159 ipv6OutDiscards); 12160 freemsg(mp); 12161 if (next_mp != NULL) 12162 freemsg(next_mp); 12163 if (ire != save_ire) { 12164 ire_refrele(ire); 12165 } 12166 return; 12167 } 12168 mp1->b_cont = mp; 12169 mp = mp1; 12170 /* 12171 * Initiate IPPF processing, if it is 12172 * already done, bypass. 12173 */ 12174 if (!qos_done && IP6_OUT_IPP(flags)) { 12175 ip_process(IPP_LOCAL_OUT, &mp, 12176 ill_index); 12177 if (mp == NULL) { 12178 BUMP_MIB(ill->ill_ip6_mib, 12179 ipv6OutDiscards); 12180 if (next_mp != NULL) 12181 freemsg(next_mp); 12182 if (ire != save_ire) { 12183 ire_refrele(ire); 12184 } 12185 return; 12186 } 12187 } 12188 } 12189 12190 /* 12191 * Update ire counters; for save_ire, this has been 12192 * done by the caller. 12193 */ 12194 if (ire != save_ire) { 12195 UPDATE_OB_PKT_COUNT(ire); 12196 ire->ire_last_used_time = lbolt; 12197 } 12198 12199 /* 12200 * Send it down. XXX Do we want to flow control AH/ESP 12201 * packets that carry TCP payloads? We don't flow 12202 * control TCP packets, but we should also not 12203 * flow-control TCP packets that have been protected. 12204 * We don't have an easy way to find out if an AH/ESP 12205 * packet was originally TCP or not currently. 12206 */ 12207 if (io == NULL) { 12208 putnext(stq, mp); 12209 } else { 12210 /* 12211 * Safety Pup says: make sure this is 12212 * going to the right interface! 12213 */ 12214 if (io->ipsec_out_capab_ill_index != 12215 ill_index) { 12216 /* IPsec kstats: bump lose counter */ 12217 freemsg(mp1); 12218 } else { 12219 ipsec_hw_putnext(stq, mp); 12220 } 12221 } 12222 12223 if (nce->nce_flags & (NCE_F_NONUD|NCE_F_PERMANENT)) { 12224 if (ire != save_ire) { 12225 ire_refrele(ire); 12226 } 12227 if (multirt_send) { 12228 ASSERT(ire1 != NULL); 12229 /* 12230 * Proceed with the next RTF_MULTIRT 12231 * ire, also set up the send-to queue 12232 * accordingly. 12233 */ 12234 ire = ire1; 12235 ire1 = NULL; 12236 stq = ire->ire_stq; 12237 nce = ire->ire_nce; 12238 ill = ire_to_ill(ire); 12239 mp = next_mp; 12240 next_mp = NULL; 12241 continue; 12242 } 12243 ASSERT(next_mp == NULL); 12244 ASSERT(ire1 == NULL); 12245 return; 12246 } 12247 12248 ASSERT(nce->nce_state != ND_INCOMPLETE); 12249 12250 /* 12251 * Check for upper layer advice 12252 */ 12253 if (flags & IPV6_REACHABILITY_CONFIRMATION) { 12254 /* 12255 * It should be o.k. to check the state without 12256 * a lock here, at most we lose an advice. 12257 */ 12258 nce->nce_last = TICK_TO_MSEC(lbolt64); 12259 if (nce->nce_state != ND_REACHABLE) { 12260 12261 mutex_enter(&nce->nce_lock); 12262 nce->nce_state = ND_REACHABLE; 12263 nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT; 12264 mutex_exit(&nce->nce_lock); 12265 (void) untimeout(nce->nce_timeout_id); 12266 if (ip_debug > 2) { 12267 /* ip1dbg */ 12268 pr_addr_dbg("ip_xmit_v6: state" 12269 " for %s changed to" 12270 " REACHABLE\n", AF_INET6, 12271 &ire->ire_addr_v6); 12272 } 12273 } 12274 if (ire != save_ire) { 12275 ire_refrele(ire); 12276 } 12277 if (multirt_send) { 12278 ASSERT(ire1 != NULL); 12279 /* 12280 * Proceed with the next RTF_MULTIRT 12281 * ire, also set up the send-to queue 12282 * accordingly. 12283 */ 12284 ire = ire1; 12285 ire1 = NULL; 12286 stq = ire->ire_stq; 12287 nce = ire->ire_nce; 12288 ill = ire_to_ill(ire); 12289 mp = next_mp; 12290 next_mp = NULL; 12291 continue; 12292 } 12293 ASSERT(next_mp == NULL); 12294 ASSERT(ire1 == NULL); 12295 return; 12296 } 12297 12298 delta = TICK_TO_MSEC(lbolt64) - nce->nce_last; 12299 ip1dbg(("ip_xmit_v6: delta = %" PRId64 12300 " ill_reachable_time = %d \n", delta, 12301 ill->ill_reachable_time)); 12302 if (delta > (uint64_t)ill->ill_reachable_time) { 12303 nce = ire->ire_nce; 12304 mutex_enter(&nce->nce_lock); 12305 switch (nce->nce_state) { 12306 case ND_REACHABLE: 12307 case ND_STALE: 12308 /* 12309 * ND_REACHABLE is identical to 12310 * ND_STALE in this specific case. If 12311 * reachable time has expired for this 12312 * neighbor (delta is greater than 12313 * reachable time), conceptually, the 12314 * neighbor cache is no longer in 12315 * REACHABLE state, but already in 12316 * STALE state. So the correct 12317 * transition here is to ND_DELAY. 12318 */ 12319 nce->nce_state = ND_DELAY; 12320 mutex_exit(&nce->nce_lock); 12321 NDP_RESTART_TIMER(nce, 12322 delay_first_probe_time); 12323 if (ip_debug > 3) { 12324 /* ip2dbg */ 12325 pr_addr_dbg("ip_xmit_v6: state" 12326 " for %s changed to" 12327 " DELAY\n", AF_INET6, 12328 &ire->ire_addr_v6); 12329 } 12330 break; 12331 case ND_DELAY: 12332 case ND_PROBE: 12333 mutex_exit(&nce->nce_lock); 12334 /* Timers have already started */ 12335 break; 12336 case ND_UNREACHABLE: 12337 /* 12338 * ndp timer has detected that this nce 12339 * is unreachable and initiated deleting 12340 * this nce and all its associated IREs. 12341 * This is a race where we found the 12342 * ire before it was deleted and have 12343 * just sent out a packet using this 12344 * unreachable nce. 12345 */ 12346 mutex_exit(&nce->nce_lock); 12347 break; 12348 default: 12349 ASSERT(0); 12350 } 12351 } 12352 12353 if (multirt_send) { 12354 ASSERT(ire1 != NULL); 12355 /* 12356 * Proceed with the next RTF_MULTIRT ire, 12357 * Also set up the send-to queue accordingly. 12358 */ 12359 if (ire != save_ire) { 12360 ire_refrele(ire); 12361 } 12362 ire = ire1; 12363 ire1 = NULL; 12364 stq = ire->ire_stq; 12365 nce = ire->ire_nce; 12366 ill = ire_to_ill(ire); 12367 mp = next_mp; 12368 next_mp = NULL; 12369 } 12370 } while (multirt_send); 12371 /* 12372 * In the multirouting case, release the last ire used for 12373 * emission. save_ire will be released by the caller. 12374 */ 12375 if (ire != save_ire) { 12376 ire_refrele(ire); 12377 } 12378 } else { 12379 /* 12380 * Queue packet if we have an conn to give back pressure. 12381 * We can't queue packets intended for hardware acceleration 12382 * since we've tossed that state already. If the packet is 12383 * being fed back from ire_send_v6, we don't know the 12384 * position in the queue to enqueue the packet and we discard 12385 * the packet. 12386 */ 12387 if (ip_output_queue && (connp != NULL) && (io == NULL) && 12388 (caller != IRE_SEND)) { 12389 if (caller == IP_WSRV) { 12390 connp->conn_did_putbq = 1; 12391 (void) putbq(connp->conn_wq, mp); 12392 conn_drain_insert(connp); 12393 /* 12394 * caller == IP_WSRV implies we are 12395 * the service thread, and the 12396 * queue is already noenabled. 12397 * The check for canput and 12398 * the putbq is not atomic. 12399 * So we need to check again. 12400 */ 12401 if (canput(stq->q_next)) 12402 connp->conn_did_putbq = 0; 12403 } else { 12404 (void) putq(connp->conn_wq, mp); 12405 } 12406 return; 12407 } 12408 BUMP_MIB(ill->ill_ip6_mib, ipv6OutDiscards); 12409 freemsg(mp); 12410 return; 12411 } 12412 } 12413 12414 /* 12415 * pr_addr_dbg function provides the needed buffer space to call 12416 * inet_ntop() function's 3rd argument. This function should be 12417 * used by any kernel routine which wants to save INET6_ADDRSTRLEN 12418 * stack buffer space in it's own stack frame. This function uses 12419 * a buffer from it's own stack and prints the information. 12420 * Example: pr_addr_dbg("func: no route for %s\n ", AF_INET, addr) 12421 * 12422 * Note: This function can call inet_ntop() once. 12423 */ 12424 void 12425 pr_addr_dbg(char *fmt1, int af, const void *addr) 12426 { 12427 char buf[INET6_ADDRSTRLEN]; 12428 12429 if (fmt1 == NULL) { 12430 ip0dbg(("pr_addr_dbg: Wrong arguments\n")); 12431 return; 12432 } 12433 12434 /* 12435 * This does not compare debug level and just prints 12436 * out. Thus it is the responsibility of the caller 12437 * to check the appropriate debug-level before calling 12438 * this function. 12439 */ 12440 if (ip_debug > 0) { 12441 printf(fmt1, inet_ntop(af, addr, buf, sizeof (buf))); 12442 } 12443 12444 12445 } 12446 12447 12448 /* 12449 * Return the length in bytes of the IPv6 headers (base header, ip6i_t 12450 * if needed and extension headers) that will be needed based on the 12451 * ip6_pkt_t structure passed by the caller. 12452 * 12453 * The returned length does not include the length of the upper level 12454 * protocol (ULP) header. 12455 */ 12456 int 12457 ip_total_hdrs_len_v6(ip6_pkt_t *ipp) 12458 { 12459 int len; 12460 12461 len = IPV6_HDR_LEN; 12462 if (ipp->ipp_fields & IPPF_HAS_IP6I) 12463 len += sizeof (ip6i_t); 12464 if (ipp->ipp_fields & IPPF_HOPOPTS) { 12465 ASSERT(ipp->ipp_hopoptslen != 0); 12466 len += ipp->ipp_hopoptslen; 12467 } 12468 if (ipp->ipp_fields & IPPF_RTHDR) { 12469 ASSERT(ipp->ipp_rthdrlen != 0); 12470 len += ipp->ipp_rthdrlen; 12471 } 12472 /* 12473 * En-route destination options 12474 * Only do them if there's a routing header as well 12475 */ 12476 if ((ipp->ipp_fields & (IPPF_RTDSTOPTS|IPPF_RTHDR)) == 12477 (IPPF_RTDSTOPTS|IPPF_RTHDR)) { 12478 ASSERT(ipp->ipp_rtdstoptslen != 0); 12479 len += ipp->ipp_rtdstoptslen; 12480 } 12481 if (ipp->ipp_fields & IPPF_DSTOPTS) { 12482 ASSERT(ipp->ipp_dstoptslen != 0); 12483 len += ipp->ipp_dstoptslen; 12484 } 12485 return (len); 12486 } 12487 12488 /* 12489 * All-purpose routine to build a header chain of an IPv6 header 12490 * followed by any required extension headers and a proto header, 12491 * preceeded (where necessary) by an ip6i_t private header. 12492 * 12493 * The fields of the IPv6 header that are derived from the ip6_pkt_t 12494 * will be filled in appropriately. 12495 * Thus the caller must fill in the rest of the IPv6 header, such as 12496 * traffic class/flowid, source address (if not set here), hoplimit (if not 12497 * set here) and destination address. 12498 * 12499 * The extension headers and ip6i_t header will all be fully filled in. 12500 */ 12501 void 12502 ip_build_hdrs_v6(uchar_t *ext_hdrs, uint_t ext_hdrs_len, 12503 ip6_pkt_t *ipp, uint8_t protocol) 12504 { 12505 uint8_t *nxthdr_ptr; 12506 uint8_t *cp; 12507 ip6i_t *ip6i; 12508 ip6_t *ip6h = (ip6_t *)ext_hdrs; 12509 12510 /* 12511 * If sending private ip6i_t header down (checksum info, nexthop, 12512 * or ifindex), adjust ip header pointer and set ip6i_t header pointer, 12513 * then fill it in. (The checksum info will be filled in by icmp). 12514 */ 12515 if (ipp->ipp_fields & IPPF_HAS_IP6I) { 12516 ip6i = (ip6i_t *)ip6h; 12517 ip6h = (ip6_t *)&ip6i[1]; 12518 12519 ip6i->ip6i_flags = 0; 12520 ip6i->ip6i_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 12521 if (ipp->ipp_fields & IPPF_IFINDEX || 12522 ipp->ipp_fields & IPPF_SCOPE_ID) { 12523 ASSERT(ipp->ipp_ifindex != 0); 12524 ip6i->ip6i_flags |= IP6I_IFINDEX; 12525 ip6i->ip6i_ifindex = ipp->ipp_ifindex; 12526 } 12527 if (ipp->ipp_fields & IPPF_ADDR) { 12528 /* 12529 * Enable per-packet source address verification if 12530 * IPV6_PKTINFO specified the source address. 12531 * ip6_src is set in the transport's _wput function. 12532 */ 12533 ASSERT(!IN6_IS_ADDR_UNSPECIFIED( 12534 &ipp->ipp_addr)); 12535 ip6i->ip6i_flags |= IP6I_VERIFY_SRC; 12536 } 12537 if (ipp->ipp_fields & IPPF_UNICAST_HOPS) { 12538 ip6h->ip6_hops = ipp->ipp_unicast_hops; 12539 /* 12540 * We need to set this flag so that IP doesn't 12541 * rewrite the IPv6 header's hoplimit with the 12542 * current default value. 12543 */ 12544 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 12545 } 12546 if (ipp->ipp_fields & IPPF_NEXTHOP) { 12547 ASSERT(!IN6_IS_ADDR_UNSPECIFIED( 12548 &ipp->ipp_nexthop)); 12549 ip6i->ip6i_flags |= IP6I_NEXTHOP; 12550 ip6i->ip6i_nexthop = ipp->ipp_nexthop; 12551 } 12552 /* 12553 * tell IP this is an ip6i_t private header 12554 */ 12555 ip6i->ip6i_nxt = IPPROTO_RAW; 12556 } 12557 /* Initialize IPv6 header */ 12558 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 12559 if (ipp->ipp_fields & IPPF_TCLASS) { 12560 ip6h->ip6_vcf = (ip6h->ip6_vcf & ~IPV6_FLOWINFO_TCLASS) | 12561 (ipp->ipp_tclass << 20); 12562 } 12563 if (ipp->ipp_fields & IPPF_ADDR) 12564 ip6h->ip6_src = ipp->ipp_addr; 12565 12566 nxthdr_ptr = (uint8_t *)&ip6h->ip6_nxt; 12567 cp = (uint8_t *)&ip6h[1]; 12568 /* 12569 * Here's where we have to start stringing together 12570 * any extension headers in the right order: 12571 * Hop-by-hop, destination, routing, and final destination opts. 12572 */ 12573 if (ipp->ipp_fields & IPPF_HOPOPTS) { 12574 /* Hop-by-hop options */ 12575 ip6_hbh_t *hbh = (ip6_hbh_t *)cp; 12576 12577 *nxthdr_ptr = IPPROTO_HOPOPTS; 12578 nxthdr_ptr = &hbh->ip6h_nxt; 12579 12580 bcopy(ipp->ipp_hopopts, cp, ipp->ipp_hopoptslen); 12581 cp += ipp->ipp_hopoptslen; 12582 } 12583 /* 12584 * En-route destination options 12585 * Only do them if there's a routing header as well 12586 */ 12587 if ((ipp->ipp_fields & (IPPF_RTDSTOPTS|IPPF_RTHDR)) == 12588 (IPPF_RTDSTOPTS|IPPF_RTHDR)) { 12589 ip6_dest_t *dst = (ip6_dest_t *)cp; 12590 12591 *nxthdr_ptr = IPPROTO_DSTOPTS; 12592 nxthdr_ptr = &dst->ip6d_nxt; 12593 12594 bcopy(ipp->ipp_rtdstopts, cp, ipp->ipp_rtdstoptslen); 12595 cp += ipp->ipp_rtdstoptslen; 12596 } 12597 /* 12598 * Routing header next 12599 */ 12600 if (ipp->ipp_fields & IPPF_RTHDR) { 12601 ip6_rthdr_t *rt = (ip6_rthdr_t *)cp; 12602 12603 *nxthdr_ptr = IPPROTO_ROUTING; 12604 nxthdr_ptr = &rt->ip6r_nxt; 12605 12606 bcopy(ipp->ipp_rthdr, cp, ipp->ipp_rthdrlen); 12607 cp += ipp->ipp_rthdrlen; 12608 } 12609 /* 12610 * Do ultimate destination options 12611 */ 12612 if (ipp->ipp_fields & IPPF_DSTOPTS) { 12613 ip6_dest_t *dest = (ip6_dest_t *)cp; 12614 12615 *nxthdr_ptr = IPPROTO_DSTOPTS; 12616 nxthdr_ptr = &dest->ip6d_nxt; 12617 12618 bcopy(ipp->ipp_dstopts, cp, ipp->ipp_dstoptslen); 12619 cp += ipp->ipp_dstoptslen; 12620 } 12621 /* 12622 * Now set the last header pointer to the proto passed in 12623 */ 12624 *nxthdr_ptr = protocol; 12625 ASSERT((int)(cp - ext_hdrs) == ext_hdrs_len); 12626 } 12627 12628 /* 12629 * Return a pointer to the routing header extension header 12630 * in the IPv6 header(s) chain passed in. 12631 * If none found, return NULL 12632 * Assumes that all extension headers are in same mblk as the v6 header 12633 */ 12634 ip6_rthdr_t * 12635 ip_find_rthdr_v6(ip6_t *ip6h, uint8_t *endptr) 12636 { 12637 ip6_dest_t *desthdr; 12638 ip6_frag_t *fraghdr; 12639 uint_t hdrlen; 12640 uint8_t nexthdr; 12641 uint8_t *ptr = (uint8_t *)&ip6h[1]; 12642 12643 if (ip6h->ip6_nxt == IPPROTO_ROUTING) 12644 return ((ip6_rthdr_t *)ptr); 12645 12646 /* 12647 * The routing header will precede all extension headers 12648 * other than the hop-by-hop and destination options 12649 * extension headers, so if we see anything other than those, 12650 * we're done and didn't find it. 12651 * We could see a destination options header alone but no 12652 * routing header, in which case we'll return NULL as soon as 12653 * we see anything after that. 12654 * Hop-by-hop and destination option headers are identical, 12655 * so we can use either one we want as a template. 12656 */ 12657 nexthdr = ip6h->ip6_nxt; 12658 while (ptr < endptr) { 12659 /* Is there enough left for len + nexthdr? */ 12660 if (ptr + MIN_EHDR_LEN > endptr) 12661 return (NULL); 12662 12663 switch (nexthdr) { 12664 case IPPROTO_HOPOPTS: 12665 case IPPROTO_DSTOPTS: 12666 /* Assumes the headers are identical for hbh and dst */ 12667 desthdr = (ip6_dest_t *)ptr; 12668 hdrlen = 8 * (desthdr->ip6d_len + 1); 12669 nexthdr = desthdr->ip6d_nxt; 12670 break; 12671 12672 case IPPROTO_ROUTING: 12673 return ((ip6_rthdr_t *)ptr); 12674 12675 case IPPROTO_FRAGMENT: 12676 fraghdr = (ip6_frag_t *)ptr; 12677 hdrlen = sizeof (ip6_frag_t); 12678 nexthdr = fraghdr->ip6f_nxt; 12679 break; 12680 12681 default: 12682 return (NULL); 12683 } 12684 ptr += hdrlen; 12685 } 12686 return (NULL); 12687 } 12688 12689 /* 12690 * Called for source-routed packets originating on this node. 12691 * Manipulates the original routing header by moving every entry up 12692 * one slot, placing the first entry in the v6 header's v6_dst field, 12693 * and placing the ultimate destination in the routing header's last 12694 * slot. 12695 * 12696 * Returns the checksum diference between the ultimate destination 12697 * (last hop in the routing header when the packet is sent) and 12698 * the first hop (ip6_dst when the packet is sent) 12699 */ 12700 uint32_t 12701 ip_massage_options_v6(ip6_t *ip6h, ip6_rthdr_t *rth) 12702 { 12703 uint_t numaddr; 12704 uint_t i; 12705 in6_addr_t *addrptr; 12706 in6_addr_t tmp; 12707 ip6_rthdr0_t *rthdr = (ip6_rthdr0_t *)rth; 12708 uint32_t cksm; 12709 uint32_t addrsum = 0; 12710 uint16_t *ptr; 12711 12712 /* 12713 * Perform any processing needed for source routing. 12714 * We know that all extension headers will be in the same mblk 12715 * as the IPv6 header. 12716 */ 12717 12718 /* 12719 * If no segments left in header, or the header length field is zero, 12720 * don't move hop addresses around; 12721 * Checksum difference is zero. 12722 */ 12723 if ((rthdr->ip6r0_segleft == 0) || (rthdr->ip6r0_len == 0)) 12724 return (0); 12725 12726 ptr = (uint16_t *)&ip6h->ip6_dst; 12727 cksm = 0; 12728 for (i = 0; i < (sizeof (in6_addr_t) / sizeof (uint16_t)); i++) { 12729 cksm += ptr[i]; 12730 } 12731 cksm = (cksm & 0xFFFF) + (cksm >> 16); 12732 12733 /* 12734 * Here's where the fun begins - we have to 12735 * move all addresses up one spot, take the 12736 * first hop and make it our first ip6_dst, 12737 * and place the ultimate destination in the 12738 * newly-opened last slot. 12739 */ 12740 addrptr = (in6_addr_t *)((char *)rthdr + sizeof (*rthdr)); 12741 numaddr = rthdr->ip6r0_len / 2; 12742 tmp = *addrptr; 12743 for (i = 0; i < (numaddr - 1); addrptr++, i++) { 12744 *addrptr = addrptr[1]; 12745 } 12746 *addrptr = ip6h->ip6_dst; 12747 ip6h->ip6_dst = tmp; 12748 12749 /* 12750 * From the checksummed ultimate destination subtract the checksummed 12751 * current ip6_dst (the first hop address). Return that number. 12752 * (In the v4 case, the second part of this is done in each routine 12753 * that calls ip_massage_options(). We do it all in this one place 12754 * for v6). 12755 */ 12756 ptr = (uint16_t *)&ip6h->ip6_dst; 12757 for (i = 0; i < (sizeof (in6_addr_t) / sizeof (uint16_t)); i++) { 12758 addrsum += ptr[i]; 12759 } 12760 cksm -= ((addrsum >> 16) + (addrsum & 0xFFFF)); 12761 if ((int)cksm < 0) 12762 cksm--; 12763 cksm = (cksm & 0xFFFF) + (cksm >> 16); 12764 12765 return (cksm); 12766 } 12767 12768 /* 12769 * See if the upper-level protocol indicated by 'proto' will be able 12770 * to do something with an ICMP_FRAGMENTATION_NEEDED (IPv4) or 12771 * ICMP6_PACKET_TOO_BIG (IPv6). 12772 */ 12773 static boolean_t 12774 ip_ulp_cando_pkt2big(int proto) 12775 { 12776 /* 12777 * For now, only TCP can handle this. 12778 * Tunnels may be able to also, but since tun isn't working over 12779 * IPv6 yet, don't worry about it for now. 12780 */ 12781 return (proto == IPPROTO_TCP); 12782 } 12783 12784 12785 /* 12786 * Propagate a multicast group membership operation (join/leave) (*fn) on 12787 * all interfaces crossed by the related multirt routes. 12788 * The call is considered successful if the operation succeeds 12789 * on at least one interface. 12790 * The function is called if the destination address in the packet to send 12791 * is multirouted. 12792 */ 12793 int 12794 ip_multirt_apply_membership_v6(int (*fn)(conn_t *, boolean_t, 12795 const in6_addr_t *, int, mcast_record_t, const in6_addr_t *, mblk_t *), 12796 ire_t *ire, conn_t *connp, boolean_t checkonly, const in6_addr_t *v6grp, 12797 mcast_record_t fmode, const in6_addr_t *v6src, mblk_t *first_mp) 12798 { 12799 ire_t *ire_gw; 12800 irb_t *irb; 12801 int index, error = 0; 12802 opt_restart_t *or; 12803 12804 irb = ire->ire_bucket; 12805 ASSERT(irb != NULL); 12806 12807 ASSERT(DB_TYPE(first_mp) == M_CTL); 12808 or = (opt_restart_t *)first_mp->b_rptr; 12809 12810 IRB_REFHOLD(irb); 12811 for (; ire != NULL; ire = ire->ire_next) { 12812 if ((ire->ire_flags & RTF_MULTIRT) == 0) 12813 continue; 12814 if (!IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, v6grp)) 12815 continue; 12816 12817 ire_gw = ire_ftable_lookup_v6(&ire->ire_gateway_addr_v6, 0, 0, 12818 IRE_INTERFACE, NULL, NULL, ALL_ZONES, 0, NULL, 12819 MATCH_IRE_RECURSIVE | MATCH_IRE_TYPE); 12820 /* No resolver exists for the gateway; skip this ire. */ 12821 if (ire_gw == NULL) 12822 continue; 12823 index = ire_gw->ire_ipif->ipif_ill->ill_phyint->phyint_ifindex; 12824 /* 12825 * A resolver exists: we can get the interface on which we have 12826 * to apply the operation. 12827 */ 12828 error = fn(connp, checkonly, v6grp, index, fmode, v6src, 12829 first_mp); 12830 if (error == 0) 12831 or->or_private = CGTP_MCAST_SUCCESS; 12832 12833 if (ip_debug > 0) { 12834 ulong_t off; 12835 char *ksym; 12836 12837 ksym = kobj_getsymname((uintptr_t)fn, &off); 12838 ip2dbg(("ip_multirt_apply_membership_v6: " 12839 "called %s, multirt group 0x%08x via itf 0x%08x, " 12840 "error %d [success %u]\n", 12841 ksym ? ksym : "?", 12842 ntohl(V4_PART_OF_V6((*v6grp))), 12843 ntohl(V4_PART_OF_V6(ire_gw->ire_src_addr_v6)), 12844 error, or->or_private)); 12845 } 12846 12847 ire_refrele(ire_gw); 12848 if (error == EINPROGRESS) { 12849 IRB_REFRELE(irb); 12850 return (error); 12851 } 12852 } 12853 IRB_REFRELE(irb); 12854 /* 12855 * Consider the call as successful if we succeeded on at least 12856 * one interface. Otherwise, return the last encountered error. 12857 */ 12858 return (or->or_private == CGTP_MCAST_SUCCESS ? 0 : error); 12859 } 12860 12861 void 12862 ip6_kstat_init(void) 12863 { 12864 if ((ip6_kstat = kstat_create("ip", 0, "ip6stat", 12865 "net", KSTAT_TYPE_NAMED, 12866 sizeof (ip6_statistics) / sizeof (kstat_named_t), 12867 KSTAT_FLAG_VIRTUAL)) != NULL) { 12868 ip6_kstat->ks_data = &ip6_statistics; 12869 kstat_install(ip6_kstat); 12870 } 12871 } 12872 12873 /* 12874 * The following two functions set and get the value for the 12875 * IPV6_SRC_PREFERENCES socket option. 12876 */ 12877 int 12878 ip6_set_src_preferences(conn_t *connp, uint32_t prefs) 12879 { 12880 /* 12881 * We only support preferences that are covered by 12882 * IPV6_PREFER_SRC_MASK. 12883 */ 12884 if (prefs & ~IPV6_PREFER_SRC_MASK) 12885 return (EINVAL); 12886 12887 /* 12888 * Look for conflicting preferences or default preferences. If 12889 * both bits of a related pair are clear, the application wants the 12890 * system's default value for that pair. Both bits in a pair can't 12891 * be set. 12892 */ 12893 if ((prefs & IPV6_PREFER_SRC_MIPMASK) == 0) { 12894 prefs |= IPV6_PREFER_SRC_MIPDEFAULT; 12895 } else if ((prefs & IPV6_PREFER_SRC_MIPMASK) == 12896 IPV6_PREFER_SRC_MIPMASK) { 12897 return (EINVAL); 12898 } 12899 if ((prefs & IPV6_PREFER_SRC_TMPMASK) == 0) { 12900 prefs |= IPV6_PREFER_SRC_TMPDEFAULT; 12901 } else if ((prefs & IPV6_PREFER_SRC_TMPMASK) == 12902 IPV6_PREFER_SRC_TMPMASK) { 12903 return (EINVAL); 12904 } 12905 if ((prefs & IPV6_PREFER_SRC_CGAMASK) == 0) { 12906 prefs |= IPV6_PREFER_SRC_CGADEFAULT; 12907 } else if ((prefs & IPV6_PREFER_SRC_CGAMASK) == 12908 IPV6_PREFER_SRC_CGAMASK) { 12909 return (EINVAL); 12910 } 12911 12912 connp->conn_src_preferences = prefs; 12913 return (0); 12914 } 12915 12916 size_t 12917 ip6_get_src_preferences(conn_t *connp, uint32_t *val) 12918 { 12919 *val = connp->conn_src_preferences; 12920 return (sizeof (connp->conn_src_preferences)); 12921 } 12922 12923 int 12924 ip6_set_pktinfo(cred_t *cr, conn_t *connp, struct in6_pktinfo *pkti, mblk_t *mp) 12925 { 12926 ill_t *ill; 12927 ire_t *ire; 12928 int error; 12929 12930 /* 12931 * Verify the source address and ifindex. Privileged users can use 12932 * any source address. For ancillary data the source address is 12933 * checked in ip_wput_v6. 12934 */ 12935 if (pkti->ipi6_ifindex != 0) { 12936 ASSERT(connp != NULL); 12937 ill = ill_lookup_on_ifindex(pkti->ipi6_ifindex, B_TRUE, 12938 CONNP_TO_WQ(connp), mp, ip_restart_optmgmt, &error); 12939 if (ill == NULL) { 12940 /* 12941 * We just want to know if the interface exists, we 12942 * don't really care about the ill pointer itself. 12943 */ 12944 if (error != EINPROGRESS) 12945 return (error); 12946 error = 0; /* Ensure we don't use it below */ 12947 } else { 12948 ill_refrele(ill); 12949 } 12950 } 12951 if (!IN6_IS_ADDR_UNSPECIFIED(&pkti->ipi6_addr) && 12952 secpolicy_net_rawaccess(cr) != 0) { 12953 ire = ire_route_lookup_v6(&pkti->ipi6_addr, 0, 0, 12954 (IRE_LOCAL|IRE_LOOPBACK), NULL, NULL, 12955 connp->conn_zoneid, NULL, MATCH_IRE_TYPE); 12956 if (ire != NULL) 12957 ire_refrele(ire); 12958 else 12959 return (ENXIO); 12960 } 12961 return (0); 12962 } 12963 12964 /* 12965 * Get the size of the IP options (including the IP headers size) 12966 * without including the AH header's size. If till_ah is B_FALSE, 12967 * and if AH header is present, dest options beyond AH header will 12968 * also be included in the returned size. 12969 */ 12970 int 12971 ipsec_ah_get_hdr_size_v6(mblk_t *mp, boolean_t till_ah) 12972 { 12973 ip6_t *ip6h; 12974 uint8_t nexthdr; 12975 uint8_t *whereptr; 12976 ip6_hbh_t *hbhhdr; 12977 ip6_dest_t *dsthdr; 12978 ip6_rthdr_t *rthdr; 12979 int ehdrlen; 12980 int size; 12981 ah_t *ah; 12982 12983 ip6h = (ip6_t *)mp->b_rptr; 12984 size = IPV6_HDR_LEN; 12985 nexthdr = ip6h->ip6_nxt; 12986 whereptr = (uint8_t *)&ip6h[1]; 12987 for (;;) { 12988 /* Assume IP has already stripped it */ 12989 ASSERT(nexthdr != IPPROTO_FRAGMENT && nexthdr != IPPROTO_RAW); 12990 switch (nexthdr) { 12991 case IPPROTO_HOPOPTS: 12992 hbhhdr = (ip6_hbh_t *)whereptr; 12993 nexthdr = hbhhdr->ip6h_nxt; 12994 ehdrlen = 8 * (hbhhdr->ip6h_len + 1); 12995 break; 12996 case IPPROTO_DSTOPTS: 12997 dsthdr = (ip6_dest_t *)whereptr; 12998 nexthdr = dsthdr->ip6d_nxt; 12999 ehdrlen = 8 * (dsthdr->ip6d_len + 1); 13000 break; 13001 case IPPROTO_ROUTING: 13002 rthdr = (ip6_rthdr_t *)whereptr; 13003 nexthdr = rthdr->ip6r_nxt; 13004 ehdrlen = 8 * (rthdr->ip6r_len + 1); 13005 break; 13006 default : 13007 if (till_ah) { 13008 ASSERT(nexthdr == IPPROTO_AH); 13009 return (size); 13010 } 13011 /* 13012 * If we don't have a AH header to traverse, 13013 * return now. This happens normally for 13014 * outbound datagrams where we have not inserted 13015 * the AH header. 13016 */ 13017 if (nexthdr != IPPROTO_AH) { 13018 return (size); 13019 } 13020 13021 /* 13022 * We don't include the AH header's size 13023 * to be symmetrical with other cases where 13024 * we either don't have a AH header (outbound) 13025 * or peek into the AH header yet (inbound and 13026 * not pulled up yet). 13027 */ 13028 ah = (ah_t *)whereptr; 13029 nexthdr = ah->ah_nexthdr; 13030 ehdrlen = (ah->ah_length << 2) + 8; 13031 13032 if (nexthdr == IPPROTO_DSTOPTS) { 13033 if (whereptr + ehdrlen >= mp->b_wptr) { 13034 /* 13035 * The destination options header 13036 * is not part of the first mblk. 13037 */ 13038 whereptr = mp->b_cont->b_rptr; 13039 } else { 13040 whereptr += ehdrlen; 13041 } 13042 13043 dsthdr = (ip6_dest_t *)whereptr; 13044 ehdrlen = 8 * (dsthdr->ip6d_len + 1); 13045 size += ehdrlen; 13046 } 13047 return (size); 13048 } 13049 whereptr += ehdrlen; 13050 size += ehdrlen; 13051 } 13052 } 13053