1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* 26 * Copyright (c) 1990 Mentat Inc. 27 */ 28 29 #pragma ident "%Z%%M% %I% %E% SMI" 30 31 #include <sys/types.h> 32 #include <sys/stream.h> 33 #include <sys/dlpi.h> 34 #include <sys/stropts.h> 35 #include <sys/sysmacros.h> 36 #include <sys/strsun.h> 37 #include <sys/strlog.h> 38 #include <sys/strsubr.h> 39 #define _SUN_TPI_VERSION 2 40 #include <sys/tihdr.h> 41 #include <sys/ddi.h> 42 #include <sys/sunddi.h> 43 #include <sys/cmn_err.h> 44 #include <sys/debug.h> 45 #include <sys/sdt.h> 46 #include <sys/kobj.h> 47 #include <sys/zone.h> 48 #include <sys/neti.h> 49 #include <sys/hook.h> 50 51 #include <sys/kmem.h> 52 #include <sys/systm.h> 53 #include <sys/param.h> 54 #include <sys/socket.h> 55 #include <sys/vtrace.h> 56 #include <sys/isa_defs.h> 57 #include <sys/atomic.h> 58 #include <sys/iphada.h> 59 #include <sys/policy.h> 60 #include <net/if.h> 61 #include <net/if_types.h> 62 #include <net/route.h> 63 #include <net/if_dl.h> 64 #include <sys/sockio.h> 65 #include <netinet/in.h> 66 #include <netinet/ip6.h> 67 #include <netinet/icmp6.h> 68 #include <netinet/sctp.h> 69 70 #include <inet/common.h> 71 #include <inet/mi.h> 72 #include <inet/mib2.h> 73 #include <inet/nd.h> 74 #include <inet/arp.h> 75 76 #include <inet/ip.h> 77 #include <inet/ip_impl.h> 78 #include <inet/ip6.h> 79 #include <inet/ip6_asp.h> 80 #include <inet/tcp.h> 81 #include <inet/tcp_impl.h> 82 #include <inet/udp_impl.h> 83 #include <inet/ipp_common.h> 84 85 #include <inet/ip_multi.h> 86 #include <inet/ip_if.h> 87 #include <inet/ip_ire.h> 88 #include <inet/ip_rts.h> 89 #include <inet/optcom.h> 90 #include <inet/ip_ndp.h> 91 #include <net/pfkeyv2.h> 92 #include <inet/ipsec_info.h> 93 #include <inet/sadb.h> 94 #include <inet/ipsec_impl.h> 95 #include <inet/tun.h> 96 #include <inet/sctp_ip.h> 97 #include <sys/pattr.h> 98 #include <inet/ipclassifier.h> 99 #include <inet/ipsecah.h> 100 #include <inet/udp_impl.h> 101 #include <sys/squeue.h> 102 103 #include <sys/tsol/label.h> 104 #include <sys/tsol/tnet.h> 105 106 #include <rpc/pmap_prot.h> 107 108 /* Temporary; for CR 6451644 work-around */ 109 #include <sys/ethernet.h> 110 111 extern squeue_func_t ip_input_proc; 112 113 /* 114 * IP statistics. 115 */ 116 #define IP6_STAT(x) (ip6_statistics.x.value.ui64++) 117 #define IP6_STAT_UPDATE(x, n) (ip6_statistics.x.value.ui64 += (n)) 118 119 typedef struct ip6_stat { 120 kstat_named_t ip6_udp_fast_path; 121 kstat_named_t ip6_udp_slow_path; 122 kstat_named_t ip6_udp_fannorm; 123 kstat_named_t ip6_udp_fanmb; 124 kstat_named_t ip6_out_sw_cksum; 125 kstat_named_t ip6_in_sw_cksum; 126 kstat_named_t ip6_tcp_in_full_hw_cksum_err; 127 kstat_named_t ip6_tcp_in_part_hw_cksum_err; 128 kstat_named_t ip6_tcp_in_sw_cksum_err; 129 kstat_named_t ip6_tcp_out_sw_cksum_bytes; 130 kstat_named_t ip6_udp_in_full_hw_cksum_err; 131 kstat_named_t ip6_udp_in_part_hw_cksum_err; 132 kstat_named_t ip6_udp_in_sw_cksum_err; 133 kstat_named_t ip6_udp_out_sw_cksum_bytes; 134 kstat_named_t ip6_frag_mdt_pkt_out; 135 kstat_named_t ip6_frag_mdt_discarded; 136 kstat_named_t ip6_frag_mdt_allocfail; 137 kstat_named_t ip6_frag_mdt_addpdescfail; 138 kstat_named_t ip6_frag_mdt_allocd; 139 } ip6_stat_t; 140 141 static ip6_stat_t ip6_statistics = { 142 { "ip6_udp_fast_path", KSTAT_DATA_UINT64 }, 143 { "ip6_udp_slow_path", KSTAT_DATA_UINT64 }, 144 { "ip6_udp_fannorm", KSTAT_DATA_UINT64 }, 145 { "ip6_udp_fanmb", KSTAT_DATA_UINT64 }, 146 { "ip6_out_sw_cksum", KSTAT_DATA_UINT64 }, 147 { "ip6_in_sw_cksum", KSTAT_DATA_UINT64 }, 148 { "ip6_tcp_in_full_hw_cksum_err", KSTAT_DATA_UINT64 }, 149 { "ip6_tcp_in_part_hw_cksum_err", KSTAT_DATA_UINT64 }, 150 { "ip6_tcp_in_sw_cksum_err", KSTAT_DATA_UINT64 }, 151 { "ip6_tcp_out_sw_cksum_bytes", KSTAT_DATA_UINT64 }, 152 { "ip6_udp_in_full_hw_cksum_err", KSTAT_DATA_UINT64 }, 153 { "ip6_udp_in_part_hw_cksum_err", KSTAT_DATA_UINT64 }, 154 { "ip6_udp_in_sw_cksum_err", KSTAT_DATA_UINT64 }, 155 { "ip6_udp_out_sw_cksum_bytes", KSTAT_DATA_UINT64 }, 156 { "ip6_frag_mdt_pkt_out", KSTAT_DATA_UINT64 }, 157 { "ip6_frag_mdt_discarded", KSTAT_DATA_UINT64 }, 158 { "ip6_frag_mdt_allocfail", KSTAT_DATA_UINT64 }, 159 { "ip6_frag_mdt_addpdescfail", KSTAT_DATA_UINT64 }, 160 { "ip6_frag_mdt_allocd", KSTAT_DATA_UINT64 }, 161 }; 162 163 static kstat_t *ip6_kstat; 164 165 /* 166 * Naming conventions: 167 * These rules should be judiciously applied 168 * if there is a need to identify something as IPv6 versus IPv4 169 * IPv6 funcions will end with _v6 in the ip module. 170 * IPv6 funcions will end with _ipv6 in the transport modules. 171 * IPv6 macros: 172 * Some macros end with _V6; e.g. ILL_FRAG_HASH_V6 173 * Some macros start with V6_; e.g. V6_OR_V4_INADDR_ANY 174 * And then there are ..V4_PART_OF_V6. 175 * The intent is that macros in the ip module end with _V6. 176 * IPv6 global variables will start with ipv6_ 177 * IPv6 structures will start with ipv6 178 * IPv6 defined constants should start with IPV6_ 179 * (but then there are NDP_DEFAULT_VERS_PRI_AND_FLOW, etc) 180 */ 181 182 /* 183 * IPv6 mibs when the interface (ill) is not known. 184 * When the ill is known the per-interface mib in the ill is used. 185 */ 186 mib2_ipIfStatsEntry_t ip6_mib; 187 mib2_ipv6IfIcmpEntry_t icmp6_mib; 188 189 /* 190 * ip6opt_ls is used to enable IPv6 (via /etc/system on TX systems). 191 * We need to do this because we didn't obtain the IP6OPT_LS (0x0a) 192 * from IANA. This mechanism will remain in effect until an official 193 * number is obtained. 194 */ 195 uchar_t ip6opt_ls; 196 197 uint_t ipv6_ire_default_count; /* Number of IPv6 IRE_DEFAULT entries */ 198 uint_t ipv6_ire_default_index; /* Walking IPv6 index used to mod in */ 199 200 const in6_addr_t ipv6_all_ones = 201 { 0xffffffffU, 0xffffffffU, 0xffffffffU, 0xffffffffU }; 202 const in6_addr_t ipv6_all_zeros = { 0, 0, 0, 0 }; 203 204 #ifdef _BIG_ENDIAN 205 const in6_addr_t ipv6_unspecified_group = { 0xff000000U, 0, 0, 0 }; 206 #else /* _BIG_ENDIAN */ 207 const in6_addr_t ipv6_unspecified_group = { 0x000000ffU, 0, 0, 0 }; 208 #endif /* _BIG_ENDIAN */ 209 210 #ifdef _BIG_ENDIAN 211 const in6_addr_t ipv6_loopback = { 0, 0, 0, 0x00000001U }; 212 #else /* _BIG_ENDIAN */ 213 const in6_addr_t ipv6_loopback = { 0, 0, 0, 0x01000000U }; 214 #endif /* _BIG_ENDIAN */ 215 216 #ifdef _BIG_ENDIAN 217 const in6_addr_t ipv6_all_hosts_mcast = { 0xff020000U, 0, 0, 0x00000001U }; 218 #else /* _BIG_ENDIAN */ 219 const in6_addr_t ipv6_all_hosts_mcast = { 0x000002ffU, 0, 0, 0x01000000U }; 220 #endif /* _BIG_ENDIAN */ 221 222 #ifdef _BIG_ENDIAN 223 const in6_addr_t ipv6_all_rtrs_mcast = { 0xff020000U, 0, 0, 0x00000002U }; 224 #else /* _BIG_ENDIAN */ 225 const in6_addr_t ipv6_all_rtrs_mcast = { 0x000002ffU, 0, 0, 0x02000000U }; 226 #endif /* _BIG_ENDIAN */ 227 228 #ifdef _BIG_ENDIAN 229 const in6_addr_t ipv6_all_v2rtrs_mcast = { 0xff020000U, 0, 0, 0x00000016U }; 230 #else /* _BIG_ENDIAN */ 231 const in6_addr_t ipv6_all_v2rtrs_mcast = { 0x000002ffU, 0, 0, 0x16000000U }; 232 #endif /* _BIG_ENDIAN */ 233 234 #ifdef _BIG_ENDIAN 235 const in6_addr_t ipv6_solicited_node_mcast = 236 { 0xff020000U, 0, 0x00000001U, 0xff000000U }; 237 #else /* _BIG_ENDIAN */ 238 const in6_addr_t ipv6_solicited_node_mcast = 239 { 0x000002ffU, 0, 0x01000000U, 0x000000ffU }; 240 #endif /* _BIG_ENDIAN */ 241 242 /* 243 * Used by icmp_send_redirect_v6 for picking random src. 244 */ 245 uint_t icmp_redirect_v6_src_index; 246 247 /* Leave room for ip_newroute to tack on the src and target addresses */ 248 #define OK_RESOLVER_MP_V6(mp) \ 249 ((mp) && ((mp)->b_wptr - (mp)->b_rptr) >= (2 * IPV6_ADDR_LEN)) 250 251 static void icmp_inbound_too_big_v6(queue_t *, mblk_t *, ill_t *ill, 252 boolean_t, zoneid_t); 253 static void icmp_pkt_v6(queue_t *, mblk_t *, void *, size_t, 254 const in6_addr_t *, boolean_t, zoneid_t); 255 static void icmp_redirect_v6(queue_t *, mblk_t *, ill_t *ill); 256 static int ip_bind_connected_v6(conn_t *, mblk_t *, in6_addr_t *, 257 uint16_t, const in6_addr_t *, ip6_pkt_t *, uint16_t, 258 boolean_t, boolean_t, boolean_t, boolean_t); 259 static boolean_t ip_bind_insert_ire_v6(mblk_t *, ire_t *, const in6_addr_t *, 260 iulp_t *); 261 static int ip_bind_laddr_v6(conn_t *, mblk_t *, const in6_addr_t *, 262 uint16_t, boolean_t, boolean_t, boolean_t); 263 static void ip_fanout_proto_v6(queue_t *, mblk_t *, ip6_t *, ill_t *, 264 ill_t *, uint8_t, uint_t, uint_t, boolean_t, zoneid_t); 265 static void ip_fanout_tcp_v6(queue_t *, mblk_t *, ip6_t *, ill_t *, 266 ill_t *, uint_t, uint_t, boolean_t, zoneid_t); 267 static void ip_fanout_udp_v6(queue_t *, mblk_t *, ip6_t *, uint32_t, 268 ill_t *, ill_t *, uint_t, boolean_t, zoneid_t); 269 static int ip_process_options_v6(queue_t *, mblk_t *, ip6_t *, 270 uint8_t *, uint_t, uint8_t); 271 static mblk_t *ip_rput_frag_v6(queue_t *, mblk_t *, ip6_t *, 272 ip6_frag_t *, uint_t, uint_t *, uint32_t *, uint16_t *); 273 static boolean_t ip_source_routed_v6(ip6_t *, mblk_t *); 274 static void ip_wput_ire_v6(queue_t *, mblk_t *, ire_t *, int, int, 275 conn_t *, int, int, int, zoneid_t); 276 277 void ip_rput_v6(queue_t *, mblk_t *); 278 static void ip_wput_v6(queue_t *, mblk_t *); 279 280 /* 281 * A template for an IPv6 AR_ENTRY_QUERY 282 */ 283 static areq_t ipv6_areq_template = { 284 AR_ENTRY_QUERY, /* cmd */ 285 sizeof (areq_t)+(2*IPV6_ADDR_LEN), /* name offset */ 286 sizeof (areq_t), /* name len (filled by ill_arp_alloc) */ 287 IP6_DL_SAP, /* protocol, from arps perspective */ 288 sizeof (areq_t), /* target addr offset */ 289 IPV6_ADDR_LEN, /* target addr_length */ 290 0, /* flags */ 291 sizeof (areq_t) + IPV6_ADDR_LEN, /* sender addr offset */ 292 IPV6_ADDR_LEN, /* sender addr length */ 293 6, /* xmit_count */ 294 1000, /* (re)xmit_interval in milliseconds */ 295 4 /* max # of requests to buffer */ 296 /* anything else filled in by the code */ 297 }; 298 299 struct qinit rinit_ipv6 = { 300 (pfi_t)ip_rput_v6, 301 NULL, 302 ip_open, 303 ip_close, 304 NULL, 305 &ip_mod_info 306 }; 307 308 struct qinit winit_ipv6 = { 309 (pfi_t)ip_wput_v6, 310 (pfi_t)ip_wsrv, 311 ip_open, 312 ip_close, 313 NULL, 314 &ip_mod_info 315 }; 316 317 /* 318 * Handle IPv6 ICMP packets sent to us. Consume the mblk passed in. 319 * The message has already been checksummed and if needed, 320 * a copy has been made to be sent any interested ICMP client (conn) 321 * Note that this is different than icmp_inbound() which does the fanout 322 * to conn's as well as local processing of the ICMP packets. 323 * 324 * All error messages are passed to the matching transport stream. 325 * 326 * Zones notes: 327 * The packet is only processed in the context of the specified zone: typically 328 * only this zone will reply to an echo request. This means that the caller must 329 * call icmp_inbound_v6() for each relevant zone. 330 */ 331 static void 332 icmp_inbound_v6(queue_t *q, mblk_t *mp, ill_t *ill, uint_t hdr_length, 333 boolean_t mctl_present, uint_t flags, zoneid_t zoneid, mblk_t *dl_mp) 334 { 335 icmp6_t *icmp6; 336 ip6_t *ip6h; 337 boolean_t interested; 338 ip6i_t *ip6i; 339 in6_addr_t origsrc; 340 ire_t *ire; 341 mblk_t *first_mp; 342 ipsec_in_t *ii; 343 344 ASSERT(ill != NULL); 345 first_mp = mp; 346 if (mctl_present) { 347 mp = first_mp->b_cont; 348 ASSERT(mp != NULL); 349 350 ii = (ipsec_in_t *)first_mp->b_rptr; 351 ASSERT(ii->ipsec_in_type == IPSEC_IN); 352 } 353 354 ip6h = (ip6_t *)mp->b_rptr; 355 356 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInMsgs); 357 358 if ((mp->b_wptr - mp->b_rptr) < (hdr_length + ICMP6_MINLEN)) { 359 if (!pullupmsg(mp, hdr_length + ICMP6_MINLEN)) { 360 ip1dbg(("icmp_inbound_v6: pullupmsg failed\n")); 361 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors); 362 freemsg(first_mp); 363 return; 364 } 365 ip6h = (ip6_t *)mp->b_rptr; 366 } 367 if (icmp_accept_clear_messages == 0) { 368 first_mp = ipsec_check_global_policy(first_mp, NULL, 369 NULL, ip6h, mctl_present); 370 if (first_mp == NULL) 371 return; 372 } 373 374 /* 375 * On a labeled system, we have to check whether the zone itself is 376 * permitted to receive raw traffic. 377 */ 378 if (is_system_labeled()) { 379 if (zoneid == ALL_ZONES) 380 zoneid = tsol_packet_to_zoneid(mp); 381 if (!tsol_can_accept_raw(mp, B_FALSE)) { 382 ip1dbg(("icmp_inbound_v6: zone %d can't receive raw", 383 zoneid)); 384 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors); 385 freemsg(first_mp); 386 return; 387 } 388 } 389 390 icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]); 391 ip2dbg(("icmp_inbound_v6: type %d code %d\n", icmp6->icmp6_type, 392 icmp6->icmp6_code)); 393 interested = !(icmp6->icmp6_type & ICMP6_INFOMSG_MASK); 394 395 /* Initiate IPPF processing here */ 396 if (IP6_IN_IPP(flags)) { 397 398 /* 399 * If the ifindex changes due to SIOCSLIFINDEX 400 * packet may return to IP on the wrong ill. 401 */ 402 ip_process(IPP_LOCAL_IN, &mp, ill->ill_phyint->phyint_ifindex); 403 if (mp == NULL) { 404 if (mctl_present) { 405 freeb(first_mp); 406 } 407 return; 408 } 409 } 410 411 switch (icmp6->icmp6_type) { 412 case ICMP6_DST_UNREACH: 413 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInDestUnreachs); 414 if (icmp6->icmp6_code == ICMP6_DST_UNREACH_ADMIN) 415 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInAdminProhibs); 416 break; 417 418 case ICMP6_TIME_EXCEEDED: 419 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInTimeExcds); 420 break; 421 422 case ICMP6_PARAM_PROB: 423 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInParmProblems); 424 break; 425 426 case ICMP6_PACKET_TOO_BIG: 427 icmp_inbound_too_big_v6(q, first_mp, ill, mctl_present, 428 zoneid); 429 return; 430 case ICMP6_ECHO_REQUEST: 431 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInEchos); 432 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst) && 433 !ipv6_resp_echo_mcast) 434 break; 435 436 /* 437 * We must have exclusive use of the mblk to convert it to 438 * a response. 439 * If not, we copy it. 440 */ 441 if (mp->b_datap->db_ref > 1) { 442 mblk_t *mp1; 443 444 mp1 = copymsg(mp); 445 freemsg(mp); 446 if (mp1 == NULL) { 447 BUMP_MIB(ill->ill_icmp6_mib, 448 ipv6IfIcmpInErrors); 449 if (mctl_present) 450 freeb(first_mp); 451 return; 452 } 453 mp = mp1; 454 ip6h = (ip6_t *)mp->b_rptr; 455 icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]); 456 if (mctl_present) 457 first_mp->b_cont = mp; 458 else 459 first_mp = mp; 460 } 461 462 /* 463 * Turn the echo into an echo reply. 464 * Remove any extension headers (do not reverse a source route) 465 * and clear the flow id (keep traffic class for now). 466 */ 467 if (hdr_length != IPV6_HDR_LEN) { 468 int i; 469 470 for (i = 0; i < IPV6_HDR_LEN; i++) 471 mp->b_rptr[hdr_length - i - 1] = 472 mp->b_rptr[IPV6_HDR_LEN - i - 1]; 473 mp->b_rptr += (hdr_length - IPV6_HDR_LEN); 474 ip6h = (ip6_t *)mp->b_rptr; 475 ip6h->ip6_nxt = IPPROTO_ICMPV6; 476 hdr_length = IPV6_HDR_LEN; 477 } 478 ip6h->ip6_vcf &= ~IPV6_FLOWINFO_FLOWLABEL; 479 icmp6->icmp6_type = ICMP6_ECHO_REPLY; 480 481 ip6h->ip6_plen = 482 htons((uint16_t)(msgdsize(mp) - IPV6_HDR_LEN)); 483 origsrc = ip6h->ip6_src; 484 /* 485 * Reverse the source and destination addresses. 486 * If the return address is a multicast, zero out the source 487 * (ip_wput_v6 will set an address). 488 */ 489 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 490 ip6h->ip6_src = ipv6_all_zeros; 491 ip6h->ip6_dst = origsrc; 492 } else { 493 ip6h->ip6_src = ip6h->ip6_dst; 494 ip6h->ip6_dst = origsrc; 495 } 496 497 /* set the hop limit */ 498 ip6h->ip6_hops = ipv6_def_hops; 499 500 /* 501 * Prepare for checksum by putting icmp length in the icmp 502 * checksum field. The checksum is calculated in ip_wput_v6. 503 */ 504 icmp6->icmp6_cksum = ip6h->ip6_plen; 505 /* 506 * ICMP echo replies should go out on the same interface 507 * the request came on as probes used by in.mpathd for 508 * detecting NIC failures are ECHO packets. We turn-off load 509 * spreading by allocating a ip6i and setting ip6i_attach_if 510 * to B_TRUE which is handled both by ip_wput_v6 and 511 * ip_newroute_v6. If we don't turnoff load spreading, 512 * the packets might get dropped if there are no 513 * non-FAILED/INACTIVE interfaces for it to go out on and 514 * in.mpathd would wrongly detect a failure or mis-detect 515 * a NIC failure as a link failure. As load spreading can 516 * happen only if ill_group is not NULL, we do only for 517 * that case and this does not affect the normal case. 518 * 519 * We force this only on echo packets that came from on-link 520 * hosts. We restrict this to link-local addresses which 521 * is used by in.mpathd for probing. In the IPv6 case, 522 * default routes typically have an ire_ipif pointer and 523 * hence a MATCH_IRE_ILL later in ip_newroute_v6/ip_wput_v6 524 * might work. As a default route out of this interface 525 * may not be present, enforcing this packet to go out in 526 * this case may not work. 527 */ 528 if (ill->ill_group != NULL && 529 IN6_IS_ADDR_LINKLOCAL(&origsrc)) { 530 /* 531 * If we are sending replies to ourselves, don't 532 * set ATTACH_IF as we may not be able to find 533 * the IRE_LOCAL on this ill i.e setting ATTACH_IF 534 * causes ip_wput_v6 to look for an IRE_LOCAL on 535 * "ill" which it may not find and will try to 536 * create an IRE_CACHE for our local address. Once 537 * we do this, we will try to forward all packets 538 * meant to our LOCAL address. 539 */ 540 ire = ire_cache_lookup_v6(&ip6h->ip6_dst, ALL_ZONES, 541 NULL); 542 if (ire == NULL || ire->ire_type != IRE_LOCAL) { 543 mp = ip_add_info_v6(mp, NULL, &ip6h->ip6_dst); 544 if (mp == NULL) { 545 BUMP_MIB(ill->ill_icmp6_mib, 546 ipv6IfIcmpInErrors); 547 if (ire != NULL) 548 ire_refrele(ire); 549 if (mctl_present) 550 freeb(first_mp); 551 return; 552 } else if (mctl_present) { 553 first_mp->b_cont = mp; 554 } else { 555 first_mp = mp; 556 } 557 ip6i = (ip6i_t *)mp->b_rptr; 558 ip6i->ip6i_flags = IP6I_ATTACH_IF; 559 ip6i->ip6i_ifindex = 560 ill->ill_phyint->phyint_ifindex; 561 } 562 if (ire != NULL) 563 ire_refrele(ire); 564 } 565 566 if (!mctl_present) { 567 /* 568 * This packet should go out the same way as it 569 * came in i.e in clear. To make sure that global 570 * policy will not be applied to this in ip_wput, 571 * we attach a IPSEC_IN mp and clear ipsec_in_secure. 572 */ 573 ASSERT(first_mp == mp); 574 if ((first_mp = ipsec_in_alloc(B_FALSE)) == NULL) { 575 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 576 freemsg(mp); 577 return; 578 } 579 ii = (ipsec_in_t *)first_mp->b_rptr; 580 581 /* This is not a secure packet */ 582 ii->ipsec_in_secure = B_FALSE; 583 first_mp->b_cont = mp; 584 } 585 ii->ipsec_in_zoneid = zoneid; 586 ASSERT(zoneid != ALL_ZONES); 587 if (!ipsec_in_to_out(first_mp, NULL, ip6h)) { 588 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 589 return; 590 } 591 put(WR(q), first_mp); 592 return; 593 594 case ICMP6_ECHO_REPLY: 595 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInEchoReplies); 596 break; 597 598 case ND_ROUTER_SOLICIT: 599 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInRouterSolicits); 600 break; 601 602 case ND_ROUTER_ADVERT: 603 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInRouterAdvertisements); 604 break; 605 606 case ND_NEIGHBOR_SOLICIT: 607 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInNeighborSolicits); 608 if (mctl_present) 609 freeb(first_mp); 610 /* XXX may wish to pass first_mp up to ndp_input someday. */ 611 ndp_input(ill, mp, dl_mp); 612 return; 613 614 case ND_NEIGHBOR_ADVERT: 615 BUMP_MIB(ill->ill_icmp6_mib, 616 ipv6IfIcmpInNeighborAdvertisements); 617 if (mctl_present) 618 freeb(first_mp); 619 /* XXX may wish to pass first_mp up to ndp_input someday. */ 620 ndp_input(ill, mp, dl_mp); 621 return; 622 623 case ND_REDIRECT: { 624 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInRedirects); 625 626 if (ipv6_ignore_redirect) 627 break; 628 629 /* 630 * As there is no upper client to deliver, we don't 631 * need the first_mp any more. 632 */ 633 if (mctl_present) 634 freeb(first_mp); 635 if (!pullupmsg(mp, -1)) { 636 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects); 637 break; 638 } 639 icmp_redirect_v6(q, mp, ill); 640 return; 641 } 642 643 /* 644 * The next three icmp messages will be handled by MLD. 645 * Pass all valid MLD packets up to any process(es) 646 * listening on a raw ICMP socket. MLD messages are 647 * freed by mld_input function. 648 */ 649 case MLD_LISTENER_QUERY: 650 case MLD_LISTENER_REPORT: 651 case MLD_LISTENER_REDUCTION: 652 if (mctl_present) 653 freeb(first_mp); 654 mld_input(q, mp, ill); 655 return; 656 default: 657 break; 658 } 659 if (interested) { 660 icmp_inbound_error_fanout_v6(q, first_mp, ip6h, icmp6, ill, 661 mctl_present, zoneid); 662 } else { 663 freemsg(first_mp); 664 } 665 } 666 667 /* 668 * Process received IPv6 ICMP Packet too big. 669 * After updating any IRE it does the fanout to any matching transport streams. 670 * Assumes the IPv6 plus ICMPv6 headers have been pulled up but nothing else. 671 */ 672 /* ARGSUSED */ 673 static void 674 icmp_inbound_too_big_v6(queue_t *q, mblk_t *mp, ill_t *ill, 675 boolean_t mctl_present, zoneid_t zoneid) 676 { 677 ip6_t *ip6h; 678 ip6_t *inner_ip6h; 679 icmp6_t *icmp6; 680 uint16_t hdr_length; 681 uint32_t mtu; 682 ire_t *ire, *first_ire; 683 mblk_t *first_mp; 684 685 first_mp = mp; 686 if (mctl_present) 687 mp = first_mp->b_cont; 688 /* 689 * We must have exclusive use of the mblk to update the MTU 690 * in the packet. 691 * If not, we copy it. 692 * 693 * If there's an M_CTL present, we know that allocated first_mp 694 * earlier in this function, so we know first_mp has refcnt of one. 695 */ 696 ASSERT(!mctl_present || first_mp->b_datap->db_ref == 1); 697 if (mp->b_datap->db_ref > 1) { 698 mblk_t *mp1; 699 700 mp1 = copymsg(mp); 701 freemsg(mp); 702 if (mp1 == NULL) { 703 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 704 if (mctl_present) 705 freeb(first_mp); 706 return; 707 } 708 mp = mp1; 709 if (mctl_present) 710 first_mp->b_cont = mp; 711 else 712 first_mp = mp; 713 } 714 ip6h = (ip6_t *)mp->b_rptr; 715 if (ip6h->ip6_nxt != IPPROTO_ICMPV6) 716 hdr_length = ip_hdr_length_v6(mp, ip6h); 717 else 718 hdr_length = IPV6_HDR_LEN; 719 720 icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]); 721 ASSERT((size_t)(mp->b_wptr - mp->b_rptr) >= hdr_length + ICMP6_MINLEN); 722 inner_ip6h = (ip6_t *)&icmp6[1]; /* Packet in error */ 723 if ((uchar_t *)&inner_ip6h[1] > mp->b_wptr) { 724 if (!pullupmsg(mp, (uchar_t *)&inner_ip6h[1] - mp->b_rptr)) { 725 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 726 freemsg(first_mp); 727 return; 728 } 729 ip6h = (ip6_t *)mp->b_rptr; 730 icmp6 = (icmp6_t *)&mp->b_rptr[hdr_length]; 731 inner_ip6h = (ip6_t *)&icmp6[1]; 732 } 733 734 /* 735 * For link local destinations matching simply on IRE type is not 736 * sufficient. Same link local addresses for different ILL's is 737 * possible. 738 */ 739 740 if (IN6_IS_ADDR_LINKLOCAL(&inner_ip6h->ip6_dst)) { 741 first_ire = ire_ctable_lookup_v6(&inner_ip6h->ip6_dst, NULL, 742 IRE_CACHE, ill->ill_ipif, ALL_ZONES, NULL, 743 MATCH_IRE_TYPE | MATCH_IRE_ILL_GROUP); 744 745 if (first_ire == NULL) { 746 if (ip_debug > 2) { 747 /* ip1dbg */ 748 pr_addr_dbg("icmp_inbound_too_big_v6:" 749 "no ire for dst %s\n", AF_INET6, 750 &inner_ip6h->ip6_dst); 751 } 752 freemsg(first_mp); 753 return; 754 } 755 756 mtu = ntohl(icmp6->icmp6_mtu); 757 rw_enter(&first_ire->ire_bucket->irb_lock, RW_READER); 758 for (ire = first_ire; ire != NULL && 759 IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, &inner_ip6h->ip6_dst); 760 ire = ire->ire_next) { 761 mutex_enter(&ire->ire_lock); 762 if (mtu < IPV6_MIN_MTU) { 763 ip1dbg(("Received mtu less than IPv6 " 764 "min mtu %d: %d\n", IPV6_MIN_MTU, mtu)); 765 mtu = IPV6_MIN_MTU; 766 /* 767 * If an mtu less than IPv6 min mtu is received, 768 * we must include a fragment header in 769 * subsequent packets. 770 */ 771 ire->ire_frag_flag |= IPH_FRAG_HDR; 772 } 773 ip1dbg(("Received mtu from router: %d\n", mtu)); 774 ire->ire_max_frag = MIN(ire->ire_max_frag, mtu); 775 /* Record the new max frag size for the ULP. */ 776 if (ire->ire_frag_flag & IPH_FRAG_HDR) { 777 /* 778 * If we need a fragment header in every packet 779 * (above case or multirouting), make sure the 780 * ULP takes it into account when computing the 781 * payload size. 782 */ 783 icmp6->icmp6_mtu = htonl(ire->ire_max_frag - 784 sizeof (ip6_frag_t)); 785 } else { 786 icmp6->icmp6_mtu = htonl(ire->ire_max_frag); 787 } 788 mutex_exit(&ire->ire_lock); 789 } 790 rw_exit(&first_ire->ire_bucket->irb_lock); 791 ire_refrele(first_ire); 792 } else { 793 irb_t *irb = NULL; 794 /* 795 * for non-link local destinations we match only on the IRE type 796 */ 797 ire = ire_ctable_lookup_v6(&inner_ip6h->ip6_dst, NULL, 798 IRE_CACHE, ill->ill_ipif, ALL_ZONES, NULL, MATCH_IRE_TYPE); 799 if (ire == NULL) { 800 if (ip_debug > 2) { 801 /* ip1dbg */ 802 pr_addr_dbg("icmp_inbound_too_big_v6:" 803 "no ire for dst %s\n", 804 AF_INET6, &inner_ip6h->ip6_dst); 805 } 806 freemsg(first_mp); 807 return; 808 } 809 irb = ire->ire_bucket; 810 ire_refrele(ire); 811 rw_enter(&irb->irb_lock, RW_READER); 812 for (ire = irb->irb_ire; ire != NULL; ire = ire->ire_next) { 813 if (IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, 814 &inner_ip6h->ip6_dst)) { 815 mtu = ntohl(icmp6->icmp6_mtu); 816 mutex_enter(&ire->ire_lock); 817 if (mtu < IPV6_MIN_MTU) { 818 ip1dbg(("Received mtu less than IPv6" 819 "min mtu %d: %d\n", 820 IPV6_MIN_MTU, mtu)); 821 mtu = IPV6_MIN_MTU; 822 /* 823 * If an mtu less than IPv6 min mtu is 824 * received, we must include a fragment 825 * header in subsequent packets. 826 */ 827 ire->ire_frag_flag |= IPH_FRAG_HDR; 828 } 829 830 ip1dbg(("Received mtu from router: %d\n", mtu)); 831 ire->ire_max_frag = MIN(ire->ire_max_frag, mtu); 832 /* Record the new max frag size for the ULP. */ 833 if (ire->ire_frag_flag & IPH_FRAG_HDR) { 834 /* 835 * If we need a fragment header in 836 * every packet (above case or 837 * multirouting), make sure the ULP 838 * takes it into account when computing 839 * the payload size. 840 */ 841 icmp6->icmp6_mtu = 842 htonl(ire->ire_max_frag - 843 sizeof (ip6_frag_t)); 844 } else { 845 icmp6->icmp6_mtu = 846 htonl(ire->ire_max_frag); 847 } 848 mutex_exit(&ire->ire_lock); 849 } 850 } 851 rw_exit(&irb->irb_lock); 852 } 853 icmp_inbound_error_fanout_v6(q, first_mp, ip6h, icmp6, ill, 854 mctl_present, zoneid); 855 } 856 857 static void 858 pkt_too_big(conn_t *connp, void *arg) 859 { 860 mblk_t *mp; 861 862 if (!connp->conn_ipv6_recvpathmtu) 863 return; 864 865 /* create message and drop it on this connections read queue */ 866 if ((mp = dupb((mblk_t *)arg)) == NULL) { 867 return; 868 } 869 mp->b_datap->db_type = M_CTL; 870 871 putnext(connp->conn_rq, mp); 872 } 873 874 /* 875 * Fanout received ICMPv6 error packets to the transports. 876 * Assumes the IPv6 plus ICMPv6 headers have been pulled up but nothing else. 877 */ 878 void 879 icmp_inbound_error_fanout_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, 880 icmp6_t *icmp6, ill_t *ill, boolean_t mctl_present, zoneid_t zoneid) 881 { 882 uint16_t *up; /* Pointer to ports in ULP header */ 883 uint32_t ports; /* reversed ports for fanout */ 884 ip6_t rip6h; /* With reversed addresses */ 885 uint16_t hdr_length; 886 uint8_t *nexthdrp; 887 uint8_t nexthdr; 888 mblk_t *first_mp; 889 ipsec_in_t *ii; 890 tcpha_t *tcpha; 891 conn_t *connp; 892 893 first_mp = mp; 894 if (mctl_present) { 895 mp = first_mp->b_cont; 896 ASSERT(mp != NULL); 897 898 ii = (ipsec_in_t *)first_mp->b_rptr; 899 ASSERT(ii->ipsec_in_type == IPSEC_IN); 900 } else { 901 ii = NULL; 902 } 903 904 hdr_length = (uint16_t)((uchar_t *)icmp6 - (uchar_t *)ip6h); 905 ASSERT((size_t)(mp->b_wptr - (uchar_t *)icmp6) >= ICMP6_MINLEN); 906 907 /* 908 * Need to pullup everything in order to use 909 * ip_hdr_length_nexthdr_v6() 910 */ 911 if (mp->b_cont != NULL) { 912 if (!pullupmsg(mp, -1)) { 913 ip1dbg(("icmp_inbound_error_fanout_v6: " 914 "pullupmsg failed\n")); 915 goto drop_pkt; 916 } 917 ip6h = (ip6_t *)mp->b_rptr; 918 icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]); 919 } 920 921 ip6h = (ip6_t *)&icmp6[1]; /* Packet in error */ 922 if ((uchar_t *)&ip6h[1] > mp->b_wptr) 923 goto drop_pkt; 924 925 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_length, &nexthdrp)) 926 goto drop_pkt; 927 nexthdr = *nexthdrp; 928 929 /* Set message type, must be done after pullups */ 930 mp->b_datap->db_type = M_CTL; 931 932 if (icmp6->icmp6_type == ICMP6_PACKET_TOO_BIG) { 933 /* 934 * Deliver indication of ICMP6_PACKET_TOO_BIG to interested 935 * sockets. 936 * 937 * Note I don't like walking every connection to deliver 938 * this information to a set of listeners. A separate 939 * list could be kept to keep the cost of this down. 940 */ 941 ipcl_walk(pkt_too_big, (void *)mp); 942 } 943 944 /* Try to pass the ICMP message to clients who need it */ 945 switch (nexthdr) { 946 case IPPROTO_UDP: { 947 /* 948 * Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of 949 * UDP header to get the port information. 950 */ 951 if ((uchar_t *)ip6h + hdr_length + ICMP_MIN_TP_HDR_LEN > 952 mp->b_wptr) { 953 break; 954 } 955 /* 956 * Attempt to find a client stream based on port. 957 * Note that we do a reverse lookup since the header is 958 * in the form we sent it out. 959 * The rip6h header is only used for the IPCL_UDP_MATCH_V6 960 * and we only set the src and dst addresses and nexthdr. 961 */ 962 up = (uint16_t *)((uchar_t *)ip6h + hdr_length); 963 rip6h.ip6_src = ip6h->ip6_dst; 964 rip6h.ip6_dst = ip6h->ip6_src; 965 rip6h.ip6_nxt = nexthdr; 966 ((uint16_t *)&ports)[0] = up[1]; 967 ((uint16_t *)&ports)[1] = up[0]; 968 969 ip_fanout_udp_v6(q, first_mp, &rip6h, ports, ill, ill, 970 IP6_NO_IPPOLICY, mctl_present, zoneid); 971 return; 972 } 973 case IPPROTO_TCP: { 974 /* 975 * Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of 976 * the TCP header to get the port information. 977 */ 978 if ((uchar_t *)ip6h + hdr_length + ICMP_MIN_TP_HDR_LEN > 979 mp->b_wptr) { 980 break; 981 } 982 983 /* 984 * Attempt to find a client stream based on port. 985 * Note that we do a reverse lookup since the header is 986 * in the form we sent it out. 987 * The rip6h header is only used for the IP_TCP_*MATCH_V6 and 988 * we only set the src and dst addresses and nexthdr. 989 */ 990 991 tcpha = (tcpha_t *)((char *)ip6h + hdr_length); 992 connp = ipcl_tcp_lookup_reversed_ipv6(ip6h, tcpha, 993 TCPS_LISTEN, ill->ill_phyint->phyint_ifindex); 994 if (connp == NULL) { 995 goto drop_pkt; 996 } 997 998 squeue_fill(connp->conn_sqp, first_mp, tcp_input, 999 connp, SQTAG_TCP6_INPUT_ICMP_ERR); 1000 return; 1001 1002 } 1003 case IPPROTO_SCTP: 1004 /* 1005 * Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of 1006 * the SCTP header to get the port information. 1007 */ 1008 if ((uchar_t *)ip6h + hdr_length + ICMP_MIN_TP_HDR_LEN > 1009 mp->b_wptr) { 1010 break; 1011 } 1012 1013 up = (uint16_t *)((uchar_t *)ip6h + hdr_length); 1014 ((uint16_t *)&ports)[0] = up[1]; 1015 ((uint16_t *)&ports)[1] = up[0]; 1016 ip_fanout_sctp(mp, ill, (ipha_t *)ip6h, ports, 0, mctl_present, 1017 IP6_NO_IPPOLICY, 0, zoneid); 1018 return; 1019 case IPPROTO_ESP: 1020 case IPPROTO_AH: { 1021 int ipsec_rc; 1022 1023 /* 1024 * We need a IPSEC_IN in the front to fanout to AH/ESP. 1025 * We will re-use the IPSEC_IN if it is already present as 1026 * AH/ESP will not affect any fields in the IPSEC_IN for 1027 * ICMP errors. If there is no IPSEC_IN, allocate a new 1028 * one and attach it in the front. 1029 */ 1030 if (ii != NULL) { 1031 /* 1032 * ip_fanout_proto_again converts the ICMP errors 1033 * that come back from AH/ESP to M_DATA so that 1034 * if it is non-AH/ESP and we do a pullupmsg in 1035 * this function, it would work. Convert it back 1036 * to M_CTL before we send up as this is a ICMP 1037 * error. This could have been generated locally or 1038 * by some router. Validate the inner IPSEC 1039 * headers. 1040 * 1041 * NOTE : ill_index is used by ip_fanout_proto_again 1042 * to locate the ill. 1043 */ 1044 ASSERT(ill != NULL); 1045 ii->ipsec_in_ill_index = 1046 ill->ill_phyint->phyint_ifindex; 1047 ii->ipsec_in_rill_index = ii->ipsec_in_ill_index; 1048 first_mp->b_cont->b_datap->db_type = M_CTL; 1049 } else { 1050 /* 1051 * IPSEC_IN is not present. We attach a ipsec_in 1052 * message and send up to IPSEC for validating 1053 * and removing the IPSEC headers. Clear 1054 * ipsec_in_secure so that when we return 1055 * from IPSEC, we don't mistakenly think that this 1056 * is a secure packet came from the network. 1057 * 1058 * NOTE : ill_index is used by ip_fanout_proto_again 1059 * to locate the ill. 1060 */ 1061 ASSERT(first_mp == mp); 1062 first_mp = ipsec_in_alloc(B_FALSE); 1063 ASSERT(ill != NULL); 1064 if (first_mp == NULL) { 1065 freemsg(mp); 1066 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 1067 return; 1068 } 1069 ii = (ipsec_in_t *)first_mp->b_rptr; 1070 1071 /* This is not a secure packet */ 1072 ii->ipsec_in_secure = B_FALSE; 1073 first_mp->b_cont = mp; 1074 mp->b_datap->db_type = M_CTL; 1075 ii->ipsec_in_ill_index = 1076 ill->ill_phyint->phyint_ifindex; 1077 ii->ipsec_in_rill_index = ii->ipsec_in_ill_index; 1078 } 1079 1080 if (!ipsec_loaded()) { 1081 ip_proto_not_sup(q, first_mp, 0, zoneid); 1082 return; 1083 } 1084 1085 if (nexthdr == IPPROTO_ESP) 1086 ipsec_rc = ipsecesp_icmp_error(first_mp); 1087 else 1088 ipsec_rc = ipsecah_icmp_error(first_mp); 1089 if (ipsec_rc == IPSEC_STATUS_FAILED) 1090 return; 1091 1092 ip_fanout_proto_again(first_mp, ill, ill, NULL); 1093 return; 1094 } 1095 case IPPROTO_ENCAP: 1096 case IPPROTO_IPV6: 1097 if ((uint8_t *)ip6h + hdr_length + 1098 (nexthdr == IPPROTO_ENCAP ? sizeof (ipha_t) : 1099 sizeof (ip6_t)) > mp->b_wptr) 1100 goto drop_pkt; 1101 1102 if (nexthdr == IPPROTO_ENCAP || 1103 !IN6_ARE_ADDR_EQUAL( 1104 &((ip6_t *)(((uint8_t *)ip6h) + hdr_length))->ip6_src, 1105 &ip6h->ip6_src) || 1106 !IN6_ARE_ADDR_EQUAL( 1107 &((ip6_t *)(((uint8_t *)ip6h) + hdr_length))->ip6_dst, 1108 &ip6h->ip6_dst)) { 1109 /* 1110 * For tunnels that have used IPsec protection, 1111 * we need to adjust the MTU to take into account 1112 * the IPsec overhead. 1113 */ 1114 if (ii != NULL) 1115 icmp6->icmp6_mtu = htonl( 1116 ntohl(icmp6->icmp6_mtu) - 1117 ipsec_in_extra_length(first_mp)); 1118 } else { 1119 /* 1120 * Self-encapsulated case. As in the ipv4 case, 1121 * we need to strip the 2nd IP header. Since mp 1122 * is already pulled-up, we can simply bcopy 1123 * the 3rd header + data over the 2nd header. 1124 */ 1125 uint16_t unused_len; 1126 ip6_t *inner_ip6h = (ip6_t *) 1127 ((uchar_t *)ip6h + hdr_length); 1128 1129 /* 1130 * Make sure we don't do recursion more than once. 1131 */ 1132 if (!ip_hdr_length_nexthdr_v6(mp, inner_ip6h, 1133 &unused_len, &nexthdrp) || 1134 *nexthdrp == IPPROTO_IPV6) { 1135 goto drop_pkt; 1136 } 1137 1138 /* 1139 * We are about to modify the packet. Make a copy if 1140 * someone else has a reference to it. 1141 */ 1142 if (DB_REF(mp) > 1) { 1143 mblk_t *mp1; 1144 uint16_t icmp6_offset; 1145 1146 mp1 = copymsg(mp); 1147 if (mp1 == NULL) { 1148 goto drop_pkt; 1149 } 1150 icmp6_offset = (uint16_t) 1151 ((uchar_t *)icmp6 - mp->b_rptr); 1152 freemsg(mp); 1153 mp = mp1; 1154 1155 icmp6 = (icmp6_t *)(mp->b_rptr + icmp6_offset); 1156 ip6h = (ip6_t *)&icmp6[1]; 1157 inner_ip6h = (ip6_t *) 1158 ((uchar_t *)ip6h + hdr_length); 1159 1160 if (mctl_present) 1161 first_mp->b_cont = mp; 1162 else 1163 first_mp = mp; 1164 } 1165 1166 /* 1167 * Need to set db_type back to M_DATA before 1168 * refeeding mp into this function. 1169 */ 1170 DB_TYPE(mp) = M_DATA; 1171 1172 /* 1173 * Copy the 3rd header + remaining data on top 1174 * of the 2nd header. 1175 */ 1176 bcopy(inner_ip6h, ip6h, 1177 mp->b_wptr - (uchar_t *)inner_ip6h); 1178 1179 /* 1180 * Subtract length of the 2nd header. 1181 */ 1182 mp->b_wptr -= hdr_length; 1183 1184 /* 1185 * Now recurse, and see what I _really_ should be 1186 * doing here. 1187 */ 1188 icmp_inbound_error_fanout_v6(q, first_mp, 1189 (ip6_t *)mp->b_rptr, icmp6, ill, mctl_present, 1190 zoneid); 1191 return; 1192 } 1193 /* FALLTHRU */ 1194 default: 1195 /* 1196 * The rip6h header is only used for the lookup and we 1197 * only set the src and dst addresses and nexthdr. 1198 */ 1199 rip6h.ip6_src = ip6h->ip6_dst; 1200 rip6h.ip6_dst = ip6h->ip6_src; 1201 rip6h.ip6_nxt = nexthdr; 1202 ip_fanout_proto_v6(q, first_mp, &rip6h, ill, ill, nexthdr, 0, 1203 IP6_NO_IPPOLICY, mctl_present, zoneid); 1204 return; 1205 } 1206 /* NOTREACHED */ 1207 drop_pkt: 1208 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors); 1209 ip1dbg(("icmp_inbound_error_fanout_v6: drop pkt\n")); 1210 freemsg(first_mp); 1211 } 1212 1213 /* 1214 * Process received IPv6 ICMP Redirect messages. 1215 */ 1216 /* ARGSUSED */ 1217 static void 1218 icmp_redirect_v6(queue_t *q, mblk_t *mp, ill_t *ill) 1219 { 1220 ip6_t *ip6h; 1221 uint16_t hdr_length; 1222 nd_redirect_t *rd; 1223 ire_t *ire; 1224 ire_t *prev_ire; 1225 ire_t *redir_ire; 1226 in6_addr_t *src, *dst, *gateway; 1227 nd_opt_hdr_t *opt; 1228 nce_t *nce; 1229 int nce_flags = 0; 1230 int err = 0; 1231 boolean_t redirect_to_router = B_FALSE; 1232 int len; 1233 int optlen; 1234 iulp_t ulp_info = { 0 }; 1235 ill_t *prev_ire_ill; 1236 ipif_t *ipif; 1237 1238 ip6h = (ip6_t *)mp->b_rptr; 1239 if (ip6h->ip6_nxt != IPPROTO_ICMPV6) 1240 hdr_length = ip_hdr_length_v6(mp, ip6h); 1241 else 1242 hdr_length = IPV6_HDR_LEN; 1243 1244 rd = (nd_redirect_t *)&mp->b_rptr[hdr_length]; 1245 len = mp->b_wptr - mp->b_rptr - hdr_length; 1246 src = &ip6h->ip6_src; 1247 dst = &rd->nd_rd_dst; 1248 gateway = &rd->nd_rd_target; 1249 1250 /* Verify if it is a valid redirect */ 1251 if (!IN6_IS_ADDR_LINKLOCAL(src) || 1252 (ip6h->ip6_hops != IPV6_MAX_HOPS) || 1253 (rd->nd_rd_code != 0) || 1254 (len < sizeof (nd_redirect_t)) || 1255 (IN6_IS_ADDR_V4MAPPED(dst)) || 1256 (IN6_IS_ADDR_MULTICAST(dst))) { 1257 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects); 1258 freemsg(mp); 1259 return; 1260 } 1261 1262 if (!(IN6_IS_ADDR_LINKLOCAL(gateway) || 1263 IN6_ARE_ADDR_EQUAL(gateway, dst))) { 1264 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects); 1265 freemsg(mp); 1266 return; 1267 } 1268 1269 if (len > sizeof (nd_redirect_t)) { 1270 if (!ndp_verify_optlen((nd_opt_hdr_t *)&rd[1], 1271 len - sizeof (nd_redirect_t))) { 1272 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects); 1273 freemsg(mp); 1274 return; 1275 } 1276 } 1277 1278 if (!IN6_ARE_ADDR_EQUAL(gateway, dst)) { 1279 redirect_to_router = B_TRUE; 1280 nce_flags |= NCE_F_ISROUTER; 1281 } 1282 1283 /* ipif will be refreleased afterwards */ 1284 ipif = ipif_get_next_ipif(NULL, ill); 1285 if (ipif == NULL) { 1286 freemsg(mp); 1287 return; 1288 } 1289 1290 /* 1291 * Verify that the IP source address of the redirect is 1292 * the same as the current first-hop router for the specified 1293 * ICMP destination address. 1294 * Also, Make sure we had a route for the dest in question and 1295 * that route was pointing to the old gateway (the source of the 1296 * redirect packet.) 1297 */ 1298 1299 prev_ire = ire_route_lookup_v6(dst, 0, src, 0, ipif, NULL, 1300 ALL_ZONES, NULL, MATCH_IRE_GW | MATCH_IRE_ILL_GROUP | 1301 MATCH_IRE_DEFAULT); 1302 1303 /* 1304 * Check that 1305 * the redirect was not from ourselves 1306 * old gateway is still directly reachable 1307 */ 1308 if (prev_ire == NULL || 1309 prev_ire->ire_type == IRE_LOCAL) { 1310 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects); 1311 ipif_refrele(ipif); 1312 goto fail_redirect; 1313 } 1314 prev_ire_ill = ire_to_ill(prev_ire); 1315 ASSERT(prev_ire_ill != NULL); 1316 if (prev_ire_ill->ill_flags & ILLF_NONUD) 1317 nce_flags |= NCE_F_NONUD; 1318 1319 /* 1320 * Should we use the old ULP info to create the new gateway? From 1321 * a user's perspective, we should inherit the info so that it 1322 * is a "smooth" transition. If we do not do that, then new 1323 * connections going thru the new gateway will have no route metrics, 1324 * which is counter-intuitive to user. From a network point of 1325 * view, this may or may not make sense even though the new gateway 1326 * is still directly connected to us so the route metrics should not 1327 * change much. 1328 * 1329 * But if the old ire_uinfo is not initialized, we do another 1330 * recursive lookup on the dest using the new gateway. There may 1331 * be a route to that. If so, use it to initialize the redirect 1332 * route. 1333 */ 1334 if (prev_ire->ire_uinfo.iulp_set) { 1335 bcopy(&prev_ire->ire_uinfo, &ulp_info, sizeof (iulp_t)); 1336 } else if (redirect_to_router) { 1337 /* 1338 * Only do the following if the redirection is really to 1339 * a router. 1340 */ 1341 ire_t *tmp_ire; 1342 ire_t *sire; 1343 1344 tmp_ire = ire_ftable_lookup_v6(dst, 0, gateway, 0, NULL, &sire, 1345 ALL_ZONES, 0, NULL, 1346 (MATCH_IRE_RECURSIVE | MATCH_IRE_GW | MATCH_IRE_DEFAULT)); 1347 if (sire != NULL) { 1348 bcopy(&sire->ire_uinfo, &ulp_info, sizeof (iulp_t)); 1349 ASSERT(tmp_ire != NULL); 1350 ire_refrele(tmp_ire); 1351 ire_refrele(sire); 1352 } else if (tmp_ire != NULL) { 1353 bcopy(&tmp_ire->ire_uinfo, &ulp_info, 1354 sizeof (iulp_t)); 1355 ire_refrele(tmp_ire); 1356 } 1357 } 1358 1359 optlen = mp->b_wptr - mp->b_rptr - hdr_length - sizeof (nd_redirect_t); 1360 opt = (nd_opt_hdr_t *)&rd[1]; 1361 opt = ndp_get_option(opt, optlen, ND_OPT_TARGET_LINKADDR); 1362 if (opt != NULL) { 1363 err = ndp_lookup_then_add(ill, 1364 (uchar_t *)&opt[1], /* Link layer address */ 1365 gateway, 1366 &ipv6_all_ones, /* prefix mask */ 1367 &ipv6_all_zeros, /* Mapping mask */ 1368 0, 1369 nce_flags, 1370 ND_STALE, 1371 &nce, 1372 NULL, 1373 NULL); 1374 switch (err) { 1375 case 0: 1376 NCE_REFRELE(nce); 1377 break; 1378 case EEXIST: 1379 /* 1380 * Check to see if link layer address has changed and 1381 * process the nce_state accordingly. 1382 */ 1383 ndp_process(nce, (uchar_t *)&opt[1], 0, B_FALSE); 1384 NCE_REFRELE(nce); 1385 break; 1386 default: 1387 ip1dbg(("icmp_redirect_v6: NCE create failed %d\n", 1388 err)); 1389 ipif_refrele(ipif); 1390 goto fail_redirect; 1391 } 1392 } 1393 if (redirect_to_router) { 1394 /* icmp_redirect_ok_v6() must have already verified this */ 1395 ASSERT(IN6_IS_ADDR_LINKLOCAL(gateway)); 1396 1397 /* 1398 * Create a Route Association. This will allow us to remember 1399 * a router told us to use the particular gateway. 1400 */ 1401 ire = ire_create_v6( 1402 dst, 1403 &ipv6_all_ones, /* mask */ 1404 &prev_ire->ire_src_addr_v6, /* source addr */ 1405 gateway, /* gateway addr */ 1406 &prev_ire->ire_max_frag, /* max frag */ 1407 NULL, /* Fast Path header */ 1408 NULL, /* no rfq */ 1409 NULL, /* no stq */ 1410 IRE_HOST, 1411 NULL, 1412 prev_ire->ire_ipif, 1413 NULL, 1414 0, 1415 0, 1416 (RTF_DYNAMIC | RTF_GATEWAY | RTF_HOST), 1417 &ulp_info, 1418 NULL, 1419 NULL); 1420 } else { 1421 queue_t *stq; 1422 1423 stq = (ipif->ipif_net_type == IRE_IF_RESOLVER) 1424 ? ipif->ipif_rq : ipif->ipif_wq; 1425 1426 /* 1427 * Just create an on link entry, i.e. interface route. 1428 */ 1429 ire = ire_create_v6( 1430 dst, /* gateway == dst */ 1431 &ipv6_all_ones, /* mask */ 1432 &prev_ire->ire_src_addr_v6, /* source addr */ 1433 &ipv6_all_zeros, /* gateway addr */ 1434 &prev_ire->ire_max_frag, /* max frag */ 1435 NULL, /* Fast Path header */ 1436 NULL, /* ire rfq */ 1437 stq, /* ire stq */ 1438 ipif->ipif_net_type, /* IF_[NO]RESOLVER */ 1439 NULL, 1440 prev_ire->ire_ipif, 1441 &ipv6_all_ones, 1442 0, 1443 0, 1444 (RTF_DYNAMIC | RTF_HOST), 1445 &ulp_info, 1446 NULL, 1447 NULL); 1448 } 1449 1450 /* Release reference from earlier ipif_get_next_ipif() */ 1451 ipif_refrele(ipif); 1452 1453 if (ire == NULL) 1454 goto fail_redirect; 1455 1456 if (ire_add(&ire, NULL, NULL, NULL, B_FALSE) == 0) { 1457 1458 /* tell routing sockets that we received a redirect */ 1459 ip_rts_change_v6(RTM_REDIRECT, 1460 &rd->nd_rd_dst, 1461 &rd->nd_rd_target, 1462 &ipv6_all_ones, 0, &ire->ire_src_addr_v6, 1463 (RTF_DYNAMIC | RTF_GATEWAY | RTF_HOST), 0, 1464 (RTA_DST | RTA_GATEWAY | RTA_NETMASK | RTA_AUTHOR)); 1465 1466 /* 1467 * Delete any existing IRE_HOST type ires for this destination. 1468 * This together with the added IRE has the effect of 1469 * modifying an existing redirect. 1470 */ 1471 redir_ire = ire_ftable_lookup_v6(dst, 0, src, IRE_HOST, 1472 ire->ire_ipif, NULL, ALL_ZONES, 0, NULL, 1473 (MATCH_IRE_GW | MATCH_IRE_TYPE | MATCH_IRE_ILL_GROUP)); 1474 1475 ire_refrele(ire); /* Held in ire_add_v6 */ 1476 1477 if (redir_ire != NULL) { 1478 if (redir_ire->ire_flags & RTF_DYNAMIC) 1479 ire_delete(redir_ire); 1480 ire_refrele(redir_ire); 1481 } 1482 } 1483 1484 if (prev_ire->ire_type == IRE_CACHE) 1485 ire_delete(prev_ire); 1486 ire_refrele(prev_ire); 1487 prev_ire = NULL; 1488 1489 fail_redirect: 1490 if (prev_ire != NULL) 1491 ire_refrele(prev_ire); 1492 freemsg(mp); 1493 } 1494 1495 static ill_t * 1496 ip_queue_to_ill_v6(queue_t *q) 1497 { 1498 ill_t *ill; 1499 1500 ASSERT(WR(q) == q); 1501 1502 if (q->q_next != NULL) { 1503 ill = (ill_t *)q->q_ptr; 1504 if (ILL_CAN_LOOKUP(ill)) 1505 ill_refhold(ill); 1506 else 1507 ill = NULL; 1508 } else { 1509 ill = ill_lookup_on_name(ipif_loopback_name, B_FALSE, B_TRUE, 1510 NULL, NULL, NULL, NULL, NULL); 1511 } 1512 if (ill == NULL) 1513 ip0dbg(("ip_queue_to_ill_v6: no ill\n")); 1514 return (ill); 1515 } 1516 1517 /* 1518 * Assigns an appropriate source address to the packet. 1519 * If origdst is one of our IP addresses that use it as the source. 1520 * If the queue is an ill queue then select a source from that ill. 1521 * Otherwise pick a source based on a route lookup back to the origsrc. 1522 * 1523 * src is the return parameter. Returns a pointer to src or NULL if failure. 1524 */ 1525 static in6_addr_t * 1526 icmp_pick_source_v6(queue_t *wq, in6_addr_t *origsrc, in6_addr_t *origdst, 1527 in6_addr_t *src, zoneid_t zoneid) 1528 { 1529 ill_t *ill; 1530 ire_t *ire; 1531 ipif_t *ipif; 1532 1533 ASSERT(!(wq->q_flag & QREADR)); 1534 if (wq->q_next != NULL) { 1535 ill = (ill_t *)wq->q_ptr; 1536 } else { 1537 ill = NULL; 1538 } 1539 1540 ire = ire_route_lookup_v6(origdst, 0, 0, (IRE_LOCAL|IRE_LOOPBACK), 1541 NULL, NULL, zoneid, NULL, (MATCH_IRE_TYPE|MATCH_IRE_ZONEONLY)); 1542 if (ire != NULL) { 1543 /* Destined to one of our addresses */ 1544 *src = *origdst; 1545 ire_refrele(ire); 1546 return (src); 1547 } 1548 if (ire != NULL) { 1549 ire_refrele(ire); 1550 ire = NULL; 1551 } 1552 if (ill == NULL) { 1553 /* What is the route back to the original source? */ 1554 ire = ire_route_lookup_v6(origsrc, 0, 0, 0, 1555 NULL, NULL, zoneid, NULL, 1556 (MATCH_IRE_DEFAULT|MATCH_IRE_RECURSIVE)); 1557 if (ire == NULL) { 1558 BUMP_MIB(&ip6_mib, ipIfStatsOutNoRoutes); 1559 return (NULL); 1560 } 1561 /* 1562 * Does not matter whether we use ire_stq or ire_ipif here. 1563 * Just pick an ill for ICMP replies. 1564 */ 1565 ASSERT(ire->ire_ipif != NULL); 1566 ill = ire->ire_ipif->ipif_ill; 1567 ire_refrele(ire); 1568 } 1569 ipif = ipif_select_source_v6(ill, origsrc, RESTRICT_TO_NONE, 1570 IPV6_PREFER_SRC_DEFAULT, zoneid); 1571 if (ipif != NULL) { 1572 *src = ipif->ipif_v6src_addr; 1573 ipif_refrele(ipif); 1574 return (src); 1575 } 1576 /* 1577 * Unusual case - can't find a usable source address to reach the 1578 * original source. Use what in the route to the source. 1579 */ 1580 ire = ire_route_lookup_v6(origsrc, 0, 0, 0, 1581 NULL, NULL, zoneid, NULL, 1582 (MATCH_IRE_DEFAULT|MATCH_IRE_RECURSIVE)); 1583 if (ire == NULL) { 1584 BUMP_MIB(&ip6_mib, ipIfStatsOutNoRoutes); 1585 return (NULL); 1586 } 1587 ASSERT(ire != NULL); 1588 *src = ire->ire_src_addr_v6; 1589 ire_refrele(ire); 1590 return (src); 1591 } 1592 1593 /* 1594 * Build and ship an IPv6 ICMP message using the packet data in mp, 1595 * and the ICMP header pointed to by "stuff". (May be called as 1596 * writer.) 1597 * Note: assumes that icmp_pkt_err_ok_v6 has been called to 1598 * verify that an icmp error packet can be sent. 1599 * 1600 * If q is an ill write side queue (which is the case when packets 1601 * arrive from ip_rput) then ip_wput code will ensure that packets to 1602 * link-local destinations are sent out that ill. 1603 * 1604 * If v6src_ptr is set use it as a source. Otherwise select a reasonable 1605 * source address (see above function). 1606 */ 1607 static void 1608 icmp_pkt_v6(queue_t *q, mblk_t *mp, void *stuff, size_t len, 1609 const in6_addr_t *v6src_ptr, boolean_t mctl_present, zoneid_t zoneid) 1610 { 1611 ip6_t *ip6h; 1612 in6_addr_t v6dst; 1613 size_t len_needed; 1614 size_t msg_len; 1615 mblk_t *mp1; 1616 icmp6_t *icmp6; 1617 ill_t *ill; 1618 in6_addr_t v6src; 1619 mblk_t *ipsec_mp; 1620 ipsec_out_t *io; 1621 1622 ill = ip_queue_to_ill_v6(q); 1623 if (ill == NULL) { 1624 freemsg(mp); 1625 return; 1626 } 1627 1628 if (mctl_present) { 1629 /* 1630 * If it is : 1631 * 1632 * 1) a IPSEC_OUT, then this is caused by outbound 1633 * datagram originating on this host. IPSEC processing 1634 * may or may not have been done. Refer to comments above 1635 * icmp_inbound_error_fanout for details. 1636 * 1637 * 2) a IPSEC_IN if we are generating a icmp_message 1638 * for an incoming datagram destined for us i.e called 1639 * from ip_fanout_send_icmp. 1640 */ 1641 ipsec_info_t *in; 1642 1643 ipsec_mp = mp; 1644 mp = ipsec_mp->b_cont; 1645 1646 in = (ipsec_info_t *)ipsec_mp->b_rptr; 1647 ip6h = (ip6_t *)mp->b_rptr; 1648 1649 ASSERT(in->ipsec_info_type == IPSEC_OUT || 1650 in->ipsec_info_type == IPSEC_IN); 1651 1652 if (in->ipsec_info_type == IPSEC_IN) { 1653 /* 1654 * Convert the IPSEC_IN to IPSEC_OUT. 1655 */ 1656 if (!ipsec_in_to_out(ipsec_mp, NULL, ip6h)) { 1657 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 1658 ill_refrele(ill); 1659 return; 1660 } 1661 } else { 1662 ASSERT(in->ipsec_info_type == IPSEC_OUT); 1663 io = (ipsec_out_t *)in; 1664 /* 1665 * Clear out ipsec_out_proc_begin, so we do a fresh 1666 * ire lookup. 1667 */ 1668 io->ipsec_out_proc_begin = B_FALSE; 1669 } 1670 } else { 1671 /* 1672 * This is in clear. The icmp message we are building 1673 * here should go out in clear. 1674 */ 1675 ipsec_in_t *ii; 1676 ASSERT(mp->b_datap->db_type == M_DATA); 1677 if ((ipsec_mp = ipsec_in_alloc(B_FALSE)) == NULL) { 1678 freemsg(mp); 1679 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 1680 ill_refrele(ill); 1681 return; 1682 } 1683 ii = (ipsec_in_t *)ipsec_mp->b_rptr; 1684 1685 /* This is not a secure packet */ 1686 ii->ipsec_in_secure = B_FALSE; 1687 /* 1688 * For trusted extensions using a shared IP address we can 1689 * send using any zoneid. 1690 */ 1691 if (zoneid == ALL_ZONES) 1692 ii->ipsec_in_zoneid = GLOBAL_ZONEID; 1693 else 1694 ii->ipsec_in_zoneid = zoneid; 1695 ipsec_mp->b_cont = mp; 1696 ip6h = (ip6_t *)mp->b_rptr; 1697 /* 1698 * Convert the IPSEC_IN to IPSEC_OUT. 1699 */ 1700 if (!ipsec_in_to_out(ipsec_mp, NULL, ip6h)) { 1701 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 1702 ill_refrele(ill); 1703 return; 1704 } 1705 } 1706 io = (ipsec_out_t *)ipsec_mp->b_rptr; 1707 1708 if (v6src_ptr != NULL) { 1709 v6src = *v6src_ptr; 1710 } else { 1711 if (icmp_pick_source_v6(q, &ip6h->ip6_src, &ip6h->ip6_dst, 1712 &v6src, zoneid) == NULL) { 1713 freemsg(ipsec_mp); 1714 ill_refrele(ill); 1715 return; 1716 } 1717 } 1718 v6dst = ip6h->ip6_src; 1719 len_needed = ipv6_icmp_return - IPV6_HDR_LEN - len; 1720 msg_len = msgdsize(mp); 1721 if (msg_len > len_needed) { 1722 if (!adjmsg(mp, len_needed - msg_len)) { 1723 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutErrors); 1724 freemsg(ipsec_mp); 1725 ill_refrele(ill); 1726 return; 1727 } 1728 msg_len = len_needed; 1729 } 1730 mp1 = allocb(IPV6_HDR_LEN + len, BPRI_HI); 1731 if (mp1 == NULL) { 1732 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutErrors); 1733 freemsg(ipsec_mp); 1734 ill_refrele(ill); 1735 return; 1736 } 1737 ill_refrele(ill); 1738 mp1->b_cont = mp; 1739 mp = mp1; 1740 ASSERT(ipsec_mp->b_datap->db_type == M_CTL && 1741 io->ipsec_out_type == IPSEC_OUT); 1742 ipsec_mp->b_cont = mp; 1743 1744 /* 1745 * Set ipsec_out_icmp_loopback so we can let the ICMP messages this 1746 * node generates be accepted in peace by all on-host destinations. 1747 * If we do NOT assume that all on-host destinations trust 1748 * self-generated ICMP messages, then rework here, ip.c, and spd.c. 1749 * (Look for ipsec_out_icmp_loopback). 1750 */ 1751 io->ipsec_out_icmp_loopback = B_TRUE; 1752 1753 ip6h = (ip6_t *)mp->b_rptr; 1754 mp1->b_wptr = (uchar_t *)ip6h + (IPV6_HDR_LEN + len); 1755 1756 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 1757 ip6h->ip6_nxt = IPPROTO_ICMPV6; 1758 ip6h->ip6_hops = ipv6_def_hops; 1759 ip6h->ip6_dst = v6dst; 1760 ip6h->ip6_src = v6src; 1761 msg_len += IPV6_HDR_LEN + len; 1762 if (msg_len > IP_MAXPACKET + IPV6_HDR_LEN) { 1763 (void) adjmsg(mp, IP_MAXPACKET + IPV6_HDR_LEN - msg_len); 1764 msg_len = IP_MAXPACKET + IPV6_HDR_LEN; 1765 } 1766 ip6h->ip6_plen = htons((uint16_t)(msgdsize(mp) - IPV6_HDR_LEN)); 1767 icmp6 = (icmp6_t *)&ip6h[1]; 1768 bcopy(stuff, (char *)icmp6, len); 1769 /* 1770 * Prepare for checksum by putting icmp length in the icmp 1771 * checksum field. The checksum is calculated in ip_wput_v6. 1772 */ 1773 icmp6->icmp6_cksum = ip6h->ip6_plen; 1774 if (icmp6->icmp6_type == ND_REDIRECT) { 1775 ip6h->ip6_hops = IPV6_MAX_HOPS; 1776 } 1777 /* Send to V6 writeside put routine */ 1778 put(q, ipsec_mp); 1779 } 1780 1781 /* 1782 * Update the output mib when ICMPv6 packets are sent. 1783 */ 1784 static void 1785 icmp_update_out_mib_v6(ill_t *ill, icmp6_t *icmp6) 1786 { 1787 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutMsgs); 1788 1789 switch (icmp6->icmp6_type) { 1790 case ICMP6_DST_UNREACH: 1791 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutDestUnreachs); 1792 if (icmp6->icmp6_code == ICMP6_DST_UNREACH_ADMIN) 1793 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutAdminProhibs); 1794 break; 1795 1796 case ICMP6_TIME_EXCEEDED: 1797 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutTimeExcds); 1798 break; 1799 1800 case ICMP6_PARAM_PROB: 1801 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutParmProblems); 1802 break; 1803 1804 case ICMP6_PACKET_TOO_BIG: 1805 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutPktTooBigs); 1806 break; 1807 1808 case ICMP6_ECHO_REQUEST: 1809 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutEchos); 1810 break; 1811 1812 case ICMP6_ECHO_REPLY: 1813 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutEchoReplies); 1814 break; 1815 1816 case ND_ROUTER_SOLICIT: 1817 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutRouterSolicits); 1818 break; 1819 1820 case ND_ROUTER_ADVERT: 1821 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutRouterAdvertisements); 1822 break; 1823 1824 case ND_NEIGHBOR_SOLICIT: 1825 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutNeighborSolicits); 1826 break; 1827 1828 case ND_NEIGHBOR_ADVERT: 1829 BUMP_MIB(ill->ill_icmp6_mib, 1830 ipv6IfIcmpOutNeighborAdvertisements); 1831 break; 1832 1833 case ND_REDIRECT: 1834 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutRedirects); 1835 break; 1836 1837 case MLD_LISTENER_QUERY: 1838 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutGroupMembQueries); 1839 break; 1840 1841 case MLD_LISTENER_REPORT: 1842 case MLD_V2_LISTENER_REPORT: 1843 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutGroupMembResponses); 1844 break; 1845 1846 case MLD_LISTENER_REDUCTION: 1847 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutGroupMembReductions); 1848 break; 1849 } 1850 } 1851 1852 /* 1853 * Check if it is ok to send an ICMPv6 error packet in 1854 * response to the IP packet in mp. 1855 * Free the message and return null if no 1856 * ICMP error packet should be sent. 1857 */ 1858 static mblk_t * 1859 icmp_pkt_err_ok_v6(queue_t *q, mblk_t *mp, 1860 boolean_t llbcast, boolean_t mcast_ok) 1861 { 1862 ip6_t *ip6h; 1863 1864 if (!mp) 1865 return (NULL); 1866 1867 ip6h = (ip6_t *)mp->b_rptr; 1868 1869 /* Check if source address uniquely identifies the host */ 1870 1871 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_src) || 1872 IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_src) || 1873 IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 1874 freemsg(mp); 1875 return (NULL); 1876 } 1877 1878 if (ip6h->ip6_nxt == IPPROTO_ICMPV6) { 1879 size_t len_needed = IPV6_HDR_LEN + ICMP6_MINLEN; 1880 icmp6_t *icmp6; 1881 1882 if (mp->b_wptr - mp->b_rptr < len_needed) { 1883 if (!pullupmsg(mp, len_needed)) { 1884 ill_t *ill; 1885 1886 ill = ip_queue_to_ill_v6(q); 1887 if (ill == NULL) { 1888 BUMP_MIB(&icmp6_mib, 1889 ipv6IfIcmpInErrors); 1890 } else { 1891 BUMP_MIB(ill->ill_icmp6_mib, 1892 ipv6IfIcmpInErrors); 1893 ill_refrele(ill); 1894 } 1895 freemsg(mp); 1896 return (NULL); 1897 } 1898 ip6h = (ip6_t *)mp->b_rptr; 1899 } 1900 icmp6 = (icmp6_t *)&ip6h[1]; 1901 /* Explicitly do not generate errors in response to redirects */ 1902 if (ICMP6_IS_ERROR(icmp6->icmp6_type) || 1903 icmp6->icmp6_type == ND_REDIRECT) { 1904 freemsg(mp); 1905 return (NULL); 1906 } 1907 } 1908 /* 1909 * Check that the destination is not multicast and that the packet 1910 * was not sent on link layer broadcast or multicast. (Exception 1911 * is Packet too big message as per the draft - when mcast_ok is set.) 1912 */ 1913 if (!mcast_ok && 1914 (llbcast || IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst))) { 1915 freemsg(mp); 1916 return (NULL); 1917 } 1918 if (icmp_err_rate_limit()) { 1919 /* 1920 * Only send ICMP error packets every so often. 1921 * This should be done on a per port/source basis, 1922 * but for now this will suffice. 1923 */ 1924 freemsg(mp); 1925 return (NULL); 1926 } 1927 return (mp); 1928 } 1929 1930 /* 1931 * Generate an ICMPv6 redirect message. 1932 * Include target link layer address option if it exits. 1933 * Always include redirect header. 1934 */ 1935 static void 1936 icmp_send_redirect_v6(queue_t *q, mblk_t *mp, in6_addr_t *targetp, 1937 in6_addr_t *dest, ill_t *ill, boolean_t llbcast) 1938 { 1939 nd_redirect_t *rd; 1940 nd_opt_rd_hdr_t *rdh; 1941 uchar_t *buf; 1942 nce_t *nce = NULL; 1943 nd_opt_hdr_t *opt; 1944 int len; 1945 int ll_opt_len = 0; 1946 int max_redir_hdr_data_len; 1947 int pkt_len; 1948 in6_addr_t *srcp; 1949 1950 /* 1951 * We are called from ip_rput where we could 1952 * not have attached an IPSEC_IN. 1953 */ 1954 ASSERT(mp->b_datap->db_type == M_DATA); 1955 1956 mp = icmp_pkt_err_ok_v6(q, mp, llbcast, B_FALSE); 1957 if (mp == NULL) 1958 return; 1959 nce = ndp_lookup_v6(ill, targetp, B_FALSE); 1960 if (nce != NULL && nce->nce_state != ND_INCOMPLETE) { 1961 ll_opt_len = (sizeof (nd_opt_hdr_t) + 1962 ill->ill_phys_addr_length + 7)/8 * 8; 1963 } 1964 len = sizeof (nd_redirect_t) + sizeof (nd_opt_rd_hdr_t) + ll_opt_len; 1965 ASSERT(len % 4 == 0); 1966 buf = kmem_alloc(len, KM_NOSLEEP); 1967 if (buf == NULL) { 1968 if (nce != NULL) 1969 NCE_REFRELE(nce); 1970 freemsg(mp); 1971 return; 1972 } 1973 1974 rd = (nd_redirect_t *)buf; 1975 rd->nd_rd_type = (uint8_t)ND_REDIRECT; 1976 rd->nd_rd_code = 0; 1977 rd->nd_rd_reserved = 0; 1978 rd->nd_rd_target = *targetp; 1979 rd->nd_rd_dst = *dest; 1980 1981 opt = (nd_opt_hdr_t *)(buf + sizeof (nd_redirect_t)); 1982 if (nce != NULL && ll_opt_len != 0) { 1983 opt->nd_opt_type = ND_OPT_TARGET_LINKADDR; 1984 opt->nd_opt_len = ll_opt_len/8; 1985 bcopy((char *)nce->nce_res_mp->b_rptr + 1986 NCE_LL_ADDR_OFFSET(ill), &opt[1], 1987 ill->ill_phys_addr_length); 1988 } 1989 if (nce != NULL) 1990 NCE_REFRELE(nce); 1991 rdh = (nd_opt_rd_hdr_t *)(buf + sizeof (nd_redirect_t) + ll_opt_len); 1992 rdh->nd_opt_rh_type = (uint8_t)ND_OPT_REDIRECTED_HEADER; 1993 /* max_redir_hdr_data_len and nd_opt_rh_len must be multiple of 8 */ 1994 max_redir_hdr_data_len = (ipv6_icmp_return - IPV6_HDR_LEN - len)/8*8; 1995 pkt_len = msgdsize(mp); 1996 /* Make sure mp is 8 byte aligned */ 1997 if (pkt_len > max_redir_hdr_data_len) { 1998 rdh->nd_opt_rh_len = (max_redir_hdr_data_len + 1999 sizeof (nd_opt_rd_hdr_t))/8; 2000 (void) adjmsg(mp, max_redir_hdr_data_len - pkt_len); 2001 } else { 2002 rdh->nd_opt_rh_len = (pkt_len + sizeof (nd_opt_rd_hdr_t))/8; 2003 (void) adjmsg(mp, -(pkt_len % 8)); 2004 } 2005 rdh->nd_opt_rh_reserved1 = 0; 2006 rdh->nd_opt_rh_reserved2 = 0; 2007 /* ipif_v6src_addr contains the link-local source address */ 2008 rw_enter(&ill_g_lock, RW_READER); 2009 if (ill->ill_group != NULL) { 2010 /* 2011 * The receiver of the redirect will verify whether it 2012 * had a route through us (srcp that we will use in 2013 * the redirect) or not. As we load spread even link-locals, 2014 * we don't know which source address the receiver of 2015 * redirect has in its route for communicating with us. 2016 * Thus we randomly choose a source here and finally we 2017 * should get to the right one and it will eventually 2018 * accept the redirect from us. We can't call 2019 * ip_lookup_scope_v6 because we don't have the right 2020 * link-local address here. Thus we randomly choose one. 2021 */ 2022 int cnt = ill->ill_group->illgrp_ill_count; 2023 2024 ill = ill->ill_group->illgrp_ill; 2025 cnt = ++icmp_redirect_v6_src_index % cnt; 2026 while (cnt--) 2027 ill = ill->ill_group_next; 2028 srcp = &ill->ill_ipif->ipif_v6src_addr; 2029 } else { 2030 srcp = &ill->ill_ipif->ipif_v6src_addr; 2031 } 2032 rw_exit(&ill_g_lock); 2033 /* Redirects sent by router, and router is global zone */ 2034 icmp_pkt_v6(q, mp, buf, len, srcp, B_FALSE, GLOBAL_ZONEID); 2035 kmem_free(buf, len); 2036 } 2037 2038 2039 /* Generate an ICMP time exceeded message. (May be called as writer.) */ 2040 void 2041 icmp_time_exceeded_v6(queue_t *q, mblk_t *mp, uint8_t code, 2042 boolean_t llbcast, boolean_t mcast_ok, zoneid_t zoneid) 2043 { 2044 icmp6_t icmp6; 2045 boolean_t mctl_present; 2046 mblk_t *first_mp; 2047 2048 EXTRACT_PKT_MP(mp, first_mp, mctl_present); 2049 2050 mp = icmp_pkt_err_ok_v6(q, mp, llbcast, mcast_ok); 2051 if (mp == NULL) { 2052 if (mctl_present) 2053 freeb(first_mp); 2054 return; 2055 } 2056 bzero(&icmp6, sizeof (icmp6_t)); 2057 icmp6.icmp6_type = ICMP6_TIME_EXCEEDED; 2058 icmp6.icmp6_code = code; 2059 icmp_pkt_v6(q, first_mp, &icmp6, sizeof (icmp6_t), NULL, mctl_present, 2060 zoneid); 2061 } 2062 2063 /* 2064 * Generate an ICMP unreachable message. 2065 */ 2066 void 2067 icmp_unreachable_v6(queue_t *q, mblk_t *mp, uint8_t code, 2068 boolean_t llbcast, boolean_t mcast_ok, zoneid_t zoneid) 2069 { 2070 icmp6_t icmp6; 2071 boolean_t mctl_present; 2072 mblk_t *first_mp; 2073 2074 EXTRACT_PKT_MP(mp, first_mp, mctl_present); 2075 2076 mp = icmp_pkt_err_ok_v6(q, mp, llbcast, mcast_ok); 2077 if (mp == NULL) { 2078 if (mctl_present) 2079 freeb(first_mp); 2080 return; 2081 } 2082 bzero(&icmp6, sizeof (icmp6_t)); 2083 icmp6.icmp6_type = ICMP6_DST_UNREACH; 2084 icmp6.icmp6_code = code; 2085 icmp_pkt_v6(q, first_mp, &icmp6, sizeof (icmp6_t), NULL, mctl_present, 2086 zoneid); 2087 } 2088 2089 /* 2090 * Generate an ICMP pkt too big message. 2091 */ 2092 static void 2093 icmp_pkt2big_v6(queue_t *q, mblk_t *mp, uint32_t mtu, 2094 boolean_t llbcast, boolean_t mcast_ok, zoneid_t zoneid) 2095 { 2096 icmp6_t icmp6; 2097 mblk_t *first_mp; 2098 boolean_t mctl_present; 2099 2100 EXTRACT_PKT_MP(mp, first_mp, mctl_present); 2101 2102 mp = icmp_pkt_err_ok_v6(q, mp, llbcast, mcast_ok); 2103 if (mp == NULL) { 2104 if (mctl_present) 2105 freeb(first_mp); 2106 return; 2107 } 2108 bzero(&icmp6, sizeof (icmp6_t)); 2109 icmp6.icmp6_type = ICMP6_PACKET_TOO_BIG; 2110 icmp6.icmp6_code = 0; 2111 icmp6.icmp6_mtu = htonl(mtu); 2112 2113 icmp_pkt_v6(q, first_mp, &icmp6, sizeof (icmp6_t), NULL, mctl_present, 2114 zoneid); 2115 } 2116 2117 /* 2118 * Generate an ICMP parameter problem message. (May be called as writer.) 2119 * 'offset' is the offset from the beginning of the packet in error. 2120 */ 2121 static void 2122 icmp_param_problem_v6(queue_t *q, mblk_t *mp, uint8_t code, 2123 uint32_t offset, boolean_t llbcast, boolean_t mcast_ok, zoneid_t zoneid) 2124 { 2125 icmp6_t icmp6; 2126 boolean_t mctl_present; 2127 mblk_t *first_mp; 2128 2129 EXTRACT_PKT_MP(mp, first_mp, mctl_present); 2130 2131 mp = icmp_pkt_err_ok_v6(q, mp, llbcast, mcast_ok); 2132 if (mp == NULL) { 2133 if (mctl_present) 2134 freeb(first_mp); 2135 return; 2136 } 2137 bzero((char *)&icmp6, sizeof (icmp6_t)); 2138 icmp6.icmp6_type = ICMP6_PARAM_PROB; 2139 icmp6.icmp6_code = code; 2140 icmp6.icmp6_pptr = htonl(offset); 2141 icmp_pkt_v6(q, first_mp, &icmp6, sizeof (icmp6_t), NULL, mctl_present, 2142 zoneid); 2143 } 2144 2145 /* 2146 * This code will need to take into account the possibility of binding 2147 * to a link local address on a multi-homed host, in which case the 2148 * outgoing interface (from the conn) will need to be used when getting 2149 * an ire for the dst. Going through proper outgoing interface and 2150 * choosing the source address corresponding to the outgoing interface 2151 * is necessary when the destination address is a link-local address and 2152 * IPV6_BOUND_IF or IPV6_PKTINFO or scope_id has been set. 2153 * This can happen when active connection is setup; thus ipp pointer 2154 * is passed here from tcp_connect_*() routines, in non-TCP cases NULL 2155 * pointer is passed as ipp pointer. 2156 */ 2157 mblk_t * 2158 ip_bind_v6(queue_t *q, mblk_t *mp, conn_t *connp, ip6_pkt_t *ipp) 2159 { 2160 ssize_t len; 2161 int protocol; 2162 struct T_bind_req *tbr; 2163 sin6_t *sin6; 2164 ipa6_conn_t *ac6; 2165 in6_addr_t *v6srcp; 2166 in6_addr_t *v6dstp; 2167 uint16_t lport; 2168 uint16_t fport; 2169 uchar_t *ucp; 2170 mblk_t *mp1; 2171 boolean_t ire_requested; 2172 boolean_t ipsec_policy_set; 2173 int error = 0; 2174 boolean_t local_bind; 2175 boolean_t orig_pkt_isv6 = connp->conn_pkt_isv6; 2176 ipa6_conn_x_t *acx6; 2177 boolean_t verify_dst; 2178 2179 ASSERT(connp->conn_af_isv6); 2180 len = mp->b_wptr - mp->b_rptr; 2181 if (len < (sizeof (*tbr) + 1)) { 2182 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 2183 "ip_bind_v6: bogus msg, len %ld", len); 2184 goto bad_addr; 2185 } 2186 /* Back up and extract the protocol identifier. */ 2187 mp->b_wptr--; 2188 tbr = (struct T_bind_req *)mp->b_rptr; 2189 /* Reset the message type in preparation for shipping it back. */ 2190 mp->b_datap->db_type = M_PCPROTO; 2191 2192 protocol = *mp->b_wptr & 0xFF; 2193 connp->conn_ulp = (uint8_t)protocol; 2194 2195 /* 2196 * Check for a zero length address. This is from a protocol that 2197 * wants to register to receive all packets of its type. 2198 */ 2199 if (tbr->ADDR_length == 0) { 2200 if ((protocol == IPPROTO_TCP || protocol == IPPROTO_SCTP || 2201 protocol == IPPROTO_ESP || protocol == IPPROTO_AH) && 2202 ipcl_proto_fanout_v6[protocol].connf_head != NULL) { 2203 /* 2204 * TCP, SCTP, AH, and ESP have single protocol fanouts. 2205 * Do not allow others to bind to these. 2206 */ 2207 goto bad_addr; 2208 } 2209 2210 /* 2211 * 2212 * The udp module never sends down a zero-length address, 2213 * and allowing this on a labeled system will break MLP 2214 * functionality. 2215 */ 2216 if (is_system_labeled() && protocol == IPPROTO_UDP) 2217 goto bad_addr; 2218 2219 /* Allow ipsec plumbing */ 2220 if (connp->conn_mac_exempt && protocol != IPPROTO_AH && 2221 protocol != IPPROTO_ESP) 2222 goto bad_addr; 2223 2224 connp->conn_srcv6 = ipv6_all_zeros; 2225 ipcl_proto_insert_v6(connp, protocol); 2226 2227 tbr->PRIM_type = T_BIND_ACK; 2228 return (mp); 2229 } 2230 2231 /* Extract the address pointer from the message. */ 2232 ucp = (uchar_t *)mi_offset_param(mp, tbr->ADDR_offset, 2233 tbr->ADDR_length); 2234 if (ucp == NULL) { 2235 ip1dbg(("ip_bind_v6: no address\n")); 2236 goto bad_addr; 2237 } 2238 if (!OK_32PTR(ucp)) { 2239 ip1dbg(("ip_bind_v6: unaligned address\n")); 2240 goto bad_addr; 2241 } 2242 mp1 = mp->b_cont; /* trailing mp if any */ 2243 ire_requested = (mp1 && mp1->b_datap->db_type == IRE_DB_REQ_TYPE); 2244 ipsec_policy_set = (mp1 && mp1->b_datap->db_type == IPSEC_POLICY_SET); 2245 2246 switch (tbr->ADDR_length) { 2247 default: 2248 ip1dbg(("ip_bind_v6: bad address length %d\n", 2249 (int)tbr->ADDR_length)); 2250 goto bad_addr; 2251 2252 case IPV6_ADDR_LEN: 2253 /* Verification of local address only */ 2254 v6srcp = (in6_addr_t *)ucp; 2255 lport = 0; 2256 local_bind = B_TRUE; 2257 break; 2258 2259 case sizeof (sin6_t): 2260 sin6 = (sin6_t *)ucp; 2261 v6srcp = &sin6->sin6_addr; 2262 lport = sin6->sin6_port; 2263 local_bind = B_TRUE; 2264 break; 2265 2266 case sizeof (ipa6_conn_t): 2267 /* 2268 * Verify that both the source and destination addresses 2269 * are valid. 2270 * Note that we allow connect to broadcast and multicast 2271 * addresses when ire_requested is set. Thus the ULP 2272 * has to check for IRE_BROADCAST and multicast. 2273 */ 2274 ac6 = (ipa6_conn_t *)ucp; 2275 v6srcp = &ac6->ac6_laddr; 2276 v6dstp = &ac6->ac6_faddr; 2277 fport = ac6->ac6_fport; 2278 /* For raw socket, the local port is not set. */ 2279 lport = ac6->ac6_lport != 0 ? ac6->ac6_lport : 2280 connp->conn_lport; 2281 local_bind = B_FALSE; 2282 /* Always verify destination reachability. */ 2283 verify_dst = B_TRUE; 2284 break; 2285 2286 case sizeof (ipa6_conn_x_t): 2287 /* 2288 * Verify that the source address is valid. 2289 * Note that we allow connect to broadcast and multicast 2290 * addresses when ire_requested is set. Thus the ULP 2291 * has to check for IRE_BROADCAST and multicast. 2292 */ 2293 acx6 = (ipa6_conn_x_t *)ucp; 2294 ac6 = &acx6->ac6x_conn; 2295 v6srcp = &ac6->ac6_laddr; 2296 v6dstp = &ac6->ac6_faddr; 2297 fport = ac6->ac6_fport; 2298 lport = ac6->ac6_lport; 2299 local_bind = B_FALSE; 2300 /* 2301 * Client that passed ipa6_conn_x_t to us specifies whether to 2302 * verify destination reachability. 2303 */ 2304 verify_dst = (acx6->ac6x_flags & ACX_VERIFY_DST) != 0; 2305 break; 2306 } 2307 if (local_bind) { 2308 if (IN6_IS_ADDR_V4MAPPED(v6srcp) && !connp->conn_ipv6_v6only) { 2309 /* Bind to IPv4 address */ 2310 ipaddr_t v4src; 2311 2312 IN6_V4MAPPED_TO_IPADDR(v6srcp, v4src); 2313 2314 error = ip_bind_laddr(connp, mp, v4src, lport, 2315 ire_requested, ipsec_policy_set, 2316 tbr->ADDR_length != IPV6_ADDR_LEN); 2317 if (error != 0) 2318 goto bad_addr; 2319 connp->conn_pkt_isv6 = B_FALSE; 2320 } else { 2321 if (IN6_IS_ADDR_V4MAPPED(v6srcp)) { 2322 error = 0; 2323 goto bad_addr; 2324 } 2325 error = ip_bind_laddr_v6(connp, mp, v6srcp, lport, 2326 ire_requested, ipsec_policy_set, 2327 (tbr->ADDR_length != IPV6_ADDR_LEN)); 2328 if (error != 0) 2329 goto bad_addr; 2330 connp->conn_pkt_isv6 = B_TRUE; 2331 } 2332 } else { 2333 /* 2334 * Bind to local and remote address. Local might be 2335 * unspecified in which case it will be extracted from 2336 * ire_src_addr_v6 2337 */ 2338 if (IN6_IS_ADDR_V4MAPPED(v6dstp) && !connp->conn_ipv6_v6only) { 2339 /* Connect to IPv4 address */ 2340 ipaddr_t v4src; 2341 ipaddr_t v4dst; 2342 2343 /* Is the source unspecified or mapped? */ 2344 if (!IN6_IS_ADDR_V4MAPPED(v6srcp) && 2345 !IN6_IS_ADDR_UNSPECIFIED(v6srcp)) { 2346 ip1dbg(("ip_bind_v6: " 2347 "dst is mapped, but not the src\n")); 2348 goto bad_addr; 2349 } 2350 IN6_V4MAPPED_TO_IPADDR(v6srcp, v4src); 2351 IN6_V4MAPPED_TO_IPADDR(v6dstp, v4dst); 2352 2353 /* 2354 * XXX Fix needed. Need to pass ipsec_policy_set 2355 * instead of B_FALSE. 2356 */ 2357 2358 /* Always verify destination reachability. */ 2359 error = ip_bind_connected(connp, mp, &v4src, lport, 2360 v4dst, fport, ire_requested, ipsec_policy_set, 2361 B_TRUE, B_TRUE); 2362 if (error != 0) 2363 goto bad_addr; 2364 IN6_IPADDR_TO_V4MAPPED(v4src, v6srcp); 2365 connp->conn_pkt_isv6 = B_FALSE; 2366 } else if (IN6_IS_ADDR_V4MAPPED(v6srcp)) { 2367 ip1dbg(("ip_bind_v6: " 2368 "src is mapped, but not the dst\n")); 2369 goto bad_addr; 2370 } else { 2371 error = ip_bind_connected_v6(connp, mp, v6srcp, 2372 lport, v6dstp, ipp, fport, ire_requested, 2373 ipsec_policy_set, B_TRUE, verify_dst); 2374 if (error != 0) 2375 goto bad_addr; 2376 connp->conn_pkt_isv6 = B_TRUE; 2377 } 2378 } 2379 /* Update qinfo if v4/v6 changed */ 2380 if ((orig_pkt_isv6 != connp->conn_pkt_isv6) && 2381 !(IPCL_IS_TCP(connp) || IPCL_IS_UDP(connp))) { 2382 if (connp->conn_pkt_isv6) 2383 ip_setqinfo(RD(q), IPV6_MINOR, B_TRUE); 2384 else 2385 ip_setqinfo(RD(q), IPV4_MINOR, B_TRUE); 2386 } 2387 2388 /* 2389 * Pass the IPSEC headers size in ire_ipsec_overhead. 2390 * We can't do this in ip_bind_insert_ire because the policy 2391 * may not have been inherited at that point in time and hence 2392 * conn_out_enforce_policy may not be set. 2393 */ 2394 mp1 = mp->b_cont; 2395 if (ire_requested && connp->conn_out_enforce_policy && 2396 mp1 != NULL && DB_TYPE(mp1) == IRE_DB_REQ_TYPE) { 2397 ire_t *ire = (ire_t *)mp1->b_rptr; 2398 ASSERT(MBLKL(mp1) >= sizeof (ire_t)); 2399 ire->ire_ipsec_overhead = (conn_ipsec_length(connp)); 2400 } 2401 2402 /* Send it home. */ 2403 mp->b_datap->db_type = M_PCPROTO; 2404 tbr->PRIM_type = T_BIND_ACK; 2405 return (mp); 2406 2407 bad_addr: 2408 if (error == EINPROGRESS) 2409 return (NULL); 2410 if (error > 0) 2411 mp = mi_tpi_err_ack_alloc(mp, TSYSERR, error); 2412 else 2413 mp = mi_tpi_err_ack_alloc(mp, TBADADDR, 0); 2414 return (mp); 2415 } 2416 2417 /* 2418 * Here address is verified to be a valid local address. 2419 * If the IRE_DB_REQ_TYPE mp is present, a multicast 2420 * address is also considered a valid local address. 2421 * In the case of a multicast address, however, the 2422 * upper protocol is expected to reset the src address 2423 * to 0 if it sees an ire with IN6_IS_ADDR_MULTICAST returned so that 2424 * no packets are emitted with multicast address as 2425 * source address. 2426 * The addresses valid for bind are: 2427 * (1) - in6addr_any 2428 * (2) - IP address of an UP interface 2429 * (3) - IP address of a DOWN interface 2430 * (4) - a multicast address. In this case 2431 * the conn will only receive packets destined to 2432 * the specified multicast address. Note: the 2433 * application still has to issue an 2434 * IPV6_JOIN_GROUP socket option. 2435 * 2436 * In all the above cases, the bound address must be valid in the current zone. 2437 * When the address is loopback or multicast, there might be many matching IREs 2438 * so bind has to look up based on the zone. 2439 */ 2440 static int 2441 ip_bind_laddr_v6(conn_t *connp, mblk_t *mp, const in6_addr_t *v6src, 2442 uint16_t lport, boolean_t ire_requested, boolean_t ipsec_policy_set, 2443 boolean_t fanout_insert) 2444 { 2445 int error = 0; 2446 ire_t *src_ire = NULL; 2447 ipif_t *ipif = NULL; 2448 mblk_t *policy_mp; 2449 zoneid_t zoneid; 2450 2451 if (ipsec_policy_set) 2452 policy_mp = mp->b_cont; 2453 2454 /* 2455 * If it was previously connected, conn_fully_bound would have 2456 * been set. 2457 */ 2458 connp->conn_fully_bound = B_FALSE; 2459 2460 zoneid = connp->conn_zoneid; 2461 2462 if (!IN6_IS_ADDR_UNSPECIFIED(v6src)) { 2463 src_ire = ire_route_lookup_v6(v6src, 0, 0, 2464 0, NULL, NULL, zoneid, NULL, MATCH_IRE_ZONEONLY); 2465 /* 2466 * If an address other than in6addr_any is requested, 2467 * we verify that it is a valid address for bind 2468 * Note: Following code is in if-else-if form for 2469 * readability compared to a condition check. 2470 */ 2471 ASSERT(src_ire == NULL || !(src_ire->ire_type & IRE_BROADCAST)); 2472 if (IRE_IS_LOCAL(src_ire)) { 2473 /* 2474 * (2) Bind to address of local UP interface 2475 */ 2476 ipif = src_ire->ire_ipif; 2477 } else if (IN6_IS_ADDR_MULTICAST(v6src)) { 2478 ipif_t *multi_ipif = NULL; 2479 ire_t *save_ire; 2480 /* 2481 * (4) bind to multicast address. 2482 * Fake out the IRE returned to upper 2483 * layer to be a broadcast IRE in 2484 * ip_bind_insert_ire_v6(). 2485 * Pass other information that matches 2486 * the ipif (e.g. the source address). 2487 * conn_multicast_ill is only used for 2488 * IPv6 packets 2489 */ 2490 mutex_enter(&connp->conn_lock); 2491 if (connp->conn_multicast_ill != NULL) { 2492 (void) ipif_lookup_zoneid( 2493 connp->conn_multicast_ill, zoneid, 0, 2494 &multi_ipif); 2495 } else { 2496 /* 2497 * Look for default like 2498 * ip_wput_v6 2499 */ 2500 multi_ipif = ipif_lookup_group_v6( 2501 &ipv6_unspecified_group, zoneid); 2502 } 2503 mutex_exit(&connp->conn_lock); 2504 save_ire = src_ire; 2505 src_ire = NULL; 2506 if (multi_ipif == NULL || !ire_requested || 2507 (src_ire = ipif_to_ire_v6(multi_ipif)) == NULL) { 2508 src_ire = save_ire; 2509 error = EADDRNOTAVAIL; 2510 } else { 2511 ASSERT(src_ire != NULL); 2512 if (save_ire != NULL) 2513 ire_refrele(save_ire); 2514 } 2515 if (multi_ipif != NULL) 2516 ipif_refrele(multi_ipif); 2517 } else { 2518 *mp->b_wptr++ = (char)connp->conn_ulp; 2519 ipif = ipif_lookup_addr_v6(v6src, NULL, zoneid, 2520 CONNP_TO_WQ(connp), mp, ip_wput_nondata, &error); 2521 if (ipif == NULL) { 2522 if (error == EINPROGRESS) { 2523 if (src_ire != NULL) 2524 ire_refrele(src_ire); 2525 return (error); 2526 } 2527 /* 2528 * Not a valid address for bind 2529 */ 2530 error = EADDRNOTAVAIL; 2531 } else { 2532 ipif_refrele(ipif); 2533 } 2534 /* 2535 * Just to keep it consistent with the processing in 2536 * ip_bind_v6(). 2537 */ 2538 mp->b_wptr--; 2539 } 2540 2541 if (error != 0) { 2542 /* Red Alert! Attempting to be a bogon! */ 2543 if (ip_debug > 2) { 2544 /* ip1dbg */ 2545 pr_addr_dbg("ip_bind_laddr_v6: bad src" 2546 " address %s\n", AF_INET6, v6src); 2547 } 2548 goto bad_addr; 2549 } 2550 } 2551 2552 /* 2553 * Allow setting new policies. For example, disconnects come 2554 * down as ipa_t bind. As we would have set conn_policy_cached 2555 * to B_TRUE before, we should set it to B_FALSE, so that policy 2556 * can change after the disconnect. 2557 */ 2558 connp->conn_policy_cached = B_FALSE; 2559 2560 /* If not fanout_insert this was just an address verification */ 2561 if (fanout_insert) { 2562 /* 2563 * The addresses have been verified. Time to insert in 2564 * the correct fanout list. 2565 */ 2566 connp->conn_srcv6 = *v6src; 2567 connp->conn_remv6 = ipv6_all_zeros; 2568 connp->conn_lport = lport; 2569 connp->conn_fport = 0; 2570 2571 /* 2572 * We need to make sure that the conn_recv is set to a non-null 2573 * value before we insert the conn_t into the classifier table. 2574 * This is to avoid a race with an incoming packet which does 2575 * an ipcl_classify(). 2576 */ 2577 if (*mp->b_wptr == IPPROTO_TCP) 2578 connp->conn_recv = tcp_conn_request; 2579 error = ipcl_bind_insert_v6(connp, *mp->b_wptr, v6src, lport); 2580 } 2581 if (error == 0) { 2582 if (ire_requested) { 2583 if (!ip_bind_insert_ire_v6(mp, src_ire, v6src, NULL)) { 2584 error = -1; 2585 goto bad_addr; 2586 } 2587 } else if (ipsec_policy_set) { 2588 if (!ip_bind_ipsec_policy_set(connp, policy_mp)) { 2589 error = -1; 2590 goto bad_addr; 2591 } 2592 } 2593 } else if (connp->conn_ulp == IPPROTO_TCP) { 2594 connp->conn_recv = tcp_input; 2595 } 2596 bad_addr: 2597 if (error != 0) { 2598 if (connp->conn_anon_port) { 2599 (void) tsol_mlp_anon(crgetzone(connp->conn_cred), 2600 connp->conn_mlp_type, connp->conn_ulp, ntohs(lport), 2601 B_FALSE); 2602 } 2603 connp->conn_mlp_type = mlptSingle; 2604 } 2605 2606 if (src_ire != NULL) 2607 ire_refrele(src_ire); 2608 2609 if (ipsec_policy_set) { 2610 ASSERT(policy_mp != NULL); 2611 freeb(policy_mp); 2612 /* 2613 * As of now assume that nothing else accompanies 2614 * IPSEC_POLICY_SET. 2615 */ 2616 mp->b_cont = NULL; 2617 } 2618 return (error); 2619 } 2620 2621 /* ARGSUSED */ 2622 static void 2623 ip_bind_connected_resume_v6(ipsq_t *ipsq, queue_t *q, mblk_t *mp, 2624 void *dummy_arg) 2625 { 2626 conn_t *connp = NULL; 2627 t_scalar_t prim; 2628 2629 ASSERT(DB_TYPE(mp) == M_PROTO || DB_TYPE(mp) == M_PCPROTO); 2630 2631 if (CONN_Q(q)) 2632 connp = Q_TO_CONN(q); 2633 ASSERT(connp != NULL); 2634 2635 prim = ((union T_primitives *)mp->b_rptr)->type; 2636 ASSERT(prim == O_T_BIND_REQ || prim == T_BIND_REQ); 2637 2638 if (IPCL_IS_TCP(connp)) { 2639 /* Pass sticky_ipp for scope_id and pktinfo */ 2640 mp = ip_bind_v6(q, mp, connp, &connp->conn_tcp->tcp_sticky_ipp); 2641 } else { 2642 /* For UDP and ICMP */ 2643 mp = ip_bind_v6(q, mp, connp, NULL); 2644 } 2645 if (mp != NULL) { 2646 if (IPCL_IS_TCP(connp)) { 2647 CONN_INC_REF(connp); 2648 squeue_fill(connp->conn_sqp, mp, ip_resume_tcp_bind, 2649 connp, SQTAG_TCP_RPUTOTHER); 2650 } else if (IPCL_IS_UDP(connp)) { 2651 udp_resume_bind(connp, mp); 2652 } else { 2653 qreply(q, mp); 2654 CONN_OPER_PENDING_DONE(connp); 2655 } 2656 } 2657 } 2658 2659 /* 2660 * Verify that both the source and destination addresses 2661 * are valid. If verify_dst, then destination address must also be reachable, 2662 * i.e. have a route. Protocols like TCP want this. Tunnels do not. 2663 * It takes ip6_pkt_t * as one of the arguments to determine correct 2664 * source address when IPV6_PKTINFO or scope_id is set along with a link-local 2665 * destination address. Note that parameter ipp is only useful for TCP connect 2666 * when scope_id is set or IPV6_PKTINFO option is set with an ifindex. For all 2667 * non-TCP cases, it is NULL and for all other tcp cases it is not useful. 2668 * 2669 */ 2670 static int 2671 ip_bind_connected_v6(conn_t *connp, mblk_t *mp, in6_addr_t *v6src, 2672 uint16_t lport, const in6_addr_t *v6dst, ip6_pkt_t *ipp, uint16_t fport, 2673 boolean_t ire_requested, boolean_t ipsec_policy_set, 2674 boolean_t fanout_insert, boolean_t verify_dst) 2675 { 2676 ire_t *src_ire; 2677 ire_t *dst_ire; 2678 int error = 0; 2679 int protocol; 2680 mblk_t *policy_mp; 2681 ire_t *sire = NULL; 2682 ire_t *md_dst_ire = NULL; 2683 ill_t *md_ill = NULL; 2684 ill_t *dst_ill = NULL; 2685 ipif_t *src_ipif = NULL; 2686 zoneid_t zoneid; 2687 boolean_t ill_held = B_FALSE; 2688 2689 src_ire = dst_ire = NULL; 2690 /* 2691 * NOTE: The protocol is beyond the wptr because that's how 2692 * the undocumented transport<-->IP T_BIND_REQ behavior works. 2693 */ 2694 protocol = *mp->b_wptr & 0xFF; 2695 2696 /* 2697 * If we never got a disconnect before, clear it now. 2698 */ 2699 connp->conn_fully_bound = B_FALSE; 2700 2701 if (ipsec_policy_set) { 2702 policy_mp = mp->b_cont; 2703 } 2704 2705 zoneid = connp->conn_zoneid; 2706 2707 if (IN6_IS_ADDR_MULTICAST(v6dst)) { 2708 ipif_t *ipif; 2709 2710 /* 2711 * Use an "emulated" IRE_BROADCAST to tell the transport it 2712 * is a multicast. 2713 * Pass other information that matches 2714 * the ipif (e.g. the source address). 2715 * 2716 * conn_multicast_ill is only used for IPv6 packets 2717 */ 2718 mutex_enter(&connp->conn_lock); 2719 if (connp->conn_multicast_ill != NULL) { 2720 (void) ipif_lookup_zoneid(connp->conn_multicast_ill, 2721 zoneid, 0, &ipif); 2722 } else { 2723 /* Look for default like ip_wput_v6 */ 2724 ipif = ipif_lookup_group_v6(v6dst, zoneid); 2725 } 2726 mutex_exit(&connp->conn_lock); 2727 if (ipif == NULL || !ire_requested || 2728 (dst_ire = ipif_to_ire_v6(ipif)) == NULL) { 2729 if (ipif != NULL) 2730 ipif_refrele(ipif); 2731 if (ip_debug > 2) { 2732 /* ip1dbg */ 2733 pr_addr_dbg("ip_bind_connected_v6: bad " 2734 "connected multicast %s\n", AF_INET6, 2735 v6dst); 2736 } 2737 error = ENETUNREACH; 2738 goto bad_addr; 2739 } 2740 if (ipif != NULL) 2741 ipif_refrele(ipif); 2742 } else { 2743 dst_ire = ire_route_lookup_v6(v6dst, NULL, NULL, 0, 2744 NULL, &sire, zoneid, MBLK_GETLABEL(mp), 2745 MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT | 2746 MATCH_IRE_PARENT | MATCH_IRE_RJ_BHOLE | MATCH_IRE_SECATTR); 2747 /* 2748 * We also prevent ire's with src address INADDR_ANY to 2749 * be used, which are created temporarily for 2750 * sending out packets from endpoints that have 2751 * conn_unspec_src set. 2752 */ 2753 if (dst_ire == NULL || 2754 (dst_ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) || 2755 IN6_IS_ADDR_UNSPECIFIED(&dst_ire->ire_src_addr_v6)) { 2756 /* 2757 * When verifying destination reachability, we always 2758 * complain. 2759 * 2760 * When not verifying destination reachability but we 2761 * found an IRE, i.e. the destination is reachable, 2762 * then the other tests still apply and we complain. 2763 */ 2764 if (verify_dst || (dst_ire != NULL)) { 2765 if (ip_debug > 2) { 2766 /* ip1dbg */ 2767 pr_addr_dbg("ip_bind_connected_v6: bad" 2768 " connected dst %s\n", AF_INET6, 2769 v6dst); 2770 } 2771 if (dst_ire == NULL || 2772 !(dst_ire->ire_type & IRE_HOST)) { 2773 error = ENETUNREACH; 2774 } else { 2775 error = EHOSTUNREACH; 2776 } 2777 goto bad_addr; 2778 } 2779 } 2780 } 2781 2782 /* 2783 * We now know that routing will allow us to reach the destination. 2784 * Check whether Trusted Solaris policy allows communication with this 2785 * host, and pretend that the destination is unreachable if not. 2786 * 2787 * This is never a problem for TCP, since that transport is known to 2788 * compute the label properly as part of the tcp_rput_other T_BIND_ACK 2789 * handling. If the remote is unreachable, it will be detected at that 2790 * point, so there's no reason to check it here. 2791 * 2792 * Note that for sendto (and other datagram-oriented friends), this 2793 * check is done as part of the data path label computation instead. 2794 * The check here is just to make non-TCP connect() report the right 2795 * error. 2796 */ 2797 if (dst_ire != NULL && is_system_labeled() && 2798 !IPCL_IS_TCP(connp) && 2799 tsol_compute_label_v6(DB_CREDDEF(mp, connp->conn_cred), v6dst, NULL, 2800 connp->conn_mac_exempt) != 0) { 2801 error = EHOSTUNREACH; 2802 if (ip_debug > 2) { 2803 pr_addr_dbg("ip_bind_connected: no label for dst %s\n", 2804 AF_INET6, v6dst); 2805 } 2806 goto bad_addr; 2807 } 2808 2809 /* 2810 * If the app does a connect(), it means that it will most likely 2811 * send more than 1 packet to the destination. It makes sense 2812 * to clear the temporary flag. 2813 */ 2814 if (dst_ire != NULL && dst_ire->ire_type == IRE_CACHE && 2815 (dst_ire->ire_marks & IRE_MARK_TEMPORARY)) { 2816 irb_t *irb = dst_ire->ire_bucket; 2817 2818 rw_enter(&irb->irb_lock, RW_WRITER); 2819 dst_ire->ire_marks &= ~IRE_MARK_TEMPORARY; 2820 irb->irb_tmp_ire_cnt--; 2821 rw_exit(&irb->irb_lock); 2822 } 2823 2824 ASSERT(dst_ire == NULL || dst_ire->ire_ipversion == IPV6_VERSION); 2825 2826 /* 2827 * See if we should notify ULP about MDT; we do this whether or not 2828 * ire_requested is TRUE, in order to handle active connects; MDT 2829 * eligibility tests for passive connects are handled separately 2830 * through tcp_adapt_ire(). We do this before the source address 2831 * selection, because dst_ire may change after a call to 2832 * ipif_select_source_v6(). This is a best-effort check, as the 2833 * packet for this connection may not actually go through 2834 * dst_ire->ire_stq, and the exact IRE can only be known after 2835 * calling ip_newroute_v6(). This is why we further check on the 2836 * IRE during Multidata packet transmission in tcp_multisend(). 2837 */ 2838 if (ip_multidata_outbound && !ipsec_policy_set && dst_ire != NULL && 2839 !(dst_ire->ire_type & (IRE_LOCAL | IRE_LOOPBACK | IRE_BROADCAST)) && 2840 (md_ill = ire_to_ill(dst_ire), md_ill != NULL) && 2841 ILL_MDT_CAPABLE(md_ill)) { 2842 md_dst_ire = dst_ire; 2843 IRE_REFHOLD(md_dst_ire); 2844 } 2845 2846 if (dst_ire != NULL && 2847 dst_ire->ire_type == IRE_LOCAL && 2848 dst_ire->ire_zoneid != zoneid && 2849 dst_ire->ire_zoneid != ALL_ZONES) { 2850 src_ire = ire_ftable_lookup_v6(v6dst, 0, 0, 0, NULL, NULL, 2851 zoneid, 0, NULL, 2852 MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT | 2853 MATCH_IRE_RJ_BHOLE); 2854 if (src_ire == NULL) { 2855 error = EHOSTUNREACH; 2856 goto bad_addr; 2857 } else if (src_ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) { 2858 if (!(src_ire->ire_type & IRE_HOST)) 2859 error = ENETUNREACH; 2860 else 2861 error = EHOSTUNREACH; 2862 goto bad_addr; 2863 } 2864 if (IN6_IS_ADDR_UNSPECIFIED(v6src)) { 2865 src_ipif = src_ire->ire_ipif; 2866 ipif_refhold(src_ipif); 2867 *v6src = src_ipif->ipif_v6lcl_addr; 2868 } 2869 ire_refrele(src_ire); 2870 src_ire = NULL; 2871 } else if (IN6_IS_ADDR_UNSPECIFIED(v6src) && dst_ire != NULL) { 2872 if ((sire != NULL) && (sire->ire_flags & RTF_SETSRC)) { 2873 *v6src = sire->ire_src_addr_v6; 2874 ire_refrele(dst_ire); 2875 dst_ire = sire; 2876 sire = NULL; 2877 } else if (dst_ire->ire_type == IRE_CACHE && 2878 (dst_ire->ire_flags & RTF_SETSRC)) { 2879 ASSERT(dst_ire->ire_zoneid == zoneid || 2880 dst_ire->ire_zoneid == ALL_ZONES); 2881 *v6src = dst_ire->ire_src_addr_v6; 2882 } else { 2883 /* 2884 * Pick a source address so that a proper inbound load 2885 * spreading would happen. Use dst_ill specified by the 2886 * app. when socket option or scopeid is set. 2887 */ 2888 int err; 2889 2890 if (ipp != NULL && ipp->ipp_ifindex != 0) { 2891 uint_t if_index; 2892 2893 /* 2894 * Scope id or IPV6_PKTINFO 2895 */ 2896 2897 if_index = ipp->ipp_ifindex; 2898 dst_ill = ill_lookup_on_ifindex( 2899 if_index, B_TRUE, NULL, NULL, NULL, NULL); 2900 if (dst_ill == NULL) { 2901 ip1dbg(("ip_bind_connected_v6:" 2902 " bad ifindex %d\n", if_index)); 2903 error = EADDRNOTAVAIL; 2904 goto bad_addr; 2905 } 2906 ill_held = B_TRUE; 2907 } else if (connp->conn_outgoing_ill != NULL) { 2908 /* 2909 * For IPV6_BOUND_IF socket option, 2910 * conn_outgoing_ill should be set 2911 * already in TCP or UDP/ICMP. 2912 */ 2913 dst_ill = conn_get_held_ill(connp, 2914 &connp->conn_outgoing_ill, &err); 2915 if (err == ILL_LOOKUP_FAILED) { 2916 ip1dbg(("ip_bind_connected_v6:" 2917 "no ill for bound_if\n")); 2918 error = EADDRNOTAVAIL; 2919 goto bad_addr; 2920 } 2921 ill_held = B_TRUE; 2922 } else if (dst_ire->ire_stq != NULL) { 2923 /* No need to hold ill here */ 2924 dst_ill = (ill_t *)dst_ire->ire_stq->q_ptr; 2925 } else { 2926 /* No need to hold ill here */ 2927 dst_ill = dst_ire->ire_ipif->ipif_ill; 2928 } 2929 if (!ip6_asp_can_lookup()) { 2930 *mp->b_wptr++ = (char)protocol; 2931 ip6_asp_pending_op(CONNP_TO_WQ(connp), mp, 2932 ip_bind_connected_resume_v6); 2933 error = EINPROGRESS; 2934 goto refrele_and_quit; 2935 } 2936 src_ipif = ipif_select_source_v6(dst_ill, v6dst, 2937 RESTRICT_TO_NONE, connp->conn_src_preferences, 2938 zoneid); 2939 ip6_asp_table_refrele(); 2940 if (src_ipif == NULL) { 2941 pr_addr_dbg("ip_bind_connected_v6: " 2942 "no usable source address for " 2943 "connection to %s\n", AF_INET6, v6dst); 2944 error = EADDRNOTAVAIL; 2945 goto bad_addr; 2946 } 2947 *v6src = src_ipif->ipif_v6lcl_addr; 2948 } 2949 } 2950 2951 /* 2952 * We do ire_route_lookup_v6() here (and not an interface lookup) 2953 * as we assert that v6src should only come from an 2954 * UP interface for hard binding. 2955 */ 2956 src_ire = ire_route_lookup_v6(v6src, 0, 0, 0, NULL, 2957 NULL, zoneid, NULL, MATCH_IRE_ZONEONLY); 2958 2959 /* src_ire must be a local|loopback */ 2960 if (!IRE_IS_LOCAL(src_ire)) { 2961 if (ip_debug > 2) { 2962 /* ip1dbg */ 2963 pr_addr_dbg("ip_bind_connected_v6: bad " 2964 "connected src %s\n", AF_INET6, v6src); 2965 } 2966 error = EADDRNOTAVAIL; 2967 goto bad_addr; 2968 } 2969 2970 /* 2971 * If the source address is a loopback address, the 2972 * destination had best be local or multicast. 2973 * The transports that can't handle multicast will reject 2974 * those addresses. 2975 */ 2976 if (src_ire->ire_type == IRE_LOOPBACK && 2977 !(IRE_IS_LOCAL(dst_ire) || IN6_IS_ADDR_MULTICAST(v6dst) || 2978 IN6_IS_ADDR_V4MAPPED_CLASSD(v6dst))) { 2979 ip1dbg(("ip_bind_connected_v6: bad connected loopback\n")); 2980 error = -1; 2981 goto bad_addr; 2982 } 2983 /* 2984 * Allow setting new policies. For example, disconnects come 2985 * down as ipa_t bind. As we would have set conn_policy_cached 2986 * to B_TRUE before, we should set it to B_FALSE, so that policy 2987 * can change after the disconnect. 2988 */ 2989 connp->conn_policy_cached = B_FALSE; 2990 2991 /* 2992 * The addresses have been verified. Initialize the conn 2993 * before calling the policy as they expect the conns 2994 * initialized. 2995 */ 2996 connp->conn_srcv6 = *v6src; 2997 connp->conn_remv6 = *v6dst; 2998 connp->conn_lport = lport; 2999 connp->conn_fport = fport; 3000 3001 ASSERT(!(ipsec_policy_set && ire_requested)); 3002 if (ire_requested) { 3003 iulp_t *ulp_info = NULL; 3004 3005 /* 3006 * Note that sire will not be NULL if this is an off-link 3007 * connection and there is not cache for that dest yet. 3008 * 3009 * XXX Because of an existing bug, if there are multiple 3010 * default routes, the IRE returned now may not be the actual 3011 * default route used (default routes are chosen in a 3012 * round robin fashion). So if the metrics for different 3013 * default routes are different, we may return the wrong 3014 * metrics. This will not be a problem if the existing 3015 * bug is fixed. 3016 */ 3017 if (sire != NULL) 3018 ulp_info = &(sire->ire_uinfo); 3019 3020 if (!ip_bind_insert_ire_v6(mp, dst_ire, v6dst, ulp_info)) { 3021 error = -1; 3022 goto bad_addr; 3023 } 3024 } else if (ipsec_policy_set) { 3025 if (!ip_bind_ipsec_policy_set(connp, policy_mp)) { 3026 error = -1; 3027 goto bad_addr; 3028 } 3029 } 3030 3031 /* 3032 * Cache IPsec policy in this conn. If we have per-socket policy, 3033 * we'll cache that. If we don't, we'll inherit global policy. 3034 * 3035 * We can't insert until the conn reflects the policy. Note that 3036 * conn_policy_cached is set by ipsec_conn_cache_policy() even for 3037 * connections where we don't have a policy. This is to prevent 3038 * global policy lookups in the inbound path. 3039 * 3040 * If we insert before we set conn_policy_cached, 3041 * CONN_INBOUND_POLICY_PRESENT_V6() check can still evaluate true 3042 * because global policy cound be non-empty. We normally call 3043 * ipsec_check_policy() for conn_policy_cached connections only if 3044 * conn_in_enforce_policy is set. But in this case, 3045 * conn_policy_cached can get set anytime since we made the 3046 * CONN_INBOUND_POLICY_PRESENT_V6() check and ipsec_check_policy() 3047 * is called, which will make the above assumption false. Thus, we 3048 * need to insert after we set conn_policy_cached. 3049 */ 3050 if ((error = ipsec_conn_cache_policy(connp, B_FALSE)) != 0) 3051 goto bad_addr; 3052 3053 /* If not fanout_insert this was just an address verification */ 3054 if (fanout_insert) { 3055 /* 3056 * The addresses have been verified. Time to insert in 3057 * the correct fanout list. 3058 * We need to make sure that the conn_recv is set to a non-null 3059 * value before we insert the conn_t into the classifier table. 3060 * This is to avoid a race with an incoming packet which does 3061 * an ipcl_classify(). 3062 */ 3063 if (protocol == IPPROTO_TCP) 3064 connp->conn_recv = tcp_input; 3065 error = ipcl_conn_insert_v6(connp, protocol, v6src, v6dst, 3066 connp->conn_ports, 3067 IPCL_IS_TCP(connp) ? connp->conn_tcp->tcp_bound_if : 0); 3068 } 3069 if (error == 0) { 3070 connp->conn_fully_bound = B_TRUE; 3071 /* 3072 * Our initial checks for MDT have passed; the IRE is not 3073 * LOCAL/LOOPBACK/BROADCAST, and the link layer seems to 3074 * be supporting MDT. Pass the IRE, IPC and ILL into 3075 * ip_mdinfo_return(), which performs further checks 3076 * against them and upon success, returns the MDT info 3077 * mblk which we will attach to the bind acknowledgment. 3078 */ 3079 if (md_dst_ire != NULL) { 3080 mblk_t *mdinfo_mp; 3081 3082 ASSERT(md_ill != NULL); 3083 ASSERT(md_ill->ill_mdt_capab != NULL); 3084 if ((mdinfo_mp = ip_mdinfo_return(md_dst_ire, connp, 3085 md_ill->ill_name, md_ill->ill_mdt_capab)) != NULL) 3086 linkb(mp, mdinfo_mp); 3087 } 3088 } 3089 bad_addr: 3090 if (ipsec_policy_set) { 3091 ASSERT(policy_mp != NULL); 3092 freeb(policy_mp); 3093 /* 3094 * As of now assume that nothing else accompanies 3095 * IPSEC_POLICY_SET. 3096 */ 3097 mp->b_cont = NULL; 3098 } 3099 refrele_and_quit: 3100 if (src_ire != NULL) 3101 IRE_REFRELE(src_ire); 3102 if (dst_ire != NULL) 3103 IRE_REFRELE(dst_ire); 3104 if (sire != NULL) 3105 IRE_REFRELE(sire); 3106 if (src_ipif != NULL) 3107 ipif_refrele(src_ipif); 3108 if (md_dst_ire != NULL) 3109 IRE_REFRELE(md_dst_ire); 3110 if (ill_held && dst_ill != NULL) 3111 ill_refrele(dst_ill); 3112 return (error); 3113 } 3114 3115 /* 3116 * Insert the ire in b_cont. Returns false if it fails (due to lack of space). 3117 * Makes the IRE be IRE_BROADCAST if dst is a multicast address. 3118 */ 3119 static boolean_t 3120 ip_bind_insert_ire_v6(mblk_t *mp, ire_t *ire, const in6_addr_t *dst, 3121 iulp_t *ulp_info) 3122 { 3123 mblk_t *mp1; 3124 ire_t *ret_ire; 3125 3126 mp1 = mp->b_cont; 3127 ASSERT(mp1 != NULL); 3128 3129 if (ire != NULL) { 3130 /* 3131 * mp1 initialized above to IRE_DB_REQ_TYPE 3132 * appended mblk. Its <upper protocol>'s 3133 * job to make sure there is room. 3134 */ 3135 if ((mp1->b_datap->db_lim - mp1->b_rptr) < sizeof (ire_t)) 3136 return (B_FALSE); 3137 3138 mp1->b_datap->db_type = IRE_DB_TYPE; 3139 mp1->b_wptr = mp1->b_rptr + sizeof (ire_t); 3140 bcopy(ire, mp1->b_rptr, sizeof (ire_t)); 3141 ret_ire = (ire_t *)mp1->b_rptr; 3142 if (IN6_IS_ADDR_MULTICAST(dst) || 3143 IN6_IS_ADDR_V4MAPPED_CLASSD(dst)) { 3144 ret_ire->ire_type = IRE_BROADCAST; 3145 ret_ire->ire_addr_v6 = *dst; 3146 } 3147 if (ulp_info != NULL) { 3148 bcopy(ulp_info, &(ret_ire->ire_uinfo), 3149 sizeof (iulp_t)); 3150 } 3151 ret_ire->ire_mp = mp1; 3152 } else { 3153 /* 3154 * No IRE was found. Remove IRE mblk. 3155 */ 3156 mp->b_cont = mp1->b_cont; 3157 freeb(mp1); 3158 } 3159 return (B_TRUE); 3160 } 3161 3162 /* 3163 * Add an ip6i_t header to the front of the mblk. 3164 * Inline if possible else allocate a separate mblk containing only the ip6i_t. 3165 * Returns NULL if allocation fails (and frees original message). 3166 * Used in outgoing path when going through ip_newroute_*v6(). 3167 * Used in incoming path to pass ifindex to transports. 3168 */ 3169 mblk_t * 3170 ip_add_info_v6(mblk_t *mp, ill_t *ill, const in6_addr_t *dst) 3171 { 3172 mblk_t *mp1; 3173 ip6i_t *ip6i; 3174 ip6_t *ip6h; 3175 3176 ip6h = (ip6_t *)mp->b_rptr; 3177 ip6i = (ip6i_t *)(mp->b_rptr - sizeof (ip6i_t)); 3178 if ((uchar_t *)ip6i < mp->b_datap->db_base || 3179 mp->b_datap->db_ref > 1) { 3180 mp1 = allocb(sizeof (ip6i_t), BPRI_MED); 3181 if (mp1 == NULL) { 3182 freemsg(mp); 3183 return (NULL); 3184 } 3185 mp1->b_wptr = mp1->b_rptr = mp1->b_datap->db_lim; 3186 mp1->b_cont = mp; 3187 mp = mp1; 3188 ip6i = (ip6i_t *)(mp->b_rptr - sizeof (ip6i_t)); 3189 } 3190 mp->b_rptr = (uchar_t *)ip6i; 3191 ip6i->ip6i_vcf = ip6h->ip6_vcf; 3192 ip6i->ip6i_nxt = IPPROTO_RAW; 3193 if (ill != NULL) { 3194 ip6i->ip6i_flags = IP6I_IFINDEX; 3195 ip6i->ip6i_ifindex = ill->ill_phyint->phyint_ifindex; 3196 } else { 3197 ip6i->ip6i_flags = 0; 3198 } 3199 ip6i->ip6i_nexthop = *dst; 3200 return (mp); 3201 } 3202 3203 /* 3204 * Handle protocols with which IP is less intimate. There 3205 * can be more than one stream bound to a particular 3206 * protocol. When this is the case, normally each one gets a copy 3207 * of any incoming packets. 3208 * However, if the packet was tunneled and not multicast we only send to it 3209 * the first match. 3210 * 3211 * Zones notes: 3212 * Packets will be distributed to streams in all zones. This is really only 3213 * useful for ICMPv6 as only applications in the global zone can create raw 3214 * sockets for other protocols. 3215 */ 3216 static void 3217 ip_fanout_proto_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, ill_t *ill, 3218 ill_t *inill, uint8_t nexthdr, uint_t nexthdr_offset, uint_t flags, 3219 boolean_t mctl_present, zoneid_t zoneid) 3220 { 3221 queue_t *rq; 3222 mblk_t *mp1, *first_mp1; 3223 in6_addr_t dst = ip6h->ip6_dst; 3224 in6_addr_t src = ip6h->ip6_src; 3225 boolean_t one_only; 3226 mblk_t *first_mp = mp; 3227 boolean_t secure, shared_addr; 3228 conn_t *connp, *first_connp, *next_connp; 3229 connf_t *connfp; 3230 3231 if (mctl_present) { 3232 mp = first_mp->b_cont; 3233 secure = ipsec_in_is_secure(first_mp); 3234 ASSERT(mp != NULL); 3235 } else { 3236 secure = B_FALSE; 3237 } 3238 3239 /* 3240 * If the packet was tunneled and not multicast we only send to it 3241 * the first match. 3242 */ 3243 one_only = ((nexthdr == IPPROTO_ENCAP || nexthdr == IPPROTO_IPV6) && 3244 !IN6_IS_ADDR_MULTICAST(&dst)); 3245 3246 shared_addr = (zoneid == ALL_ZONES); 3247 if (shared_addr) { 3248 /* 3249 * We don't allow multilevel ports for raw IP, so no need to 3250 * check for that here. 3251 */ 3252 zoneid = tsol_packet_to_zoneid(mp); 3253 } 3254 3255 connfp = &ipcl_proto_fanout_v6[nexthdr]; 3256 mutex_enter(&connfp->connf_lock); 3257 connp = connfp->connf_head; 3258 for (connp = connfp->connf_head; connp != NULL; 3259 connp = connp->conn_next) { 3260 if (IPCL_PROTO_MATCH_V6(connp, nexthdr, ip6h, ill, flags, 3261 zoneid) && 3262 (!is_system_labeled() || 3263 tsol_receive_local(mp, &dst, IPV6_VERSION, shared_addr, 3264 connp))) 3265 break; 3266 } 3267 3268 if (connp == NULL || connp->conn_upq == NULL) { 3269 /* 3270 * No one bound to this port. Is 3271 * there a client that wants all 3272 * unclaimed datagrams? 3273 */ 3274 mutex_exit(&connfp->connf_lock); 3275 if (ip_fanout_send_icmp_v6(q, first_mp, flags, 3276 ICMP6_PARAM_PROB, ICMP6_PARAMPROB_NEXTHEADER, 3277 nexthdr_offset, mctl_present, zoneid)) { 3278 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInUnknownProtos); 3279 } 3280 3281 return; 3282 } 3283 3284 CONN_INC_REF(connp); 3285 first_connp = connp; 3286 3287 /* 3288 * XXX: Fix the multiple protocol listeners case. We should not 3289 * be walking the conn->next list here. 3290 */ 3291 if (one_only) { 3292 /* 3293 * Only send message to one tunnel driver by immediately 3294 * terminating the loop. 3295 */ 3296 connp = NULL; 3297 } else { 3298 connp = connp->conn_next; 3299 3300 } 3301 for (;;) { 3302 while (connp != NULL) { 3303 if (IPCL_PROTO_MATCH_V6(connp, nexthdr, ip6h, ill, 3304 flags, zoneid) && 3305 (!is_system_labeled() || 3306 tsol_receive_local(mp, &dst, IPV6_VERSION, 3307 shared_addr, connp))) 3308 break; 3309 connp = connp->conn_next; 3310 } 3311 3312 /* 3313 * Just copy the data part alone. The mctl part is 3314 * needed just for verifying policy and it is never 3315 * sent up. 3316 */ 3317 if (connp == NULL || connp->conn_upq == NULL || 3318 (((first_mp1 = dupmsg(first_mp)) == NULL) && 3319 ((first_mp1 = ip_copymsg(first_mp)) == NULL))) { 3320 /* 3321 * No more intested clients or memory 3322 * allocation failed 3323 */ 3324 connp = first_connp; 3325 break; 3326 } 3327 mp1 = mctl_present ? first_mp1->b_cont : first_mp1; 3328 CONN_INC_REF(connp); 3329 mutex_exit(&connfp->connf_lock); 3330 rq = connp->conn_rq; 3331 /* 3332 * For link-local always add ifindex so that transport can set 3333 * sin6_scope_id. Avoid it for ICMP error fanout. 3334 */ 3335 if ((connp->conn_ip_recvpktinfo || 3336 IN6_IS_ADDR_LINKLOCAL(&src)) && 3337 (flags & IP_FF_IPINFO)) { 3338 /* Add header */ 3339 mp1 = ip_add_info_v6(mp1, inill, &dst); 3340 } 3341 if (mp1 == NULL) { 3342 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3343 } else if (!canputnext(rq)) { 3344 if (flags & IP_FF_RAWIP) { 3345 BUMP_MIB(ill->ill_ip_mib, 3346 rawipIfStatsInOverflows); 3347 } else { 3348 BUMP_MIB(ill->ill_icmp6_mib, 3349 ipv6IfIcmpInOverflows); 3350 } 3351 3352 freemsg(mp1); 3353 } else { 3354 /* 3355 * Don't enforce here if we're a tunnel - let "tun" do 3356 * it instead. 3357 */ 3358 if (!IPCL_IS_IPTUN(connp) && 3359 (CONN_INBOUND_POLICY_PRESENT_V6(connp) || secure)) { 3360 first_mp1 = ipsec_check_inbound_policy 3361 (first_mp1, connp, NULL, ip6h, 3362 mctl_present); 3363 } 3364 if (first_mp1 != NULL) { 3365 if (mctl_present) 3366 freeb(first_mp1); 3367 BUMP_MIB(ill->ill_ip_mib, 3368 ipIfStatsHCInDelivers); 3369 putnext(rq, mp1); 3370 } 3371 } 3372 mutex_enter(&connfp->connf_lock); 3373 /* Follow the next pointer before releasing the conn. */ 3374 next_connp = connp->conn_next; 3375 CONN_DEC_REF(connp); 3376 connp = next_connp; 3377 } 3378 3379 /* Last one. Send it upstream. */ 3380 mutex_exit(&connfp->connf_lock); 3381 3382 /* Initiate IPPF processing */ 3383 if (IP6_IN_IPP(flags)) { 3384 uint_t ifindex; 3385 3386 mutex_enter(&ill->ill_lock); 3387 ifindex = ill->ill_phyint->phyint_ifindex; 3388 mutex_exit(&ill->ill_lock); 3389 ip_process(IPP_LOCAL_IN, &mp, ifindex); 3390 if (mp == NULL) { 3391 CONN_DEC_REF(connp); 3392 if (mctl_present) 3393 freeb(first_mp); 3394 return; 3395 } 3396 } 3397 3398 /* 3399 * For link-local always add ifindex so that transport can set 3400 * sin6_scope_id. Avoid it for ICMP error fanout. 3401 */ 3402 if ((connp->conn_ip_recvpktinfo || IN6_IS_ADDR_LINKLOCAL(&src)) && 3403 (flags & IP_FF_IPINFO)) { 3404 /* Add header */ 3405 mp = ip_add_info_v6(mp, inill, &dst); 3406 if (mp == NULL) { 3407 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3408 CONN_DEC_REF(connp); 3409 if (mctl_present) 3410 freeb(first_mp); 3411 return; 3412 } else if (mctl_present) { 3413 first_mp->b_cont = mp; 3414 } else { 3415 first_mp = mp; 3416 } 3417 } 3418 3419 rq = connp->conn_rq; 3420 if (!canputnext(rq)) { 3421 if (flags & IP_FF_RAWIP) { 3422 BUMP_MIB(ill->ill_ip_mib, rawipIfStatsInOverflows); 3423 } else { 3424 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInOverflows); 3425 } 3426 3427 freemsg(first_mp); 3428 } else { 3429 if (IPCL_IS_IPTUN(connp)) { 3430 /* 3431 * Tunneled packet. We enforce policy in the tunnel 3432 * module itself. 3433 * 3434 * Send the WHOLE packet up (incl. IPSEC_IN) without 3435 * a policy check. 3436 */ 3437 putnext(rq, first_mp); 3438 CONN_DEC_REF(connp); 3439 return; 3440 } 3441 /* 3442 * Don't enforce here if we're a tunnel - let "tun" do 3443 * it instead. 3444 */ 3445 if (nexthdr != IPPROTO_ENCAP && nexthdr != IPPROTO_IPV6 && 3446 (CONN_INBOUND_POLICY_PRESENT(connp) || secure)) { 3447 first_mp = ipsec_check_inbound_policy(first_mp, connp, 3448 NULL, ip6h, mctl_present); 3449 if (first_mp == NULL) { 3450 CONN_DEC_REF(connp); 3451 return; 3452 } 3453 } 3454 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 3455 putnext(rq, mp); 3456 if (mctl_present) 3457 freeb(first_mp); 3458 } 3459 CONN_DEC_REF(connp); 3460 } 3461 3462 /* 3463 * Send an ICMP error after patching up the packet appropriately. Returns 3464 * non-zero if the appropriate MIB should be bumped; zero otherwise. 3465 */ 3466 int 3467 ip_fanout_send_icmp_v6(queue_t *q, mblk_t *mp, uint_t flags, 3468 uint_t icmp_type, uint8_t icmp_code, uint_t nexthdr_offset, 3469 boolean_t mctl_present, zoneid_t zoneid) 3470 { 3471 ip6_t *ip6h; 3472 mblk_t *first_mp; 3473 boolean_t secure; 3474 unsigned char db_type; 3475 3476 first_mp = mp; 3477 if (mctl_present) { 3478 mp = mp->b_cont; 3479 secure = ipsec_in_is_secure(first_mp); 3480 ASSERT(mp != NULL); 3481 } else { 3482 /* 3483 * If this is an ICMP error being reported - which goes 3484 * up as M_CTLs, we need to convert them to M_DATA till 3485 * we finish checking with global policy because 3486 * ipsec_check_global_policy() assumes M_DATA as clear 3487 * and M_CTL as secure. 3488 */ 3489 db_type = mp->b_datap->db_type; 3490 mp->b_datap->db_type = M_DATA; 3491 secure = B_FALSE; 3492 } 3493 /* 3494 * We are generating an icmp error for some inbound packet. 3495 * Called from all ip_fanout_(udp, tcp, proto) functions. 3496 * Before we generate an error, check with global policy 3497 * to see whether this is allowed to enter the system. As 3498 * there is no "conn", we are checking with global policy. 3499 */ 3500 ip6h = (ip6_t *)mp->b_rptr; 3501 if (secure || ipsec_inbound_v6_policy_present) { 3502 first_mp = ipsec_check_global_policy(first_mp, NULL, 3503 NULL, ip6h, mctl_present); 3504 if (first_mp == NULL) 3505 return (0); 3506 } 3507 3508 if (!mctl_present) 3509 mp->b_datap->db_type = db_type; 3510 3511 if (flags & IP_FF_SEND_ICMP) { 3512 if (flags & IP_FF_HDR_COMPLETE) { 3513 if (ip_hdr_complete_v6(ip6h, zoneid)) { 3514 freemsg(first_mp); 3515 return (1); 3516 } 3517 } 3518 switch (icmp_type) { 3519 case ICMP6_DST_UNREACH: 3520 icmp_unreachable_v6(WR(q), first_mp, icmp_code, 3521 B_FALSE, B_FALSE, zoneid); 3522 break; 3523 case ICMP6_PARAM_PROB: 3524 icmp_param_problem_v6(WR(q), first_mp, icmp_code, 3525 nexthdr_offset, B_FALSE, B_FALSE, zoneid); 3526 break; 3527 default: 3528 #ifdef DEBUG 3529 panic("ip_fanout_send_icmp_v6: wrong type"); 3530 /*NOTREACHED*/ 3531 #else 3532 freemsg(first_mp); 3533 break; 3534 #endif 3535 } 3536 } else { 3537 freemsg(first_mp); 3538 return (0); 3539 } 3540 3541 return (1); 3542 } 3543 3544 3545 /* 3546 * Fanout for TCP packets 3547 * The caller puts <fport, lport> in the ports parameter. 3548 */ 3549 static void 3550 ip_fanout_tcp_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, ill_t *ill, ill_t *inill, 3551 uint_t flags, uint_t hdr_len, boolean_t mctl_present, zoneid_t zoneid) 3552 { 3553 mblk_t *first_mp; 3554 boolean_t secure; 3555 conn_t *connp; 3556 tcph_t *tcph; 3557 boolean_t syn_present = B_FALSE; 3558 3559 first_mp = mp; 3560 if (mctl_present) { 3561 mp = first_mp->b_cont; 3562 secure = ipsec_in_is_secure(first_mp); 3563 ASSERT(mp != NULL); 3564 } else { 3565 secure = B_FALSE; 3566 } 3567 3568 connp = ipcl_classify_v6(mp, IPPROTO_TCP, hdr_len, zoneid); 3569 3570 if (connp == NULL || 3571 !conn_wantpacket_v6(connp, ill, ip6h, flags, zoneid)) { 3572 /* 3573 * No hard-bound match. Send Reset. 3574 */ 3575 dblk_t *dp = mp->b_datap; 3576 uint32_t ill_index; 3577 3578 ASSERT((dp->db_struioflag & STRUIO_IP) == 0); 3579 3580 /* Initiate IPPf processing, if needed. */ 3581 if (IPP_ENABLED(IPP_LOCAL_IN) && (flags & IP6_NO_IPPOLICY)) { 3582 ill_index = ill->ill_phyint->phyint_ifindex; 3583 ip_process(IPP_LOCAL_IN, &first_mp, ill_index); 3584 if (first_mp == NULL) { 3585 if (connp != NULL) 3586 CONN_DEC_REF(connp); 3587 return; 3588 } 3589 } 3590 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 3591 tcp_xmit_listeners_reset(first_mp, hdr_len, zoneid); 3592 if (connp != NULL) 3593 CONN_DEC_REF(connp); 3594 return; 3595 } 3596 3597 tcph = (tcph_t *)&mp->b_rptr[hdr_len]; 3598 if ((tcph->th_flags[0] & (TH_SYN|TH_ACK|TH_RST|TH_URG)) == TH_SYN) { 3599 if (connp->conn_flags & IPCL_TCP) { 3600 squeue_t *sqp; 3601 3602 /* 3603 * For fused tcp loopback, assign the eager's 3604 * squeue to be that of the active connect's. 3605 */ 3606 if ((flags & IP_FF_LOOPBACK) && do_tcp_fusion && 3607 !CONN_INBOUND_POLICY_PRESENT_V6(connp) && !secure && 3608 !IP6_IN_IPP(flags)) { 3609 ASSERT(Q_TO_CONN(q) != NULL); 3610 sqp = Q_TO_CONN(q)->conn_sqp; 3611 } else { 3612 sqp = IP_SQUEUE_GET(lbolt); 3613 } 3614 3615 mp->b_datap->db_struioflag |= STRUIO_EAGER; 3616 DB_CKSUMSTART(mp) = (intptr_t)sqp; 3617 3618 /* 3619 * db_cksumstuff is unused in the incoming 3620 * path; Thus store the ifindex here. It will 3621 * be cleared in tcp_conn_create_v6(). 3622 */ 3623 DB_CKSUMSTUFF(mp) = 3624 (intptr_t)ill->ill_phyint->phyint_ifindex; 3625 syn_present = B_TRUE; 3626 } 3627 } 3628 3629 if (IPCL_IS_TCP(connp) && IPCL_IS_BOUND(connp) && !syn_present) { 3630 uint_t flags = (unsigned int)tcph->th_flags[0] & 0xFF; 3631 if ((flags & TH_RST) || (flags & TH_URG)) { 3632 CONN_DEC_REF(connp); 3633 freemsg(first_mp); 3634 return; 3635 } 3636 if (flags & TH_ACK) { 3637 tcp_xmit_listeners_reset(first_mp, hdr_len, zoneid); 3638 CONN_DEC_REF(connp); 3639 return; 3640 } 3641 3642 CONN_DEC_REF(connp); 3643 freemsg(first_mp); 3644 return; 3645 } 3646 3647 if (CONN_INBOUND_POLICY_PRESENT_V6(connp) || secure) { 3648 first_mp = ipsec_check_inbound_policy(first_mp, connp, 3649 NULL, ip6h, mctl_present); 3650 if (first_mp == NULL) { 3651 CONN_DEC_REF(connp); 3652 return; 3653 } 3654 if (IPCL_IS_TCP(connp) && IPCL_IS_BOUND(connp)) { 3655 ASSERT(syn_present); 3656 if (mctl_present) { 3657 ASSERT(first_mp != mp); 3658 first_mp->b_datap->db_struioflag |= 3659 STRUIO_POLICY; 3660 } else { 3661 ASSERT(first_mp == mp); 3662 mp->b_datap->db_struioflag &= 3663 ~STRUIO_EAGER; 3664 mp->b_datap->db_struioflag |= 3665 STRUIO_POLICY; 3666 } 3667 } else { 3668 /* 3669 * Discard first_mp early since we're dealing with a 3670 * fully-connected conn_t and tcp doesn't do policy in 3671 * this case. Also, if someone is bound to IPPROTO_TCP 3672 * over raw IP, they don't expect to see a M_CTL. 3673 */ 3674 if (mctl_present) { 3675 freeb(first_mp); 3676 mctl_present = B_FALSE; 3677 } 3678 first_mp = mp; 3679 } 3680 } 3681 3682 /* Initiate IPPF processing */ 3683 if (IP6_IN_IPP(flags)) { 3684 uint_t ifindex; 3685 3686 mutex_enter(&ill->ill_lock); 3687 ifindex = ill->ill_phyint->phyint_ifindex; 3688 mutex_exit(&ill->ill_lock); 3689 ip_process(IPP_LOCAL_IN, &mp, ifindex); 3690 if (mp == NULL) { 3691 CONN_DEC_REF(connp); 3692 if (mctl_present) { 3693 freeb(first_mp); 3694 } 3695 return; 3696 } else if (mctl_present) { 3697 /* 3698 * ip_add_info_v6 might return a new mp. 3699 */ 3700 ASSERT(first_mp != mp); 3701 first_mp->b_cont = mp; 3702 } else { 3703 first_mp = mp; 3704 } 3705 } 3706 3707 /* 3708 * For link-local always add ifindex so that TCP can bind to that 3709 * interface. Avoid it for ICMP error fanout. 3710 */ 3711 if (!syn_present && ((connp->conn_ip_recvpktinfo || 3712 IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src)) && 3713 (flags & IP_FF_IPINFO))) { 3714 /* Add header */ 3715 mp = ip_add_info_v6(mp, inill, &ip6h->ip6_dst); 3716 if (mp == NULL) { 3717 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3718 CONN_DEC_REF(connp); 3719 if (mctl_present) 3720 freeb(first_mp); 3721 return; 3722 } else if (mctl_present) { 3723 ASSERT(first_mp != mp); 3724 first_mp->b_cont = mp; 3725 } else { 3726 first_mp = mp; 3727 } 3728 } 3729 3730 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 3731 if (IPCL_IS_TCP(connp)) { 3732 (*ip_input_proc)(connp->conn_sqp, first_mp, 3733 connp->conn_recv, connp, SQTAG_IP6_TCP_INPUT); 3734 } else { 3735 putnext(connp->conn_rq, first_mp); 3736 CONN_DEC_REF(connp); 3737 } 3738 } 3739 3740 /* 3741 * Fanout for UDP packets. 3742 * The caller puts <fport, lport> in the ports parameter. 3743 * ire_type must be IRE_BROADCAST for multicast and broadcast packets. 3744 * 3745 * If SO_REUSEADDR is set all multicast and broadcast packets 3746 * will be delivered to all streams bound to the same port. 3747 * 3748 * Zones notes: 3749 * Multicast packets will be distributed to streams in all zones. 3750 */ 3751 static void 3752 ip_fanout_udp_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, uint32_t ports, 3753 ill_t *ill, ill_t *inill, uint_t flags, boolean_t mctl_present, 3754 zoneid_t zoneid) 3755 { 3756 uint32_t dstport, srcport; 3757 in6_addr_t dst; 3758 mblk_t *first_mp; 3759 boolean_t secure; 3760 conn_t *connp; 3761 connf_t *connfp; 3762 conn_t *first_conn; 3763 conn_t *next_conn; 3764 mblk_t *mp1, *first_mp1; 3765 in6_addr_t src; 3766 boolean_t shared_addr; 3767 3768 first_mp = mp; 3769 if (mctl_present) { 3770 mp = first_mp->b_cont; 3771 secure = ipsec_in_is_secure(first_mp); 3772 ASSERT(mp != NULL); 3773 } else { 3774 secure = B_FALSE; 3775 } 3776 3777 /* Extract ports in net byte order */ 3778 dstport = htons(ntohl(ports) & 0xFFFF); 3779 srcport = htons(ntohl(ports) >> 16); 3780 dst = ip6h->ip6_dst; 3781 src = ip6h->ip6_src; 3782 3783 shared_addr = (zoneid == ALL_ZONES); 3784 if (shared_addr) { 3785 zoneid = tsol_mlp_findzone(IPPROTO_UDP, dstport); 3786 /* 3787 * If no shared MLP is found, tsol_mlp_findzone returns 3788 * ALL_ZONES. In that case, we assume it's SLP, and 3789 * search for the zone based on the packet label. 3790 * That will also return ALL_ZONES on failure, but 3791 * we never allow conn_zoneid to be set to ALL_ZONES. 3792 */ 3793 if (zoneid == ALL_ZONES) 3794 zoneid = tsol_packet_to_zoneid(mp); 3795 } 3796 3797 /* Attempt to find a client stream based on destination port. */ 3798 connfp = &ipcl_udp_fanout[IPCL_UDP_HASH(dstport)]; 3799 mutex_enter(&connfp->connf_lock); 3800 connp = connfp->connf_head; 3801 if (!IN6_IS_ADDR_MULTICAST(&dst)) { 3802 /* 3803 * Not multicast. Send to the one (first) client we find. 3804 */ 3805 while (connp != NULL) { 3806 if (IPCL_UDP_MATCH_V6(connp, dstport, dst, srcport, 3807 src) && IPCL_ZONE_MATCH(connp, zoneid) && 3808 conn_wantpacket_v6(connp, ill, ip6h, 3809 flags, zoneid)) { 3810 break; 3811 } 3812 connp = connp->conn_next; 3813 } 3814 if (connp == NULL || connp->conn_upq == NULL) 3815 goto notfound; 3816 3817 if (is_system_labeled() && 3818 !tsol_receive_local(mp, &dst, IPV6_VERSION, shared_addr, 3819 connp)) 3820 goto notfound; 3821 3822 /* Found a client */ 3823 CONN_INC_REF(connp); 3824 mutex_exit(&connfp->connf_lock); 3825 3826 if (CONN_UDP_FLOWCTLD(connp)) { 3827 freemsg(first_mp); 3828 CONN_DEC_REF(connp); 3829 return; 3830 } 3831 if (CONN_INBOUND_POLICY_PRESENT_V6(connp) || secure) { 3832 first_mp = ipsec_check_inbound_policy(first_mp, 3833 connp, NULL, ip6h, mctl_present); 3834 if (first_mp == NULL) { 3835 CONN_DEC_REF(connp); 3836 return; 3837 } 3838 } 3839 /* Initiate IPPF processing */ 3840 if (IP6_IN_IPP(flags)) { 3841 uint_t ifindex; 3842 3843 mutex_enter(&ill->ill_lock); 3844 ifindex = ill->ill_phyint->phyint_ifindex; 3845 mutex_exit(&ill->ill_lock); 3846 ip_process(IPP_LOCAL_IN, &mp, ifindex); 3847 if (mp == NULL) { 3848 CONN_DEC_REF(connp); 3849 if (mctl_present) 3850 freeb(first_mp); 3851 return; 3852 } 3853 } 3854 /* 3855 * For link-local always add ifindex so that 3856 * transport can set sin6_scope_id. Avoid it for 3857 * ICMP error fanout. 3858 */ 3859 if ((connp->conn_ip_recvpktinfo || 3860 IN6_IS_ADDR_LINKLOCAL(&src)) && 3861 (flags & IP_FF_IPINFO)) { 3862 /* Add header */ 3863 mp = ip_add_info_v6(mp, inill, &dst); 3864 if (mp == NULL) { 3865 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3866 CONN_DEC_REF(connp); 3867 if (mctl_present) 3868 freeb(first_mp); 3869 return; 3870 } else if (mctl_present) { 3871 first_mp->b_cont = mp; 3872 } else { 3873 first_mp = mp; 3874 } 3875 } 3876 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 3877 3878 /* Send it upstream */ 3879 CONN_UDP_RECV(connp, mp); 3880 3881 IP6_STAT(ip6_udp_fannorm); 3882 CONN_DEC_REF(connp); 3883 if (mctl_present) 3884 freeb(first_mp); 3885 return; 3886 } 3887 3888 while (connp != NULL) { 3889 if ((IPCL_UDP_MATCH_V6(connp, dstport, dst, srcport, src)) && 3890 conn_wantpacket_v6(connp, ill, ip6h, flags, zoneid) && 3891 (!is_system_labeled() || 3892 tsol_receive_local(mp, &dst, IPV6_VERSION, shared_addr, 3893 connp))) 3894 break; 3895 connp = connp->conn_next; 3896 } 3897 3898 if (connp == NULL || connp->conn_upq == NULL) 3899 goto notfound; 3900 3901 first_conn = connp; 3902 3903 CONN_INC_REF(connp); 3904 connp = connp->conn_next; 3905 for (;;) { 3906 while (connp != NULL) { 3907 if (IPCL_UDP_MATCH_V6(connp, dstport, dst, srcport, 3908 src) && conn_wantpacket_v6(connp, ill, ip6h, 3909 flags, zoneid) && 3910 (!is_system_labeled() || 3911 tsol_receive_local(mp, &dst, IPV6_VERSION, 3912 shared_addr, connp))) 3913 break; 3914 connp = connp->conn_next; 3915 } 3916 /* 3917 * Just copy the data part alone. The mctl part is 3918 * needed just for verifying policy and it is never 3919 * sent up. 3920 */ 3921 if (connp == NULL || 3922 (((first_mp1 = dupmsg(first_mp)) == NULL) && 3923 ((first_mp1 = ip_copymsg(first_mp)) 3924 == NULL))) { 3925 /* 3926 * No more interested clients or memory 3927 * allocation failed 3928 */ 3929 connp = first_conn; 3930 break; 3931 } 3932 mp1 = mctl_present ? first_mp1->b_cont : first_mp1; 3933 CONN_INC_REF(connp); 3934 mutex_exit(&connfp->connf_lock); 3935 /* 3936 * For link-local always add ifindex so that transport 3937 * can set sin6_scope_id. Avoid it for ICMP error 3938 * fanout. 3939 */ 3940 if ((connp->conn_ip_recvpktinfo || 3941 IN6_IS_ADDR_LINKLOCAL(&src)) && 3942 (flags & IP_FF_IPINFO)) { 3943 /* Add header */ 3944 mp1 = ip_add_info_v6(mp1, inill, &dst); 3945 } 3946 /* mp1 could have changed */ 3947 if (mctl_present) 3948 first_mp1->b_cont = mp1; 3949 else 3950 first_mp1 = mp1; 3951 if (mp1 == NULL) { 3952 if (mctl_present) 3953 freeb(first_mp1); 3954 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3955 goto next_one; 3956 } 3957 if (CONN_UDP_FLOWCTLD(connp)) { 3958 BUMP_MIB(ill->ill_ip_mib, udpIfStatsInOverflows); 3959 freemsg(first_mp1); 3960 goto next_one; 3961 } 3962 3963 if (CONN_INBOUND_POLICY_PRESENT_V6(connp) || 3964 secure) { 3965 first_mp1 = ipsec_check_inbound_policy 3966 (first_mp1, connp, NULL, ip6h, 3967 mctl_present); 3968 } 3969 if (first_mp1 != NULL) { 3970 if (mctl_present) 3971 freeb(first_mp1); 3972 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 3973 3974 /* Send it upstream */ 3975 CONN_UDP_RECV(connp, mp1); 3976 } 3977 next_one: 3978 mutex_enter(&connfp->connf_lock); 3979 /* Follow the next pointer before releasing the conn. */ 3980 next_conn = connp->conn_next; 3981 IP6_STAT(ip6_udp_fanmb); 3982 CONN_DEC_REF(connp); 3983 connp = next_conn; 3984 } 3985 3986 /* Last one. Send it upstream. */ 3987 mutex_exit(&connfp->connf_lock); 3988 3989 /* Initiate IPPF processing */ 3990 if (IP6_IN_IPP(flags)) { 3991 uint_t ifindex; 3992 3993 mutex_enter(&ill->ill_lock); 3994 ifindex = ill->ill_phyint->phyint_ifindex; 3995 mutex_exit(&ill->ill_lock); 3996 ip_process(IPP_LOCAL_IN, &mp, ifindex); 3997 if (mp == NULL) { 3998 CONN_DEC_REF(connp); 3999 if (mctl_present) { 4000 freeb(first_mp); 4001 } 4002 return; 4003 } 4004 } 4005 4006 /* 4007 * For link-local always add ifindex so that transport can set 4008 * sin6_scope_id. Avoid it for ICMP error fanout. 4009 */ 4010 if ((connp->conn_ip_recvpktinfo || 4011 IN6_IS_ADDR_LINKLOCAL(&src)) && (flags & IP_FF_IPINFO)) { 4012 /* Add header */ 4013 mp = ip_add_info_v6(mp, inill, &dst); 4014 if (mp == NULL) { 4015 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 4016 CONN_DEC_REF(connp); 4017 if (mctl_present) 4018 freeb(first_mp); 4019 return; 4020 } else if (mctl_present) { 4021 first_mp->b_cont = mp; 4022 } else { 4023 first_mp = mp; 4024 } 4025 } 4026 if (CONN_UDP_FLOWCTLD(connp)) { 4027 BUMP_MIB(ill->ill_ip_mib, udpIfStatsInOverflows); 4028 freemsg(mp); 4029 } else { 4030 if (CONN_INBOUND_POLICY_PRESENT_V6(connp) || secure) { 4031 first_mp = ipsec_check_inbound_policy(first_mp, 4032 connp, NULL, ip6h, mctl_present); 4033 if (first_mp == NULL) { 4034 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 4035 CONN_DEC_REF(connp); 4036 return; 4037 } 4038 } 4039 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 4040 4041 /* Send it upstream */ 4042 CONN_UDP_RECV(connp, mp); 4043 } 4044 IP6_STAT(ip6_udp_fanmb); 4045 CONN_DEC_REF(connp); 4046 if (mctl_present) 4047 freeb(first_mp); 4048 return; 4049 4050 notfound: 4051 mutex_exit(&connfp->connf_lock); 4052 /* 4053 * No one bound to this port. Is 4054 * there a client that wants all 4055 * unclaimed datagrams? 4056 */ 4057 if (ipcl_proto_fanout_v6[IPPROTO_UDP].connf_head != NULL) { 4058 ip_fanout_proto_v6(q, first_mp, ip6h, ill, inill, IPPROTO_UDP, 4059 0, flags | IP_FF_RAWIP | IP_FF_IPINFO, mctl_present, 4060 zoneid); 4061 } else { 4062 if (ip_fanout_send_icmp_v6(q, first_mp, flags, 4063 ICMP6_DST_UNREACH, ICMP6_DST_UNREACH_NOPORT, 0, 4064 mctl_present, zoneid)) { 4065 BUMP_MIB(ill->ill_ip_mib, udpIfStatsNoPorts); 4066 } 4067 } 4068 } 4069 4070 /* 4071 * int ip_find_hdr_v6() 4072 * 4073 * This routine is used by the upper layer protocols and the IP tunnel 4074 * module to: 4075 * - Set extension header pointers to appropriate locations 4076 * - Determine IPv6 header length and return it 4077 * - Return a pointer to the last nexthdr value 4078 * 4079 * The caller must initialize ipp_fields. 4080 * 4081 * NOTE: If multiple extension headers of the same type are present, 4082 * ip_find_hdr_v6() will set the respective extension header pointers 4083 * to the first one that it encounters in the IPv6 header. It also 4084 * skips fragment headers. This routine deals with malformed packets 4085 * of various sorts in which case the returned length is up to the 4086 * malformed part. 4087 */ 4088 int 4089 ip_find_hdr_v6(mblk_t *mp, ip6_t *ip6h, ip6_pkt_t *ipp, uint8_t *nexthdrp) 4090 { 4091 uint_t length, ehdrlen; 4092 uint8_t nexthdr; 4093 uint8_t *whereptr, *endptr; 4094 ip6_dest_t *tmpdstopts; 4095 ip6_rthdr_t *tmprthdr; 4096 ip6_hbh_t *tmphopopts; 4097 ip6_frag_t *tmpfraghdr; 4098 4099 length = IPV6_HDR_LEN; 4100 whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */ 4101 endptr = mp->b_wptr; 4102 4103 nexthdr = ip6h->ip6_nxt; 4104 while (whereptr < endptr) { 4105 /* Is there enough left for len + nexthdr? */ 4106 if (whereptr + MIN_EHDR_LEN > endptr) 4107 goto done; 4108 4109 switch (nexthdr) { 4110 case IPPROTO_HOPOPTS: 4111 tmphopopts = (ip6_hbh_t *)whereptr; 4112 ehdrlen = 8 * (tmphopopts->ip6h_len + 1); 4113 if ((uchar_t *)tmphopopts + ehdrlen > endptr) 4114 goto done; 4115 nexthdr = tmphopopts->ip6h_nxt; 4116 /* return only 1st hbh */ 4117 if (!(ipp->ipp_fields & IPPF_HOPOPTS)) { 4118 ipp->ipp_fields |= IPPF_HOPOPTS; 4119 ipp->ipp_hopopts = tmphopopts; 4120 ipp->ipp_hopoptslen = ehdrlen; 4121 } 4122 break; 4123 case IPPROTO_DSTOPTS: 4124 tmpdstopts = (ip6_dest_t *)whereptr; 4125 ehdrlen = 8 * (tmpdstopts->ip6d_len + 1); 4126 if ((uchar_t *)tmpdstopts + ehdrlen > endptr) 4127 goto done; 4128 nexthdr = tmpdstopts->ip6d_nxt; 4129 /* 4130 * ipp_dstopts is set to the destination header after a 4131 * routing header. 4132 * Assume it is a post-rthdr destination header 4133 * and adjust when we find an rthdr. 4134 */ 4135 if (!(ipp->ipp_fields & IPPF_DSTOPTS)) { 4136 ipp->ipp_fields |= IPPF_DSTOPTS; 4137 ipp->ipp_dstopts = tmpdstopts; 4138 ipp->ipp_dstoptslen = ehdrlen; 4139 } 4140 break; 4141 case IPPROTO_ROUTING: 4142 tmprthdr = (ip6_rthdr_t *)whereptr; 4143 ehdrlen = 8 * (tmprthdr->ip6r_len + 1); 4144 if ((uchar_t *)tmprthdr + ehdrlen > endptr) 4145 goto done; 4146 nexthdr = tmprthdr->ip6r_nxt; 4147 /* return only 1st rthdr */ 4148 if (!(ipp->ipp_fields & IPPF_RTHDR)) { 4149 ipp->ipp_fields |= IPPF_RTHDR; 4150 ipp->ipp_rthdr = tmprthdr; 4151 ipp->ipp_rthdrlen = ehdrlen; 4152 } 4153 /* 4154 * Make any destination header we've seen be a 4155 * pre-rthdr destination header. 4156 */ 4157 if (ipp->ipp_fields & IPPF_DSTOPTS) { 4158 ipp->ipp_fields &= ~IPPF_DSTOPTS; 4159 ipp->ipp_fields |= IPPF_RTDSTOPTS; 4160 ipp->ipp_rtdstopts = ipp->ipp_dstopts; 4161 ipp->ipp_dstopts = NULL; 4162 ipp->ipp_rtdstoptslen = ipp->ipp_dstoptslen; 4163 ipp->ipp_dstoptslen = 0; 4164 } 4165 break; 4166 case IPPROTO_FRAGMENT: 4167 tmpfraghdr = (ip6_frag_t *)whereptr; 4168 ehdrlen = sizeof (ip6_frag_t); 4169 if ((uchar_t *)tmpfraghdr + ehdrlen > endptr) 4170 goto done; 4171 nexthdr = tmpfraghdr->ip6f_nxt; 4172 if (!(ipp->ipp_fields & IPPF_FRAGHDR)) { 4173 ipp->ipp_fields |= IPPF_FRAGHDR; 4174 ipp->ipp_fraghdr = tmpfraghdr; 4175 ipp->ipp_fraghdrlen = ehdrlen; 4176 } 4177 break; 4178 case IPPROTO_NONE: 4179 default: 4180 goto done; 4181 } 4182 length += ehdrlen; 4183 whereptr += ehdrlen; 4184 } 4185 done: 4186 if (nexthdrp != NULL) 4187 *nexthdrp = nexthdr; 4188 return (length); 4189 } 4190 4191 int 4192 ip_hdr_complete_v6(ip6_t *ip6h, zoneid_t zoneid) 4193 { 4194 ire_t *ire; 4195 4196 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 4197 ire = ire_lookup_local_v6(zoneid); 4198 if (ire == NULL) { 4199 ip1dbg(("ip_hdr_complete_v6: no source IRE\n")); 4200 return (1); 4201 } 4202 ip6h->ip6_src = ire->ire_addr_v6; 4203 ire_refrele(ire); 4204 } 4205 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 4206 ip6h->ip6_hops = ipv6_def_hops; 4207 return (0); 4208 } 4209 4210 /* 4211 * Try to determine where and what are the IPv6 header length and 4212 * pointer to nexthdr value for the upper layer protocol (or an 4213 * unknown next hdr). 4214 * 4215 * Parameters returns a pointer to the nexthdr value; 4216 * Must handle malformed packets of various sorts. 4217 * Function returns failure for malformed cases. 4218 */ 4219 boolean_t 4220 ip_hdr_length_nexthdr_v6(mblk_t *mp, ip6_t *ip6h, uint16_t *hdr_length_ptr, 4221 uint8_t **nexthdrpp) 4222 { 4223 uint16_t length; 4224 uint_t ehdrlen; 4225 uint8_t *nexthdrp; 4226 uint8_t *whereptr; 4227 uint8_t *endptr; 4228 ip6_dest_t *desthdr; 4229 ip6_rthdr_t *rthdr; 4230 ip6_frag_t *fraghdr; 4231 4232 ASSERT((IPH_HDR_VERSION(ip6h) & ~IP_FORWARD_PROG_BIT) == IPV6_VERSION); 4233 length = IPV6_HDR_LEN; 4234 whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */ 4235 endptr = mp->b_wptr; 4236 4237 nexthdrp = &ip6h->ip6_nxt; 4238 while (whereptr < endptr) { 4239 /* Is there enough left for len + nexthdr? */ 4240 if (whereptr + MIN_EHDR_LEN > endptr) 4241 break; 4242 4243 switch (*nexthdrp) { 4244 case IPPROTO_HOPOPTS: 4245 case IPPROTO_DSTOPTS: 4246 /* Assumes the headers are identical for hbh and dst */ 4247 desthdr = (ip6_dest_t *)whereptr; 4248 ehdrlen = 8 * (desthdr->ip6d_len + 1); 4249 if ((uchar_t *)desthdr + ehdrlen > endptr) 4250 return (B_FALSE); 4251 nexthdrp = &desthdr->ip6d_nxt; 4252 break; 4253 case IPPROTO_ROUTING: 4254 rthdr = (ip6_rthdr_t *)whereptr; 4255 ehdrlen = 8 * (rthdr->ip6r_len + 1); 4256 if ((uchar_t *)rthdr + ehdrlen > endptr) 4257 return (B_FALSE); 4258 nexthdrp = &rthdr->ip6r_nxt; 4259 break; 4260 case IPPROTO_FRAGMENT: 4261 fraghdr = (ip6_frag_t *)whereptr; 4262 ehdrlen = sizeof (ip6_frag_t); 4263 if ((uchar_t *)&fraghdr[1] > endptr) 4264 return (B_FALSE); 4265 nexthdrp = &fraghdr->ip6f_nxt; 4266 break; 4267 case IPPROTO_NONE: 4268 /* No next header means we're finished */ 4269 default: 4270 *hdr_length_ptr = length; 4271 *nexthdrpp = nexthdrp; 4272 return (B_TRUE); 4273 } 4274 length += ehdrlen; 4275 whereptr += ehdrlen; 4276 *hdr_length_ptr = length; 4277 *nexthdrpp = nexthdrp; 4278 } 4279 switch (*nexthdrp) { 4280 case IPPROTO_HOPOPTS: 4281 case IPPROTO_DSTOPTS: 4282 case IPPROTO_ROUTING: 4283 case IPPROTO_FRAGMENT: 4284 /* 4285 * If any know extension headers are still to be processed, 4286 * the packet's malformed (or at least all the IP header(s) are 4287 * not in the same mblk - and that should never happen. 4288 */ 4289 return (B_FALSE); 4290 4291 default: 4292 /* 4293 * If we get here, we know that all of the IP headers were in 4294 * the same mblk, even if the ULP header is in the next mblk. 4295 */ 4296 *hdr_length_ptr = length; 4297 *nexthdrpp = nexthdrp; 4298 return (B_TRUE); 4299 } 4300 } 4301 4302 /* 4303 * Return the length of the IPv6 related headers (including extension headers) 4304 * Returns a length even if the packet is malformed. 4305 */ 4306 int 4307 ip_hdr_length_v6(mblk_t *mp, ip6_t *ip6h) 4308 { 4309 uint16_t hdr_len; 4310 uint8_t *nexthdrp; 4311 4312 (void) ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_len, &nexthdrp); 4313 return (hdr_len); 4314 } 4315 4316 /* 4317 * Select an ill for the packet by considering load spreading across 4318 * a different ill in the group if dst_ill is part of some group. 4319 */ 4320 static ill_t * 4321 ip_newroute_get_dst_ill_v6(ill_t *dst_ill) 4322 { 4323 ill_t *ill; 4324 4325 /* 4326 * We schedule irrespective of whether the source address is 4327 * INADDR_UNSPECIED or not. 4328 */ 4329 ill = illgrp_scheduler(dst_ill); 4330 if (ill == NULL) 4331 return (NULL); 4332 4333 /* 4334 * For groups with names ip_sioctl_groupname ensures that all 4335 * ills are of same type. For groups without names, ifgrp_insert 4336 * ensures this. 4337 */ 4338 ASSERT(dst_ill->ill_type == ill->ill_type); 4339 4340 return (ill); 4341 } 4342 4343 /* 4344 * IPv6 - 4345 * ip_newroute_v6 is called by ip_rput_data_v6 or ip_wput_v6 whenever we need 4346 * to send out a packet to a destination address for which we do not have 4347 * specific routing information. 4348 * 4349 * Handle non-multicast packets. If ill is non-NULL the match is done 4350 * for that ill. 4351 * 4352 * When a specific ill is specified (using IPV6_PKTINFO, 4353 * IPV6_MULTICAST_IF, or IPV6_BOUND_IF) we will only match 4354 * on routing entries (ftable and ctable) that have a matching 4355 * ire->ire_ipif->ipif_ill. Thus this can only be used 4356 * for destinations that are on-link for the specific ill 4357 * and that can appear on multiple links. Thus it is useful 4358 * for multicast destinations, link-local destinations, and 4359 * at some point perhaps for site-local destinations (if the 4360 * node sits at a site boundary). 4361 * We create the cache entries in the regular ctable since 4362 * it can not "confuse" things for other destinations. 4363 * table. 4364 * 4365 * When ill is part of a ill group, we subject the packets 4366 * to load spreading even if the ill is specified by the 4367 * means described above. We disable only for IPV6_BOUND_PIF 4368 * and for the cases where IP6I_ATTACH_IF is set i.e NS/NA/ 4369 * Echo replies to link-local destinations have IP6I_ATTACH_IF 4370 * set. 4371 * 4372 * NOTE : These are the scopes of some of the variables that point at IRE, 4373 * which needs to be followed while making any future modifications 4374 * to avoid memory leaks. 4375 * 4376 * - ire and sire are the entries looked up initially by 4377 * ire_ftable_lookup_v6. 4378 * - ipif_ire is used to hold the interface ire associated with 4379 * the new cache ire. But it's scope is limited, so we always REFRELE 4380 * it before branching out to error paths. 4381 * - save_ire is initialized before ire_create, so that ire returned 4382 * by ire_create will not over-write the ire. We REFRELE save_ire 4383 * before breaking out of the switch. 4384 * 4385 * Thus on failures, we have to REFRELE only ire and sire, if they 4386 * are not NULL. 4387 * 4388 * v6srcp may be used in the future. Currently unused. 4389 */ 4390 /* ARGSUSED */ 4391 void 4392 ip_newroute_v6(queue_t *q, mblk_t *mp, const in6_addr_t *v6dstp, 4393 const in6_addr_t *v6srcp, ill_t *ill, zoneid_t zoneid) 4394 { 4395 in6_addr_t v6gw; 4396 in6_addr_t dst; 4397 ire_t *ire = NULL; 4398 ipif_t *src_ipif = NULL; 4399 ill_t *dst_ill = NULL; 4400 ire_t *sire = NULL; 4401 ire_t *save_ire; 4402 mblk_t *dlureq_mp; 4403 ip6_t *ip6h; 4404 int err = 0; 4405 mblk_t *first_mp; 4406 ipsec_out_t *io; 4407 ill_t *attach_ill = NULL; 4408 ushort_t ire_marks = 0; 4409 int match_flags; 4410 boolean_t ip6i_present; 4411 ire_t *first_sire = NULL; 4412 mblk_t *copy_mp = NULL; 4413 mblk_t *xmit_mp = NULL; 4414 in6_addr_t save_dst; 4415 uint32_t multirt_flags = 4416 MULTIRT_CACHEGW | MULTIRT_USESTAMP | MULTIRT_SETSTAMP; 4417 boolean_t multirt_is_resolvable; 4418 boolean_t multirt_resolve_next; 4419 boolean_t need_rele = B_FALSE; 4420 boolean_t do_attach_ill = B_FALSE; 4421 boolean_t ip6_asp_table_held = B_FALSE; 4422 tsol_ire_gw_secattr_t *attrp = NULL; 4423 tsol_gcgrp_t *gcgrp = NULL; 4424 tsol_gcgrp_addr_t ga; 4425 4426 ASSERT(!IN6_IS_ADDR_MULTICAST(v6dstp)); 4427 4428 first_mp = mp; 4429 if (mp->b_datap->db_type == M_CTL) { 4430 mp = mp->b_cont; 4431 io = (ipsec_out_t *)first_mp->b_rptr; 4432 ASSERT(io->ipsec_out_type == IPSEC_OUT); 4433 } else { 4434 io = NULL; 4435 } 4436 4437 /* 4438 * If this end point is bound to IPIF_NOFAILOVER, set bnf_ill and 4439 * bind_to_nofailover B_TRUE. We can't use conn to determine as it 4440 * could be NULL. 4441 * 4442 * This information can appear either in an ip6i_t or an IPSEC_OUT 4443 * message. 4444 */ 4445 ip6h = (ip6_t *)mp->b_rptr; 4446 ip6i_present = (ip6h->ip6_nxt == IPPROTO_RAW); 4447 if (ip6i_present || (io != NULL && io->ipsec_out_attach_if)) { 4448 if (!ip6i_present || 4449 ((ip6i_t *)ip6h)->ip6i_flags & IP6I_ATTACH_IF) { 4450 attach_ill = ip_grab_attach_ill(ill, first_mp, 4451 (ip6i_present ? ((ip6i_t *)ip6h)->ip6i_ifindex : 4452 io->ipsec_out_ill_index), B_TRUE); 4453 /* Failure case frees things for us. */ 4454 if (attach_ill == NULL) 4455 return; 4456 4457 /* 4458 * Check if we need an ire that will not be 4459 * looked up by anybody else i.e. HIDDEN. 4460 */ 4461 if (ill_is_probeonly(attach_ill)) 4462 ire_marks = IRE_MARK_HIDDEN; 4463 } 4464 } 4465 4466 if (IN6_IS_ADDR_LOOPBACK(v6dstp)) { 4467 ip1dbg(("ip_newroute_v6: dst with loopback addr\n")); 4468 goto icmp_err_ret; 4469 } else if ((v6srcp != NULL) && IN6_IS_ADDR_LOOPBACK(v6srcp)) { 4470 ip1dbg(("ip_newroute_v6: src with loopback addr\n")); 4471 goto icmp_err_ret; 4472 } 4473 4474 /* 4475 * If this IRE is created for forwarding or it is not for 4476 * TCP traffic, mark it as temporary. 4477 * 4478 * Is it sufficient just to check the next header?? 4479 */ 4480 if (mp->b_prev != NULL || !IP_FLOW_CONTROLLED_ULP(ip6h->ip6_nxt)) 4481 ire_marks |= IRE_MARK_TEMPORARY; 4482 4483 /* 4484 * Get what we can from ire_ftable_lookup_v6 which will follow an IRE 4485 * chain until it gets the most specific information available. 4486 * For example, we know that there is no IRE_CACHE for this dest, 4487 * but there may be an IRE_OFFSUBNET which specifies a gateway. 4488 * ire_ftable_lookup_v6 will look up the gateway, etc. 4489 */ 4490 4491 if (ill == NULL) { 4492 match_flags = MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT | 4493 MATCH_IRE_PARENT | MATCH_IRE_RJ_BHOLE | MATCH_IRE_SECATTR; 4494 ire = ire_ftable_lookup_v6(v6dstp, 0, 0, 0, 4495 NULL, &sire, zoneid, 0, MBLK_GETLABEL(mp), 4496 match_flags); 4497 /* 4498 * ire_add_then_send -> ip_newroute_v6 in the CGTP case passes 4499 * in a NULL ill, but the packet could be a neighbor 4500 * solicitation/advertisment and could have a valid attach_ill. 4501 */ 4502 if (attach_ill != NULL) 4503 ill_refrele(attach_ill); 4504 } else { 4505 if (attach_ill != NULL) { 4506 /* 4507 * attach_ill is set only for communicating with 4508 * on-link hosts. So, don't look for DEFAULT. 4509 * ip_wput_v6 passes the right ill in this case and 4510 * hence we can assert. 4511 */ 4512 ASSERT(ill == attach_ill); 4513 ill_refrele(attach_ill); 4514 do_attach_ill = B_TRUE; 4515 match_flags = MATCH_IRE_RJ_BHOLE | MATCH_IRE_ILL; 4516 } else { 4517 match_flags = MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT | 4518 MATCH_IRE_RJ_BHOLE | MATCH_IRE_ILL_GROUP; 4519 } 4520 match_flags |= MATCH_IRE_PARENT | MATCH_IRE_SECATTR; 4521 ire = ire_ftable_lookup_v6(v6dstp, NULL, NULL, 0, ill->ill_ipif, 4522 &sire, zoneid, 0, MBLK_GETLABEL(mp), match_flags); 4523 } 4524 4525 ip3dbg(("ip_newroute_v6: ire_ftable_lookup_v6() " 4526 "returned ire %p, sire %p\n", (void *)ire, (void *)sire)); 4527 4528 if (zoneid == ALL_ZONES && ire != NULL) { 4529 /* 4530 * In the forwarding case, we can use a route from any zone 4531 * since we won't change the source address. We can easily 4532 * assert that the source address is already set when there's no 4533 * ip6_info header - otherwise we'd have to call pullupmsg(). 4534 */ 4535 ASSERT(ip6i_present || 4536 !IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)); 4537 zoneid = ire->ire_zoneid; 4538 } 4539 4540 /* 4541 * We enter a loop that will be run only once in most cases. 4542 * The loop is re-entered in the case where the destination 4543 * can be reached through multiple RTF_MULTIRT-flagged routes. 4544 * The intention is to compute multiple routes to a single 4545 * destination in a single ip_newroute_v6 call. 4546 * The information is contained in sire->ire_flags. 4547 */ 4548 do { 4549 multirt_resolve_next = B_FALSE; 4550 4551 if (dst_ill != NULL) { 4552 ill_refrele(dst_ill); 4553 dst_ill = NULL; 4554 } 4555 if (src_ipif != NULL) { 4556 ipif_refrele(src_ipif); 4557 src_ipif = NULL; 4558 } 4559 if ((sire != NULL) && sire->ire_flags & RTF_MULTIRT) { 4560 ip3dbg(("ip_newroute_v6: starting new resolution " 4561 "with first_mp %p, tag %d\n", 4562 (void *)first_mp, MULTIRT_DEBUG_TAGGED(first_mp))); 4563 4564 /* 4565 * We check if there are trailing unresolved routes for 4566 * the destination contained in sire. 4567 */ 4568 multirt_is_resolvable = ire_multirt_lookup_v6(&ire, 4569 &sire, multirt_flags, MBLK_GETLABEL(mp)); 4570 4571 ip3dbg(("ip_newroute_v6: multirt_is_resolvable %d, " 4572 "ire %p, sire %p\n", 4573 multirt_is_resolvable, (void *)ire, (void *)sire)); 4574 4575 if (!multirt_is_resolvable) { 4576 /* 4577 * No more multirt routes to resolve; give up 4578 * (all routes resolved or no more resolvable 4579 * routes). 4580 */ 4581 if (ire != NULL) { 4582 ire_refrele(ire); 4583 ire = NULL; 4584 } 4585 } else { 4586 ASSERT(sire != NULL); 4587 ASSERT(ire != NULL); 4588 /* 4589 * We simply use first_sire as a flag that 4590 * indicates if a resolvable multirt route has 4591 * already been found during the preceding 4592 * loops. If it is not the case, we may have 4593 * to send an ICMP error to report that the 4594 * destination is unreachable. We do not 4595 * IRE_REFHOLD first_sire. 4596 */ 4597 if (first_sire == NULL) { 4598 first_sire = sire; 4599 } 4600 } 4601 } 4602 if ((ire == NULL) || (ire == sire)) { 4603 /* 4604 * either ire == NULL (the destination cannot be 4605 * resolved) or ire == sire (the gateway cannot be 4606 * resolved). At this point, there are no more routes 4607 * to resolve for the destination, thus we exit. 4608 */ 4609 if (ip_debug > 3) { 4610 /* ip2dbg */ 4611 pr_addr_dbg("ip_newroute_v6: " 4612 "can't resolve %s\n", AF_INET6, v6dstp); 4613 } 4614 ip3dbg(("ip_newroute_v6: " 4615 "ire %p, sire %p, first_sire %p\n", 4616 (void *)ire, (void *)sire, (void *)first_sire)); 4617 4618 if (sire != NULL) { 4619 ire_refrele(sire); 4620 sire = NULL; 4621 } 4622 4623 if (first_sire != NULL) { 4624 /* 4625 * At least one multirt route has been found 4626 * in the same ip_newroute() call; there is no 4627 * need to report an ICMP error. 4628 * first_sire was not IRE_REFHOLDed. 4629 */ 4630 MULTIRT_DEBUG_UNTAG(first_mp); 4631 freemsg(first_mp); 4632 return; 4633 } 4634 ip_rts_change_v6(RTM_MISS, v6dstp, 0, 0, 0, 0, 0, 0, 4635 RTA_DST); 4636 goto icmp_err_ret; 4637 } 4638 4639 ASSERT(ire->ire_ipversion == IPV6_VERSION); 4640 4641 /* 4642 * Verify that the returned IRE does not have either the 4643 * RTF_REJECT or RTF_BLACKHOLE flags set and that the IRE is 4644 * either an IRE_CACHE, IRE_IF_NORESOLVER or IRE_IF_RESOLVER. 4645 */ 4646 if ((ire->ire_flags & (RTF_REJECT | RTF_BLACKHOLE)) || 4647 (ire->ire_type & (IRE_CACHE | IRE_INTERFACE)) == 0) 4648 goto icmp_err_ret; 4649 4650 /* 4651 * Increment the ire_ob_pkt_count field for ire if it is an 4652 * INTERFACE (IF_RESOLVER or IF_NORESOLVER) IRE type, and 4653 * increment the same for the parent IRE, sire, if it is some 4654 * sort of prefix IRE (which includes DEFAULT, PREFIX, HOST 4655 * and HOST_REDIRECT). 4656 */ 4657 if ((ire->ire_type & IRE_INTERFACE) != 0) { 4658 UPDATE_OB_PKT_COUNT(ire); 4659 ire->ire_last_used_time = lbolt; 4660 } 4661 4662 if (sire != NULL) { 4663 mutex_enter(&sire->ire_lock); 4664 v6gw = sire->ire_gateway_addr_v6; 4665 mutex_exit(&sire->ire_lock); 4666 ASSERT((sire->ire_type & (IRE_CACHETABLE | 4667 IRE_INTERFACE)) == 0); 4668 UPDATE_OB_PKT_COUNT(sire); 4669 sire->ire_last_used_time = lbolt; 4670 } else { 4671 v6gw = ipv6_all_zeros; 4672 } 4673 4674 /* 4675 * We have a route to reach the destination. 4676 * 4677 * 1) If the interface is part of ill group, try to get a new 4678 * ill taking load spreading into account. 4679 * 4680 * 2) After selecting the ill, get a source address that might 4681 * create good inbound load spreading and that matches the 4682 * right scope. ipif_select_source_v6 does this for us. 4683 * 4684 * If the application specified the ill (ifindex), we still 4685 * load spread. Only if the packets needs to go out specifically 4686 * on a given ill e.g. bind to IPIF_NOFAILOVER address, 4687 * IPV6_BOUND_PIF we don't try to use a different ill for load 4688 * spreading. 4689 */ 4690 if (!do_attach_ill) { 4691 /* 4692 * If the interface belongs to an interface group, 4693 * make sure the next possible interface in the group 4694 * is used. This encourages load spreading among 4695 * peers in an interface group. However, in the case 4696 * of multirouting, load spreading is not used, as we 4697 * actually want to replicate outgoing packets through 4698 * particular interfaces. 4699 * 4700 * Note: While we pick a dst_ill we are really only 4701 * interested in the ill for load spreading. 4702 * The source ipif is determined by source address 4703 * selection below. 4704 */ 4705 if ((sire != NULL) && (sire->ire_flags & RTF_MULTIRT)) { 4706 dst_ill = ire->ire_ipif->ipif_ill; 4707 /* For uniformity do a refhold */ 4708 ill_refhold(dst_ill); 4709 } else { 4710 /* 4711 * If we are here trying to create an IRE_CACHE 4712 * for an offlink destination and have the 4713 * IRE_CACHE for the next hop and the latter is 4714 * using virtual IP source address selection i.e 4715 * it's ire->ire_ipif is pointing to a virtual 4716 * network interface (vni) then 4717 * ip_newroute_get_dst_ll() will return the vni 4718 * interface as the dst_ill. Since the vni is 4719 * virtual i.e not associated with any physical 4720 * interface, it cannot be the dst_ill, hence 4721 * in such a case call ip_newroute_get_dst_ll() 4722 * with the stq_ill instead of the ire_ipif ILL. 4723 * The function returns a refheld ill. 4724 */ 4725 if ((ire->ire_type == IRE_CACHE) && 4726 IS_VNI(ire->ire_ipif->ipif_ill)) 4727 dst_ill = ip_newroute_get_dst_ill_v6( 4728 ire->ire_stq->q_ptr); 4729 else 4730 dst_ill = ip_newroute_get_dst_ill_v6( 4731 ire->ire_ipif->ipif_ill); 4732 } 4733 if (dst_ill == NULL) { 4734 if (ip_debug > 2) { 4735 pr_addr_dbg("ip_newroute_v6 : no dst " 4736 "ill for dst %s\n", 4737 AF_INET6, v6dstp); 4738 } 4739 goto icmp_err_ret; 4740 } else if (dst_ill->ill_group == NULL && ill != NULL && 4741 dst_ill != ill) { 4742 /* 4743 * If "ill" is not part of any group, we should 4744 * have found a route matching "ill" as we 4745 * called ire_ftable_lookup_v6 with 4746 * MATCH_IRE_ILL_GROUP. 4747 * Rather than asserting when there is a 4748 * mismatch, we just drop the packet. 4749 */ 4750 ip0dbg(("ip_newroute_v6: BOUND_IF failed : " 4751 "dst_ill %s ill %s\n", 4752 dst_ill->ill_name, 4753 ill->ill_name)); 4754 goto icmp_err_ret; 4755 } 4756 } else { 4757 dst_ill = ire->ire_ipif->ipif_ill; 4758 /* For uniformity do refhold */ 4759 ill_refhold(dst_ill); 4760 /* 4761 * We should have found a route matching ill as we 4762 * called ire_ftable_lookup_v6 with MATCH_IRE_ILL. 4763 * Rather than asserting, while there is a mismatch, 4764 * we just drop the packet. 4765 */ 4766 if (dst_ill != ill) { 4767 ip0dbg(("ip_newroute_v6: Packet dropped as " 4768 "IP6I_ATTACH_IF ill is %s, " 4769 "ire->ire_ipif->ipif_ill is %s\n", 4770 ill->ill_name, 4771 dst_ill->ill_name)); 4772 goto icmp_err_ret; 4773 } 4774 } 4775 /* 4776 * Pick a source address which matches the scope of the 4777 * destination address. 4778 * For RTF_SETSRC routes, the source address is imposed by the 4779 * parent ire (sire). 4780 */ 4781 ASSERT(src_ipif == NULL); 4782 if (ire->ire_type == IRE_IF_RESOLVER && 4783 !IN6_IS_ADDR_UNSPECIFIED(&v6gw) && 4784 ip6_asp_can_lookup()) { 4785 /* 4786 * The ire cache entry we're adding is for the 4787 * gateway itself. The source address in this case 4788 * is relative to the gateway's address. 4789 */ 4790 ip6_asp_table_held = B_TRUE; 4791 src_ipif = ipif_select_source_v6(dst_ill, &v6gw, 4792 RESTRICT_TO_GROUP, IPV6_PREFER_SRC_DEFAULT, zoneid); 4793 if (src_ipif != NULL) 4794 ire_marks |= IRE_MARK_USESRC_CHECK; 4795 } else { 4796 if ((sire != NULL) && (sire->ire_flags & RTF_SETSRC)) { 4797 /* 4798 * Check that the ipif matching the requested 4799 * source address still exists. 4800 */ 4801 src_ipif = ipif_lookup_addr_v6( 4802 &sire->ire_src_addr_v6, NULL, zoneid, 4803 NULL, NULL, NULL, NULL); 4804 } 4805 if (src_ipif == NULL && ip6_asp_can_lookup()) { 4806 uint_t restrict_ill = RESTRICT_TO_NONE; 4807 4808 if (ip6i_present && ((ip6i_t *)ip6h)->ip6i_flags 4809 & IP6I_ATTACH_IF) 4810 restrict_ill = RESTRICT_TO_ILL; 4811 ip6_asp_table_held = B_TRUE; 4812 src_ipif = ipif_select_source_v6(dst_ill, 4813 v6dstp, restrict_ill, 4814 IPV6_PREFER_SRC_DEFAULT, zoneid); 4815 if (src_ipif != NULL) 4816 ire_marks |= IRE_MARK_USESRC_CHECK; 4817 } 4818 } 4819 4820 if (src_ipif == NULL) { 4821 if (ip_debug > 2) { 4822 /* ip1dbg */ 4823 pr_addr_dbg("ip_newroute_v6: no src for " 4824 "dst %s\n, ", AF_INET6, v6dstp); 4825 printf("ip_newroute_v6: interface name %s\n", 4826 dst_ill->ill_name); 4827 } 4828 goto icmp_err_ret; 4829 } 4830 4831 if (ip_debug > 3) { 4832 /* ip2dbg */ 4833 pr_addr_dbg("ip_newroute_v6: first hop %s\n", 4834 AF_INET6, &v6gw); 4835 } 4836 ip2dbg(("\tire type %s (%d)\n", 4837 ip_nv_lookup(ire_nv_tbl, ire->ire_type), ire->ire_type)); 4838 4839 /* 4840 * At this point in ip_newroute_v6(), ire is either the 4841 * IRE_CACHE of the next-hop gateway for an off-subnet 4842 * destination or an IRE_INTERFACE type that should be used 4843 * to resolve an on-subnet destination or an on-subnet 4844 * next-hop gateway. 4845 * 4846 * In the IRE_CACHE case, we have the following : 4847 * 4848 * 1) src_ipif - used for getting a source address. 4849 * 4850 * 2) dst_ill - from which we derive ire_stq/ire_rfq. This 4851 * means packets using this IRE_CACHE will go out on dst_ill. 4852 * 4853 * 3) The IRE sire will point to the prefix that is the longest 4854 * matching route for the destination. These prefix types 4855 * include IRE_DEFAULT, IRE_PREFIX, IRE_HOST. 4856 * 4857 * The newly created IRE_CACHE entry for the off-subnet 4858 * destination is tied to both the prefix route and the 4859 * interface route used to resolve the next-hop gateway 4860 * via the ire_phandle and ire_ihandle fields, respectively. 4861 * 4862 * In the IRE_INTERFACE case, we have the following : 4863 * 4864 * 1) src_ipif - used for getting a source address. 4865 * 4866 * 2) dst_ill - from which we derive ire_stq/ire_rfq. This 4867 * means packets using the IRE_CACHE that we will build 4868 * here will go out on dst_ill. 4869 * 4870 * 3) sire may or may not be NULL. But, the IRE_CACHE that is 4871 * to be created will only be tied to the IRE_INTERFACE that 4872 * was derived from the ire_ihandle field. 4873 * 4874 * If sire is non-NULL, it means the destination is off-link 4875 * and we will first create the IRE_CACHE for the gateway. 4876 * Next time through ip_newroute_v6, we will create the 4877 * IRE_CACHE for the final destination as described above. 4878 */ 4879 save_ire = ire; 4880 switch (ire->ire_type) { 4881 case IRE_CACHE: { 4882 ire_t *ipif_ire; 4883 4884 ASSERT(sire != NULL); 4885 if (IN6_IS_ADDR_UNSPECIFIED(&v6gw)) { 4886 mutex_enter(&ire->ire_lock); 4887 v6gw = ire->ire_gateway_addr_v6; 4888 mutex_exit(&ire->ire_lock); 4889 } 4890 /* 4891 * We need 3 ire's to create a new cache ire for an 4892 * off-link destination from the cache ire of the 4893 * gateway. 4894 * 4895 * 1. The prefix ire 'sire' 4896 * 2. The cache ire of the gateway 'ire' 4897 * 3. The interface ire 'ipif_ire' 4898 * 4899 * We have (1) and (2). We lookup (3) below. 4900 * 4901 * If there is no interface route to the gateway, 4902 * it is a race condition, where we found the cache 4903 * but the inteface route has been deleted. 4904 */ 4905 ipif_ire = ire_ihandle_lookup_offlink_v6(ire, sire); 4906 if (ipif_ire == NULL) { 4907 ip1dbg(("ip_newroute_v6:" 4908 "ire_ihandle_lookup_offlink_v6 failed\n")); 4909 goto icmp_err_ret; 4910 } 4911 /* 4912 * Assume DL_UNITDATA_REQ is same for all physical 4913 * interfaces in the ifgrp. If it isn't, this code will 4914 * have to be seriously rewhacked to allow the 4915 * fastpath probing (such that I cache the link 4916 * header in the IRE_CACHE) to work over ifgrps. 4917 * We have what we need to build an IRE_CACHE. 4918 */ 4919 /* 4920 * Note: the new ire inherits RTF_SETSRC 4921 * and RTF_MULTIRT to propagate these flags from prefix 4922 * to cache. 4923 */ 4924 4925 /* 4926 * Check cached gateway IRE for any security 4927 * attributes; if found, associate the gateway 4928 * credentials group to the destination IRE. 4929 */ 4930 if ((attrp = save_ire->ire_gw_secattr) != NULL) { 4931 mutex_enter(&attrp->igsa_lock); 4932 if ((gcgrp = attrp->igsa_gcgrp) != NULL) 4933 GCGRP_REFHOLD(gcgrp); 4934 mutex_exit(&attrp->igsa_lock); 4935 } 4936 4937 ire = ire_create_v6( 4938 v6dstp, /* dest address */ 4939 &ipv6_all_ones, /* mask */ 4940 &src_ipif->ipif_v6src_addr, /* source address */ 4941 &v6gw, /* gateway address */ 4942 &save_ire->ire_max_frag, 4943 NULL, /* Fast Path header */ 4944 dst_ill->ill_rq, /* recv-from queue */ 4945 dst_ill->ill_wq, /* send-to queue */ 4946 IRE_CACHE, 4947 NULL, 4948 src_ipif, 4949 &sire->ire_mask_v6, /* Parent mask */ 4950 sire->ire_phandle, /* Parent handle */ 4951 ipif_ire->ire_ihandle, /* Interface handle */ 4952 sire->ire_flags & /* flags if any */ 4953 (RTF_SETSRC | RTF_MULTIRT), 4954 &(sire->ire_uinfo), 4955 NULL, 4956 gcgrp); 4957 4958 if (ire == NULL) { 4959 if (gcgrp != NULL) { 4960 GCGRP_REFRELE(gcgrp); 4961 gcgrp = NULL; 4962 } 4963 ire_refrele(save_ire); 4964 ire_refrele(ipif_ire); 4965 break; 4966 } 4967 4968 /* reference now held by IRE */ 4969 gcgrp = NULL; 4970 4971 ire->ire_marks |= ire_marks; 4972 4973 /* 4974 * Prevent sire and ipif_ire from getting deleted. The 4975 * newly created ire is tied to both of them via the 4976 * phandle and ihandle respectively. 4977 */ 4978 IRB_REFHOLD(sire->ire_bucket); 4979 /* Has it been removed already ? */ 4980 if (sire->ire_marks & IRE_MARK_CONDEMNED) { 4981 IRB_REFRELE(sire->ire_bucket); 4982 ire_refrele(ipif_ire); 4983 ire_refrele(save_ire); 4984 break; 4985 } 4986 4987 IRB_REFHOLD(ipif_ire->ire_bucket); 4988 /* Has it been removed already ? */ 4989 if (ipif_ire->ire_marks & IRE_MARK_CONDEMNED) { 4990 IRB_REFRELE(ipif_ire->ire_bucket); 4991 IRB_REFRELE(sire->ire_bucket); 4992 ire_refrele(ipif_ire); 4993 ire_refrele(save_ire); 4994 break; 4995 } 4996 4997 xmit_mp = first_mp; 4998 if (ire->ire_flags & RTF_MULTIRT) { 4999 copy_mp = copymsg(first_mp); 5000 if (copy_mp != NULL) { 5001 xmit_mp = copy_mp; 5002 MULTIRT_DEBUG_TAG(first_mp); 5003 } 5004 } 5005 ire_add_then_send(q, ire, xmit_mp); 5006 if (ip6_asp_table_held) { 5007 ip6_asp_table_refrele(); 5008 ip6_asp_table_held = B_FALSE; 5009 } 5010 ire_refrele(save_ire); 5011 5012 /* Assert that sire is not deleted yet. */ 5013 ASSERT(sire->ire_ptpn != NULL); 5014 IRB_REFRELE(sire->ire_bucket); 5015 5016 /* Assert that ipif_ire is not deleted yet. */ 5017 ASSERT(ipif_ire->ire_ptpn != NULL); 5018 IRB_REFRELE(ipif_ire->ire_bucket); 5019 ire_refrele(ipif_ire); 5020 5021 if (copy_mp != NULL) { 5022 /* 5023 * Search for the next unresolved 5024 * multirt route. 5025 */ 5026 copy_mp = NULL; 5027 ipif_ire = NULL; 5028 ire = NULL; 5029 /* re-enter the loop */ 5030 multirt_resolve_next = B_TRUE; 5031 continue; 5032 } 5033 ire_refrele(sire); 5034 ill_refrele(dst_ill); 5035 ipif_refrele(src_ipif); 5036 return; 5037 } 5038 case IRE_IF_NORESOLVER: 5039 /* 5040 * We have what we need to build an IRE_CACHE. 5041 * 5042 * Create a new dlureq_mp with the IPv6 gateway 5043 * address in destination address in the DLPI hdr 5044 * if the physical length is exactly 16 bytes. 5045 */ 5046 if (dst_ill->ill_phys_addr_length == IPV6_ADDR_LEN) { 5047 const in6_addr_t *addr; 5048 5049 if (!IN6_IS_ADDR_UNSPECIFIED(&v6gw)) 5050 addr = &v6gw; 5051 else 5052 addr = v6dstp; 5053 5054 dlureq_mp = ill_dlur_gen((uchar_t *)addr, 5055 dst_ill->ill_phys_addr_length, 5056 dst_ill->ill_sap, 5057 dst_ill->ill_sap_length); 5058 } else { 5059 /* 5060 * handle the Gated case, where we create 5061 * a NORESOLVER route for loopback. 5062 */ 5063 if (dst_ill->ill_net_type != IRE_IF_NORESOLVER) 5064 break; 5065 dlureq_mp = ill_dlur_gen(NULL, 5066 dst_ill->ill_phys_addr_length, 5067 dst_ill->ill_sap, 5068 dst_ill->ill_sap_length); 5069 } 5070 if (dlureq_mp == NULL) 5071 break; 5072 /* 5073 * TSol note: We are creating the ire cache for the 5074 * destination 'dst'. If 'dst' is offlink, going 5075 * through the first hop 'gw', the security attributes 5076 * of 'dst' must be set to point to the gateway 5077 * credentials of gateway 'gw'. If 'dst' is onlink, it 5078 * is possible that 'dst' is a potential gateway that is 5079 * referenced by some route that has some security 5080 * attributes. Thus in the former case, we need to do a 5081 * gcgrp_lookup of 'gw' while in the latter case we 5082 * need to do gcgrp_lookup of 'dst' itself. 5083 */ 5084 ga.ga_af = AF_INET6; 5085 if (!IN6_IS_ADDR_UNSPECIFIED(&v6gw)) 5086 ga.ga_addr = v6gw; 5087 else 5088 ga.ga_addr = *v6dstp; 5089 gcgrp = gcgrp_lookup(&ga, B_FALSE); 5090 5091 /* 5092 * Note: the new ire inherits sire flags RTF_SETSRC 5093 * and RTF_MULTIRT to propagate those rules from prefix 5094 * to cache. 5095 */ 5096 ire = ire_create_v6( 5097 v6dstp, /* dest address */ 5098 &ipv6_all_ones, /* mask */ 5099 &src_ipif->ipif_v6src_addr, /* source address */ 5100 &v6gw, /* gateway address */ 5101 &save_ire->ire_max_frag, 5102 NULL, /* Fast Path header */ 5103 dst_ill->ill_rq, /* recv-from queue */ 5104 dst_ill->ill_wq, /* send-to queue */ 5105 IRE_CACHE, 5106 dlureq_mp, 5107 src_ipif, 5108 &save_ire->ire_mask_v6, /* Parent mask */ 5109 (sire != NULL) ? /* Parent handle */ 5110 sire->ire_phandle : 0, 5111 save_ire->ire_ihandle, /* Interface handle */ 5112 (sire != NULL) ? /* flags if any */ 5113 sire->ire_flags & 5114 (RTF_SETSRC | RTF_MULTIRT) : 0, 5115 &(save_ire->ire_uinfo), 5116 NULL, 5117 gcgrp); 5118 5119 freeb(dlureq_mp); 5120 5121 if (ire == NULL) { 5122 if (gcgrp != NULL) { 5123 GCGRP_REFRELE(gcgrp); 5124 gcgrp = NULL; 5125 } 5126 ire_refrele(save_ire); 5127 break; 5128 } 5129 5130 /* reference now held by IRE */ 5131 gcgrp = NULL; 5132 5133 ire->ire_marks |= ire_marks; 5134 5135 if (!IN6_IS_ADDR_UNSPECIFIED(&v6gw)) 5136 dst = v6gw; 5137 else 5138 dst = *v6dstp; 5139 err = ndp_noresolver(dst_ill, &dst); 5140 if (err != 0) { 5141 ire_refrele(save_ire); 5142 break; 5143 } 5144 5145 /* Prevent save_ire from getting deleted */ 5146 IRB_REFHOLD(save_ire->ire_bucket); 5147 /* Has it been removed already ? */ 5148 if (save_ire->ire_marks & IRE_MARK_CONDEMNED) { 5149 IRB_REFRELE(save_ire->ire_bucket); 5150 ire_refrele(save_ire); 5151 break; 5152 } 5153 5154 xmit_mp = first_mp; 5155 /* 5156 * In case of MULTIRT, a copy of the current packet 5157 * to send is made to further re-enter the 5158 * loop and attempt another route resolution 5159 */ 5160 if ((sire != NULL) && sire->ire_flags & RTF_MULTIRT) { 5161 copy_mp = copymsg(first_mp); 5162 if (copy_mp != NULL) { 5163 xmit_mp = copy_mp; 5164 MULTIRT_DEBUG_TAG(first_mp); 5165 } 5166 } 5167 ire_add_then_send(q, ire, xmit_mp); 5168 if (ip6_asp_table_held) { 5169 ip6_asp_table_refrele(); 5170 ip6_asp_table_held = B_FALSE; 5171 } 5172 5173 /* Assert that it is not deleted yet. */ 5174 ASSERT(save_ire->ire_ptpn != NULL); 5175 IRB_REFRELE(save_ire->ire_bucket); 5176 ire_refrele(save_ire); 5177 5178 if (copy_mp != NULL) { 5179 /* 5180 * If we found a (no)resolver, we ignore any 5181 * trailing top priority IRE_CACHE in 5182 * further loops. This ensures that we do not 5183 * omit any (no)resolver despite the priority 5184 * in this call. 5185 * IRE_CACHE, if any, will be processed 5186 * by another thread entering ip_newroute(), 5187 * (on resolver response, for example). 5188 * We use this to force multiple parallel 5189 * resolution as soon as a packet needs to be 5190 * sent. The result is, after one packet 5191 * emission all reachable routes are generally 5192 * resolved. 5193 * Otherwise, complete resolution of MULTIRT 5194 * routes would require several emissions as 5195 * side effect. 5196 */ 5197 multirt_flags &= ~MULTIRT_CACHEGW; 5198 5199 /* 5200 * Search for the next unresolved multirt 5201 * route. 5202 */ 5203 copy_mp = NULL; 5204 save_ire = NULL; 5205 ire = NULL; 5206 /* re-enter the loop */ 5207 multirt_resolve_next = B_TRUE; 5208 continue; 5209 } 5210 5211 /* Don't need sire anymore */ 5212 if (sire != NULL) 5213 ire_refrele(sire); 5214 ill_refrele(dst_ill); 5215 ipif_refrele(src_ipif); 5216 return; 5217 5218 case IRE_IF_RESOLVER: 5219 /* 5220 * We can't build an IRE_CACHE yet, but at least we 5221 * found a resolver that can help. 5222 */ 5223 dst = *v6dstp; 5224 5225 /* 5226 * To be at this point in the code with a non-zero gw 5227 * means that dst is reachable through a gateway that 5228 * we have never resolved. By changing dst to the gw 5229 * addr we resolve the gateway first. When 5230 * ire_add_then_send() tries to put the IP dg to dst, 5231 * it will reenter ip_newroute() at which time we will 5232 * find the IRE_CACHE for the gw and create another 5233 * IRE_CACHE above (for dst itself). 5234 */ 5235 if (!IN6_IS_ADDR_UNSPECIFIED(&v6gw)) { 5236 save_dst = dst; 5237 dst = v6gw; 5238 v6gw = ipv6_all_zeros; 5239 } 5240 if (dst_ill->ill_flags & ILLF_XRESOLV) { 5241 /* 5242 * Ask the external resolver to do its thing. 5243 * Make an mblk chain in the following form: 5244 * ARQ_REQ_MBLK-->IRE_MBLK-->packet 5245 */ 5246 mblk_t *ire_mp; 5247 mblk_t *areq_mp; 5248 areq_t *areq; 5249 in6_addr_t *addrp; 5250 5251 ip1dbg(("ip_newroute_v6:ILLF_XRESOLV\n")); 5252 if (ip6_asp_table_held) { 5253 ip6_asp_table_refrele(); 5254 ip6_asp_table_held = B_FALSE; 5255 } 5256 ire = ire_create_mp_v6( 5257 &dst, /* dest address */ 5258 &ipv6_all_ones, /* mask */ 5259 &src_ipif->ipif_v6src_addr, 5260 /* source address */ 5261 &v6gw, /* gateway address */ 5262 NULL, /* Fast Path header */ 5263 dst_ill->ill_rq, /* recv-from queue */ 5264 dst_ill->ill_wq, /* send-to queue */ 5265 IRE_CACHE, 5266 NULL, 5267 src_ipif, 5268 &save_ire->ire_mask_v6, 5269 /* Parent mask */ 5270 0, 5271 save_ire->ire_ihandle, 5272 /* Interface handle */ 5273 0, /* flags if any */ 5274 &(save_ire->ire_uinfo), 5275 NULL, 5276 NULL); 5277 5278 ire_refrele(save_ire); 5279 if (ire == NULL) { 5280 ip1dbg(("ip_newroute_v6:" 5281 "ire is NULL\n")); 5282 break; 5283 } 5284 5285 if ((sire != NULL) && 5286 (sire->ire_flags & RTF_MULTIRT)) { 5287 /* 5288 * processing a copy of the packet to 5289 * send for further resolution loops 5290 */ 5291 copy_mp = copymsg(first_mp); 5292 if (copy_mp != NULL) 5293 MULTIRT_DEBUG_TAG(copy_mp); 5294 } 5295 ire->ire_marks |= ire_marks; 5296 ire_mp = ire->ire_mp; 5297 /* 5298 * Now create or find an nce for this interface. 5299 * The hw addr will need to to be set from 5300 * the reply to the AR_ENTRY_QUERY that 5301 * we're about to send. This will be done in 5302 * ire_add_v6(). 5303 */ 5304 err = ndp_resolver(dst_ill, &dst, mp, zoneid); 5305 switch (err) { 5306 case 0: 5307 /* 5308 * New cache entry created. 5309 * Break, then ask the external 5310 * resolver. 5311 */ 5312 break; 5313 case EINPROGRESS: 5314 /* 5315 * Resolution in progress; 5316 * packet has been queued by 5317 * ndp_resolver(). 5318 */ 5319 ire_delete(ire); 5320 ire = NULL; 5321 /* 5322 * Check if another multirt 5323 * route must be resolved. 5324 */ 5325 if (copy_mp != NULL) { 5326 /* 5327 * If we found a resolver, we 5328 * ignore any trailing top 5329 * priority IRE_CACHE in 5330 * further loops. The reason is 5331 * the same as for noresolver. 5332 */ 5333 multirt_flags &= 5334 ~MULTIRT_CACHEGW; 5335 /* 5336 * Search for the next 5337 * unresolved multirt route. 5338 */ 5339 first_mp = copy_mp; 5340 copy_mp = NULL; 5341 mp = first_mp; 5342 if (mp->b_datap->db_type == 5343 M_CTL) { 5344 mp = mp->b_cont; 5345 } 5346 ASSERT(sire != NULL); 5347 dst = save_dst; 5348 /* 5349 * re-enter the loop 5350 */ 5351 multirt_resolve_next = 5352 B_TRUE; 5353 continue; 5354 } 5355 5356 if (sire != NULL) 5357 ire_refrele(sire); 5358 ill_refrele(dst_ill); 5359 ipif_refrele(src_ipif); 5360 return; 5361 default: 5362 /* 5363 * Transient error; packet will be 5364 * freed. 5365 */ 5366 ire_delete(ire); 5367 ire = NULL; 5368 break; 5369 } 5370 if (err != 0) 5371 break; 5372 /* 5373 * Now set up the AR_ENTRY_QUERY and send it. 5374 */ 5375 areq_mp = ill_arp_alloc(dst_ill, 5376 (uchar_t *)&ipv6_areq_template, 5377 (caddr_t)&dst); 5378 if (areq_mp == NULL) { 5379 ip1dbg(("ip_newroute_v6:" 5380 "areq_mp is NULL\n")); 5381 freemsg(ire_mp); 5382 break; 5383 } 5384 areq = (areq_t *)areq_mp->b_rptr; 5385 addrp = (in6_addr_t *)((char *)areq + 5386 areq->areq_target_addr_offset); 5387 *addrp = dst; 5388 addrp = (in6_addr_t *)((char *)areq + 5389 areq->areq_sender_addr_offset); 5390 *addrp = src_ipif->ipif_v6src_addr; 5391 /* 5392 * link the chain, then send up to the resolver. 5393 */ 5394 linkb(areq_mp, ire_mp); 5395 linkb(areq_mp, mp); 5396 ip1dbg(("ip_newroute_v6:" 5397 "putnext to resolver\n")); 5398 putnext(dst_ill->ill_rq, areq_mp); 5399 /* 5400 * Check if another multirt route 5401 * must be resolved. 5402 */ 5403 ire = NULL; 5404 if (copy_mp != NULL) { 5405 /* 5406 * If we find a resolver, we ignore any 5407 * trailing top priority IRE_CACHE in 5408 * further loops. The reason is the 5409 * same as for noresolver. 5410 */ 5411 multirt_flags &= ~MULTIRT_CACHEGW; 5412 /* 5413 * Search for the next unresolved 5414 * multirt route. 5415 */ 5416 first_mp = copy_mp; 5417 copy_mp = NULL; 5418 mp = first_mp; 5419 if (mp->b_datap->db_type == M_CTL) { 5420 mp = mp->b_cont; 5421 } 5422 ASSERT(sire != NULL); 5423 dst = save_dst; 5424 /* 5425 * re-enter the loop 5426 */ 5427 multirt_resolve_next = B_TRUE; 5428 continue; 5429 } 5430 5431 if (sire != NULL) 5432 ire_refrele(sire); 5433 ill_refrele(dst_ill); 5434 ipif_refrele(src_ipif); 5435 return; 5436 } 5437 /* 5438 * Non-external resolver case. 5439 * 5440 * TSol note: Please see the note above the 5441 * IRE_IF_NORESOLVER case. 5442 */ 5443 ga.ga_af = AF_INET6; 5444 ga.ga_addr = dst; 5445 gcgrp = gcgrp_lookup(&ga, B_FALSE); 5446 5447 ire = ire_create_v6( 5448 &dst, /* dest address */ 5449 &ipv6_all_ones, /* mask */ 5450 &src_ipif->ipif_v6src_addr, /* source address */ 5451 &v6gw, /* gateway address */ 5452 &save_ire->ire_max_frag, 5453 NULL, /* Fast Path header */ 5454 dst_ill->ill_rq, /* recv-from queue */ 5455 dst_ill->ill_wq, /* send-to queue */ 5456 IRE_CACHE, 5457 NULL, 5458 src_ipif, 5459 &save_ire->ire_mask_v6, /* Parent mask */ 5460 0, 5461 save_ire->ire_ihandle, /* Interface handle */ 5462 0, /* flags if any */ 5463 &(save_ire->ire_uinfo), 5464 NULL, 5465 gcgrp); 5466 5467 if (ire == NULL) { 5468 if (gcgrp != NULL) { 5469 GCGRP_REFRELE(gcgrp); 5470 gcgrp = NULL; 5471 } 5472 ire_refrele(save_ire); 5473 break; 5474 } 5475 5476 /* reference now held by IRE */ 5477 gcgrp = NULL; 5478 5479 if ((sire != NULL) && 5480 (sire->ire_flags & RTF_MULTIRT)) { 5481 copy_mp = copymsg(first_mp); 5482 if (copy_mp != NULL) 5483 MULTIRT_DEBUG_TAG(copy_mp); 5484 } 5485 5486 ire->ire_marks |= ire_marks; 5487 err = ndp_resolver(dst_ill, &dst, first_mp, zoneid); 5488 switch (err) { 5489 case 0: 5490 /* Prevent save_ire from getting deleted */ 5491 IRB_REFHOLD(save_ire->ire_bucket); 5492 /* Has it been removed already ? */ 5493 if (save_ire->ire_marks & IRE_MARK_CONDEMNED) { 5494 IRB_REFRELE(save_ire->ire_bucket); 5495 ire_refrele(save_ire); 5496 break; 5497 } 5498 5499 /* 5500 * We have a resolved cache entry, 5501 * add in the IRE. 5502 */ 5503 ire_add_then_send(q, ire, first_mp); 5504 if (ip6_asp_table_held) { 5505 ip6_asp_table_refrele(); 5506 ip6_asp_table_held = B_FALSE; 5507 } 5508 5509 /* Assert that it is not deleted yet. */ 5510 ASSERT(save_ire->ire_ptpn != NULL); 5511 IRB_REFRELE(save_ire->ire_bucket); 5512 ire_refrele(save_ire); 5513 /* 5514 * Check if another multirt route 5515 * must be resolved. 5516 */ 5517 ire = NULL; 5518 if (copy_mp != NULL) { 5519 /* 5520 * If we find a resolver, we ignore any 5521 * trailing top priority IRE_CACHE in 5522 * further loops. The reason is the 5523 * same as for noresolver. 5524 */ 5525 multirt_flags &= ~MULTIRT_CACHEGW; 5526 /* 5527 * Search for the next unresolved 5528 * multirt route. 5529 */ 5530 first_mp = copy_mp; 5531 copy_mp = NULL; 5532 mp = first_mp; 5533 if (mp->b_datap->db_type == M_CTL) { 5534 mp = mp->b_cont; 5535 } 5536 ASSERT(sire != NULL); 5537 dst = save_dst; 5538 /* 5539 * re-enter the loop 5540 */ 5541 multirt_resolve_next = B_TRUE; 5542 continue; 5543 } 5544 5545 if (sire != NULL) 5546 ire_refrele(sire); 5547 ill_refrele(dst_ill); 5548 ipif_refrele(src_ipif); 5549 return; 5550 5551 case EINPROGRESS: 5552 /* 5553 * mp was consumed - presumably queued. 5554 * No need for ire, presumably resolution is 5555 * in progress, and ire will be added when the 5556 * address is resolved. 5557 */ 5558 if (ip6_asp_table_held) { 5559 ip6_asp_table_refrele(); 5560 ip6_asp_table_held = B_FALSE; 5561 } 5562 ASSERT(ire->ire_nce == NULL); 5563 ire_delete(ire); 5564 ire_refrele(save_ire); 5565 /* 5566 * Check if another multirt route 5567 * must be resolved. 5568 */ 5569 ire = NULL; 5570 if (copy_mp != NULL) { 5571 /* 5572 * If we find a resolver, we ignore any 5573 * trailing top priority IRE_CACHE in 5574 * further loops. The reason is the 5575 * same as for noresolver. 5576 */ 5577 multirt_flags &= ~MULTIRT_CACHEGW; 5578 /* 5579 * Search for the next unresolved 5580 * multirt route. 5581 */ 5582 first_mp = copy_mp; 5583 copy_mp = NULL; 5584 mp = first_mp; 5585 if (mp->b_datap->db_type == M_CTL) { 5586 mp = mp->b_cont; 5587 } 5588 ASSERT(sire != NULL); 5589 dst = save_dst; 5590 /* 5591 * re-enter the loop 5592 */ 5593 multirt_resolve_next = B_TRUE; 5594 continue; 5595 } 5596 if (sire != NULL) 5597 ire_refrele(sire); 5598 ill_refrele(dst_ill); 5599 ipif_refrele(src_ipif); 5600 return; 5601 default: 5602 /* Some transient error */ 5603 ASSERT(ire->ire_nce == NULL); 5604 ire_refrele(save_ire); 5605 break; 5606 } 5607 break; 5608 default: 5609 break; 5610 } 5611 if (ip6_asp_table_held) { 5612 ip6_asp_table_refrele(); 5613 ip6_asp_table_held = B_FALSE; 5614 } 5615 } while (multirt_resolve_next); 5616 5617 err_ret: 5618 ip1dbg(("ip_newroute_v6: dropped\n")); 5619 if (src_ipif != NULL) 5620 ipif_refrele(src_ipif); 5621 if (dst_ill != NULL) { 5622 need_rele = B_TRUE; 5623 ill = dst_ill; 5624 } 5625 if (ill != NULL) { 5626 if (mp->b_prev != NULL) { 5627 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 5628 } else { 5629 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); 5630 } 5631 5632 if (need_rele) 5633 ill_refrele(ill); 5634 } else { 5635 if (mp->b_prev != NULL) { 5636 BUMP_MIB(&ip6_mib, ipIfStatsInDiscards); 5637 } else { 5638 BUMP_MIB(&ip6_mib, ipIfStatsOutDiscards); 5639 } 5640 } 5641 /* Did this packet originate externally? */ 5642 if (mp->b_prev) { 5643 mp->b_next = NULL; 5644 mp->b_prev = NULL; 5645 } 5646 if (copy_mp != NULL) { 5647 MULTIRT_DEBUG_UNTAG(copy_mp); 5648 freemsg(copy_mp); 5649 } 5650 MULTIRT_DEBUG_UNTAG(first_mp); 5651 freemsg(first_mp); 5652 if (ire != NULL) 5653 ire_refrele(ire); 5654 if (sire != NULL) 5655 ire_refrele(sire); 5656 return; 5657 5658 icmp_err_ret: 5659 if (ip6_asp_table_held) 5660 ip6_asp_table_refrele(); 5661 if (src_ipif != NULL) 5662 ipif_refrele(src_ipif); 5663 if (dst_ill != NULL) { 5664 need_rele = B_TRUE; 5665 ill = dst_ill; 5666 } 5667 ip1dbg(("ip_newroute_v6: no route\n")); 5668 if (sire != NULL) 5669 ire_refrele(sire); 5670 /* 5671 * We need to set sire to NULL to avoid double freeing if we 5672 * ever goto err_ret from below. 5673 */ 5674 sire = NULL; 5675 ip6h = (ip6_t *)mp->b_rptr; 5676 /* Skip ip6i_t header if present */ 5677 if (ip6h->ip6_nxt == IPPROTO_RAW) { 5678 /* Make sure the IPv6 header is present */ 5679 if ((mp->b_wptr - (uchar_t *)ip6h) < 5680 sizeof (ip6i_t) + IPV6_HDR_LEN) { 5681 if (!pullupmsg(mp, sizeof (ip6i_t) + IPV6_HDR_LEN)) { 5682 ip1dbg(("ip_newroute_v6: pullupmsg failed\n")); 5683 goto err_ret; 5684 } 5685 } 5686 mp->b_rptr += sizeof (ip6i_t); 5687 ip6h = (ip6_t *)mp->b_rptr; 5688 } 5689 /* Did this packet originate externally? */ 5690 if (mp->b_prev) { 5691 if (ill != NULL) { 5692 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInNoRoutes); 5693 } else { 5694 BUMP_MIB(&ip6_mib, ipIfStatsInNoRoutes); 5695 } 5696 mp->b_next = NULL; 5697 mp->b_prev = NULL; 5698 q = WR(q); 5699 } else { 5700 if (ill != NULL) { 5701 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutNoRoutes); 5702 } else { 5703 BUMP_MIB(&ip6_mib, ipIfStatsOutNoRoutes); 5704 } 5705 if (ip_hdr_complete_v6(ip6h, zoneid)) { 5706 /* Failed */ 5707 if (copy_mp != NULL) { 5708 MULTIRT_DEBUG_UNTAG(copy_mp); 5709 freemsg(copy_mp); 5710 } 5711 MULTIRT_DEBUG_UNTAG(first_mp); 5712 freemsg(first_mp); 5713 if (ire != NULL) 5714 ire_refrele(ire); 5715 if (need_rele) 5716 ill_refrele(ill); 5717 return; 5718 } 5719 } 5720 5721 if (need_rele) 5722 ill_refrele(ill); 5723 5724 /* 5725 * At this point we will have ire only if RTF_BLACKHOLE 5726 * or RTF_REJECT flags are set on the IRE. It will not 5727 * generate ICMP6_DST_UNREACH_NOROUTE if RTF_BLACKHOLE is set. 5728 */ 5729 if (ire != NULL) { 5730 if (ire->ire_flags & RTF_BLACKHOLE) { 5731 ire_refrele(ire); 5732 if (copy_mp != NULL) { 5733 MULTIRT_DEBUG_UNTAG(copy_mp); 5734 freemsg(copy_mp); 5735 } 5736 MULTIRT_DEBUG_UNTAG(first_mp); 5737 freemsg(first_mp); 5738 return; 5739 } 5740 ire_refrele(ire); 5741 } 5742 if (ip_debug > 3) { 5743 /* ip2dbg */ 5744 pr_addr_dbg("ip_newroute_v6: no route to %s\n", 5745 AF_INET6, v6dstp); 5746 } 5747 icmp_unreachable_v6(WR(q), first_mp, ICMP6_DST_UNREACH_NOROUTE, 5748 B_FALSE, B_FALSE, zoneid); 5749 } 5750 5751 /* 5752 * ip_newroute_ipif_v6 is called by ip_wput_v6 and ip_wput_ipsec_out_v6 whenever 5753 * we need to send out a packet to a destination address for which we do not 5754 * have specific routing information. It is only used for multicast packets. 5755 * 5756 * If unspec_src we allow creating an IRE with source address zero. 5757 * ire_send_v6() will delete it after the packet is sent. 5758 */ 5759 void 5760 ip_newroute_ipif_v6(queue_t *q, mblk_t *mp, ipif_t *ipif, 5761 in6_addr_t v6dst, int unspec_src, zoneid_t zoneid) 5762 { 5763 ire_t *ire = NULL; 5764 ipif_t *src_ipif = NULL; 5765 int err = 0; 5766 ill_t *dst_ill = NULL; 5767 ire_t *save_ire; 5768 ushort_t ire_marks = 0; 5769 ipsec_out_t *io; 5770 ill_t *attach_ill = NULL; 5771 ill_t *ill; 5772 ip6_t *ip6h; 5773 mblk_t *first_mp; 5774 boolean_t ip6i_present; 5775 ire_t *fire = NULL; 5776 mblk_t *copy_mp = NULL; 5777 boolean_t multirt_resolve_next; 5778 in6_addr_t *v6dstp = &v6dst; 5779 boolean_t ipif_held = B_FALSE; 5780 boolean_t ill_held = B_FALSE; 5781 boolean_t ip6_asp_table_held = B_FALSE; 5782 5783 /* 5784 * This loop is run only once in most cases. 5785 * We loop to resolve further routes only when the destination 5786 * can be reached through multiple RTF_MULTIRT-flagged ires. 5787 */ 5788 do { 5789 multirt_resolve_next = B_FALSE; 5790 if (dst_ill != NULL) { 5791 ill_refrele(dst_ill); 5792 dst_ill = NULL; 5793 } 5794 5795 if (src_ipif != NULL) { 5796 ipif_refrele(src_ipif); 5797 src_ipif = NULL; 5798 } 5799 ASSERT(ipif != NULL); 5800 ill = ipif->ipif_ill; 5801 5802 ASSERT(!IN6_IS_ADDR_V4MAPPED(v6dstp)); 5803 if (ip_debug > 2) { 5804 /* ip1dbg */ 5805 pr_addr_dbg("ip_newroute_ipif_v6: v6dst %s\n", 5806 AF_INET6, v6dstp); 5807 printf("ip_newroute_ipif_v6: if %s, v6 %d\n", 5808 ill->ill_name, ipif->ipif_isv6); 5809 } 5810 5811 first_mp = mp; 5812 if (mp->b_datap->db_type == M_CTL) { 5813 mp = mp->b_cont; 5814 io = (ipsec_out_t *)first_mp->b_rptr; 5815 ASSERT(io->ipsec_out_type == IPSEC_OUT); 5816 } else { 5817 io = NULL; 5818 } 5819 5820 /* 5821 * If the interface is a pt-pt interface we look for an 5822 * IRE_IF_RESOLVER or IRE_IF_NORESOLVER that matches both the 5823 * local_address and the pt-pt destination address. 5824 * Otherwise we just match the local address. 5825 */ 5826 if (!(ill->ill_flags & ILLF_MULTICAST)) { 5827 goto err_ret; 5828 } 5829 /* 5830 * If this end point is bound to IPIF_NOFAILOVER, set bnf_ill 5831 * and bind_to_nofailover B_TRUE. We can't use conn to determine 5832 * as it could be NULL. 5833 * 5834 * This information can appear either in an ip6i_t or an 5835 * IPSEC_OUT message. 5836 */ 5837 ip6h = (ip6_t *)mp->b_rptr; 5838 ip6i_present = (ip6h->ip6_nxt == IPPROTO_RAW); 5839 if (ip6i_present || (io != NULL && io->ipsec_out_attach_if)) { 5840 if (!ip6i_present || 5841 ((ip6i_t *)ip6h)->ip6i_flags & IP6I_ATTACH_IF) { 5842 attach_ill = ip_grab_attach_ill(ill, first_mp, 5843 (ip6i_present ? 5844 ((ip6i_t *)ip6h)->ip6i_ifindex : 5845 io->ipsec_out_ill_index), B_TRUE); 5846 /* Failure case frees things for us. */ 5847 if (attach_ill == NULL) 5848 return; 5849 5850 /* 5851 * Check if we need an ire that will not be 5852 * looked up by anybody else i.e. HIDDEN. 5853 */ 5854 if (ill_is_probeonly(attach_ill)) 5855 ire_marks = IRE_MARK_HIDDEN; 5856 } 5857 } 5858 5859 /* 5860 * We check if an IRE_OFFSUBNET for the addr that goes through 5861 * ipif exists. We need it to determine if the RTF_SETSRC and/or 5862 * RTF_MULTIRT flags must be honored. 5863 */ 5864 fire = ipif_lookup_multi_ire_v6(ipif, v6dstp); 5865 ip2dbg(("ip_newroute_ipif_v6: " 5866 "ipif_lookup_multi_ire_v6(" 5867 "ipif %p, dst %08x) = fire %p\n", 5868 (void *)ipif, ntohl(V4_PART_OF_V6((*v6dstp))), 5869 (void *)fire)); 5870 5871 /* 5872 * If the application specified the ill (ifindex), we still 5873 * load spread. Only if the packets needs to go out specifically 5874 * on a given ill e.g. binding to IPIF_NOFAILOVER address or 5875 * IPV6_BOUND_PIF, or there is a parent ire entry that specified 5876 * multirouting, then we don't try to use a different ill for 5877 * load spreading. 5878 */ 5879 if (attach_ill == NULL) { 5880 /* 5881 * If the interface belongs to an interface group, 5882 * make sure the next possible interface in the group 5883 * is used. This encourages load spreading among peers 5884 * in an interface group. 5885 * 5886 * Note: While we pick a dst_ill we are really only 5887 * interested in the ill for load spreading. The source 5888 * ipif is determined by source address selection below. 5889 */ 5890 if ((fire != NULL) && (fire->ire_flags & RTF_MULTIRT)) { 5891 dst_ill = ipif->ipif_ill; 5892 /* For uniformity do a refhold */ 5893 ill_refhold(dst_ill); 5894 } else { 5895 /* refheld by ip_newroute_get_dst_ill_v6 */ 5896 dst_ill = 5897 ip_newroute_get_dst_ill_v6(ipif->ipif_ill); 5898 } 5899 if (dst_ill == NULL) { 5900 if (ip_debug > 2) { 5901 pr_addr_dbg("ip_newroute_ipif_v6: " 5902 "no dst ill for dst %s\n", 5903 AF_INET6, v6dstp); 5904 } 5905 goto err_ret; 5906 } 5907 } else { 5908 dst_ill = ipif->ipif_ill; 5909 /* 5910 * ip_wput_v6 passes the right ipif for IPIF_NOFAILOVER 5911 * and IPV6_BOUND_PIF case. 5912 */ 5913 ASSERT(dst_ill == attach_ill); 5914 /* attach_ill is already refheld */ 5915 } 5916 /* 5917 * Pick a source address which matches the scope of the 5918 * destination address. 5919 * For RTF_SETSRC routes, the source address is imposed by the 5920 * parent ire (fire). 5921 */ 5922 ASSERT(src_ipif == NULL); 5923 if ((fire != NULL) && (fire->ire_flags & RTF_SETSRC)) { 5924 /* 5925 * Check that the ipif matching the requested source 5926 * address still exists. 5927 */ 5928 src_ipif = 5929 ipif_lookup_addr_v6(&fire->ire_src_addr_v6, 5930 NULL, zoneid, NULL, NULL, NULL, NULL); 5931 } 5932 if (src_ipif == NULL && ip6_asp_can_lookup()) { 5933 ip6_asp_table_held = B_TRUE; 5934 src_ipif = ipif_select_source_v6(dst_ill, v6dstp, 5935 RESTRICT_TO_NONE, IPV6_PREFER_SRC_DEFAULT, zoneid); 5936 } 5937 5938 if (src_ipif == NULL) { 5939 if (!unspec_src) { 5940 if (ip_debug > 2) { 5941 /* ip1dbg */ 5942 pr_addr_dbg("ip_newroute_ipif_v6: " 5943 "no src for dst %s\n,", 5944 AF_INET6, v6dstp); 5945 printf(" through interface %s\n", 5946 dst_ill->ill_name); 5947 } 5948 goto err_ret; 5949 } 5950 src_ipif = ipif; 5951 ipif_refhold(src_ipif); 5952 } 5953 ire = ipif_to_ire_v6(ipif); 5954 if (ire == NULL) { 5955 if (ip_debug > 2) { 5956 /* ip1dbg */ 5957 pr_addr_dbg("ip_newroute_ipif_v6: v6src %s\n", 5958 AF_INET6, &ipif->ipif_v6lcl_addr); 5959 printf("ip_newroute_ipif_v6: " 5960 "if %s\n", dst_ill->ill_name); 5961 } 5962 goto err_ret; 5963 } 5964 if (ire->ire_flags & (RTF_REJECT | RTF_BLACKHOLE)) 5965 goto err_ret; 5966 5967 ASSERT(ire->ire_ipversion == IPV6_VERSION); 5968 5969 ip1dbg(("ip_newroute_ipif_v6: interface type %s (%d),", 5970 ip_nv_lookup(ire_nv_tbl, ire->ire_type), ire->ire_type)); 5971 if (ip_debug > 2) { 5972 /* ip1dbg */ 5973 pr_addr_dbg(" address %s\n", 5974 AF_INET6, &ire->ire_src_addr_v6); 5975 } 5976 save_ire = ire; 5977 ip2dbg(("ip_newroute_ipif: ire %p, ipif %p\n", 5978 (void *)ire, (void *)ipif)); 5979 5980 if ((fire != NULL) && (fire->ire_flags & RTF_MULTIRT)) { 5981 /* 5982 * an IRE_OFFSUBET was looked up 5983 * on that interface. 5984 * this ire has RTF_MULTIRT flag, 5985 * so the resolution loop 5986 * will be re-entered to resolve 5987 * additional routes on other 5988 * interfaces. For that purpose, 5989 * a copy of the packet is 5990 * made at this point. 5991 */ 5992 fire->ire_last_used_time = lbolt; 5993 copy_mp = copymsg(first_mp); 5994 if (copy_mp) { 5995 MULTIRT_DEBUG_TAG(copy_mp); 5996 } 5997 } 5998 5999 ASSERT((attach_ill == NULL) || (dst_ill == attach_ill)); 6000 switch (ire->ire_type) { 6001 case IRE_IF_NORESOLVER: { 6002 /* We have what we need to build an IRE_CACHE. */ 6003 mblk_t *dlureq_mp; 6004 6005 /* 6006 * Create a new dlureq_mp with the 6007 * IPv6 gateway address in destination address in the 6008 * DLPI hdr if the physical length is exactly 16 bytes. 6009 */ 6010 ASSERT(dst_ill->ill_isv6); 6011 if (dst_ill->ill_phys_addr_length == IPV6_ADDR_LEN) { 6012 dlureq_mp = ill_dlur_gen((uchar_t *)v6dstp, 6013 dst_ill->ill_phys_addr_length, 6014 dst_ill->ill_sap, 6015 dst_ill->ill_sap_length); 6016 } else { 6017 /* 6018 * handle the Gated case, where we create 6019 * a NORESOLVER route for loopback. 6020 */ 6021 if (dst_ill->ill_net_type != IRE_IF_NORESOLVER) 6022 break; 6023 dlureq_mp = ill_dlur_gen(NULL, 6024 dst_ill->ill_phys_addr_length, 6025 dst_ill->ill_sap, 6026 dst_ill->ill_sap_length); 6027 } 6028 6029 if (dlureq_mp == NULL) 6030 break; 6031 /* 6032 * The newly created ire will inherit the flags of the 6033 * parent ire, if any. 6034 */ 6035 ire = ire_create_v6( 6036 v6dstp, /* dest address */ 6037 &ipv6_all_ones, /* mask */ 6038 &src_ipif->ipif_v6src_addr, /* source address */ 6039 NULL, /* gateway address */ 6040 &save_ire->ire_max_frag, 6041 NULL, /* Fast Path header */ 6042 dst_ill->ill_rq, /* recv-from queue */ 6043 dst_ill->ill_wq, /* send-to queue */ 6044 IRE_CACHE, 6045 dlureq_mp, 6046 src_ipif, 6047 NULL, 6048 (fire != NULL) ? /* Parent handle */ 6049 fire->ire_phandle : 0, 6050 save_ire->ire_ihandle, /* Interface handle */ 6051 (fire != NULL) ? 6052 (fire->ire_flags & (RTF_SETSRC | RTF_MULTIRT)) : 6053 0, 6054 &ire_uinfo_null, 6055 NULL, 6056 NULL); 6057 6058 freeb(dlureq_mp); 6059 6060 if (ire == NULL) { 6061 ire_refrele(save_ire); 6062 break; 6063 } 6064 6065 ire->ire_marks |= ire_marks; 6066 6067 err = ndp_noresolver(dst_ill, v6dstp); 6068 if (err != 0) { 6069 ire_refrele(save_ire); 6070 break; 6071 } 6072 6073 /* Prevent save_ire from getting deleted */ 6074 IRB_REFHOLD(save_ire->ire_bucket); 6075 /* Has it been removed already ? */ 6076 if (save_ire->ire_marks & IRE_MARK_CONDEMNED) { 6077 IRB_REFRELE(save_ire->ire_bucket); 6078 ire_refrele(save_ire); 6079 break; 6080 } 6081 6082 ire_add_then_send(q, ire, first_mp); 6083 if (ip6_asp_table_held) { 6084 ip6_asp_table_refrele(); 6085 ip6_asp_table_held = B_FALSE; 6086 } 6087 6088 /* Assert that it is not deleted yet. */ 6089 ASSERT(save_ire->ire_ptpn != NULL); 6090 IRB_REFRELE(save_ire->ire_bucket); 6091 ire_refrele(save_ire); 6092 if (fire != NULL) { 6093 ire_refrele(fire); 6094 fire = NULL; 6095 } 6096 6097 /* 6098 * The resolution loop is re-entered if we 6099 * actually are in a multirouting case. 6100 */ 6101 if (copy_mp != NULL) { 6102 boolean_t need_resolve = 6103 ire_multirt_need_resolve_v6(v6dstp, 6104 MBLK_GETLABEL(copy_mp)); 6105 if (!need_resolve) { 6106 MULTIRT_DEBUG_UNTAG(copy_mp); 6107 freemsg(copy_mp); 6108 copy_mp = NULL; 6109 } else { 6110 /* 6111 * ipif_lookup_group_v6() calls 6112 * ire_lookup_multi_v6() that uses 6113 * ire_ftable_lookup_v6() to find 6114 * an IRE_INTERFACE for the group. 6115 * In the multirt case, 6116 * ire_lookup_multi_v6() then invokes 6117 * ire_multirt_lookup_v6() to find 6118 * the next resolvable ire. 6119 * As a result, we obtain a new 6120 * interface, derived from the 6121 * next ire. 6122 */ 6123 if (ipif_held) { 6124 ipif_refrele(ipif); 6125 ipif_held = B_FALSE; 6126 } 6127 ipif = ipif_lookup_group_v6(v6dstp, 6128 zoneid); 6129 ip2dbg(("ip_newroute_ipif: " 6130 "multirt dst %08x, ipif %p\n", 6131 ntohl(V4_PART_OF_V6((*v6dstp))), 6132 (void *)ipif)); 6133 if (ipif != NULL) { 6134 ipif_held = B_TRUE; 6135 mp = copy_mp; 6136 copy_mp = NULL; 6137 multirt_resolve_next = 6138 B_TRUE; 6139 continue; 6140 } else { 6141 freemsg(copy_mp); 6142 } 6143 } 6144 } 6145 ill_refrele(dst_ill); 6146 if (ipif_held) { 6147 ipif_refrele(ipif); 6148 ipif_held = B_FALSE; 6149 } 6150 if (src_ipif != NULL) 6151 ipif_refrele(src_ipif); 6152 return; 6153 } 6154 case IRE_IF_RESOLVER: { 6155 6156 ASSERT(dst_ill->ill_isv6); 6157 6158 /* 6159 * We obtain a partial IRE_CACHE which we will pass 6160 * along with the resolver query. When the response 6161 * comes back it will be there ready for us to add. 6162 */ 6163 /* 6164 * the newly created ire will inherit the flags of the 6165 * parent ire, if any. 6166 */ 6167 ire = ire_create_v6( 6168 v6dstp, /* dest address */ 6169 &ipv6_all_ones, /* mask */ 6170 &src_ipif->ipif_v6src_addr, /* source address */ 6171 NULL, /* gateway address */ 6172 &save_ire->ire_max_frag, 6173 NULL, /* Fast Path header */ 6174 dst_ill->ill_rq, /* recv-from queue */ 6175 dst_ill->ill_wq, /* send-to queue */ 6176 IRE_CACHE, 6177 NULL, 6178 src_ipif, 6179 NULL, 6180 (fire != NULL) ? /* Parent handle */ 6181 fire->ire_phandle : 0, 6182 save_ire->ire_ihandle, /* Interface handle */ 6183 (fire != NULL) ? 6184 (fire->ire_flags & (RTF_SETSRC | RTF_MULTIRT)) : 6185 0, 6186 &ire_uinfo_null, 6187 NULL, 6188 NULL); 6189 6190 if (ire == NULL) { 6191 ire_refrele(save_ire); 6192 break; 6193 } 6194 6195 ire->ire_marks |= ire_marks; 6196 6197 /* Resolve and add ire to the ctable */ 6198 err = ndp_resolver(dst_ill, v6dstp, first_mp, zoneid); 6199 switch (err) { 6200 case 0: 6201 /* Prevent save_ire from getting deleted */ 6202 IRB_REFHOLD(save_ire->ire_bucket); 6203 /* Has it been removed already ? */ 6204 if (save_ire->ire_marks & IRE_MARK_CONDEMNED) { 6205 IRB_REFRELE(save_ire->ire_bucket); 6206 ire_refrele(save_ire); 6207 break; 6208 } 6209 /* 6210 * We have a resolved cache entry, 6211 * add in the IRE. 6212 */ 6213 ire_add_then_send(q, ire, first_mp); 6214 if (ip6_asp_table_held) { 6215 ip6_asp_table_refrele(); 6216 ip6_asp_table_held = B_FALSE; 6217 } 6218 6219 /* Assert that it is not deleted yet. */ 6220 ASSERT(save_ire->ire_ptpn != NULL); 6221 IRB_REFRELE(save_ire->ire_bucket); 6222 ire_refrele(save_ire); 6223 if (fire != NULL) { 6224 ire_refrele(fire); 6225 fire = NULL; 6226 } 6227 6228 /* 6229 * The resolution loop is re-entered if we 6230 * actually are in a multirouting case. 6231 */ 6232 if (copy_mp != NULL) { 6233 boolean_t need_resolve = 6234 ire_multirt_need_resolve_v6(v6dstp, 6235 MBLK_GETLABEL(copy_mp)); 6236 if (!need_resolve) { 6237 MULTIRT_DEBUG_UNTAG(copy_mp); 6238 freemsg(copy_mp); 6239 copy_mp = NULL; 6240 } else { 6241 /* 6242 * ipif_lookup_group_v6() calls 6243 * ire_lookup_multi_v6() that 6244 * uses ire_ftable_lookup_v6() 6245 * to find an IRE_INTERFACE for 6246 * the group. In the multirt 6247 * case, ire_lookup_multi_v6() 6248 * then invokes 6249 * ire_multirt_lookup_v6() to 6250 * find the next resolvable ire. 6251 * As a result, we obtain a new 6252 * interface, derived from the 6253 * next ire. 6254 */ 6255 if (ipif_held) { 6256 ipif_refrele(ipif); 6257 ipif_held = B_FALSE; 6258 } 6259 ipif = ipif_lookup_group_v6( 6260 v6dstp, zoneid); 6261 ip2dbg(("ip_newroute_ipif: " 6262 "multirt dst %08x, " 6263 "ipif %p\n", 6264 ntohl(V4_PART_OF_V6( 6265 (*v6dstp))), 6266 (void *)ipif)); 6267 if (ipif != NULL) { 6268 ipif_held = B_TRUE; 6269 mp = copy_mp; 6270 copy_mp = NULL; 6271 multirt_resolve_next = 6272 B_TRUE; 6273 continue; 6274 } else { 6275 freemsg(copy_mp); 6276 } 6277 } 6278 } 6279 ill_refrele(dst_ill); 6280 if (ipif_held) { 6281 ipif_refrele(ipif); 6282 ipif_held = B_FALSE; 6283 } 6284 if (src_ipif != NULL) 6285 ipif_refrele(src_ipif); 6286 return; 6287 6288 case EINPROGRESS: 6289 /* 6290 * mp was consumed - presumably queued. 6291 * No need for ire, presumably resolution is 6292 * in progress, and ire will be added when the 6293 * address is resolved. 6294 */ 6295 if (ip6_asp_table_held) { 6296 ip6_asp_table_refrele(); 6297 ip6_asp_table_held = B_FALSE; 6298 } 6299 ire_delete(ire); 6300 ire_refrele(save_ire); 6301 if (fire != NULL) { 6302 ire_refrele(fire); 6303 fire = NULL; 6304 } 6305 6306 /* 6307 * The resolution loop is re-entered if we 6308 * actually are in a multirouting case. 6309 */ 6310 if (copy_mp != NULL) { 6311 boolean_t need_resolve = 6312 ire_multirt_need_resolve_v6(v6dstp, 6313 MBLK_GETLABEL(copy_mp)); 6314 if (!need_resolve) { 6315 MULTIRT_DEBUG_UNTAG(copy_mp); 6316 freemsg(copy_mp); 6317 copy_mp = NULL; 6318 } else { 6319 /* 6320 * ipif_lookup_group_v6() calls 6321 * ire_lookup_multi_v6() that 6322 * uses ire_ftable_lookup_v6() 6323 * to find an IRE_INTERFACE for 6324 * the group. In the multirt 6325 * case, ire_lookup_multi_v6() 6326 * then invokes 6327 * ire_multirt_lookup_v6() to 6328 * find the next resolvable ire. 6329 * As a result, we obtain a new 6330 * interface, derived from the 6331 * next ire. 6332 */ 6333 if (ipif_held) { 6334 ipif_refrele(ipif); 6335 ipif_held = B_FALSE; 6336 } 6337 ipif = ipif_lookup_group_v6( 6338 v6dstp, zoneid); 6339 ip2dbg(("ip_newroute_ipif: " 6340 "multirt dst %08x, " 6341 "ipif %p\n", 6342 ntohl(V4_PART_OF_V6( 6343 (*v6dstp))), 6344 (void *)ipif)); 6345 if (ipif != NULL) { 6346 ipif_held = B_TRUE; 6347 mp = copy_mp; 6348 copy_mp = NULL; 6349 multirt_resolve_next = 6350 B_TRUE; 6351 continue; 6352 } else { 6353 freemsg(copy_mp); 6354 } 6355 } 6356 } 6357 ill_refrele(dst_ill); 6358 if (ipif_held) { 6359 ipif_refrele(ipif); 6360 ipif_held = B_FALSE; 6361 } 6362 if (src_ipif != NULL) 6363 ipif_refrele(src_ipif); 6364 return; 6365 default: 6366 /* Some transient error */ 6367 ire_refrele(save_ire); 6368 break; 6369 } 6370 break; 6371 } 6372 default: 6373 break; 6374 } 6375 if (ip6_asp_table_held) { 6376 ip6_asp_table_refrele(); 6377 ip6_asp_table_held = B_FALSE; 6378 } 6379 } while (multirt_resolve_next); 6380 6381 err_ret: 6382 if (ip6_asp_table_held) 6383 ip6_asp_table_refrele(); 6384 if (ire != NULL) 6385 ire_refrele(ire); 6386 if (fire != NULL) 6387 ire_refrele(fire); 6388 if (ipif != NULL && ipif_held) 6389 ipif_refrele(ipif); 6390 if (src_ipif != NULL) 6391 ipif_refrele(src_ipif); 6392 /* Multicast - no point in trying to generate ICMP error */ 6393 ASSERT((attach_ill == NULL) || (dst_ill == attach_ill)); 6394 if (dst_ill != NULL) { 6395 ill = dst_ill; 6396 ill_held = B_TRUE; 6397 } 6398 if (mp->b_prev || mp->b_next) { 6399 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 6400 } else { 6401 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); 6402 } 6403 ip1dbg(("ip_newroute_ipif_v6: dropped\n")); 6404 mp->b_next = NULL; 6405 mp->b_prev = NULL; 6406 freemsg(first_mp); 6407 if (ill_held) 6408 ill_refrele(ill); 6409 } 6410 6411 /* 6412 * Parse and process any hop-by-hop or destination options. 6413 * 6414 * Assumes that q is an ill read queue so that ICMP errors for link-local 6415 * destinations are sent out the correct interface. 6416 * 6417 * Returns -1 if there was an error and mp has been consumed. 6418 * Returns 0 if no special action is needed. 6419 * Returns 1 if the packet contained a router alert option for this node 6420 * which is verified to be "interesting/known" for our implementation. 6421 * 6422 * XXX Note: In future as more hbh or dest options are defined, 6423 * it may be better to have different routines for hbh and dest 6424 * options as opt_type fields other than IP6OPT_PAD1 and IP6OPT_PADN 6425 * may have same value in different namespaces. Or is it same namespace ?? 6426 * Current code checks for each opt_type (other than pads) if it is in 6427 * the expected nexthdr (hbh or dest) 6428 */ 6429 static int 6430 ip_process_options_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, 6431 uint8_t *optptr, uint_t optlen, uint8_t hdr_type) 6432 { 6433 uint8_t opt_type; 6434 uint_t optused; 6435 int ret = 0; 6436 mblk_t *first_mp; 6437 const char *errtype; 6438 zoneid_t zoneid; 6439 ill_t *ill = q->q_ptr; 6440 6441 first_mp = mp; 6442 if (mp->b_datap->db_type == M_CTL) { 6443 mp = mp->b_cont; 6444 } 6445 6446 while (optlen != 0) { 6447 opt_type = *optptr; 6448 if (opt_type == IP6OPT_PAD1) { 6449 optused = 1; 6450 } else { 6451 if (optlen < 2) 6452 goto bad_opt; 6453 errtype = "malformed"; 6454 if (opt_type == ip6opt_ls) { 6455 optused = 2 + optptr[1]; 6456 if (optused > optlen) 6457 goto bad_opt; 6458 } else switch (opt_type) { 6459 case IP6OPT_PADN: 6460 /* 6461 * Note:We don't verify that (N-2) pad octets 6462 * are zero as required by spec. Adhere to 6463 * "be liberal in what you accept..." part of 6464 * implementation philosophy (RFC791,RFC1122) 6465 */ 6466 optused = 2 + optptr[1]; 6467 if (optused > optlen) 6468 goto bad_opt; 6469 break; 6470 6471 case IP6OPT_JUMBO: 6472 if (hdr_type != IPPROTO_HOPOPTS) 6473 goto opt_error; 6474 goto opt_error; /* XXX Not implemented! */ 6475 6476 case IP6OPT_ROUTER_ALERT: { 6477 struct ip6_opt_router *or; 6478 6479 if (hdr_type != IPPROTO_HOPOPTS) 6480 goto opt_error; 6481 optused = 2 + optptr[1]; 6482 if (optused > optlen) 6483 goto bad_opt; 6484 or = (struct ip6_opt_router *)optptr; 6485 /* Check total length and alignment */ 6486 if (optused != sizeof (*or) || 6487 ((uintptr_t)or->ip6or_value & 0x1) != 0) 6488 goto opt_error; 6489 /* Check value */ 6490 switch (*((uint16_t *)or->ip6or_value)) { 6491 case IP6_ALERT_MLD: 6492 case IP6_ALERT_RSVP: 6493 ret = 1; 6494 } 6495 break; 6496 } 6497 case IP6OPT_HOME_ADDRESS: { 6498 /* 6499 * Minimal support for the home address option 6500 * (which is required by all IPv6 nodes). 6501 * Implement by just swapping the home address 6502 * and source address. 6503 * XXX Note: this has IPsec implications since 6504 * AH needs to take this into account. 6505 * Also, when IPsec is used we need to ensure 6506 * that this is only processed once 6507 * in the received packet (to avoid swapping 6508 * back and forth). 6509 * NOTE:This option processing is considered 6510 * to be unsafe and prone to a denial of 6511 * service attack. 6512 * The current processing is not safe even with 6513 * IPsec secured IP packets. Since the home 6514 * address option processing requirement still 6515 * is in the IETF draft and in the process of 6516 * being redefined for its usage, it has been 6517 * decided to turn off the option by default. 6518 * If this section of code needs to be executed, 6519 * ndd variable ip6_ignore_home_address_opt 6520 * should be set to 0 at the user's own risk. 6521 */ 6522 struct ip6_opt_home_address *oh; 6523 in6_addr_t tmp; 6524 6525 if (ipv6_ignore_home_address_opt) 6526 goto opt_error; 6527 6528 if (hdr_type != IPPROTO_DSTOPTS) 6529 goto opt_error; 6530 optused = 2 + optptr[1]; 6531 if (optused > optlen) 6532 goto bad_opt; 6533 6534 /* 6535 * We did this dest. opt the first time 6536 * around (i.e. before AH processing). 6537 * If we've done AH... stop now. 6538 */ 6539 if (first_mp != mp) { 6540 ipsec_in_t *ii; 6541 6542 ii = (ipsec_in_t *)first_mp->b_rptr; 6543 if (ii->ipsec_in_ah_sa != NULL) 6544 break; 6545 } 6546 6547 oh = (struct ip6_opt_home_address *)optptr; 6548 /* Check total length and alignment */ 6549 if (optused < sizeof (*oh) || 6550 ((uintptr_t)oh->ip6oh_addr & 0x7) != 0) 6551 goto opt_error; 6552 /* Swap ip6_src and the home address */ 6553 tmp = ip6h->ip6_src; 6554 /* XXX Note: only 8 byte alignment option */ 6555 ip6h->ip6_src = *(in6_addr_t *)oh->ip6oh_addr; 6556 *(in6_addr_t *)oh->ip6oh_addr = tmp; 6557 break; 6558 } 6559 6560 case IP6OPT_TUNNEL_LIMIT: 6561 if (hdr_type != IPPROTO_DSTOPTS) { 6562 goto opt_error; 6563 } 6564 optused = 2 + optptr[1]; 6565 if (optused > optlen) { 6566 goto bad_opt; 6567 } 6568 if (optused != 3) { 6569 goto opt_error; 6570 } 6571 break; 6572 6573 default: 6574 errtype = "unknown"; 6575 /* FALLTHROUGH */ 6576 opt_error: 6577 /* Determine which zone should send error */ 6578 zoneid = ipif_lookup_addr_zoneid_v6( 6579 &ip6h->ip6_dst, ill); 6580 switch (IP6OPT_TYPE(opt_type)) { 6581 case IP6OPT_TYPE_SKIP: 6582 optused = 2 + optptr[1]; 6583 if (optused > optlen) 6584 goto bad_opt; 6585 ip1dbg(("ip_process_options_v6: %s " 6586 "opt 0x%x skipped\n", 6587 errtype, opt_type)); 6588 break; 6589 case IP6OPT_TYPE_DISCARD: 6590 ip1dbg(("ip_process_options_v6: %s " 6591 "opt 0x%x; packet dropped\n", 6592 errtype, opt_type)); 6593 freemsg(first_mp); 6594 return (-1); 6595 case IP6OPT_TYPE_ICMP: 6596 if (zoneid == ALL_ZONES) { 6597 freemsg(first_mp); 6598 return (-1); 6599 } 6600 icmp_param_problem_v6(WR(q), first_mp, 6601 ICMP6_PARAMPROB_OPTION, 6602 (uint32_t)(optptr - 6603 (uint8_t *)ip6h), 6604 B_FALSE, B_FALSE, zoneid); 6605 return (-1); 6606 case IP6OPT_TYPE_FORCEICMP: 6607 if (zoneid == ALL_ZONES) { 6608 freemsg(first_mp); 6609 return (-1); 6610 } 6611 icmp_param_problem_v6(WR(q), first_mp, 6612 ICMP6_PARAMPROB_OPTION, 6613 (uint32_t)(optptr - 6614 (uint8_t *)ip6h), 6615 B_FALSE, B_TRUE, zoneid); 6616 return (-1); 6617 default: 6618 ASSERT(0); 6619 } 6620 } 6621 } 6622 optlen -= optused; 6623 optptr += optused; 6624 } 6625 return (ret); 6626 6627 bad_opt: 6628 /* Determine which zone should send error */ 6629 zoneid = ipif_lookup_addr_zoneid_v6(&ip6h->ip6_dst, ill); 6630 if (zoneid == ALL_ZONES) { 6631 freemsg(first_mp); 6632 } else { 6633 icmp_param_problem_v6(WR(q), first_mp, ICMP6_PARAMPROB_OPTION, 6634 (uint32_t)(optptr - (uint8_t *)ip6h), 6635 B_FALSE, B_FALSE, zoneid); 6636 } 6637 return (-1); 6638 } 6639 6640 /* 6641 * Process a routing header that is not yet empty. 6642 * Only handles type 0 routing headers. 6643 */ 6644 static void 6645 ip_process_rthdr(queue_t *q, mblk_t *mp, ip6_t *ip6h, ip6_rthdr_t *rth, 6646 ill_t *ill, uint_t flags, mblk_t *hada_mp, mblk_t *dl_mp) 6647 { 6648 ip6_rthdr0_t *rthdr; 6649 uint_t ehdrlen; 6650 uint_t numaddr; 6651 in6_addr_t *addrptr; 6652 in6_addr_t tmp; 6653 6654 ASSERT(rth->ip6r_segleft != 0); 6655 6656 if (!ipv6_forward_src_routed) { 6657 /* XXX Check for source routed out same interface? */ 6658 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 6659 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInAddrErrors); 6660 freemsg(hada_mp); 6661 freemsg(mp); 6662 return; 6663 } 6664 6665 if (rth->ip6r_type != 0) { 6666 if (hada_mp != NULL) 6667 goto hada_drop; 6668 /* Sent by forwarding path, and router is global zone */ 6669 icmp_param_problem_v6(WR(q), mp, 6670 ICMP6_PARAMPROB_HEADER, 6671 (uint32_t)((uchar_t *)&rth->ip6r_type - (uchar_t *)ip6h), 6672 B_FALSE, B_FALSE, GLOBAL_ZONEID); 6673 return; 6674 } 6675 rthdr = (ip6_rthdr0_t *)rth; 6676 ehdrlen = 8 * (rthdr->ip6r0_len + 1); 6677 ASSERT(mp->b_rptr + ehdrlen <= mp->b_wptr); 6678 addrptr = (in6_addr_t *)((char *)rthdr + sizeof (*rthdr)); 6679 /* rthdr->ip6r0_len is twice the number of addresses in the header */ 6680 if (rthdr->ip6r0_len & 0x1) { 6681 /* An odd length is impossible */ 6682 if (hada_mp != NULL) 6683 goto hada_drop; 6684 /* Sent by forwarding path, and router is global zone */ 6685 icmp_param_problem_v6(WR(q), mp, 6686 ICMP6_PARAMPROB_HEADER, 6687 (uint32_t)((uchar_t *)&rthdr->ip6r0_len - (uchar_t *)ip6h), 6688 B_FALSE, B_FALSE, GLOBAL_ZONEID); 6689 return; 6690 } 6691 numaddr = rthdr->ip6r0_len / 2; 6692 if (rthdr->ip6r0_segleft > numaddr) { 6693 /* segleft exceeds number of addresses in routing header */ 6694 if (hada_mp != NULL) 6695 goto hada_drop; 6696 /* Sent by forwarding path, and router is global zone */ 6697 icmp_param_problem_v6(WR(q), mp, 6698 ICMP6_PARAMPROB_HEADER, 6699 (uint32_t)((uchar_t *)&rthdr->ip6r0_segleft - 6700 (uchar_t *)ip6h), 6701 B_FALSE, B_FALSE, GLOBAL_ZONEID); 6702 return; 6703 } 6704 addrptr += (numaddr - rthdr->ip6r0_segleft); 6705 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst) || 6706 IN6_IS_ADDR_MULTICAST(addrptr)) { 6707 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 6708 freemsg(hada_mp); 6709 freemsg(mp); 6710 return; 6711 } 6712 /* Swap */ 6713 tmp = *addrptr; 6714 *addrptr = ip6h->ip6_dst; 6715 ip6h->ip6_dst = tmp; 6716 rthdr->ip6r0_segleft--; 6717 /* Don't allow any mapped addresses - ip_wput_v6 can't handle them */ 6718 if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_dst)) { 6719 if (hada_mp != NULL) 6720 goto hada_drop; 6721 /* Sent by forwarding path, and router is global zone */ 6722 icmp_unreachable_v6(WR(q), mp, ICMP6_DST_UNREACH_NOROUTE, 6723 B_FALSE, B_FALSE, GLOBAL_ZONEID); 6724 return; 6725 } 6726 if (ip_check_v6_mblk(mp, ill) == 0) { 6727 ip6h = (ip6_t *)mp->b_rptr; 6728 ip_rput_data_v6(q, ill, mp, ip6h, flags, hada_mp, dl_mp); 6729 } 6730 return; 6731 hada_drop: 6732 /* IPsec kstats: bean counter? */ 6733 freemsg(hada_mp); 6734 freemsg(mp); 6735 } 6736 6737 /* 6738 * Read side put procedure for IPv6 module. 6739 */ 6740 void 6741 ip_rput_v6(queue_t *q, mblk_t *mp) 6742 { 6743 mblk_t *first_mp; 6744 mblk_t *hada_mp = NULL; 6745 ip6_t *ip6h; 6746 boolean_t ll_multicast = B_FALSE; 6747 boolean_t mctl_present = B_FALSE; 6748 ill_t *ill; 6749 struct iocblk *iocp; 6750 uint_t flags = 0; 6751 mblk_t *dl_mp; 6752 6753 ill = (ill_t *)q->q_ptr; 6754 if (ill->ill_state_flags & ILL_CONDEMNED) { 6755 union DL_primitives *dl; 6756 6757 dl = (union DL_primitives *)mp->b_rptr; 6758 /* 6759 * Things are opening or closing - only accept DLPI 6760 * ack messages. If the stream is closing and ip_wsrv 6761 * has completed, ip_close is out of the qwait, but has 6762 * not yet completed qprocsoff. Don't proceed any further 6763 * because the ill has been cleaned up and things hanging 6764 * off the ill have been freed. 6765 */ 6766 if ((mp->b_datap->db_type != M_PCPROTO) || 6767 (dl->dl_primitive == DL_UNITDATA_IND)) { 6768 inet_freemsg(mp); 6769 return; 6770 } 6771 } 6772 6773 dl_mp = NULL; 6774 switch (mp->b_datap->db_type) { 6775 case M_DATA: { 6776 int hlen; 6777 uchar_t *ucp; 6778 struct ether_header *eh; 6779 dl_unitdata_ind_t *dui; 6780 6781 /* 6782 * This is a work-around for CR 6451644, a bug in Nemo. It 6783 * should be removed when that problem is fixed. 6784 */ 6785 if (ill->ill_mactype == DL_ETHER && 6786 (hlen = MBLKHEAD(mp)) >= sizeof (struct ether_header) && 6787 (ucp = mp->b_rptr)[-1] == (IP6_DL_SAP & 0xFF) && 6788 ucp[-2] == (IP6_DL_SAP >> 8)) { 6789 if (hlen >= sizeof (struct ether_vlan_header) && 6790 ucp[-5] == 0 && ucp[-6] == 0x81) 6791 ucp -= sizeof (struct ether_vlan_header); 6792 else 6793 ucp -= sizeof (struct ether_header); 6794 /* 6795 * If it's a group address, then fabricate a 6796 * DL_UNITDATA_IND message. 6797 */ 6798 if ((ll_multicast = (ucp[0] & 1)) != 0 && 6799 (dl_mp = allocb(DL_UNITDATA_IND_SIZE + 16, 6800 BPRI_HI)) != NULL) { 6801 eh = (struct ether_header *)ucp; 6802 dui = (dl_unitdata_ind_t *)dl_mp->b_rptr; 6803 DB_TYPE(dl_mp) = M_PROTO; 6804 dl_mp->b_wptr = (uchar_t *)(dui + 1) + 16; 6805 dui->dl_primitive = DL_UNITDATA_IND; 6806 dui->dl_dest_addr_length = 8; 6807 dui->dl_dest_addr_offset = DL_UNITDATA_IND_SIZE; 6808 dui->dl_src_addr_length = 8; 6809 dui->dl_src_addr_offset = DL_UNITDATA_IND_SIZE + 6810 8; 6811 dui->dl_group_address = 1; 6812 ucp = (uchar_t *)(dui + 1); 6813 if (ill->ill_sap_length > 0) 6814 ucp += ill->ill_sap_length; 6815 bcopy(&eh->ether_dhost, ucp, 6); 6816 bcopy(&eh->ether_shost, ucp + 8, 6); 6817 ucp = (uchar_t *)(dui + 1); 6818 if (ill->ill_sap_length < 0) 6819 ucp += 8 + ill->ill_sap_length; 6820 bcopy(&eh->ether_type, ucp, 2); 6821 bcopy(&eh->ether_type, ucp + 8, 2); 6822 } 6823 } 6824 break; 6825 } 6826 6827 case M_PROTO: 6828 case M_PCPROTO: 6829 if (((dl_unitdata_ind_t *)mp->b_rptr)->dl_primitive != 6830 DL_UNITDATA_IND) { 6831 /* Go handle anything other than data elsewhere. */ 6832 ip_rput_dlpi(q, mp); 6833 return; 6834 } 6835 #define dlur ((dl_unitdata_ind_t *)mp->b_rptr) 6836 ll_multicast = dlur->dl_group_address; 6837 #undef dlur 6838 /* Save the DLPI header. */ 6839 dl_mp = mp; 6840 mp = mp->b_cont; 6841 dl_mp->b_cont = NULL; 6842 break; 6843 case M_BREAK: 6844 panic("ip_rput_v6: got an M_BREAK"); 6845 /*NOTREACHED*/ 6846 case M_IOCACK: 6847 iocp = (struct iocblk *)mp->b_rptr; 6848 switch (iocp->ioc_cmd) { 6849 case DL_IOC_HDR_INFO: 6850 ill = (ill_t *)q->q_ptr; 6851 ill_fastpath_ack(ill, mp); 6852 return; 6853 case SIOCSTUNPARAM: 6854 case SIOCGTUNPARAM: 6855 case OSIOCSTUNPARAM: 6856 case OSIOCGTUNPARAM: 6857 /* Go through qwriter */ 6858 break; 6859 default: 6860 putnext(q, mp); 6861 return; 6862 } 6863 /* FALLTHRU */ 6864 case M_ERROR: 6865 case M_HANGUP: 6866 mutex_enter(&ill->ill_lock); 6867 if (ill->ill_state_flags & ILL_CONDEMNED) { 6868 mutex_exit(&ill->ill_lock); 6869 freemsg(mp); 6870 return; 6871 } 6872 ill_refhold_locked(ill); 6873 mutex_exit(&ill->ill_lock); 6874 qwriter_ip(NULL, ill, q, mp, ip_rput_other, CUR_OP, B_FALSE); 6875 return; 6876 case M_CTL: 6877 if ((MBLKL(mp) > sizeof (int)) && 6878 ((da_ipsec_t *)mp->b_rptr)->da_type == IPHADA_M_CTL) { 6879 ASSERT(MBLKL(mp) >= sizeof (da_ipsec_t)); 6880 mctl_present = B_TRUE; 6881 break; 6882 } 6883 putnext(q, mp); 6884 return; 6885 case M_IOCNAK: 6886 iocp = (struct iocblk *)mp->b_rptr; 6887 switch (iocp->ioc_cmd) { 6888 case DL_IOC_HDR_INFO: 6889 case SIOCSTUNPARAM: 6890 case SIOCGTUNPARAM: 6891 case OSIOCSTUNPARAM: 6892 case OSIOCGTUNPARAM: 6893 mutex_enter(&ill->ill_lock); 6894 if (ill->ill_state_flags & ILL_CONDEMNED) { 6895 mutex_exit(&ill->ill_lock); 6896 freemsg(mp); 6897 return; 6898 } 6899 ill_refhold_locked(ill); 6900 mutex_exit(&ill->ill_lock); 6901 qwriter_ip(NULL, ill, q, mp, ip_rput_other, CUR_OP, 6902 B_FALSE); 6903 return; 6904 default: 6905 break; 6906 } 6907 /* FALLTHRU */ 6908 default: 6909 putnext(q, mp); 6910 return; 6911 } 6912 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInReceives); 6913 UPDATE_MIB(ill->ill_ip_mib, ipIfStatsHCInOctets, 6914 (mp->b_cont == NULL) ? MBLKL(mp) : msgdsize(mp)); 6915 /* 6916 * if db_ref > 1 then copymsg and free original. Packet may be 6917 * changed and do not want other entity who has a reference to this 6918 * message to trip over the changes. This is a blind change because 6919 * trying to catch all places that might change packet is too 6920 * difficult (since it may be a module above this one). 6921 */ 6922 if (mp->b_datap->db_ref > 1) { 6923 mblk_t *mp1; 6924 6925 mp1 = copymsg(mp); 6926 freemsg(mp); 6927 if (mp1 == NULL) { 6928 first_mp = NULL; 6929 goto discard; 6930 } 6931 mp = mp1; 6932 } 6933 first_mp = mp; 6934 if (mctl_present) { 6935 hada_mp = first_mp; 6936 mp = first_mp->b_cont; 6937 } 6938 6939 if (ip_check_v6_mblk(mp, ill) == -1) 6940 return; 6941 6942 ip6h = (ip6_t *)mp->b_rptr; 6943 6944 DTRACE_PROBE4(ip6__physical__in__start, 6945 ill_t *, ill, ill_t *, NULL, 6946 ip6_t *, ip6h, mblk_t *, first_mp); 6947 6948 FW_HOOKS6(ip6_physical_in_event, ipv6firewall_physical_in, 6949 ill, NULL, ip6h, first_mp, mp); 6950 6951 DTRACE_PROBE1(ip6__physical__in__end, mblk_t *, first_mp); 6952 6953 if (first_mp == NULL) 6954 return; 6955 6956 if ((ip6h->ip6_vcf & IPV6_VERS_AND_FLOW_MASK) == 6957 IPV6_DEFAULT_VERS_AND_FLOW) { 6958 /* 6959 * It may be a bit too expensive to do this mapped address 6960 * check here, but in the interest of robustness, it seems 6961 * like the correct place. 6962 * TODO: Avoid this check for e.g. connected TCP sockets 6963 */ 6964 if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_src)) { 6965 ip1dbg(("ip_rput_v6: pkt with mapped src addr\n")); 6966 goto discard; 6967 } 6968 6969 if (IN6_IS_ADDR_LOOPBACK(&ip6h->ip6_src)) { 6970 ip1dbg(("ip_rput_v6: pkt with loopback src")); 6971 goto discard; 6972 } else if (IN6_IS_ADDR_LOOPBACK(&ip6h->ip6_dst)) { 6973 ip1dbg(("ip_rput_v6: pkt with loopback dst")); 6974 goto discard; 6975 } 6976 6977 flags |= (ll_multicast ? IP6_IN_LLMCAST : 0); 6978 ip_rput_data_v6(q, ill, mp, ip6h, flags, hada_mp, dl_mp); 6979 } else { 6980 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInWrongIPVersion); 6981 goto discard; 6982 } 6983 freemsg(dl_mp); 6984 return; 6985 6986 discard: 6987 if (dl_mp != NULL) 6988 freeb(dl_mp); 6989 freemsg(first_mp); 6990 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 6991 } 6992 6993 /* 6994 * Walk through the IPv6 packet in mp and see if there's an AH header 6995 * in it. See if the AH header needs to get done before other headers in 6996 * the packet. (Worker function for ipsec_early_ah_v6().) 6997 */ 6998 #define IPSEC_HDR_DONT_PROCESS 0 6999 #define IPSEC_HDR_PROCESS 1 7000 #define IPSEC_MEMORY_ERROR 2 7001 static int 7002 ipsec_needs_processing_v6(mblk_t *mp, uint8_t *nexthdr) 7003 { 7004 uint_t length; 7005 uint_t ehdrlen; 7006 uint8_t *whereptr; 7007 uint8_t *endptr; 7008 uint8_t *nexthdrp; 7009 ip6_dest_t *desthdr; 7010 ip6_rthdr_t *rthdr; 7011 ip6_t *ip6h; 7012 7013 /* 7014 * For now just pullup everything. In general, the less pullups, 7015 * the better, but there's so much squirrelling through anyway, 7016 * it's just easier this way. 7017 */ 7018 if (!pullupmsg(mp, -1)) { 7019 return (IPSEC_MEMORY_ERROR); 7020 } 7021 7022 ip6h = (ip6_t *)mp->b_rptr; 7023 length = IPV6_HDR_LEN; 7024 whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */ 7025 endptr = mp->b_wptr; 7026 7027 /* 7028 * We can't just use the argument nexthdr in the place 7029 * of nexthdrp becaue we don't dereference nexthdrp 7030 * till we confirm whether it is a valid address. 7031 */ 7032 nexthdrp = &ip6h->ip6_nxt; 7033 while (whereptr < endptr) { 7034 /* Is there enough left for len + nexthdr? */ 7035 if (whereptr + MIN_EHDR_LEN > endptr) 7036 return (IPSEC_MEMORY_ERROR); 7037 7038 switch (*nexthdrp) { 7039 case IPPROTO_HOPOPTS: 7040 case IPPROTO_DSTOPTS: 7041 /* Assumes the headers are identical for hbh and dst */ 7042 desthdr = (ip6_dest_t *)whereptr; 7043 ehdrlen = 8 * (desthdr->ip6d_len + 1); 7044 if ((uchar_t *)desthdr + ehdrlen > endptr) 7045 return (IPSEC_MEMORY_ERROR); 7046 /* 7047 * Return DONT_PROCESS because of potential Mobile IPv6 7048 * cruft for destination options. 7049 */ 7050 if (*nexthdrp == IPPROTO_DSTOPTS) 7051 return (IPSEC_HDR_DONT_PROCESS); 7052 nexthdrp = &desthdr->ip6d_nxt; 7053 break; 7054 case IPPROTO_ROUTING: 7055 rthdr = (ip6_rthdr_t *)whereptr; 7056 7057 /* 7058 * If there's more hops left on the routing header, 7059 * return now with DON'T PROCESS. 7060 */ 7061 if (rthdr->ip6r_segleft > 0) 7062 return (IPSEC_HDR_DONT_PROCESS); 7063 7064 ehdrlen = 8 * (rthdr->ip6r_len + 1); 7065 if ((uchar_t *)rthdr + ehdrlen > endptr) 7066 return (IPSEC_MEMORY_ERROR); 7067 nexthdrp = &rthdr->ip6r_nxt; 7068 break; 7069 case IPPROTO_FRAGMENT: 7070 /* Wait for reassembly */ 7071 return (IPSEC_HDR_DONT_PROCESS); 7072 case IPPROTO_AH: 7073 *nexthdr = IPPROTO_AH; 7074 return (IPSEC_HDR_PROCESS); 7075 case IPPROTO_NONE: 7076 /* No next header means we're finished */ 7077 default: 7078 return (IPSEC_HDR_DONT_PROCESS); 7079 } 7080 length += ehdrlen; 7081 whereptr += ehdrlen; 7082 } 7083 panic("ipsec_needs_processing_v6"); 7084 /*NOTREACHED*/ 7085 } 7086 7087 /* 7088 * Path for AH if options are present. If this is the first time we are 7089 * sending a datagram to AH, allocate a IPSEC_IN message and prepend it. 7090 * Otherwise, just fanout. Return value answers the boolean question: 7091 * "Did I consume the mblk you sent me?" 7092 * 7093 * Sometimes AH needs to be done before other IPv6 headers for security 7094 * reasons. This function (and its ipsec_needs_processing_v6() above) 7095 * indicates if that is so, and fans out to the appropriate IPsec protocol 7096 * for the datagram passed in. 7097 */ 7098 static boolean_t 7099 ipsec_early_ah_v6(queue_t *q, mblk_t *first_mp, boolean_t mctl_present, 7100 ill_t *ill, ire_t *ire, mblk_t *hada_mp, zoneid_t zoneid) 7101 { 7102 mblk_t *mp; 7103 uint8_t nexthdr; 7104 ipsec_in_t *ii = NULL; 7105 ah_t *ah; 7106 ipsec_status_t ipsec_rc; 7107 7108 ASSERT((hada_mp == NULL) || (!mctl_present)); 7109 7110 switch (ipsec_needs_processing_v6( 7111 (mctl_present ? first_mp->b_cont : first_mp), &nexthdr)) { 7112 case IPSEC_MEMORY_ERROR: 7113 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 7114 freemsg(hada_mp); 7115 freemsg(first_mp); 7116 return (B_TRUE); 7117 case IPSEC_HDR_DONT_PROCESS: 7118 return (B_FALSE); 7119 } 7120 7121 /* Default means send it to AH! */ 7122 ASSERT(nexthdr == IPPROTO_AH); 7123 if (!mctl_present) { 7124 mp = first_mp; 7125 if ((first_mp = ipsec_in_alloc(B_FALSE)) == NULL) { 7126 ip1dbg(("ipsec_early_ah_v6: IPSEC_IN " 7127 "allocation failure.\n")); 7128 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 7129 freemsg(hada_mp); 7130 freemsg(mp); 7131 return (B_TRUE); 7132 } 7133 /* 7134 * Store the ill_index so that when we come back 7135 * from IPSEC we ride on the same queue. 7136 */ 7137 ii = (ipsec_in_t *)first_mp->b_rptr; 7138 ii->ipsec_in_ill_index = ill->ill_phyint->phyint_ifindex; 7139 ii->ipsec_in_rill_index = ii->ipsec_in_ill_index; 7140 first_mp->b_cont = mp; 7141 } 7142 /* 7143 * Cache hardware acceleration info. 7144 */ 7145 if (hada_mp != NULL) { 7146 ASSERT(ii != NULL); 7147 IPSECHW_DEBUG(IPSECHW_PKT, ("ipsec_early_ah_v6: " 7148 "caching data attr.\n")); 7149 ii->ipsec_in_accelerated = B_TRUE; 7150 ii->ipsec_in_da = hada_mp; 7151 } 7152 7153 if (!ipsec_loaded()) { 7154 ip_proto_not_sup(q, first_mp, IP_FF_SEND_ICMP, zoneid); 7155 return (B_TRUE); 7156 } 7157 7158 ah = ipsec_inbound_ah_sa(first_mp); 7159 if (ah == NULL) 7160 return (B_TRUE); 7161 ASSERT(ii->ipsec_in_ah_sa != NULL); 7162 ASSERT(ii->ipsec_in_ah_sa->ipsa_input_func != NULL); 7163 ipsec_rc = ii->ipsec_in_ah_sa->ipsa_input_func(first_mp, ah); 7164 7165 switch (ipsec_rc) { 7166 case IPSEC_STATUS_SUCCESS: 7167 /* we're done with IPsec processing, send it up */ 7168 ip_fanout_proto_again(first_mp, ill, ill, ire); 7169 break; 7170 case IPSEC_STATUS_FAILED: 7171 BUMP_MIB(&ip6_mib, ipIfStatsInDiscards); 7172 break; 7173 case IPSEC_STATUS_PENDING: 7174 /* no action needed */ 7175 break; 7176 } 7177 return (B_TRUE); 7178 } 7179 7180 /* 7181 * Validate the IPv6 mblk for alignment. 7182 */ 7183 int 7184 ip_check_v6_mblk(mblk_t *mp, ill_t *ill) 7185 { 7186 int pkt_len, ip6_len; 7187 ip6_t *ip6h = (ip6_t *)mp->b_rptr; 7188 7189 /* check for alignment and full IPv6 header */ 7190 if (!OK_32PTR((uchar_t *)ip6h) || 7191 (mp->b_wptr - (uchar_t *)ip6h) < IPV6_HDR_LEN) { 7192 if (!pullupmsg(mp, IPV6_HDR_LEN)) { 7193 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 7194 ip1dbg(("ip_rput_v6: pullupmsg failed\n")); 7195 freemsg(mp); 7196 return (-1); 7197 } 7198 ip6h = (ip6_t *)mp->b_rptr; 7199 } 7200 7201 ASSERT(OK_32PTR((uchar_t *)ip6h) && 7202 (mp->b_wptr - (uchar_t *)ip6h) >= IPV6_HDR_LEN); 7203 7204 if (mp->b_cont == NULL) 7205 pkt_len = mp->b_wptr - mp->b_rptr; 7206 else 7207 pkt_len = msgdsize(mp); 7208 ip6_len = ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN; 7209 7210 /* 7211 * Check for bogus (too short packet) and packet which 7212 * was padded by the link layer. 7213 */ 7214 if (ip6_len != pkt_len) { 7215 ssize_t diff; 7216 7217 if (ip6_len > pkt_len) { 7218 ip1dbg(("ip_rput_data_v6: packet too short %d %d\n", 7219 ip6_len, pkt_len)); 7220 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInTruncatedPkts); 7221 freemsg(mp); 7222 return (-1); 7223 } 7224 diff = (ssize_t)(pkt_len - ip6_len); 7225 7226 if (!adjmsg(mp, -diff)) { 7227 ip1dbg(("ip_rput_data_v6: adjmsg failed\n")); 7228 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 7229 freemsg(mp); 7230 return (-1); 7231 } 7232 } 7233 return (0); 7234 } 7235 7236 /* 7237 * ip_rput_data_v6 -- received IPv6 packets in M_DATA messages show up here. 7238 * ip_rput_v6 has already verified alignment, the min length, the version, 7239 * and db_ref = 1. 7240 * 7241 * The ill passed in (the arg named inill) is the ill that the packet 7242 * actually arrived on. We need to remember this when saving the 7243 * input interface index into potential IPV6_PKTINFO data in 7244 * ip_add_info_v6(). 7245 * 7246 * This routine doesn't free dl_mp; that's the caller's responsibility on 7247 * return. (Note that the callers are complex enough that there's no tail 7248 * recursion here anyway.) 7249 */ 7250 void 7251 ip_rput_data_v6(queue_t *q, ill_t *inill, mblk_t *mp, ip6_t *ip6h, 7252 uint_t flags, mblk_t *hada_mp, mblk_t *dl_mp) 7253 { 7254 ire_t *ire = NULL; 7255 queue_t *rq; 7256 ill_t *ill = inill; 7257 ill_t *outill; 7258 ipif_t *ipif; 7259 uint8_t *whereptr; 7260 uint8_t nexthdr; 7261 uint16_t remlen; 7262 uint_t prev_nexthdr_offset; 7263 uint_t used; 7264 size_t pkt_len; 7265 uint16_t ip6_len; 7266 uint_t hdr_len; 7267 boolean_t mctl_present; 7268 mblk_t *first_mp; 7269 mblk_t *first_mp1; 7270 boolean_t no_forward; 7271 ip6_hbh_t *hbhhdr; 7272 boolean_t ll_multicast = (flags & IP6_IN_LLMCAST); 7273 conn_t *connp; 7274 ilm_t *ilm; 7275 uint32_t ports; 7276 uint_t ipif_id = 0; 7277 zoneid_t zoneid = GLOBAL_ZONEID; 7278 uint16_t hck_flags, reass_hck_flags; 7279 uint32_t reass_sum; 7280 boolean_t cksum_err; 7281 mblk_t *mp1; 7282 7283 EXTRACT_PKT_MP(mp, first_mp, mctl_present); 7284 7285 if (hada_mp != NULL) { 7286 /* 7287 * It's an IPsec accelerated packet. 7288 * Keep a pointer to the data attributes around until 7289 * we allocate the ipsecinfo structure. 7290 */ 7291 IPSECHW_DEBUG(IPSECHW_PKT, 7292 ("ip_rput_data_v6: inbound HW accelerated IPsec pkt\n")); 7293 hada_mp->b_cont = NULL; 7294 /* 7295 * Since it is accelerated, it came directly from 7296 * the ill. 7297 */ 7298 ASSERT(mctl_present == B_FALSE); 7299 ASSERT(mp->b_datap->db_type != M_CTL); 7300 } 7301 7302 ip6h = (ip6_t *)mp->b_rptr; 7303 ip6_len = ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN; 7304 pkt_len = ip6_len; 7305 7306 if (ILL_HCKSUM_CAPABLE(ill) && !mctl_present && dohwcksum) 7307 hck_flags = DB_CKSUMFLAGS(mp); 7308 else 7309 hck_flags = 0; 7310 7311 /* Clear checksum flags in case we need to forward */ 7312 DB_CKSUMFLAGS(mp) = 0; 7313 reass_sum = reass_hck_flags = 0; 7314 7315 nexthdr = ip6h->ip6_nxt; 7316 7317 prev_nexthdr_offset = (uint_t)((uchar_t *)&ip6h->ip6_nxt - 7318 (uchar_t *)ip6h); 7319 whereptr = (uint8_t *)&ip6h[1]; 7320 remlen = pkt_len - IPV6_HDR_LEN; /* Track how much is left */ 7321 7322 /* Process hop by hop header options */ 7323 if (nexthdr == IPPROTO_HOPOPTS) { 7324 uint_t ehdrlen; 7325 uint8_t *optptr; 7326 7327 if (remlen < MIN_EHDR_LEN) 7328 goto pkt_too_short; 7329 if (mp->b_cont != NULL && 7330 whereptr + MIN_EHDR_LEN > mp->b_wptr) { 7331 if (!pullupmsg(mp, IPV6_HDR_LEN + MIN_EHDR_LEN)) { 7332 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 7333 freemsg(hada_mp); 7334 freemsg(first_mp); 7335 return; 7336 } 7337 ip6h = (ip6_t *)mp->b_rptr; 7338 whereptr = (uint8_t *)ip6h + pkt_len - remlen; 7339 } 7340 hbhhdr = (ip6_hbh_t *)whereptr; 7341 nexthdr = hbhhdr->ip6h_nxt; 7342 prev_nexthdr_offset = (uint_t)(whereptr - (uint8_t *)ip6h); 7343 ehdrlen = 8 * (hbhhdr->ip6h_len + 1); 7344 7345 if (remlen < ehdrlen) 7346 goto pkt_too_short; 7347 if (mp->b_cont != NULL && 7348 whereptr + ehdrlen > mp->b_wptr) { 7349 if (!pullupmsg(mp, IPV6_HDR_LEN + ehdrlen)) { 7350 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 7351 freemsg(hada_mp); 7352 freemsg(first_mp); 7353 return; 7354 } 7355 ip6h = (ip6_t *)mp->b_rptr; 7356 whereptr = (uint8_t *)ip6h + pkt_len - remlen; 7357 hbhhdr = (ip6_hbh_t *)whereptr; 7358 } 7359 7360 optptr = whereptr + 2; 7361 whereptr += ehdrlen; 7362 remlen -= ehdrlen; 7363 switch (ip_process_options_v6(q, first_mp, ip6h, optptr, 7364 ehdrlen - 2, IPPROTO_HOPOPTS)) { 7365 case -1: 7366 /* 7367 * Packet has been consumed and any 7368 * needed ICMP messages sent. 7369 */ 7370 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors); 7371 freemsg(hada_mp); 7372 return; 7373 case 0: 7374 /* no action needed */ 7375 break; 7376 case 1: 7377 /* Known router alert */ 7378 goto ipv6forus; 7379 } 7380 } 7381 7382 /* 7383 * Attach any necessary label information to this packet. 7384 */ 7385 if (is_system_labeled() && !tsol_get_pkt_label(mp, IPV6_VERSION)) { 7386 if (ip6opt_ls != 0) 7387 ip0dbg(("tsol_get_pkt_label v6 failed\n")); 7388 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors); 7389 freemsg(hada_mp); 7390 freemsg(first_mp); 7391 return; 7392 } 7393 7394 /* 7395 * On incoming v6 multicast packets we will bypass the ire table, 7396 * and assume that the read queue corresponds to the targetted 7397 * interface. 7398 * 7399 * The effect of this is the same as the IPv4 original code, but is 7400 * much cleaner I think. See ip_rput for how that was done. 7401 */ 7402 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 7403 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInMcastPkts); 7404 UPDATE_MIB(ill->ill_ip_mib, ipIfStatsHCInMcastOctets, pkt_len); 7405 /* 7406 * XXX TODO Give to mrouted to for multicast forwarding. 7407 */ 7408 ILM_WALKER_HOLD(ill); 7409 ilm = ilm_lookup_ill_v6(ill, &ip6h->ip6_dst, ALL_ZONES); 7410 ILM_WALKER_RELE(ill); 7411 if (ilm == NULL) { 7412 if (ip_debug > 3) { 7413 /* ip2dbg */ 7414 pr_addr_dbg("ip_rput_data_v6: got mcast packet" 7415 " which is not for us: %s\n", AF_INET6, 7416 &ip6h->ip6_dst); 7417 } 7418 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 7419 freemsg(hada_mp); 7420 freemsg(first_mp); 7421 return; 7422 } 7423 if (ip_debug > 3) { 7424 /* ip2dbg */ 7425 pr_addr_dbg("ip_rput_data_v6: multicast for us: %s\n", 7426 AF_INET6, &ip6h->ip6_dst); 7427 } 7428 rq = ill->ill_rq; 7429 zoneid = GLOBAL_ZONEID; 7430 goto ipv6forus; 7431 } 7432 7433 ipif = ill->ill_ipif; 7434 7435 /* 7436 * If a packet was received on an interface that is a 6to4 tunnel, 7437 * incoming IPv6 packets, with a 6to4 addressed IPv6 destination, must 7438 * be checked to have a 6to4 prefix (2002:V4ADDR::/48) that is equal to 7439 * the 6to4 prefix of the address configured on the receiving interface. 7440 * Otherwise, the packet was delivered to this interface in error and 7441 * the packet must be dropped. 7442 */ 7443 if ((ill->ill_is_6to4tun) && IN6_IS_ADDR_6TO4(&ip6h->ip6_dst)) { 7444 7445 if (!IN6_ARE_6TO4_PREFIX_EQUAL(&ipif->ipif_v6lcl_addr, 7446 &ip6h->ip6_dst)) { 7447 if (ip_debug > 2) { 7448 /* ip1dbg */ 7449 pr_addr_dbg("ip_rput_data_v6: received 6to4 " 7450 "addressed packet which is not for us: " 7451 "%s\n", AF_INET6, &ip6h->ip6_dst); 7452 } 7453 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 7454 freemsg(first_mp); 7455 return; 7456 } 7457 } 7458 7459 /* 7460 * Find an ire that matches destination. For link-local addresses 7461 * we have to match the ill. 7462 * TBD for site local addresses. 7463 */ 7464 if (IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_dst)) { 7465 ire = ire_ctable_lookup_v6(&ip6h->ip6_dst, NULL, 7466 IRE_CACHE|IRE_LOCAL, ill->ill_ipif, ALL_ZONES, NULL, 7467 MATCH_IRE_TYPE | MATCH_IRE_ILL_GROUP); 7468 } else { 7469 ire = ire_cache_lookup_v6(&ip6h->ip6_dst, ALL_ZONES, 7470 MBLK_GETLABEL(mp)); 7471 } 7472 if (ire == NULL) { 7473 /* 7474 * No matching IRE found. Mark this packet as having 7475 * originated externally. 7476 */ 7477 if (!(ill->ill_flags & ILLF_ROUTER) || ll_multicast) { 7478 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 7479 if (!(ill->ill_flags & ILLF_ROUTER)) { 7480 BUMP_MIB(ill->ill_ip_mib, 7481 ipIfStatsInAddrErrors); 7482 } 7483 freemsg(hada_mp); 7484 freemsg(first_mp); 7485 return; 7486 } 7487 if (ip6h->ip6_hops <= 1) { 7488 if (hada_mp != NULL) 7489 goto hada_drop; 7490 /* Sent by forwarding path, and router is global zone */ 7491 icmp_time_exceeded_v6(WR(q), first_mp, 7492 ICMP6_TIME_EXCEED_TRANSIT, ll_multicast, B_FALSE, 7493 GLOBAL_ZONEID); 7494 return; 7495 } 7496 /* 7497 * Per RFC 3513 section 2.5.2, we must not forward packets with 7498 * an unspecified source address. 7499 */ 7500 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 7501 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 7502 freemsg(hada_mp); 7503 freemsg(first_mp); 7504 return; 7505 } 7506 mp->b_prev = (mblk_t *)(uintptr_t) 7507 ill->ill_phyint->phyint_ifindex; 7508 ip_newroute_v6(q, mp, &ip6h->ip6_dst, &ip6h->ip6_src, 7509 IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_dst) ? ill : NULL, 7510 ALL_ZONES); 7511 return; 7512 } 7513 ipif_id = ire->ire_ipif->ipif_seqid; 7514 /* we have a matching IRE */ 7515 if (ire->ire_stq != NULL) { 7516 ill_group_t *ill_group; 7517 ill_group_t *ire_group; 7518 7519 /* 7520 * To be quicker, we may wish not to chase pointers 7521 * (ire->ire_ipif->ipif_ill...) and instead store the 7522 * forwarding policy in the ire. An unfortunate side- 7523 * effect of this would be requiring an ire flush whenever 7524 * the ILLF_ROUTER flag changes. For now, chase pointers 7525 * once and store in the boolean no_forward. 7526 * 7527 * This appears twice to keep it out of the non-forwarding, 7528 * yes-it's-for-us-on-the-right-interface case. 7529 */ 7530 no_forward = ((ill->ill_flags & 7531 ire->ire_ipif->ipif_ill->ill_flags & ILLF_ROUTER) == 0); 7532 7533 7534 ASSERT(first_mp == mp); 7535 /* 7536 * This ire has a send-to queue - forward the packet. 7537 */ 7538 if (no_forward || ll_multicast || (hada_mp != NULL)) { 7539 freemsg(hada_mp); 7540 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 7541 if (no_forward) { 7542 BUMP_MIB(ill->ill_ip_mib, 7543 ipIfStatsInAddrErrors); 7544 } 7545 freemsg(mp); 7546 ire_refrele(ire); 7547 return; 7548 } 7549 /* 7550 * ipIfStatsHCInForwDatagrams should only be increment if there 7551 * will be an attempt to forward the packet, which is why we 7552 * increment after the above condition has been checked. 7553 */ 7554 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInForwDatagrams); 7555 if (ip6h->ip6_hops <= 1) { 7556 ip1dbg(("ip_rput_data_v6: hop limit expired.\n")); 7557 /* Sent by forwarding path, and router is global zone */ 7558 icmp_time_exceeded_v6(WR(q), mp, 7559 ICMP6_TIME_EXCEED_TRANSIT, ll_multicast, B_FALSE, 7560 GLOBAL_ZONEID); 7561 ire_refrele(ire); 7562 return; 7563 } 7564 /* 7565 * Per RFC 3513 section 2.5.2, we must not forward packets with 7566 * an unspecified source address. 7567 */ 7568 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 7569 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 7570 freemsg(mp); 7571 ire_refrele(ire); 7572 return; 7573 } 7574 7575 if (is_system_labeled()) { 7576 mblk_t *mp1; 7577 7578 if ((mp1 = tsol_ip_forward(ire, mp)) == NULL) { 7579 BUMP_MIB(ill->ill_ip_mib, 7580 ipIfStatsForwProhibits); 7581 freemsg(mp); 7582 ire_refrele(ire); 7583 return; 7584 } 7585 /* Size may have changed */ 7586 mp = mp1; 7587 ip6h = (ip6_t *)mp->b_rptr; 7588 pkt_len = msgdsize(mp); 7589 } 7590 7591 if (pkt_len > ire->ire_max_frag) { 7592 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInTooBigErrors); 7593 /* Sent by forwarding path, and router is global zone */ 7594 icmp_pkt2big_v6(WR(q), mp, ire->ire_max_frag, 7595 ll_multicast, B_TRUE, GLOBAL_ZONEID); 7596 ire_refrele(ire); 7597 return; 7598 } 7599 7600 /* 7601 * Check to see if we're forwarding the packet to a 7602 * different link from which it came. If so, check the 7603 * source and destination addresses since routers must not 7604 * forward any packets with link-local source or 7605 * destination addresses to other links. Otherwise (if 7606 * we're forwarding onto the same link), conditionally send 7607 * a redirect message. 7608 */ 7609 ill_group = ill->ill_group; 7610 ire_group = ((ill_t *)(ire->ire_rfq)->q_ptr)->ill_group; 7611 if (ire->ire_rfq != q && (ill_group == NULL || 7612 ill_group != ire_group)) { 7613 if (IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_dst) || 7614 IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src)) { 7615 BUMP_MIB(ill->ill_ip_mib, 7616 ipIfStatsInAddrErrors); 7617 freemsg(mp); 7618 ire_refrele(ire); 7619 return; 7620 } 7621 /* TBD add site-local check at site boundary? */ 7622 } else if (ipv6_send_redirects) { 7623 in6_addr_t *v6targ; 7624 in6_addr_t gw_addr_v6; 7625 ire_t *src_ire_v6 = NULL; 7626 7627 /* 7628 * Don't send a redirect when forwarding a source 7629 * routed packet. 7630 */ 7631 if (ip_source_routed_v6(ip6h, mp)) 7632 goto forward; 7633 7634 mutex_enter(&ire->ire_lock); 7635 gw_addr_v6 = ire->ire_gateway_addr_v6; 7636 mutex_exit(&ire->ire_lock); 7637 if (!IN6_IS_ADDR_UNSPECIFIED(&gw_addr_v6)) { 7638 v6targ = &gw_addr_v6; 7639 /* 7640 * We won't send redirects to a router 7641 * that doesn't have a link local 7642 * address, but will forward. 7643 */ 7644 if (!IN6_IS_ADDR_LINKLOCAL(v6targ)) { 7645 BUMP_MIB(ill->ill_ip_mib, 7646 ipIfStatsInAddrErrors); 7647 goto forward; 7648 } 7649 } else { 7650 v6targ = &ip6h->ip6_dst; 7651 } 7652 7653 src_ire_v6 = ire_ftable_lookup_v6(&ip6h->ip6_src, 7654 NULL, NULL, IRE_INTERFACE, ire->ire_ipif, NULL, 7655 ALL_ZONES, 0, NULL, 7656 MATCH_IRE_IPIF | MATCH_IRE_TYPE); 7657 7658 if (src_ire_v6 != NULL) { 7659 /* 7660 * The source is directly connected. 7661 */ 7662 mp1 = copymsg(mp); 7663 if (mp1 != NULL) { 7664 icmp_send_redirect_v6(WR(q), 7665 mp1, v6targ, &ip6h->ip6_dst, 7666 ill, B_FALSE); 7667 } 7668 ire_refrele(src_ire_v6); 7669 } 7670 } 7671 7672 forward: 7673 /* Hoplimit verified above */ 7674 ip6h->ip6_hops--; 7675 7676 outill = ire->ire_ipif->ipif_ill; 7677 7678 DTRACE_PROBE4(ip6__forwarding__start, 7679 ill_t *, inill, ill_t *, outill, 7680 ip6_t *, ip6h, mblk_t *, mp); 7681 7682 FW_HOOKS6(ip6_forwarding_event, ipv6firewall_forwarding, 7683 inill, outill, ip6h, mp, mp); 7684 7685 DTRACE_PROBE1(ip6__forwarding__end, mblk_t *, mp); 7686 7687 if (mp != NULL) { 7688 UPDATE_IB_PKT_COUNT(ire); 7689 ire->ire_last_used_time = lbolt; 7690 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCOutForwDatagrams); 7691 ip_xmit_v6(mp, ire, 0, NULL, B_FALSE, NULL); 7692 } 7693 IRE_REFRELE(ire); 7694 return; 7695 } 7696 rq = ire->ire_rfq; 7697 7698 /* 7699 * Need to put on correct queue for reassembly to find it. 7700 * No need to use put() since reassembly has its own locks. 7701 * Note: multicast packets and packets destined to addresses 7702 * assigned to loopback (ire_rfq is NULL) will be reassembled on 7703 * the arriving ill. 7704 */ 7705 if (rq != q) { 7706 boolean_t check_multi = B_TRUE; 7707 ill_group_t *ill_group = NULL; 7708 ill_group_t *ire_group = NULL; 7709 ill_t *ire_ill = NULL; 7710 uint_t ill_ifindex = ill->ill_usesrc_ifindex; 7711 7712 /* 7713 * To be quicker, we may wish not to chase pointers 7714 * (ire->ire_ipif->ipif_ill...) and instead store the 7715 * forwarding policy in the ire. An unfortunate side- 7716 * effect of this would be requiring an ire flush whenever 7717 * the ILLF_ROUTER flag changes. For now, chase pointers 7718 * once and store in the boolean no_forward. 7719 */ 7720 no_forward = ((ill->ill_flags & 7721 ire->ire_ipif->ipif_ill->ill_flags & ILLF_ROUTER) == 0); 7722 7723 ill_group = ill->ill_group; 7724 if (rq != NULL) { 7725 ire_ill = (ill_t *)(rq->q_ptr); 7726 ire_group = ire_ill->ill_group; 7727 } 7728 7729 /* 7730 * If it's part of the same IPMP group, or if it's a legal 7731 * address on the 'usesrc' interface, then bypass strict 7732 * checks. 7733 */ 7734 if (ill_group != NULL && ill_group == ire_group) { 7735 check_multi = B_FALSE; 7736 } else if (ill_ifindex != 0 && ire_ill != NULL && 7737 ill_ifindex == ire_ill->ill_phyint->phyint_ifindex) { 7738 check_multi = B_FALSE; 7739 } 7740 7741 ASSERT(!IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)); 7742 if (check_multi && ipv6_strict_dst_multihoming && no_forward) { 7743 /* 7744 * This packet came in on an interface other than the 7745 * one associated with the destination address 7746 * and we are strict about matches. 7747 * 7748 * As long as the ills belong to the same group, 7749 * we don't consider them to arriving on the wrong 7750 * interface. Thus, when the switch is doing inbound 7751 * load spreading, we won't drop packets when we 7752 * are doing strict multihoming checks. 7753 */ 7754 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 7755 freemsg(hada_mp); 7756 freemsg(first_mp); 7757 ire_refrele(ire); 7758 return; 7759 } 7760 7761 if (rq != NULL) 7762 q = rq; 7763 7764 ill = (ill_t *)q->q_ptr; 7765 ASSERT(ill); 7766 } 7767 7768 zoneid = ire->ire_zoneid; 7769 UPDATE_IB_PKT_COUNT(ire); 7770 ire->ire_last_used_time = lbolt; 7771 /* Don't use the ire after this point. */ 7772 ire_refrele(ire); 7773 ipv6forus: 7774 /* 7775 * Looks like this packet is for us one way or another. 7776 * This is where we'll process destination headers etc. 7777 */ 7778 for (; ; ) { 7779 switch (nexthdr) { 7780 case IPPROTO_TCP: { 7781 uint16_t *up; 7782 uint32_t sum; 7783 int offset; 7784 7785 hdr_len = pkt_len - remlen; 7786 7787 if (hada_mp != NULL) { 7788 ip0dbg(("tcp hada drop\n")); 7789 goto hada_drop; 7790 } 7791 7792 7793 /* TCP needs all of the TCP header */ 7794 if (remlen < TCP_MIN_HEADER_LENGTH) 7795 goto pkt_too_short; 7796 if (mp->b_cont != NULL && 7797 whereptr + TCP_MIN_HEADER_LENGTH > mp->b_wptr) { 7798 if (!pullupmsg(mp, 7799 hdr_len + TCP_MIN_HEADER_LENGTH)) { 7800 BUMP_MIB(ill->ill_ip_mib, 7801 ipIfStatsInDiscards); 7802 freemsg(first_mp); 7803 return; 7804 } 7805 hck_flags = 0; 7806 ip6h = (ip6_t *)mp->b_rptr; 7807 whereptr = (uint8_t *)ip6h + hdr_len; 7808 } 7809 /* 7810 * Extract the offset field from the TCP header. 7811 */ 7812 offset = ((uchar_t *)ip6h)[hdr_len + 12] >> 4; 7813 if (offset != 5) { 7814 if (offset < 5) { 7815 ip1dbg(("ip_rput_data_v6: short " 7816 "TCP data offset")); 7817 BUMP_MIB(ill->ill_ip_mib, 7818 ipIfStatsInDiscards); 7819 freemsg(first_mp); 7820 return; 7821 } 7822 /* 7823 * There must be TCP options. 7824 * Make sure we can grab them. 7825 */ 7826 offset <<= 2; 7827 if (remlen < offset) 7828 goto pkt_too_short; 7829 if (mp->b_cont != NULL && 7830 whereptr + offset > mp->b_wptr) { 7831 if (!pullupmsg(mp, 7832 hdr_len + offset)) { 7833 BUMP_MIB(ill->ill_ip_mib, 7834 ipIfStatsInDiscards); 7835 freemsg(first_mp); 7836 return; 7837 } 7838 hck_flags = 0; 7839 ip6h = (ip6_t *)mp->b_rptr; 7840 whereptr = (uint8_t *)ip6h + hdr_len; 7841 } 7842 } 7843 7844 up = (uint16_t *)&ip6h->ip6_src; 7845 /* 7846 * TCP checksum calculation. First sum up the 7847 * pseudo-header fields: 7848 * - Source IPv6 address 7849 * - Destination IPv6 address 7850 * - TCP payload length 7851 * - TCP protocol ID 7852 */ 7853 sum = htons(IPPROTO_TCP + remlen) + 7854 up[0] + up[1] + up[2] + up[3] + 7855 up[4] + up[5] + up[6] + up[7] + 7856 up[8] + up[9] + up[10] + up[11] + 7857 up[12] + up[13] + up[14] + up[15]; 7858 7859 /* Fold initial sum */ 7860 sum = (sum & 0xffff) + (sum >> 16); 7861 7862 mp1 = mp->b_cont; 7863 7864 if ((hck_flags & (HCK_FULLCKSUM|HCK_PARTIALCKSUM)) == 0) 7865 IP6_STAT(ip6_in_sw_cksum); 7866 7867 IP_CKSUM_RECV(hck_flags, sum, (uchar_t *) 7868 ((uchar_t *)mp->b_rptr + DB_CKSUMSTART(mp)), 7869 (int32_t)(whereptr - (uchar_t *)mp->b_rptr), 7870 mp, mp1, cksum_err); 7871 7872 if (cksum_err) { 7873 BUMP_MIB(ill->ill_ip_mib, tcpIfStatsInErrs); 7874 7875 if (hck_flags & HCK_FULLCKSUM) 7876 IP6_STAT(ip6_tcp_in_full_hw_cksum_err); 7877 else if (hck_flags & HCK_PARTIALCKSUM) 7878 IP6_STAT(ip6_tcp_in_part_hw_cksum_err); 7879 else 7880 IP6_STAT(ip6_tcp_in_sw_cksum_err); 7881 7882 freemsg(first_mp); 7883 return; 7884 } 7885 tcp_fanout: 7886 ip_fanout_tcp_v6(q, first_mp, ip6h, ill, inill, 7887 (flags|IP_FF_SEND_ICMP|IP_FF_SYN_ADDIRE| 7888 IP_FF_IPINFO), hdr_len, mctl_present, zoneid); 7889 return; 7890 } 7891 case IPPROTO_SCTP: 7892 { 7893 sctp_hdr_t *sctph; 7894 uint32_t calcsum, pktsum; 7895 uint_t hdr_len = pkt_len - remlen; 7896 7897 /* SCTP needs all of the SCTP header */ 7898 if (remlen < sizeof (*sctph)) { 7899 goto pkt_too_short; 7900 } 7901 if (whereptr + sizeof (*sctph) > mp->b_wptr) { 7902 ASSERT(mp->b_cont != NULL); 7903 if (!pullupmsg(mp, hdr_len + sizeof (*sctph))) { 7904 BUMP_MIB(ill->ill_ip_mib, 7905 ipIfStatsInDiscards); 7906 freemsg(mp); 7907 return; 7908 } 7909 ip6h = (ip6_t *)mp->b_rptr; 7910 whereptr = (uint8_t *)ip6h + hdr_len; 7911 } 7912 7913 sctph = (sctp_hdr_t *)(mp->b_rptr + hdr_len); 7914 /* checksum */ 7915 pktsum = sctph->sh_chksum; 7916 sctph->sh_chksum = 0; 7917 calcsum = sctp_cksum(mp, hdr_len); 7918 if (calcsum != pktsum) { 7919 BUMP_MIB(&sctp_mib, sctpChecksumError); 7920 freemsg(mp); 7921 return; 7922 } 7923 sctph->sh_chksum = pktsum; 7924 ports = *(uint32_t *)(mp->b_rptr + hdr_len); 7925 if ((connp = sctp_fanout(&ip6h->ip6_src, &ip6h->ip6_dst, 7926 ports, ipif_id, zoneid, mp)) == NULL) { 7927 ip_fanout_sctp_raw(first_mp, ill, 7928 (ipha_t *)ip6h, B_FALSE, ports, 7929 mctl_present, 7930 (flags|IP_FF_SEND_ICMP|IP_FF_IPINFO), 7931 B_TRUE, ipif_id, zoneid); 7932 return; 7933 } 7934 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 7935 sctp_input(connp, (ipha_t *)ip6h, mp, first_mp, ill, 7936 B_FALSE, mctl_present); 7937 return; 7938 } 7939 case IPPROTO_UDP: { 7940 uint16_t *up; 7941 uint32_t sum; 7942 7943 hdr_len = pkt_len - remlen; 7944 7945 if (hada_mp != NULL) { 7946 ip0dbg(("udp hada drop\n")); 7947 goto hada_drop; 7948 } 7949 7950 /* Verify that at least the ports are present */ 7951 if (remlen < UDPH_SIZE) 7952 goto pkt_too_short; 7953 if (mp->b_cont != NULL && 7954 whereptr + UDPH_SIZE > mp->b_wptr) { 7955 if (!pullupmsg(mp, hdr_len + UDPH_SIZE)) { 7956 BUMP_MIB(ill->ill_ip_mib, 7957 ipIfStatsInDiscards); 7958 freemsg(first_mp); 7959 return; 7960 } 7961 hck_flags = 0; 7962 ip6h = (ip6_t *)mp->b_rptr; 7963 whereptr = (uint8_t *)ip6h + hdr_len; 7964 } 7965 7966 /* 7967 * Before going through the regular checksum 7968 * calculation, make sure the received checksum 7969 * is non-zero. RFC 2460 says, a 0x0000 checksum 7970 * in a UDP packet (within IPv6 packet) is invalid 7971 * and should be replaced by 0xffff. This makes 7972 * sense as regular checksum calculation will 7973 * pass for both the cases i.e. 0x0000 and 0xffff. 7974 * Removing one of the case makes error detection 7975 * stronger. 7976 */ 7977 7978 if (((udpha_t *)whereptr)->uha_checksum == 0) { 7979 /* 0x0000 checksum is invalid */ 7980 ip1dbg(("ip_rput_data_v6: Invalid UDP " 7981 "checksum value 0x0000\n")); 7982 BUMP_MIB(ill->ill_ip_mib, 7983 udpIfStatsInCksumErrs); 7984 freemsg(first_mp); 7985 return; 7986 } 7987 7988 up = (uint16_t *)&ip6h->ip6_src; 7989 7990 /* 7991 * UDP checksum calculation. First sum up the 7992 * pseudo-header fields: 7993 * - Source IPv6 address 7994 * - Destination IPv6 address 7995 * - UDP payload length 7996 * - UDP protocol ID 7997 */ 7998 7999 sum = htons(IPPROTO_UDP + remlen) + 8000 up[0] + up[1] + up[2] + up[3] + 8001 up[4] + up[5] + up[6] + up[7] + 8002 up[8] + up[9] + up[10] + up[11] + 8003 up[12] + up[13] + up[14] + up[15]; 8004 8005 /* Fold initial sum */ 8006 sum = (sum & 0xffff) + (sum >> 16); 8007 8008 if (reass_hck_flags != 0) { 8009 hck_flags = reass_hck_flags; 8010 8011 IP_CKSUM_RECV_REASS(hck_flags, 8012 (int32_t)(whereptr - (uchar_t *)mp->b_rptr), 8013 sum, reass_sum, cksum_err); 8014 } else { 8015 mp1 = mp->b_cont; 8016 8017 IP_CKSUM_RECV(hck_flags, sum, (uchar_t *) 8018 ((uchar_t *)mp->b_rptr + DB_CKSUMSTART(mp)), 8019 (int32_t)(whereptr - (uchar_t *)mp->b_rptr), 8020 mp, mp1, cksum_err); 8021 } 8022 8023 if ((hck_flags & (HCK_FULLCKSUM|HCK_PARTIALCKSUM)) == 0) 8024 IP6_STAT(ip6_in_sw_cksum); 8025 8026 if (cksum_err) { 8027 BUMP_MIB(ill->ill_ip_mib, 8028 udpIfStatsInCksumErrs); 8029 8030 if (hck_flags & HCK_FULLCKSUM) 8031 IP6_STAT(ip6_udp_in_full_hw_cksum_err); 8032 else if (hck_flags & HCK_PARTIALCKSUM) 8033 IP6_STAT(ip6_udp_in_part_hw_cksum_err); 8034 else 8035 IP6_STAT(ip6_udp_in_sw_cksum_err); 8036 8037 freemsg(first_mp); 8038 return; 8039 } 8040 goto udp_fanout; 8041 } 8042 case IPPROTO_ICMPV6: { 8043 uint16_t *up; 8044 uint32_t sum; 8045 uint_t hdr_len = pkt_len - remlen; 8046 8047 if (hada_mp != NULL) { 8048 ip0dbg(("icmp hada drop\n")); 8049 goto hada_drop; 8050 } 8051 8052 up = (uint16_t *)&ip6h->ip6_src; 8053 sum = htons(IPPROTO_ICMPV6 + remlen) + 8054 up[0] + up[1] + up[2] + up[3] + 8055 up[4] + up[5] + up[6] + up[7] + 8056 up[8] + up[9] + up[10] + up[11] + 8057 up[12] + up[13] + up[14] + up[15]; 8058 sum = (sum & 0xffff) + (sum >> 16); 8059 sum = IP_CSUM(mp, hdr_len, sum); 8060 if (sum != 0) { 8061 /* IPv6 ICMP checksum failed */ 8062 ip1dbg(("ip_rput_data_v6: ICMPv6 checksum " 8063 "failed %x\n", 8064 sum)); 8065 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInMsgs); 8066 BUMP_MIB(ill->ill_icmp6_mib, 8067 ipv6IfIcmpInErrors); 8068 freemsg(first_mp); 8069 return; 8070 } 8071 8072 icmp_fanout: 8073 /* Check variable for testing applications */ 8074 if (ipv6_drop_inbound_icmpv6) { 8075 freemsg(first_mp); 8076 return; 8077 } 8078 /* 8079 * Assume that there is always at least one conn for 8080 * ICMPv6 (in.ndpd) i.e. don't optimize the case 8081 * where there is no conn. 8082 */ 8083 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 8084 ASSERT(!(ill->ill_phyint->phyint_flags & 8085 PHYI_LOOPBACK)); 8086 /* 8087 * In the multicast case, applications may have 8088 * joined the group from different zones, so we 8089 * need to deliver the packet to each of them. 8090 * Loop through the multicast memberships 8091 * structures (ilm) on the receive ill and send 8092 * a copy of the packet up each matching one. 8093 */ 8094 ILM_WALKER_HOLD(ill); 8095 for (ilm = ill->ill_ilm; ilm != NULL; 8096 ilm = ilm->ilm_next) { 8097 if (ilm->ilm_flags & ILM_DELETED) 8098 continue; 8099 if (!IN6_ARE_ADDR_EQUAL( 8100 &ilm->ilm_v6addr, &ip6h->ip6_dst)) 8101 continue; 8102 if (!ipif_lookup_zoneid(ill, 8103 ilm->ilm_zoneid, IPIF_UP, NULL)) 8104 continue; 8105 8106 first_mp1 = ip_copymsg(first_mp); 8107 if (first_mp1 == NULL) 8108 continue; 8109 icmp_inbound_v6(q, first_mp1, ill, 8110 hdr_len, mctl_present, 0, 8111 ilm->ilm_zoneid, dl_mp); 8112 } 8113 ILM_WALKER_RELE(ill); 8114 } else { 8115 first_mp1 = ip_copymsg(first_mp); 8116 if (first_mp1 != NULL) 8117 icmp_inbound_v6(q, first_mp1, ill, 8118 hdr_len, mctl_present, 0, zoneid, 8119 dl_mp); 8120 } 8121 } 8122 /* FALLTHRU */ 8123 default: { 8124 /* 8125 * Handle protocols with which IPv6 is less intimate. 8126 */ 8127 uint_t proto_flags = IP_FF_RAWIP|IP_FF_IPINFO; 8128 8129 if (hada_mp != NULL) { 8130 ip0dbg(("default hada drop\n")); 8131 goto hada_drop; 8132 } 8133 8134 /* 8135 * Enable sending ICMP for "Unknown" nexthdr 8136 * case. i.e. where we did not FALLTHRU from 8137 * IPPROTO_ICMPV6 processing case above. 8138 * If we did FALLTHRU, then the packet has already been 8139 * processed for IPPF, don't process it again in 8140 * ip_fanout_proto_v6; set IP6_NO_IPPOLICY in the 8141 * flags 8142 */ 8143 if (nexthdr != IPPROTO_ICMPV6) 8144 proto_flags |= IP_FF_SEND_ICMP; 8145 else 8146 proto_flags |= IP6_NO_IPPOLICY; 8147 8148 ip_fanout_proto_v6(q, first_mp, ip6h, ill, inill, 8149 nexthdr, prev_nexthdr_offset, (flags|proto_flags), 8150 mctl_present, zoneid); 8151 return; 8152 } 8153 8154 case IPPROTO_DSTOPTS: { 8155 uint_t ehdrlen; 8156 uint8_t *optptr; 8157 ip6_dest_t *desthdr; 8158 8159 /* Check if AH is present. */ 8160 if (ipsec_early_ah_v6(q, first_mp, mctl_present, ill, 8161 ire, hada_mp, zoneid)) { 8162 ip0dbg(("dst early hada drop\n")); 8163 return; 8164 } 8165 8166 /* 8167 * Reinitialize pointers, as ipsec_early_ah_v6() does 8168 * complete pullups. We don't have to do more pullups 8169 * as a result. 8170 */ 8171 whereptr = (uint8_t *)((uintptr_t)mp->b_rptr + 8172 (uintptr_t)(whereptr - ((uint8_t *)ip6h))); 8173 ip6h = (ip6_t *)mp->b_rptr; 8174 8175 if (remlen < MIN_EHDR_LEN) 8176 goto pkt_too_short; 8177 8178 desthdr = (ip6_dest_t *)whereptr; 8179 nexthdr = desthdr->ip6d_nxt; 8180 prev_nexthdr_offset = (uint_t)(whereptr - 8181 (uint8_t *)ip6h); 8182 ehdrlen = 8 * (desthdr->ip6d_len + 1); 8183 if (remlen < ehdrlen) 8184 goto pkt_too_short; 8185 optptr = whereptr + 2; 8186 /* 8187 * Note: XXX This code does not seem to make 8188 * distinction between Destination Options Header 8189 * being before/after Routing Header which can 8190 * happen if we are at the end of source route. 8191 * This may become significant in future. 8192 * (No real significant Destination Options are 8193 * defined/implemented yet ). 8194 */ 8195 switch (ip_process_options_v6(q, first_mp, ip6h, optptr, 8196 ehdrlen - 2, IPPROTO_DSTOPTS)) { 8197 case -1: 8198 /* 8199 * Packet has been consumed and any needed 8200 * ICMP errors sent. 8201 */ 8202 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors); 8203 freemsg(hada_mp); 8204 return; 8205 case 0: 8206 /* No action needed continue */ 8207 break; 8208 case 1: 8209 /* 8210 * Unnexpected return value 8211 * (Router alert is a Hop-by-Hop option) 8212 */ 8213 #ifdef DEBUG 8214 panic("ip_rput_data_v6: router " 8215 "alert hbh opt indication in dest opt"); 8216 /*NOTREACHED*/ 8217 #else 8218 freemsg(hada_mp); 8219 freemsg(first_mp); 8220 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 8221 return; 8222 #endif 8223 } 8224 used = ehdrlen; 8225 break; 8226 } 8227 case IPPROTO_FRAGMENT: { 8228 ip6_frag_t *fraghdr; 8229 size_t no_frag_hdr_len; 8230 8231 if (hada_mp != NULL) { 8232 ip0dbg(("frag hada drop\n")); 8233 goto hada_drop; 8234 } 8235 8236 ASSERT(first_mp == mp); 8237 if (remlen < sizeof (ip6_frag_t)) 8238 goto pkt_too_short; 8239 8240 if (mp->b_cont != NULL && 8241 whereptr + sizeof (ip6_frag_t) > mp->b_wptr) { 8242 if (!pullupmsg(mp, 8243 pkt_len - remlen + sizeof (ip6_frag_t))) { 8244 BUMP_MIB(ill->ill_ip_mib, 8245 ipIfStatsInDiscards); 8246 freemsg(mp); 8247 return; 8248 } 8249 hck_flags = 0; 8250 ip6h = (ip6_t *)mp->b_rptr; 8251 whereptr = (uint8_t *)ip6h + pkt_len - remlen; 8252 } 8253 8254 fraghdr = (ip6_frag_t *)whereptr; 8255 used = (uint_t)sizeof (ip6_frag_t); 8256 BUMP_MIB(ill->ill_ip_mib, ipIfStatsReasmReqds); 8257 8258 /* 8259 * Invoke the CGTP (multirouting) filtering module to 8260 * process the incoming packet. Packets identified as 8261 * duplicates must be discarded. Filtering is active 8262 * only if the the ip_cgtp_filter ndd variable is 8263 * non-zero. 8264 */ 8265 if (ip_cgtp_filter && (ip_cgtp_filter_ops != NULL)) { 8266 int cgtp_flt_pkt = 8267 ip_cgtp_filter_ops->cfo_filter_v6( 8268 inill->ill_rq, ip6h, fraghdr); 8269 if (cgtp_flt_pkt == CGTP_IP_PKT_DUPLICATE) { 8270 freemsg(mp); 8271 return; 8272 } 8273 } 8274 8275 /* Restore the flags */ 8276 DB_CKSUMFLAGS(mp) = hck_flags; 8277 8278 mp = ip_rput_frag_v6(q, mp, ip6h, fraghdr, 8279 remlen - used, &prev_nexthdr_offset, 8280 &reass_sum, &reass_hck_flags); 8281 if (mp == NULL) { 8282 /* Reassembly is still pending */ 8283 return; 8284 } 8285 /* The first mblk are the headers before the frag hdr */ 8286 BUMP_MIB(ill->ill_ip_mib, ipIfStatsReasmOKs); 8287 8288 first_mp = mp; /* mp has most likely changed! */ 8289 no_frag_hdr_len = mp->b_wptr - mp->b_rptr; 8290 ip6h = (ip6_t *)mp->b_rptr; 8291 nexthdr = ((char *)ip6h)[prev_nexthdr_offset]; 8292 whereptr = mp->b_rptr + no_frag_hdr_len; 8293 remlen = ntohs(ip6h->ip6_plen) + 8294 (uint16_t)(IPV6_HDR_LEN - no_frag_hdr_len); 8295 pkt_len = msgdsize(mp); 8296 used = 0; 8297 break; 8298 } 8299 case IPPROTO_HOPOPTS: 8300 if (hada_mp != NULL) { 8301 ip0dbg(("hop hada drop\n")); 8302 goto hada_drop; 8303 } 8304 /* 8305 * Illegal header sequence. 8306 * (Hop-by-hop headers are processed above 8307 * and required to immediately follow IPv6 header) 8308 */ 8309 icmp_param_problem_v6(WR(q), first_mp, 8310 ICMP6_PARAMPROB_NEXTHEADER, 8311 prev_nexthdr_offset, 8312 B_FALSE, B_FALSE, zoneid); 8313 return; 8314 8315 case IPPROTO_ROUTING: { 8316 uint_t ehdrlen; 8317 ip6_rthdr_t *rthdr; 8318 8319 /* Check if AH is present. */ 8320 if (ipsec_early_ah_v6(q, first_mp, mctl_present, ill, 8321 ire, hada_mp, zoneid)) { 8322 ip0dbg(("routing hada drop\n")); 8323 return; 8324 } 8325 8326 /* 8327 * Reinitialize pointers, as ipsec_early_ah_v6() does 8328 * complete pullups. We don't have to do more pullups 8329 * as a result. 8330 */ 8331 whereptr = (uint8_t *)((uintptr_t)mp->b_rptr + 8332 (uintptr_t)(whereptr - ((uint8_t *)ip6h))); 8333 ip6h = (ip6_t *)mp->b_rptr; 8334 8335 if (remlen < MIN_EHDR_LEN) 8336 goto pkt_too_short; 8337 rthdr = (ip6_rthdr_t *)whereptr; 8338 nexthdr = rthdr->ip6r_nxt; 8339 prev_nexthdr_offset = (uint_t)(whereptr - 8340 (uint8_t *)ip6h); 8341 ehdrlen = 8 * (rthdr->ip6r_len + 1); 8342 if (remlen < ehdrlen) 8343 goto pkt_too_short; 8344 if (rthdr->ip6r_segleft != 0) { 8345 /* Not end of source route */ 8346 if (ll_multicast) { 8347 BUMP_MIB(ill->ill_ip_mib, 8348 ipIfStatsForwProhibits); 8349 freemsg(hada_mp); 8350 freemsg(mp); 8351 return; 8352 } 8353 ip_process_rthdr(q, mp, ip6h, rthdr, ill, 8354 flags, hada_mp, dl_mp); 8355 return; 8356 } 8357 used = ehdrlen; 8358 break; 8359 } 8360 case IPPROTO_AH: 8361 case IPPROTO_ESP: { 8362 /* 8363 * Fast path for AH/ESP. If this is the first time 8364 * we are sending a datagram to AH/ESP, allocate 8365 * a IPSEC_IN message and prepend it. Otherwise, 8366 * just fanout. 8367 */ 8368 8369 ipsec_in_t *ii; 8370 int ipsec_rc; 8371 8372 if (!mctl_present) { 8373 ASSERT(first_mp == mp); 8374 if ((first_mp = ipsec_in_alloc(B_FALSE)) == 8375 NULL) { 8376 ip1dbg(("ip_rput_data_v6: IPSEC_IN " 8377 "allocation failure.\n")); 8378 BUMP_MIB(ill->ill_ip_mib, 8379 ipIfStatsInDiscards); 8380 freemsg(mp); 8381 return; 8382 } 8383 /* 8384 * Store the ill_index so that when we come back 8385 * from IPSEC we ride on the same queue. 8386 */ 8387 ii = (ipsec_in_t *)first_mp->b_rptr; 8388 ii->ipsec_in_ill_index = 8389 ill->ill_phyint->phyint_ifindex; 8390 ii->ipsec_in_rill_index = 8391 ii->ipsec_in_ill_index; 8392 first_mp->b_cont = mp; 8393 /* 8394 * Cache hardware acceleration info. 8395 */ 8396 if (hada_mp != NULL) { 8397 IPSECHW_DEBUG(IPSECHW_PKT, 8398 ("ip_rput_data_v6: " 8399 "caching data attr.\n")); 8400 ii->ipsec_in_accelerated = B_TRUE; 8401 ii->ipsec_in_da = hada_mp; 8402 hada_mp = NULL; 8403 } 8404 } else { 8405 ii = (ipsec_in_t *)first_mp->b_rptr; 8406 } 8407 8408 if (!ipsec_loaded()) { 8409 ip_proto_not_sup(q, first_mp, IP_FF_SEND_ICMP, 8410 ire->ire_zoneid); 8411 return; 8412 } 8413 8414 /* select inbound SA and have IPsec process the pkt */ 8415 if (nexthdr == IPPROTO_ESP) { 8416 esph_t *esph = ipsec_inbound_esp_sa(first_mp); 8417 if (esph == NULL) 8418 return; 8419 ASSERT(ii->ipsec_in_esp_sa != NULL); 8420 ASSERT(ii->ipsec_in_esp_sa->ipsa_input_func != 8421 NULL); 8422 ipsec_rc = ii->ipsec_in_esp_sa->ipsa_input_func( 8423 first_mp, esph); 8424 } else { 8425 ah_t *ah = ipsec_inbound_ah_sa(first_mp); 8426 if (ah == NULL) 8427 return; 8428 ASSERT(ii->ipsec_in_ah_sa != NULL); 8429 ASSERT(ii->ipsec_in_ah_sa->ipsa_input_func != 8430 NULL); 8431 ipsec_rc = ii->ipsec_in_ah_sa->ipsa_input_func( 8432 first_mp, ah); 8433 } 8434 8435 switch (ipsec_rc) { 8436 case IPSEC_STATUS_SUCCESS: 8437 break; 8438 case IPSEC_STATUS_FAILED: 8439 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 8440 /* FALLTHRU */ 8441 case IPSEC_STATUS_PENDING: 8442 return; 8443 } 8444 /* we're done with IPsec processing, send it up */ 8445 ip_fanout_proto_again(first_mp, ill, inill, ire); 8446 return; 8447 } 8448 case IPPROTO_NONE: 8449 /* All processing is done. Count as "delivered". */ 8450 freemsg(hada_mp); 8451 freemsg(first_mp); 8452 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 8453 return; 8454 } 8455 whereptr += used; 8456 ASSERT(remlen >= used); 8457 remlen -= used; 8458 } 8459 /* NOTREACHED */ 8460 8461 pkt_too_short: 8462 ip1dbg(("ip_rput_data_v6: packet too short %d %lu %d\n", 8463 ip6_len, pkt_len, remlen)); 8464 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInTruncatedPkts); 8465 freemsg(hada_mp); 8466 freemsg(first_mp); 8467 return; 8468 udp_fanout: 8469 if (mctl_present || IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 8470 connp = NULL; 8471 } else { 8472 connp = ipcl_classify_v6(mp, IPPROTO_UDP, hdr_len, zoneid); 8473 if ((connp != NULL) && (connp->conn_upq == NULL)) { 8474 CONN_DEC_REF(connp); 8475 connp = NULL; 8476 } 8477 } 8478 8479 if (connp == NULL) { 8480 uint32_t ports; 8481 8482 ports = *(uint32_t *)(mp->b_rptr + hdr_len + 8483 UDP_PORTS_OFFSET); 8484 IP6_STAT(ip6_udp_slow_path); 8485 ip_fanout_udp_v6(q, first_mp, ip6h, ports, ill, inill, 8486 (flags|IP_FF_SEND_ICMP|IP_FF_IPINFO), mctl_present, 8487 zoneid); 8488 return; 8489 } 8490 8491 if (CONN_UDP_FLOWCTLD(connp)) { 8492 freemsg(first_mp); 8493 BUMP_MIB(ill->ill_ip_mib, udpIfStatsInOverflows); 8494 CONN_DEC_REF(connp); 8495 return; 8496 } 8497 8498 /* Initiate IPPF processing */ 8499 if (IP6_IN_IPP(flags)) { 8500 ip_process(IPP_LOCAL_IN, &mp, ill->ill_phyint->phyint_ifindex); 8501 if (mp == NULL) { 8502 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 8503 CONN_DEC_REF(connp); 8504 return; 8505 } 8506 } 8507 8508 if (connp->conn_ip_recvpktinfo || 8509 IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src)) { 8510 mp = ip_add_info_v6(mp, inill, &ip6h->ip6_dst); 8511 if (mp == NULL) { 8512 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 8513 CONN_DEC_REF(connp); 8514 return; 8515 } 8516 } 8517 8518 IP6_STAT(ip6_udp_fast_path); 8519 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 8520 8521 /* Send it upstream */ 8522 CONN_UDP_RECV(connp, mp); 8523 8524 CONN_DEC_REF(connp); 8525 freemsg(hada_mp); 8526 return; 8527 8528 hada_drop: 8529 ip1dbg(("ip_rput_data_v6: malformed accelerated packet\n")); 8530 /* IPsec kstats: bump counter here */ 8531 freemsg(hada_mp); 8532 freemsg(first_mp); 8533 } 8534 8535 /* 8536 * Reassemble fragment. 8537 * When it returns a completed message the first mblk will only contain 8538 * the headers prior to the fragment header. 8539 * 8540 * prev_nexthdr_offset is an offset indication of where the nexthdr field is 8541 * of the preceding header. This is needed to patch the previous header's 8542 * nexthdr field when reassembly completes. 8543 */ 8544 static mblk_t * 8545 ip_rput_frag_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, 8546 ip6_frag_t *fraghdr, uint_t remlen, uint_t *prev_nexthdr_offset, 8547 uint32_t *cksum_val, uint16_t *cksum_flags) 8548 { 8549 ill_t *ill = (ill_t *)q->q_ptr; 8550 uint32_t ident = ntohl(fraghdr->ip6f_ident); 8551 uint16_t offset; 8552 boolean_t more_frags; 8553 uint8_t nexthdr = fraghdr->ip6f_nxt; 8554 in6_addr_t *v6dst_ptr; 8555 in6_addr_t *v6src_ptr; 8556 uint_t end; 8557 uint_t hdr_length; 8558 size_t count; 8559 ipf_t *ipf; 8560 ipf_t **ipfp; 8561 ipfb_t *ipfb; 8562 mblk_t *mp1; 8563 uint8_t ecn_info = 0; 8564 size_t msg_len; 8565 mblk_t *tail_mp; 8566 mblk_t *t_mp; 8567 boolean_t pruned = B_FALSE; 8568 uint32_t sum_val; 8569 uint16_t sum_flags; 8570 8571 8572 if (cksum_val != NULL) 8573 *cksum_val = 0; 8574 if (cksum_flags != NULL) 8575 *cksum_flags = 0; 8576 8577 /* 8578 * We utilize hardware computed checksum info only for UDP since 8579 * IP fragmentation is a normal occurence for the protocol. In 8580 * addition, checksum offload support for IP fragments carrying 8581 * UDP payload is commonly implemented across network adapters. 8582 */ 8583 ASSERT(ill != NULL); 8584 if (nexthdr == IPPROTO_UDP && dohwcksum && ILL_HCKSUM_CAPABLE(ill) && 8585 (DB_CKSUMFLAGS(mp) & (HCK_FULLCKSUM | HCK_PARTIALCKSUM))) { 8586 mblk_t *mp1 = mp->b_cont; 8587 int32_t len; 8588 8589 /* Record checksum information from the packet */ 8590 sum_val = (uint32_t)DB_CKSUM16(mp); 8591 sum_flags = DB_CKSUMFLAGS(mp); 8592 8593 /* fragmented payload offset from beginning of mblk */ 8594 offset = (uint16_t)((uchar_t *)&fraghdr[1] - mp->b_rptr); 8595 8596 if ((sum_flags & HCK_PARTIALCKSUM) && 8597 (mp1 == NULL || mp1->b_cont == NULL) && 8598 offset >= (uint16_t)DB_CKSUMSTART(mp) && 8599 ((len = offset - (uint16_t)DB_CKSUMSTART(mp)) & 1) == 0) { 8600 uint32_t adj; 8601 /* 8602 * Partial checksum has been calculated by hardware 8603 * and attached to the packet; in addition, any 8604 * prepended extraneous data is even byte aligned. 8605 * If any such data exists, we adjust the checksum; 8606 * this would also handle any postpended data. 8607 */ 8608 IP_ADJCKSUM_PARTIAL(mp->b_rptr + DB_CKSUMSTART(mp), 8609 mp, mp1, len, adj); 8610 8611 /* One's complement subtract extraneous checksum */ 8612 if (adj >= sum_val) 8613 sum_val = ~(adj - sum_val) & 0xFFFF; 8614 else 8615 sum_val -= adj; 8616 } 8617 } else { 8618 sum_val = 0; 8619 sum_flags = 0; 8620 } 8621 8622 /* Clear hardware checksumming flag */ 8623 DB_CKSUMFLAGS(mp) = 0; 8624 8625 /* 8626 * Note: Fragment offset in header is in 8-octet units. 8627 * Clearing least significant 3 bits not only extracts 8628 * it but also gets it in units of octets. 8629 */ 8630 offset = ntohs(fraghdr->ip6f_offlg) & ~7; 8631 more_frags = (fraghdr->ip6f_offlg & IP6F_MORE_FRAG); 8632 8633 /* 8634 * Is the more frags flag on and the payload length not a multiple 8635 * of eight? 8636 */ 8637 if (more_frags && (ntohs(ip6h->ip6_plen) & 7)) { 8638 zoneid_t zoneid; 8639 8640 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors); 8641 zoneid = ipif_lookup_addr_zoneid_v6(&ip6h->ip6_dst, ill); 8642 if (zoneid == ALL_ZONES) { 8643 freemsg(mp); 8644 return (NULL); 8645 } 8646 icmp_param_problem_v6(WR(q), mp, ICMP6_PARAMPROB_HEADER, 8647 (uint32_t)((char *)&ip6h->ip6_plen - 8648 (char *)ip6h), B_FALSE, B_FALSE, zoneid); 8649 return (NULL); 8650 } 8651 8652 v6src_ptr = &ip6h->ip6_src; 8653 v6dst_ptr = &ip6h->ip6_dst; 8654 end = remlen; 8655 8656 hdr_length = (uint_t)((char *)&fraghdr[1] - (char *)ip6h); 8657 end += offset; 8658 8659 /* 8660 * Would fragment cause reassembled packet to have a payload length 8661 * greater than IP_MAXPACKET - the max payload size? 8662 */ 8663 if (end > IP_MAXPACKET) { 8664 zoneid_t zoneid; 8665 8666 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors); 8667 zoneid = ipif_lookup_addr_zoneid_v6(&ip6h->ip6_dst, ill); 8668 if (zoneid == ALL_ZONES) { 8669 freemsg(mp); 8670 return (NULL); 8671 } 8672 icmp_param_problem_v6(WR(q), mp, ICMP6_PARAMPROB_HEADER, 8673 (uint32_t)((char *)&fraghdr->ip6f_offlg - 8674 (char *)ip6h), B_FALSE, B_FALSE, zoneid); 8675 return (NULL); 8676 } 8677 8678 /* 8679 * This packet just has one fragment. Reassembly not 8680 * needed. 8681 */ 8682 if (!more_frags && offset == 0) { 8683 goto reass_done; 8684 } 8685 8686 /* 8687 * Drop the fragmented as early as possible, if 8688 * we don't have resource(s) to re-assemble. 8689 */ 8690 if (ip_reass_queue_bytes == 0) { 8691 freemsg(mp); 8692 return (NULL); 8693 } 8694 8695 /* Record the ECN field info. */ 8696 ecn_info = (uint8_t)(ntohl(ip6h->ip6_vcf & htonl(~0xFFCFFFFF)) >> 20); 8697 /* 8698 * If this is not the first fragment, dump the unfragmentable 8699 * portion of the packet. 8700 */ 8701 if (offset) 8702 mp->b_rptr = (uchar_t *)&fraghdr[1]; 8703 8704 /* 8705 * Fragmentation reassembly. Each ILL has a hash table for 8706 * queueing packets undergoing reassembly for all IPIFs 8707 * associated with the ILL. The hash is based on the packet 8708 * IP ident field. The ILL frag hash table was allocated 8709 * as a timer block at the time the ILL was created. Whenever 8710 * there is anything on the reassembly queue, the timer will 8711 * be running. 8712 */ 8713 msg_len = MBLKSIZE(mp); 8714 tail_mp = mp; 8715 while (tail_mp->b_cont != NULL) { 8716 tail_mp = tail_mp->b_cont; 8717 msg_len += MBLKSIZE(tail_mp); 8718 } 8719 /* 8720 * If the reassembly list for this ILL will get too big 8721 * prune it. 8722 */ 8723 8724 if ((msg_len + sizeof (*ipf) + ill->ill_frag_count) >= 8725 ip_reass_queue_bytes) { 8726 ill_frag_prune(ill, (ip_reass_queue_bytes < msg_len) ? 0 8727 : (ip_reass_queue_bytes - msg_len)); 8728 pruned = B_TRUE; 8729 } 8730 8731 ipfb = &ill->ill_frag_hash_tbl[ILL_FRAG_HASH_V6(*v6src_ptr, ident)]; 8732 mutex_enter(&ipfb->ipfb_lock); 8733 8734 ipfp = &ipfb->ipfb_ipf; 8735 /* Try to find an existing fragment queue for this packet. */ 8736 for (;;) { 8737 ipf = ipfp[0]; 8738 if (ipf) { 8739 /* 8740 * It has to match on ident, source address, and 8741 * dest address. 8742 */ 8743 if (ipf->ipf_ident == ident && 8744 IN6_ARE_ADDR_EQUAL(&ipf->ipf_v6src, v6src_ptr) && 8745 IN6_ARE_ADDR_EQUAL(&ipf->ipf_v6dst, v6dst_ptr)) { 8746 8747 /* 8748 * If we have received too many 8749 * duplicate fragments for this packet 8750 * free it. 8751 */ 8752 if (ipf->ipf_num_dups > ip_max_frag_dups) { 8753 ill_frag_free_pkts(ill, ipfb, ipf, 1); 8754 freemsg(mp); 8755 mutex_exit(&ipfb->ipfb_lock); 8756 return (NULL); 8757 } 8758 8759 break; 8760 } 8761 ipfp = &ipf->ipf_hash_next; 8762 continue; 8763 } 8764 8765 8766 /* 8767 * If we pruned the list, do we want to store this new 8768 * fragment?. We apply an optimization here based on the 8769 * fact that most fragments will be received in order. 8770 * So if the offset of this incoming fragment is zero, 8771 * it is the first fragment of a new packet. We will 8772 * keep it. Otherwise drop the fragment, as we have 8773 * probably pruned the packet already (since the 8774 * packet cannot be found). 8775 */ 8776 8777 if (pruned && offset != 0) { 8778 mutex_exit(&ipfb->ipfb_lock); 8779 freemsg(mp); 8780 return (NULL); 8781 } 8782 8783 /* New guy. Allocate a frag message. */ 8784 mp1 = allocb(sizeof (*ipf), BPRI_MED); 8785 if (!mp1) { 8786 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 8787 freemsg(mp); 8788 partial_reass_done: 8789 mutex_exit(&ipfb->ipfb_lock); 8790 return (NULL); 8791 } 8792 8793 if (ipfb->ipfb_frag_pkts >= MAX_FRAG_PKTS) { 8794 /* 8795 * Too many fragmented packets in this hash bucket. 8796 * Free the oldest. 8797 */ 8798 ill_frag_free_pkts(ill, ipfb, ipfb->ipfb_ipf, 1); 8799 } 8800 8801 mp1->b_cont = mp; 8802 8803 /* Initialize the fragment header. */ 8804 ipf = (ipf_t *)mp1->b_rptr; 8805 ipf->ipf_mp = mp1; 8806 ipf->ipf_ptphn = ipfp; 8807 ipfp[0] = ipf; 8808 ipf->ipf_hash_next = NULL; 8809 ipf->ipf_ident = ident; 8810 ipf->ipf_v6src = *v6src_ptr; 8811 ipf->ipf_v6dst = *v6dst_ptr; 8812 /* Record reassembly start time. */ 8813 ipf->ipf_timestamp = gethrestime_sec(); 8814 /* Record ipf generation and account for frag header */ 8815 ipf->ipf_gen = ill->ill_ipf_gen++; 8816 ipf->ipf_count = MBLKSIZE(mp1); 8817 ipf->ipf_protocol = nexthdr; 8818 ipf->ipf_nf_hdr_len = 0; 8819 ipf->ipf_prev_nexthdr_offset = 0; 8820 ipf->ipf_last_frag_seen = B_FALSE; 8821 ipf->ipf_ecn = ecn_info; 8822 ipf->ipf_num_dups = 0; 8823 ipfb->ipfb_frag_pkts++; 8824 ipf->ipf_checksum = 0; 8825 ipf->ipf_checksum_flags = 0; 8826 8827 /* Store checksum value in fragment header */ 8828 if (sum_flags != 0) { 8829 sum_val = (sum_val & 0xFFFF) + (sum_val >> 16); 8830 sum_val = (sum_val & 0xFFFF) + (sum_val >> 16); 8831 ipf->ipf_checksum = sum_val; 8832 ipf->ipf_checksum_flags = sum_flags; 8833 } 8834 8835 /* 8836 * We handle reassembly two ways. In the easy case, 8837 * where all the fragments show up in order, we do 8838 * minimal bookkeeping, and just clip new pieces on 8839 * the end. If we ever see a hole, then we go off 8840 * to ip_reassemble which has to mark the pieces and 8841 * keep track of the number of holes, etc. Obviously, 8842 * the point of having both mechanisms is so we can 8843 * handle the easy case as efficiently as possible. 8844 */ 8845 if (offset == 0) { 8846 /* Easy case, in-order reassembly so far. */ 8847 /* Update the byte count */ 8848 ipf->ipf_count += msg_len; 8849 ipf->ipf_tail_mp = tail_mp; 8850 /* 8851 * Keep track of next expected offset in 8852 * ipf_end. 8853 */ 8854 ipf->ipf_end = end; 8855 ipf->ipf_nf_hdr_len = hdr_length; 8856 ipf->ipf_prev_nexthdr_offset = *prev_nexthdr_offset; 8857 } else { 8858 /* Hard case, hole at the beginning. */ 8859 ipf->ipf_tail_mp = NULL; 8860 /* 8861 * ipf_end == 0 means that we have given up 8862 * on easy reassembly. 8863 */ 8864 ipf->ipf_end = 0; 8865 8866 /* Forget checksum offload from now on */ 8867 ipf->ipf_checksum_flags = 0; 8868 8869 /* 8870 * ipf_hole_cnt is set by ip_reassemble. 8871 * ipf_count is updated by ip_reassemble. 8872 * No need to check for return value here 8873 * as we don't expect reassembly to complete or 8874 * fail for the first fragment itself. 8875 */ 8876 (void) ip_reassemble(mp, ipf, offset, more_frags, ill, 8877 msg_len); 8878 } 8879 /* Update per ipfb and ill byte counts */ 8880 ipfb->ipfb_count += ipf->ipf_count; 8881 ASSERT(ipfb->ipfb_count > 0); /* Wraparound */ 8882 ill->ill_frag_count += ipf->ipf_count; 8883 ASSERT(ill->ill_frag_count > 0); /* Wraparound */ 8884 /* If the frag timer wasn't already going, start it. */ 8885 mutex_enter(&ill->ill_lock); 8886 ill_frag_timer_start(ill); 8887 mutex_exit(&ill->ill_lock); 8888 goto partial_reass_done; 8889 } 8890 8891 /* 8892 * If the packet's flag has changed (it could be coming up 8893 * from an interface different than the previous, therefore 8894 * possibly different checksum capability), then forget about 8895 * any stored checksum states. Otherwise add the value to 8896 * the existing one stored in the fragment header. 8897 */ 8898 if (sum_flags != 0 && sum_flags == ipf->ipf_checksum_flags) { 8899 sum_val += ipf->ipf_checksum; 8900 sum_val = (sum_val & 0xFFFF) + (sum_val >> 16); 8901 sum_val = (sum_val & 0xFFFF) + (sum_val >> 16); 8902 ipf->ipf_checksum = sum_val; 8903 } else if (ipf->ipf_checksum_flags != 0) { 8904 /* Forget checksum offload from now on */ 8905 ipf->ipf_checksum_flags = 0; 8906 } 8907 8908 /* 8909 * We have a new piece of a datagram which is already being 8910 * reassembled. Update the ECN info if all IP fragments 8911 * are ECN capable. If there is one which is not, clear 8912 * all the info. If there is at least one which has CE 8913 * code point, IP needs to report that up to transport. 8914 */ 8915 if (ecn_info != IPH_ECN_NECT && ipf->ipf_ecn != IPH_ECN_NECT) { 8916 if (ecn_info == IPH_ECN_CE) 8917 ipf->ipf_ecn = IPH_ECN_CE; 8918 } else { 8919 ipf->ipf_ecn = IPH_ECN_NECT; 8920 } 8921 8922 if (offset && ipf->ipf_end == offset) { 8923 /* The new fragment fits at the end */ 8924 ipf->ipf_tail_mp->b_cont = mp; 8925 /* Update the byte count */ 8926 ipf->ipf_count += msg_len; 8927 /* Update per ipfb and ill byte counts */ 8928 ipfb->ipfb_count += msg_len; 8929 ASSERT(ipfb->ipfb_count > 0); /* Wraparound */ 8930 ill->ill_frag_count += msg_len; 8931 ASSERT(ill->ill_frag_count > 0); /* Wraparound */ 8932 if (more_frags) { 8933 /* More to come. */ 8934 ipf->ipf_end = end; 8935 ipf->ipf_tail_mp = tail_mp; 8936 goto partial_reass_done; 8937 } 8938 } else { 8939 /* 8940 * Go do the hard cases. 8941 * Call ip_reassemble(). 8942 */ 8943 int ret; 8944 8945 if (offset == 0) { 8946 if (ipf->ipf_prev_nexthdr_offset == 0) { 8947 ipf->ipf_nf_hdr_len = hdr_length; 8948 ipf->ipf_prev_nexthdr_offset = 8949 *prev_nexthdr_offset; 8950 } 8951 } 8952 /* Save current byte count */ 8953 count = ipf->ipf_count; 8954 ret = ip_reassemble(mp, ipf, offset, more_frags, ill, msg_len); 8955 8956 /* Count of bytes added and subtracted (freeb()ed) */ 8957 count = ipf->ipf_count - count; 8958 if (count) { 8959 /* Update per ipfb and ill byte counts */ 8960 ipfb->ipfb_count += count; 8961 ASSERT(ipfb->ipfb_count > 0); /* Wraparound */ 8962 ill->ill_frag_count += count; 8963 ASSERT(ill->ill_frag_count > 0); /* Wraparound */ 8964 } 8965 if (ret == IP_REASS_PARTIAL) { 8966 goto partial_reass_done; 8967 } else if (ret == IP_REASS_FAILED) { 8968 /* Reassembly failed. Free up all resources */ 8969 ill_frag_free_pkts(ill, ipfb, ipf, 1); 8970 for (t_mp = mp; t_mp != NULL; t_mp = t_mp->b_cont) { 8971 IP_REASS_SET_START(t_mp, 0); 8972 IP_REASS_SET_END(t_mp, 0); 8973 } 8974 freemsg(mp); 8975 goto partial_reass_done; 8976 } 8977 8978 /* We will reach here iff 'ret' is IP_REASS_COMPLETE */ 8979 } 8980 /* 8981 * We have completed reassembly. Unhook the frag header from 8982 * the reassembly list. 8983 * 8984 * Grab the unfragmentable header length next header value out 8985 * of the first fragment 8986 */ 8987 ASSERT(ipf->ipf_nf_hdr_len != 0); 8988 hdr_length = ipf->ipf_nf_hdr_len; 8989 8990 /* 8991 * Before we free the frag header, record the ECN info 8992 * to report back to the transport. 8993 */ 8994 ecn_info = ipf->ipf_ecn; 8995 8996 /* 8997 * Store the nextheader field in the header preceding the fragment 8998 * header 8999 */ 9000 nexthdr = ipf->ipf_protocol; 9001 *prev_nexthdr_offset = ipf->ipf_prev_nexthdr_offset; 9002 ipfp = ipf->ipf_ptphn; 9003 9004 /* We need to supply these to caller */ 9005 if ((sum_flags = ipf->ipf_checksum_flags) != 0) 9006 sum_val = ipf->ipf_checksum; 9007 else 9008 sum_val = 0; 9009 9010 mp1 = ipf->ipf_mp; 9011 count = ipf->ipf_count; 9012 ipf = ipf->ipf_hash_next; 9013 if (ipf) 9014 ipf->ipf_ptphn = ipfp; 9015 ipfp[0] = ipf; 9016 ill->ill_frag_count -= count; 9017 ASSERT(ipfb->ipfb_count >= count); 9018 ipfb->ipfb_count -= count; 9019 ipfb->ipfb_frag_pkts--; 9020 mutex_exit(&ipfb->ipfb_lock); 9021 /* Ditch the frag header. */ 9022 mp = mp1->b_cont; 9023 freeb(mp1); 9024 9025 /* 9026 * Make sure the packet is good by doing some sanity 9027 * check. If bad we can silentely drop the packet. 9028 */ 9029 reass_done: 9030 if (hdr_length < sizeof (ip6_frag_t)) { 9031 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors); 9032 ip1dbg(("ip_rput_frag_v6: bad packet\n")); 9033 freemsg(mp); 9034 return (NULL); 9035 } 9036 9037 /* 9038 * Remove the fragment header from the initial header by 9039 * splitting the mblk into the non-fragmentable header and 9040 * everthing after the fragment extension header. This has the 9041 * side effect of putting all the headers that need destination 9042 * processing into the b_cont block-- on return this fact is 9043 * used in order to avoid having to look at the extensions 9044 * already processed. 9045 * 9046 * Note that this code assumes that the unfragmentable portion 9047 * of the header is in the first mblk and increments 9048 * the read pointer past it. If this assumption is broken 9049 * this code fails badly. 9050 */ 9051 if (mp->b_rptr + hdr_length != mp->b_wptr) { 9052 mblk_t *nmp; 9053 9054 if (!(nmp = dupb(mp))) { 9055 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 9056 ip1dbg(("ip_rput_frag_v6: dupb failed\n")); 9057 freemsg(mp); 9058 return (NULL); 9059 } 9060 nmp->b_cont = mp->b_cont; 9061 mp->b_cont = nmp; 9062 nmp->b_rptr += hdr_length; 9063 } 9064 mp->b_wptr = mp->b_rptr + hdr_length - sizeof (ip6_frag_t); 9065 9066 ip6h = (ip6_t *)mp->b_rptr; 9067 ((char *)ip6h)[*prev_nexthdr_offset] = nexthdr; 9068 9069 /* Restore original IP length in header. */ 9070 ip6h->ip6_plen = htons((uint16_t)(msgdsize(mp) - IPV6_HDR_LEN)); 9071 /* Record the ECN info. */ 9072 ip6h->ip6_vcf &= htonl(0xFFCFFFFF); 9073 ip6h->ip6_vcf |= htonl(ecn_info << 20); 9074 9075 /* Reassembly is successful; return checksum information if needed */ 9076 if (cksum_val != NULL) 9077 *cksum_val = sum_val; 9078 if (cksum_flags != NULL) 9079 *cksum_flags = sum_flags; 9080 9081 return (mp); 9082 } 9083 9084 /* 9085 * Walk through the options to see if there is a routing header. 9086 * If present get the destination which is the last address of 9087 * the option. 9088 */ 9089 in6_addr_t 9090 ip_get_dst_v6(ip6_t *ip6h, boolean_t *is_fragment) 9091 { 9092 uint8_t nexthdr; 9093 uint8_t *whereptr; 9094 ip6_hbh_t *hbhhdr; 9095 ip6_dest_t *dsthdr; 9096 ip6_rthdr0_t *rthdr; 9097 ip6_frag_t *fraghdr; 9098 int ehdrlen; 9099 int left; 9100 in6_addr_t *ap, rv; 9101 9102 if (is_fragment != NULL) 9103 *is_fragment = B_FALSE; 9104 9105 rv = ip6h->ip6_dst; 9106 9107 nexthdr = ip6h->ip6_nxt; 9108 whereptr = (uint8_t *)&ip6h[1]; 9109 for (;;) { 9110 9111 ASSERT(nexthdr != IPPROTO_RAW); 9112 switch (nexthdr) { 9113 case IPPROTO_HOPOPTS: 9114 hbhhdr = (ip6_hbh_t *)whereptr; 9115 nexthdr = hbhhdr->ip6h_nxt; 9116 ehdrlen = 8 * (hbhhdr->ip6h_len + 1); 9117 break; 9118 case IPPROTO_DSTOPTS: 9119 dsthdr = (ip6_dest_t *)whereptr; 9120 nexthdr = dsthdr->ip6d_nxt; 9121 ehdrlen = 8 * (dsthdr->ip6d_len + 1); 9122 break; 9123 case IPPROTO_ROUTING: 9124 rthdr = (ip6_rthdr0_t *)whereptr; 9125 nexthdr = rthdr->ip6r0_nxt; 9126 ehdrlen = 8 * (rthdr->ip6r0_len + 1); 9127 9128 left = rthdr->ip6r0_segleft; 9129 ap = (in6_addr_t *)((char *)rthdr + sizeof (*rthdr)); 9130 rv = *(ap + left - 1); 9131 /* 9132 * If the caller doesn't care whether the packet 9133 * is a fragment or not, we can stop here since 9134 * we have our destination. 9135 */ 9136 if (is_fragment == NULL) 9137 goto done; 9138 break; 9139 case IPPROTO_FRAGMENT: 9140 fraghdr = (ip6_frag_t *)whereptr; 9141 nexthdr = fraghdr->ip6f_nxt; 9142 ehdrlen = sizeof (ip6_frag_t); 9143 if (is_fragment != NULL) 9144 *is_fragment = B_TRUE; 9145 goto done; 9146 default : 9147 goto done; 9148 } 9149 whereptr += ehdrlen; 9150 } 9151 9152 done: 9153 return (rv); 9154 } 9155 9156 /* 9157 * ip_source_routed_v6: 9158 * This function is called by redirect code in ip_rput_data_v6 to 9159 * know whether this packet is source routed through this node i.e 9160 * whether this node (router) is part of the journey. This 9161 * function is called under two cases : 9162 * 9163 * case 1 : Routing header was processed by this node and 9164 * ip_process_rthdr replaced ip6_dst with the next hop 9165 * and we are forwarding the packet to the next hop. 9166 * 9167 * case 2 : Routing header was not processed by this node and we 9168 * are just forwarding the packet. 9169 * 9170 * For case (1) we don't want to send redirects. For case(2) we 9171 * want to send redirects. 9172 */ 9173 static boolean_t 9174 ip_source_routed_v6(ip6_t *ip6h, mblk_t *mp) 9175 { 9176 uint8_t nexthdr; 9177 in6_addr_t *addrptr; 9178 ip6_rthdr0_t *rthdr; 9179 uint8_t numaddr; 9180 ip6_hbh_t *hbhhdr; 9181 uint_t ehdrlen; 9182 uint8_t *byteptr; 9183 9184 ip2dbg(("ip_source_routed_v6\n")); 9185 nexthdr = ip6h->ip6_nxt; 9186 ehdrlen = IPV6_HDR_LEN; 9187 9188 /* if a routing hdr is preceeded by HOPOPT or DSTOPT */ 9189 while (nexthdr == IPPROTO_HOPOPTS || 9190 nexthdr == IPPROTO_DSTOPTS) { 9191 byteptr = (uint8_t *)ip6h + ehdrlen; 9192 /* 9193 * Check if we have already processed 9194 * packets or we are just a forwarding 9195 * router which only pulled up msgs up 9196 * to IPV6HDR and one HBH ext header 9197 */ 9198 if (byteptr + MIN_EHDR_LEN > mp->b_wptr) { 9199 ip2dbg(("ip_source_routed_v6: Extension" 9200 " headers not processed\n")); 9201 return (B_FALSE); 9202 } 9203 hbhhdr = (ip6_hbh_t *)byteptr; 9204 nexthdr = hbhhdr->ip6h_nxt; 9205 ehdrlen = ehdrlen + 8 * (hbhhdr->ip6h_len + 1); 9206 } 9207 switch (nexthdr) { 9208 case IPPROTO_ROUTING: 9209 byteptr = (uint8_t *)ip6h + ehdrlen; 9210 /* 9211 * If for some reason, we haven't pulled up 9212 * the routing hdr data mblk, then we must 9213 * not have processed it at all. So for sure 9214 * we are not part of the source routed journey. 9215 */ 9216 if (byteptr + MIN_EHDR_LEN > mp->b_wptr) { 9217 ip2dbg(("ip_source_routed_v6: Routing" 9218 " header not processed\n")); 9219 return (B_FALSE); 9220 } 9221 rthdr = (ip6_rthdr0_t *)byteptr; 9222 /* 9223 * Either we are an intermediate router or the 9224 * last hop before destination and we have 9225 * already processed the routing header. 9226 * If segment_left is greater than or equal to zero, 9227 * then we must be the (numaddr - segleft) entry 9228 * of the routing header. Although ip6r0_segleft 9229 * is a unit8_t variable, we still check for zero 9230 * or greater value, if in case the data type 9231 * is changed someday in future. 9232 */ 9233 if (rthdr->ip6r0_segleft > 0 || 9234 rthdr->ip6r0_segleft == 0) { 9235 ire_t *ire = NULL; 9236 9237 numaddr = rthdr->ip6r0_len / 2; 9238 addrptr = (in6_addr_t *)((char *)rthdr + 9239 sizeof (*rthdr)); 9240 addrptr += (numaddr - (rthdr->ip6r0_segleft + 1)); 9241 if (addrptr != NULL) { 9242 ire = ire_ctable_lookup_v6(addrptr, NULL, 9243 IRE_LOCAL, NULL, ALL_ZONES, NULL, 9244 MATCH_IRE_TYPE); 9245 if (ire != NULL) { 9246 ire_refrele(ire); 9247 return (B_TRUE); 9248 } 9249 ip1dbg(("ip_source_routed_v6: No ire found\n")); 9250 } 9251 } 9252 /* FALLTHRU */ 9253 default: 9254 ip2dbg(("ip_source_routed_v6: Not source routed here\n")); 9255 return (B_FALSE); 9256 } 9257 } 9258 9259 /* 9260 * ip_wput_v6 -- Packets sent down from transport modules show up here. 9261 * Assumes that the following set of headers appear in the first 9262 * mblk: 9263 * ip6i_t (if present) CAN also appear as a separate mblk. 9264 * ip6_t 9265 * Any extension headers 9266 * TCP/UDP/SCTP header (if present) 9267 * The routine can handle an ICMPv6 header that is not in the first mblk. 9268 * 9269 * The order to determine the outgoing interface is as follows: 9270 * 1. IPV6_BOUND_PIF is set, use that ill (conn_outgoing_pill) 9271 * 2. If conn_nofailover_ill is set then use that ill. 9272 * 3. If an ip6i_t with IP6I_IFINDEX set then use that ill. 9273 * 4. If q is an ill queue and (link local or multicast destination) then 9274 * use that ill. 9275 * 5. If IPV6_BOUND_IF has been set use that ill. 9276 * 6. For multicast: if IPV6_MULTICAST_IF has been set use it. Otherwise 9277 * look for the best IRE match for the unspecified group to determine 9278 * the ill. 9279 * 7. For unicast: Just do an IRE lookup for the best match. 9280 * 9281 * arg2 is always a queue_t *. 9282 * When that queue is an ill_t (i.e. q_next != NULL), then arg must be 9283 * the zoneid. 9284 * When that queue is not an ill_t, then arg must be a conn_t pointer. 9285 */ 9286 void 9287 ip_output_v6(void *arg, mblk_t *mp, void *arg2, int caller) 9288 { 9289 conn_t *connp = NULL; 9290 queue_t *q = (queue_t *)arg2; 9291 ire_t *ire = NULL; 9292 ire_t *sctp_ire = NULL; 9293 ip6_t *ip6h; 9294 in6_addr_t *v6dstp; 9295 ill_t *ill = NULL; 9296 ipif_t *ipif; 9297 ip6i_t *ip6i; 9298 int cksum_request; /* -1 => normal. */ 9299 /* 1 => Skip TCP/UDP/SCTP checksum */ 9300 /* Otherwise contains insert offset for checksum */ 9301 int unspec_src; 9302 boolean_t do_outrequests; /* Increment OutRequests? */ 9303 mib2_ipIfStatsEntry_t *mibptr; 9304 int match_flags = MATCH_IRE_ILL_GROUP; 9305 boolean_t attach_if = B_FALSE; 9306 mblk_t *first_mp; 9307 boolean_t mctl_present; 9308 ipsec_out_t *io; 9309 boolean_t drop_if_delayed = B_FALSE; 9310 boolean_t multirt_need_resolve = B_FALSE; 9311 mblk_t *copy_mp = NULL; 9312 int err; 9313 int ip6i_flags = 0; 9314 zoneid_t zoneid; 9315 ill_t *saved_ill = NULL; 9316 boolean_t conn_lock_held; 9317 boolean_t need_decref = B_FALSE; 9318 9319 /* 9320 * Highest bit in version field is Reachability Confirmation bit 9321 * used by NUD in ip_xmit_v6(). 9322 */ 9323 #ifdef _BIG_ENDIAN 9324 #define IPVER(ip6h) ((((uint32_t *)ip6h)[0] >> 28) & 0x7) 9325 #else 9326 #define IPVER(ip6h) ((((uint32_t *)ip6h)[0] >> 4) & 0x7) 9327 #endif 9328 9329 /* 9330 * M_CTL comes from 6 places 9331 * 9332 * 1) TCP sends down IPSEC_OUT(M_CTL) for detached connections 9333 * both V4 and V6 datagrams. 9334 * 9335 * 2) AH/ESP sends down M_CTL after doing their job with both 9336 * V4 and V6 datagrams. 9337 * 9338 * 3) NDP callbacks when nce is resolved and IPSEC_OUT has been 9339 * attached. 9340 * 9341 * 4) Notifications from an external resolver (for XRESOLV ifs) 9342 * 9343 * 5) AH/ESP send down IPSEC_CTL(M_CTL) to be relayed to hardware for 9344 * IPsec hardware acceleration support. 9345 * 9346 * 6) TUN_HELLO. 9347 * 9348 * We need to handle (1)'s IPv6 case and (3) here. For the 9349 * IPv4 case in (1), and (2), IPSEC processing has already 9350 * started. The code in ip_wput() already knows how to handle 9351 * continuing IPSEC processing (for IPv4 and IPv6). All other 9352 * M_CTLs (including case (4)) are passed on to ip_wput_nondata() 9353 * for handling. 9354 */ 9355 first_mp = mp; 9356 mctl_present = B_FALSE; 9357 io = NULL; 9358 9359 /* Multidata transmit? */ 9360 if (DB_TYPE(mp) == M_MULTIDATA) { 9361 /* 9362 * We should never get here, since all Multidata messages 9363 * originating from tcp should have been directed over to 9364 * tcp_multisend() in the first place. 9365 */ 9366 BUMP_MIB(&ip6_mib, ipIfStatsOutDiscards); 9367 freemsg(mp); 9368 return; 9369 } else if (DB_TYPE(mp) == M_CTL) { 9370 uint32_t mctltype = 0; 9371 uint32_t mlen = MBLKL(first_mp); 9372 9373 mp = mp->b_cont; 9374 mctl_present = B_TRUE; 9375 io = (ipsec_out_t *)first_mp->b_rptr; 9376 9377 /* 9378 * Validate this M_CTL message. The only three types of 9379 * M_CTL messages we expect to see in this code path are 9380 * ipsec_out_t or ipsec_in_t structures (allocated as 9381 * ipsec_info_t unions), or ipsec_ctl_t structures. 9382 * The ipsec_out_type and ipsec_in_type overlap in the two 9383 * data structures, and they are either set to IPSEC_OUT 9384 * or IPSEC_IN depending on which data structure it is. 9385 * ipsec_ctl_t is an IPSEC_CTL. 9386 * 9387 * All other M_CTL messages are sent to ip_wput_nondata() 9388 * for handling. 9389 */ 9390 if (mlen >= sizeof (io->ipsec_out_type)) 9391 mctltype = io->ipsec_out_type; 9392 9393 if ((mlen == sizeof (ipsec_ctl_t)) && 9394 (mctltype == IPSEC_CTL)) { 9395 ip_output(arg, first_mp, arg2, caller); 9396 return; 9397 } 9398 9399 if ((mlen < sizeof (ipsec_info_t)) || 9400 (mctltype != IPSEC_OUT && mctltype != IPSEC_IN) || 9401 mp == NULL) { 9402 ip_wput_nondata(NULL, q, first_mp, NULL); 9403 return; 9404 } 9405 /* NDP callbacks have q_next non-NULL. That's case #3. */ 9406 if (q->q_next == NULL) { 9407 ip6h = (ip6_t *)mp->b_rptr; 9408 /* 9409 * For a freshly-generated TCP dgram that needs IPV6 9410 * processing, don't call ip_wput immediately. We can 9411 * tell this by the ipsec_out_proc_begin. In-progress 9412 * IPSEC_OUT messages have proc_begin set to TRUE, 9413 * and we want to send all IPSEC_IN messages to 9414 * ip_wput() for IPsec processing or finishing. 9415 */ 9416 if (mctltype == IPSEC_IN || 9417 IPVER(ip6h) != IPV6_VERSION || 9418 io->ipsec_out_proc_begin) { 9419 mibptr = &ip6_mib; 9420 goto notv6; 9421 } 9422 } 9423 } else if (DB_TYPE(mp) != M_DATA) { 9424 ip_wput_nondata(NULL, q, mp, NULL); 9425 return; 9426 } 9427 9428 ip6h = (ip6_t *)mp->b_rptr; 9429 9430 if (IPVER(ip6h) != IPV6_VERSION) { 9431 mibptr = &ip6_mib; 9432 goto notv6; 9433 } 9434 9435 if (q->q_next != NULL) { 9436 ill = (ill_t *)q->q_ptr; 9437 /* 9438 * We don't know if this ill will be used for IPv6 9439 * until the ILLF_IPV6 flag is set via SIOCSLIFNAME. 9440 * ipif_set_values() sets the ill_isv6 flag to true if 9441 * ILLF_IPV6 is set. If the ill_isv6 flag isn't true, 9442 * just drop the packet. 9443 */ 9444 if (!ill->ill_isv6) { 9445 ip1dbg(("ip_wput_v6: Received an IPv6 packet before " 9446 "ILLF_IPV6 was set\n")); 9447 freemsg(first_mp); 9448 return; 9449 } 9450 /* For uniformity do a refhold */ 9451 mutex_enter(&ill->ill_lock); 9452 if (!ILL_CAN_LOOKUP(ill)) { 9453 mutex_exit(&ill->ill_lock); 9454 freemsg(first_mp); 9455 return; 9456 } 9457 ill_refhold_locked(ill); 9458 mutex_exit(&ill->ill_lock); 9459 mibptr = ill->ill_ip_mib; 9460 9461 ASSERT(mibptr != NULL); 9462 unspec_src = 0; 9463 BUMP_MIB(mibptr, ipIfStatsHCOutRequests); 9464 do_outrequests = B_FALSE; 9465 zoneid = (zoneid_t)(uintptr_t)arg; 9466 } else { 9467 connp = (conn_t *)arg; 9468 ASSERT(connp != NULL); 9469 zoneid = connp->conn_zoneid; 9470 9471 /* is queue flow controlled? */ 9472 if ((q->q_first || connp->conn_draining) && 9473 (caller == IP_WPUT)) { 9474 /* 9475 * 1) TCP sends down M_CTL for detached connections. 9476 * 2) AH/ESP sends down M_CTL. 9477 * 9478 * We don't flow control either of the above. Only 9479 * UDP and others are flow controlled for which we 9480 * can't have a M_CTL. 9481 */ 9482 ASSERT(first_mp == mp); 9483 (void) putq(q, mp); 9484 return; 9485 } 9486 mibptr = &ip6_mib; 9487 unspec_src = connp->conn_unspec_src; 9488 do_outrequests = B_TRUE; 9489 if (mp->b_flag & MSGHASREF) { 9490 mp->b_flag &= ~MSGHASREF; 9491 ASSERT(connp->conn_ulp == IPPROTO_SCTP); 9492 SCTP_EXTRACT_IPINFO(mp, sctp_ire); 9493 need_decref = B_TRUE; 9494 } 9495 9496 /* 9497 * If there is a policy, try to attach an ipsec_out in 9498 * the front. At the end, first_mp either points to a 9499 * M_DATA message or IPSEC_OUT message linked to a 9500 * M_DATA message. We have to do it now as we might 9501 * lose the "conn" if we go through ip_newroute. 9502 */ 9503 if (!mctl_present && 9504 (connp->conn_out_enforce_policy || 9505 connp->conn_latch != NULL)) { 9506 ASSERT(first_mp == mp); 9507 /* XXX Any better way to get the protocol fast ? */ 9508 if (((mp = ipsec_attach_ipsec_out(mp, connp, NULL, 9509 connp->conn_ulp)) == NULL)) { 9510 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 9511 if (need_decref) 9512 CONN_DEC_REF(connp); 9513 return; 9514 } else { 9515 ASSERT(mp->b_datap->db_type == M_CTL); 9516 first_mp = mp; 9517 mp = mp->b_cont; 9518 mctl_present = B_TRUE; 9519 io = (ipsec_out_t *)first_mp->b_rptr; 9520 } 9521 } 9522 } 9523 9524 /* check for alignment and full IPv6 header */ 9525 if (!OK_32PTR((uchar_t *)ip6h) || 9526 (mp->b_wptr - (uchar_t *)ip6h) < IPV6_HDR_LEN) { 9527 ip0dbg(("ip_wput_v6: bad alignment or length\n")); 9528 if (do_outrequests) 9529 BUMP_MIB(mibptr, ipIfStatsHCOutRequests); 9530 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 9531 freemsg(first_mp); 9532 if (ill != NULL) 9533 ill_refrele(ill); 9534 if (need_decref) 9535 CONN_DEC_REF(connp); 9536 return; 9537 } 9538 v6dstp = &ip6h->ip6_dst; 9539 cksum_request = -1; 9540 ip6i = NULL; 9541 9542 /* 9543 * Once neighbor discovery has completed, ndp_process() will provide 9544 * locally generated packets for which processing can be reattempted. 9545 * In these cases, connp is NULL and the original zone is part of a 9546 * prepended ipsec_out_t. 9547 */ 9548 if (io != NULL) { 9549 /* 9550 * When coming from icmp_input_v6, the zoneid might not match 9551 * for the loopback case, because inside icmp_input_v6 the 9552 * queue_t is a conn queue from the sending side. 9553 */ 9554 zoneid = io->ipsec_out_zoneid; 9555 ASSERT(zoneid != ALL_ZONES); 9556 } 9557 9558 if (ip6h->ip6_nxt == IPPROTO_RAW) { 9559 /* 9560 * This is an ip6i_t header followed by an ip6_hdr. 9561 * Check which fields are set. 9562 * 9563 * When the packet comes from a transport we should have 9564 * all needed headers in the first mblk. However, when 9565 * going through ip_newroute*_v6 the ip6i might be in 9566 * a separate mblk when we return here. In that case 9567 * we pullup everything to ensure that extension and transport 9568 * headers "stay" in the first mblk. 9569 */ 9570 ip6i = (ip6i_t *)ip6h; 9571 ip6i_flags = ip6i->ip6i_flags; 9572 9573 ASSERT((mp->b_wptr - (uchar_t *)ip6i) == sizeof (ip6i_t) || 9574 ((mp->b_wptr - (uchar_t *)ip6i) >= 9575 sizeof (ip6i_t) + IPV6_HDR_LEN)); 9576 9577 if ((mp->b_wptr - (uchar_t *)ip6i) == sizeof (ip6i_t)) { 9578 if (!pullupmsg(mp, -1)) { 9579 ip1dbg(("ip_wput_v6: pullupmsg failed\n")); 9580 if (do_outrequests) { 9581 BUMP_MIB(mibptr, 9582 ipIfStatsHCOutRequests); 9583 } 9584 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 9585 freemsg(first_mp); 9586 if (ill != NULL) 9587 ill_refrele(ill); 9588 if (need_decref) 9589 CONN_DEC_REF(connp); 9590 return; 9591 } 9592 ip6h = (ip6_t *)mp->b_rptr; 9593 v6dstp = &ip6h->ip6_dst; 9594 ip6i = (ip6i_t *)ip6h; 9595 } 9596 ip6h = (ip6_t *)&ip6i[1]; 9597 9598 /* 9599 * Advance rptr past the ip6i_t to get ready for 9600 * transmitting the packet. However, if the packet gets 9601 * passed to ip_newroute*_v6 then rptr is moved back so 9602 * that the ip6i_t header can be inspected when the 9603 * packet comes back here after passing through 9604 * ire_add_then_send. 9605 */ 9606 mp->b_rptr = (uchar_t *)ip6h; 9607 9608 /* 9609 * IP6I_ATTACH_IF is set in this function when we had a 9610 * conn and it was either bound to the IPFF_NOFAILOVER address 9611 * or IPV6_BOUND_PIF was set. These options override other 9612 * options that set the ifindex. We come here with 9613 * IP6I_ATTACH_IF set when we can't find the ire and 9614 * ip_newroute_v6 is feeding the packet for second time. 9615 */ 9616 if ((ip6i->ip6i_flags & IP6I_IFINDEX) || 9617 (ip6i->ip6i_flags & IP6I_ATTACH_IF)) { 9618 ASSERT(ip6i->ip6i_ifindex != 0); 9619 if (ill != NULL) 9620 ill_refrele(ill); 9621 ill = ill_lookup_on_ifindex(ip6i->ip6i_ifindex, 1, 9622 NULL, NULL, NULL, NULL); 9623 if (ill == NULL) { 9624 if (do_outrequests) { 9625 BUMP_MIB(mibptr, 9626 ipIfStatsHCOutRequests); 9627 } 9628 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 9629 ip1dbg(("ip_wput_v6: bad ifindex %d\n", 9630 ip6i->ip6i_ifindex)); 9631 if (need_decref) 9632 CONN_DEC_REF(connp); 9633 freemsg(first_mp); 9634 return; 9635 } 9636 mibptr = ill->ill_ip_mib; 9637 if (ip6i->ip6i_flags & IP6I_IFINDEX) { 9638 /* 9639 * Preserve the index so that when we return 9640 * from IPSEC processing, we know where to 9641 * send the packet. 9642 */ 9643 if (mctl_present) { 9644 ASSERT(io != NULL); 9645 io->ipsec_out_ill_index = 9646 ip6i->ip6i_ifindex; 9647 } 9648 } 9649 if (ip6i->ip6i_flags & IP6I_ATTACH_IF) { 9650 /* 9651 * This is a multipathing probe packet that has 9652 * been delayed in ND resolution. Drop the 9653 * packet for the reasons mentioned in 9654 * nce_queue_mp() 9655 */ 9656 if ((ip6i->ip6i_flags & IP6I_DROP_IFDELAYED) && 9657 (ip6i->ip6i_flags & IP6I_ND_DELAYED)) { 9658 freemsg(first_mp); 9659 ill_refrele(ill); 9660 if (need_decref) 9661 CONN_DEC_REF(connp); 9662 return; 9663 } 9664 } 9665 } 9666 if (ip6i->ip6i_flags & IP6I_VERIFY_SRC) { 9667 cred_t *cr = DB_CREDDEF(mp, GET_QUEUE_CRED(q)); 9668 9669 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)); 9670 if (secpolicy_net_rawaccess(cr) != 0) { 9671 /* 9672 * Use IPCL_ZONEID to honor SO_ALLZONES. 9673 */ 9674 ire = ire_route_lookup_v6(&ip6h->ip6_src, 9675 0, 0, (IRE_LOCAL|IRE_LOOPBACK), NULL, 9676 NULL, connp != NULL ? 9677 IPCL_ZONEID(connp) : zoneid, NULL, 9678 MATCH_IRE_TYPE | MATCH_IRE_ZONEONLY); 9679 if (ire == NULL) { 9680 if (do_outrequests) 9681 BUMP_MIB(mibptr, 9682 ipIfStatsHCOutRequests); 9683 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 9684 ip1dbg(("ip_wput_v6: bad source " 9685 "addr\n")); 9686 freemsg(first_mp); 9687 if (ill != NULL) 9688 ill_refrele(ill); 9689 if (need_decref) 9690 CONN_DEC_REF(connp); 9691 return; 9692 } 9693 ire_refrele(ire); 9694 } 9695 /* No need to verify again when using ip_newroute */ 9696 ip6i->ip6i_flags &= ~IP6I_VERIFY_SRC; 9697 } 9698 if (!(ip6i->ip6i_flags & IP6I_NEXTHOP)) { 9699 /* 9700 * Make sure they match since ip_newroute*_v6 etc might 9701 * (unknown to them) inspect ip6i_nexthop when 9702 * they think they access ip6_dst. 9703 */ 9704 ip6i->ip6i_nexthop = ip6h->ip6_dst; 9705 } 9706 if (ip6i->ip6i_flags & IP6I_NO_ULP_CKSUM) 9707 cksum_request = 1; 9708 if (ip6i->ip6i_flags & IP6I_RAW_CHECKSUM) 9709 cksum_request = ip6i->ip6i_checksum_off; 9710 if (ip6i->ip6i_flags & IP6I_UNSPEC_SRC) 9711 unspec_src = 1; 9712 9713 if (do_outrequests && ill != NULL) { 9714 BUMP_MIB(mibptr, ipIfStatsHCOutRequests); 9715 do_outrequests = B_FALSE; 9716 } 9717 /* 9718 * Store ip6i_t info that we need after we come back 9719 * from IPSEC processing. 9720 */ 9721 if (mctl_present) { 9722 ASSERT(io != NULL); 9723 io->ipsec_out_unspec_src = unspec_src; 9724 } 9725 } 9726 if (connp != NULL && connp->conn_dontroute) 9727 ip6h->ip6_hops = 1; 9728 9729 if (IN6_IS_ADDR_MULTICAST(v6dstp)) 9730 goto ipv6multicast; 9731 9732 /* 1. IPV6_BOUND_PIF takes precedence over all the ifindex settings. */ 9733 if (connp != NULL && connp->conn_outgoing_pill != NULL) { 9734 ill_t *conn_outgoing_pill; 9735 9736 conn_outgoing_pill = conn_get_held_ill(connp, 9737 &connp->conn_outgoing_pill, &err); 9738 if (err == ILL_LOOKUP_FAILED) { 9739 if (ill != NULL) 9740 ill_refrele(ill); 9741 if (need_decref) 9742 CONN_DEC_REF(connp); 9743 freemsg(first_mp); 9744 return; 9745 } 9746 if (conn_outgoing_pill != NULL) { 9747 if (ill != NULL) 9748 ill_refrele(ill); 9749 ill = conn_outgoing_pill; 9750 attach_if = B_TRUE; 9751 match_flags = MATCH_IRE_ILL; 9752 mibptr = ill->ill_ip_mib; 9753 9754 /* 9755 * Check if we need an ire that will not be 9756 * looked up by anybody else i.e. HIDDEN. 9757 */ 9758 if (ill_is_probeonly(ill)) 9759 match_flags |= MATCH_IRE_MARK_HIDDEN; 9760 goto send_from_ill; 9761 } 9762 } 9763 9764 /* 2. If ipc_nofailover_ill is set then use that ill. */ 9765 if (connp != NULL && connp->conn_nofailover_ill != NULL) { 9766 ill_t *conn_nofailover_ill; 9767 9768 conn_nofailover_ill = conn_get_held_ill(connp, 9769 &connp->conn_nofailover_ill, &err); 9770 if (err == ILL_LOOKUP_FAILED) { 9771 if (ill != NULL) 9772 ill_refrele(ill); 9773 if (need_decref) 9774 CONN_DEC_REF(connp); 9775 freemsg(first_mp); 9776 return; 9777 } 9778 if (conn_nofailover_ill != NULL) { 9779 if (ill != NULL) 9780 ill_refrele(ill); 9781 ill = conn_nofailover_ill; 9782 attach_if = B_TRUE; 9783 /* 9784 * Assumes that ipc_nofailover_ill is used only for 9785 * multipathing probe packets. These packets are better 9786 * dropped, if they are delayed in ND resolution, for 9787 * the reasons described in nce_queue_mp(). 9788 * IP6I_DROP_IFDELAYED will be set later on in this 9789 * function for this packet. 9790 */ 9791 drop_if_delayed = B_TRUE; 9792 match_flags = MATCH_IRE_ILL; 9793 mibptr = ill->ill_ip_mib; 9794 9795 /* 9796 * Check if we need an ire that will not be 9797 * looked up by anybody else i.e. HIDDEN. 9798 */ 9799 if (ill_is_probeonly(ill)) 9800 match_flags |= MATCH_IRE_MARK_HIDDEN; 9801 goto send_from_ill; 9802 } 9803 } 9804 9805 /* 9806 * Redo 1. If we did not find an IRE_CACHE the first time, we should 9807 * have an ip6i_t with IP6I_ATTACH_IF if IPV6_BOUND_PIF or 9808 * bind to the IPIF_NOFAILOVER address was used on this endpoint. 9809 */ 9810 if (ip6i != NULL && (ip6i->ip6i_flags & IP6I_ATTACH_IF)) { 9811 ASSERT(ip6i->ip6i_ifindex != 0); 9812 attach_if = B_TRUE; 9813 ASSERT(ill != NULL); 9814 match_flags = MATCH_IRE_ILL; 9815 9816 /* 9817 * Check if we need an ire that will not be 9818 * looked up by anybody else i.e. HIDDEN. 9819 */ 9820 if (ill_is_probeonly(ill)) 9821 match_flags |= MATCH_IRE_MARK_HIDDEN; 9822 goto send_from_ill; 9823 } 9824 9825 /* 3. If an ip6i_t with IP6I_IFINDEX set then use that ill. */ 9826 if (ip6i != NULL && (ip6i->ip6i_flags & IP6I_IFINDEX)) { 9827 ASSERT(ill != NULL); 9828 goto send_from_ill; 9829 } 9830 9831 /* 9832 * 4. If q is an ill queue and (link local or multicast destination) 9833 * then use that ill. 9834 */ 9835 if (ill != NULL && IN6_IS_ADDR_LINKLOCAL(v6dstp)) { 9836 goto send_from_ill; 9837 } 9838 9839 /* 5. If IPV6_BOUND_IF has been set use that ill. */ 9840 if (connp != NULL && connp->conn_outgoing_ill != NULL) { 9841 ill_t *conn_outgoing_ill; 9842 9843 conn_outgoing_ill = conn_get_held_ill(connp, 9844 &connp->conn_outgoing_ill, &err); 9845 if (err == ILL_LOOKUP_FAILED) { 9846 if (ill != NULL) 9847 ill_refrele(ill); 9848 if (need_decref) 9849 CONN_DEC_REF(connp); 9850 freemsg(first_mp); 9851 return; 9852 } 9853 if (ill != NULL) 9854 ill_refrele(ill); 9855 ill = conn_outgoing_ill; 9856 mibptr = ill->ill_ip_mib; 9857 goto send_from_ill; 9858 } 9859 9860 /* 9861 * 6. For unicast: Just do an IRE lookup for the best match. 9862 * If we get here for a link-local address it is rather random 9863 * what interface we pick on a multihomed host. 9864 * *If* there is an IRE_CACHE (and the link-local address 9865 * isn't duplicated on multi links) this will find the IRE_CACHE. 9866 * Otherwise it will use one of the matching IRE_INTERFACE routes 9867 * for the link-local prefix. Hence, applications 9868 * *should* be encouraged to specify an outgoing interface when sending 9869 * to a link local address. 9870 */ 9871 if (connp == NULL || (IP_FLOW_CONTROLLED_ULP(connp->conn_ulp) && 9872 !connp->conn_fully_bound)) { 9873 /* 9874 * We cache IRE_CACHEs to avoid lookups. We don't do 9875 * this for the tcp global queue and listen end point 9876 * as it does not really have a real destination to 9877 * talk to. 9878 */ 9879 ire = ire_cache_lookup_v6(v6dstp, zoneid, MBLK_GETLABEL(mp)); 9880 } else { 9881 /* 9882 * IRE_MARK_CONDEMNED is marked in ire_delete. We don't 9883 * grab a lock here to check for CONDEMNED as it is okay 9884 * to send a packet or two with the IRE_CACHE that is going 9885 * away. 9886 */ 9887 mutex_enter(&connp->conn_lock); 9888 ire = sctp_ire != NULL ? sctp_ire : connp->conn_ire_cache; 9889 if (ire != NULL && 9890 IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, v6dstp) && 9891 !(ire->ire_marks & IRE_MARK_CONDEMNED)) { 9892 9893 IRE_REFHOLD(ire); 9894 mutex_exit(&connp->conn_lock); 9895 9896 } else { 9897 boolean_t cached = B_FALSE; 9898 9899 connp->conn_ire_cache = NULL; 9900 mutex_exit(&connp->conn_lock); 9901 /* Release the old ire */ 9902 if (ire != NULL && sctp_ire == NULL) 9903 IRE_REFRELE_NOTR(ire); 9904 9905 ire = ire_cache_lookup_v6(v6dstp, zoneid, 9906 MBLK_GETLABEL(mp)); 9907 if (ire != NULL) { 9908 IRE_REFHOLD_NOTR(ire); 9909 9910 mutex_enter(&connp->conn_lock); 9911 if (!(connp->conn_state_flags & CONN_CLOSING) && 9912 (connp->conn_ire_cache == NULL)) { 9913 rw_enter(&ire->ire_bucket->irb_lock, 9914 RW_READER); 9915 if (!(ire->ire_marks & 9916 IRE_MARK_CONDEMNED)) { 9917 connp->conn_ire_cache = ire; 9918 cached = B_TRUE; 9919 } 9920 rw_exit(&ire->ire_bucket->irb_lock); 9921 } 9922 mutex_exit(&connp->conn_lock); 9923 9924 /* 9925 * We can continue to use the ire but since it 9926 * was not cached, we should drop the extra 9927 * reference. 9928 */ 9929 if (!cached) 9930 IRE_REFRELE_NOTR(ire); 9931 } 9932 } 9933 } 9934 9935 if (ire != NULL) { 9936 if (do_outrequests) { 9937 /* Handle IRE_LOCAL's that might appear here */ 9938 if (ire->ire_type == IRE_CACHE) { 9939 mibptr = ((ill_t *)ire->ire_stq->q_ptr)-> 9940 ill_ip_mib; 9941 } else { 9942 mibptr = ire->ire_ipif->ipif_ill->ill_ip_mib; 9943 } 9944 BUMP_MIB(mibptr, ipIfStatsHCOutRequests); 9945 } 9946 ASSERT(!attach_if); 9947 9948 /* 9949 * Check if the ire has the RTF_MULTIRT flag, inherited 9950 * from an IRE_OFFSUBNET ire entry in ip_newroute(). 9951 */ 9952 if (ire->ire_flags & RTF_MULTIRT) { 9953 /* 9954 * Force hop limit of multirouted packets if required. 9955 * The hop limit of such packets is bounded by the 9956 * ip_multirt_ttl ndd variable. 9957 * NDP packets must have a hop limit of 255; don't 9958 * change the hop limit in that case. 9959 */ 9960 if ((ip_multirt_ttl > 0) && 9961 (ip6h->ip6_hops > ip_multirt_ttl) && 9962 (ip6h->ip6_hops != IPV6_MAX_HOPS)) { 9963 if (ip_debug > 3) { 9964 ip2dbg(("ip_wput_v6: forcing multirt " 9965 "hop limit to %d (was %d) ", 9966 ip_multirt_ttl, ip6h->ip6_hops)); 9967 pr_addr_dbg("v6dst %s\n", AF_INET6, 9968 &ire->ire_addr_v6); 9969 } 9970 ip6h->ip6_hops = ip_multirt_ttl; 9971 } 9972 9973 /* 9974 * We look at this point if there are pending 9975 * unresolved routes. ire_multirt_need_resolve_v6() 9976 * checks in O(n) that all IRE_OFFSUBNET ire 9977 * entries for the packet's destination and 9978 * flagged RTF_MULTIRT are currently resolved. 9979 * If some remain unresolved, we do a copy 9980 * of the current message. It will be used 9981 * to initiate additional route resolutions. 9982 */ 9983 multirt_need_resolve = 9984 ire_multirt_need_resolve_v6(&ire->ire_addr_v6, 9985 MBLK_GETLABEL(first_mp)); 9986 ip2dbg(("ip_wput_v6: ire %p, " 9987 "multirt_need_resolve %d, first_mp %p\n", 9988 (void *)ire, multirt_need_resolve, 9989 (void *)first_mp)); 9990 if (multirt_need_resolve) { 9991 copy_mp = copymsg(first_mp); 9992 if (copy_mp != NULL) { 9993 MULTIRT_DEBUG_TAG(copy_mp); 9994 } 9995 } 9996 } 9997 ip_wput_ire_v6(q, first_mp, ire, unspec_src, cksum_request, 9998 connp, caller, 0, ip6i_flags, zoneid); 9999 if (need_decref) { 10000 CONN_DEC_REF(connp); 10001 connp = NULL; 10002 } 10003 IRE_REFRELE(ire); 10004 10005 /* 10006 * Try to resolve another multiroute if 10007 * ire_multirt_need_resolve_v6() deemed it necessary. 10008 * copy_mp will be consumed (sent or freed) by 10009 * ip_newroute_v6(). 10010 */ 10011 if (copy_mp != NULL) { 10012 if (mctl_present) { 10013 ip6h = (ip6_t *)copy_mp->b_cont->b_rptr; 10014 } else { 10015 ip6h = (ip6_t *)copy_mp->b_rptr; 10016 } 10017 ip_newroute_v6(q, copy_mp, &ip6h->ip6_dst, 10018 &ip6h->ip6_src, NULL, zoneid); 10019 } 10020 if (ill != NULL) 10021 ill_refrele(ill); 10022 return; 10023 } 10024 10025 /* 10026 * No full IRE for this destination. Send it to 10027 * ip_newroute_v6 to see if anything else matches. 10028 * Mark this packet as having originated on this 10029 * machine. 10030 * Update rptr if there was an ip6i_t header. 10031 */ 10032 mp->b_prev = NULL; 10033 mp->b_next = NULL; 10034 if (ip6i != NULL) 10035 mp->b_rptr -= sizeof (ip6i_t); 10036 10037 if (unspec_src) { 10038 if (ip6i == NULL) { 10039 /* 10040 * Add ip6i_t header to carry unspec_src 10041 * until the packet comes back in ip_wput_v6. 10042 */ 10043 mp = ip_add_info_v6(mp, NULL, v6dstp); 10044 if (mp == NULL) { 10045 if (do_outrequests) 10046 BUMP_MIB(mibptr, 10047 ipIfStatsHCOutRequests); 10048 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10049 if (mctl_present) 10050 freeb(first_mp); 10051 if (ill != NULL) 10052 ill_refrele(ill); 10053 if (need_decref) 10054 CONN_DEC_REF(connp); 10055 return; 10056 } 10057 ip6i = (ip6i_t *)mp->b_rptr; 10058 10059 if (mctl_present) { 10060 ASSERT(first_mp != mp); 10061 first_mp->b_cont = mp; 10062 } else { 10063 first_mp = mp; 10064 } 10065 10066 if ((mp->b_wptr - (uchar_t *)ip6i) == 10067 sizeof (ip6i_t)) { 10068 /* 10069 * ndp_resolver called from ip_newroute_v6 10070 * expects pulled up message. 10071 */ 10072 if (!pullupmsg(mp, -1)) { 10073 ip1dbg(("ip_wput_v6: pullupmsg" 10074 " failed\n")); 10075 if (do_outrequests) { 10076 BUMP_MIB(mibptr, 10077 ipIfStatsHCOutRequests); 10078 } 10079 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10080 freemsg(first_mp); 10081 if (ill != NULL) 10082 ill_refrele(ill); 10083 if (need_decref) 10084 CONN_DEC_REF(connp); 10085 return; 10086 } 10087 ip6i = (ip6i_t *)mp->b_rptr; 10088 } 10089 ip6h = (ip6_t *)&ip6i[1]; 10090 v6dstp = &ip6h->ip6_dst; 10091 } 10092 ip6i->ip6i_flags |= IP6I_UNSPEC_SRC; 10093 if (mctl_present) { 10094 ASSERT(io != NULL); 10095 io->ipsec_out_unspec_src = unspec_src; 10096 } 10097 } 10098 if (do_outrequests) 10099 BUMP_MIB(mibptr, ipIfStatsHCOutRequests); 10100 if (need_decref) 10101 CONN_DEC_REF(connp); 10102 ip_newroute_v6(q, first_mp, v6dstp, &ip6h->ip6_src, NULL, zoneid); 10103 if (ill != NULL) 10104 ill_refrele(ill); 10105 return; 10106 10107 10108 /* 10109 * Handle multicast packets with or without an conn. 10110 * Assumes that the transports set ip6_hops taking 10111 * IPV6_MULTICAST_HOPS (and the other ways to set the hoplimit) 10112 * into account. 10113 */ 10114 ipv6multicast: 10115 ip2dbg(("ip_wput_v6: multicast\n")); 10116 10117 /* 10118 * 1. IPV6_BOUND_PIF takes precedence over all the ifindex settings 10119 * 2. If conn_nofailover_ill is set then use that ill. 10120 * 10121 * Hold the conn_lock till we refhold the ill of interest that is 10122 * pointed to from the conn. Since we cannot do an ill/ipif_refrele 10123 * while holding any locks, postpone the refrele until after the 10124 * conn_lock is dropped. 10125 */ 10126 if (connp != NULL) { 10127 mutex_enter(&connp->conn_lock); 10128 conn_lock_held = B_TRUE; 10129 } else { 10130 conn_lock_held = B_FALSE; 10131 } 10132 if (connp != NULL && connp->conn_outgoing_pill != NULL) { 10133 err = ill_check_and_refhold(connp->conn_outgoing_pill); 10134 if (err == ILL_LOOKUP_FAILED) { 10135 ip1dbg(("ip_output_v6: multicast" 10136 " conn_outgoing_pill no ipif\n")); 10137 multicast_discard: 10138 ASSERT(saved_ill == NULL); 10139 if (conn_lock_held) 10140 mutex_exit(&connp->conn_lock); 10141 if (ill != NULL) 10142 ill_refrele(ill); 10143 freemsg(first_mp); 10144 if (do_outrequests) 10145 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10146 if (need_decref) 10147 CONN_DEC_REF(connp); 10148 return; 10149 } 10150 saved_ill = ill; 10151 ill = connp->conn_outgoing_pill; 10152 attach_if = B_TRUE; 10153 match_flags = MATCH_IRE_ILL; 10154 mibptr = ill->ill_ip_mib; 10155 10156 /* 10157 * Check if we need an ire that will not be 10158 * looked up by anybody else i.e. HIDDEN. 10159 */ 10160 if (ill_is_probeonly(ill)) 10161 match_flags |= MATCH_IRE_MARK_HIDDEN; 10162 } else if (connp != NULL && connp->conn_nofailover_ill != NULL) { 10163 err = ill_check_and_refhold(connp->conn_nofailover_ill); 10164 if (err == ILL_LOOKUP_FAILED) { 10165 ip1dbg(("ip_output_v6: multicast" 10166 " conn_nofailover_ill no ipif\n")); 10167 goto multicast_discard; 10168 } 10169 saved_ill = ill; 10170 ill = connp->conn_nofailover_ill; 10171 attach_if = B_TRUE; 10172 match_flags = MATCH_IRE_ILL; 10173 10174 /* 10175 * Check if we need an ire that will not be 10176 * looked up by anybody else i.e. HIDDEN. 10177 */ 10178 if (ill_is_probeonly(ill)) 10179 match_flags |= MATCH_IRE_MARK_HIDDEN; 10180 } else if (ip6i != NULL && (ip6i->ip6i_flags & IP6I_ATTACH_IF)) { 10181 /* 10182 * Redo 1. If we did not find an IRE_CACHE the first time, 10183 * we should have an ip6i_t with IP6I_ATTACH_IF if 10184 * IPV6_BOUND_PIF or bind to the IPIF_NOFAILOVER address was 10185 * used on this endpoint. 10186 */ 10187 ASSERT(ip6i->ip6i_ifindex != 0); 10188 attach_if = B_TRUE; 10189 ASSERT(ill != NULL); 10190 match_flags = MATCH_IRE_ILL; 10191 10192 /* 10193 * Check if we need an ire that will not be 10194 * looked up by anybody else i.e. HIDDEN. 10195 */ 10196 if (ill_is_probeonly(ill)) 10197 match_flags |= MATCH_IRE_MARK_HIDDEN; 10198 } else if (ip6i != NULL && (ip6i->ip6i_flags & IP6I_IFINDEX)) { 10199 /* 3. If an ip6i_t with IP6I_IFINDEX set then use that ill. */ 10200 10201 ASSERT(ill != NULL); 10202 } else if (ill != NULL) { 10203 /* 10204 * 4. If q is an ill queue and (link local or multicast 10205 * destination) then use that ill. 10206 * We don't need the ipif initialization here. 10207 * This useless assert below is just to prevent lint from 10208 * reporting a null body if statement. 10209 */ 10210 ASSERT(ill != NULL); 10211 } else if (connp != NULL) { 10212 /* 10213 * 5. If IPV6_BOUND_IF has been set use that ill. 10214 * 10215 * 6. For multicast: if IPV6_MULTICAST_IF has been set use it. 10216 * Otherwise look for the best IRE match for the unspecified 10217 * group to determine the ill. 10218 * 10219 * conn_multicast_ill is used for only IPv6 packets. 10220 * conn_multicast_ipif is used for only IPv4 packets. 10221 * Thus a PF_INET6 socket send both IPv4 and IPv6 10222 * multicast packets using different IP*_MULTICAST_IF 10223 * interfaces. 10224 */ 10225 if (connp->conn_outgoing_ill != NULL) { 10226 err = ill_check_and_refhold(connp->conn_outgoing_ill); 10227 if (err == ILL_LOOKUP_FAILED) { 10228 ip1dbg(("ip_output_v6: multicast" 10229 " conn_outgoing_ill no ipif\n")); 10230 goto multicast_discard; 10231 } 10232 ill = connp->conn_outgoing_ill; 10233 } else if (connp->conn_multicast_ill != NULL) { 10234 err = ill_check_and_refhold(connp->conn_multicast_ill); 10235 if (err == ILL_LOOKUP_FAILED) { 10236 ip1dbg(("ip_output_v6: multicast" 10237 " conn_multicast_ill no ipif\n")); 10238 goto multicast_discard; 10239 } 10240 ill = connp->conn_multicast_ill; 10241 } else { 10242 mutex_exit(&connp->conn_lock); 10243 conn_lock_held = B_FALSE; 10244 ipif = ipif_lookup_group_v6(v6dstp, zoneid); 10245 if (ipif == NULL) { 10246 ip1dbg(("ip_output_v6: multicast no ipif\n")); 10247 goto multicast_discard; 10248 } 10249 /* 10250 * We have a ref to this ipif, so we can safely 10251 * access ipif_ill. 10252 */ 10253 ill = ipif->ipif_ill; 10254 mutex_enter(&ill->ill_lock); 10255 if (!ILL_CAN_LOOKUP(ill)) { 10256 mutex_exit(&ill->ill_lock); 10257 ipif_refrele(ipif); 10258 ill = NULL; 10259 ip1dbg(("ip_output_v6: multicast no ipif\n")); 10260 goto multicast_discard; 10261 } 10262 ill_refhold_locked(ill); 10263 mutex_exit(&ill->ill_lock); 10264 ipif_refrele(ipif); 10265 /* 10266 * Save binding until IPV6_MULTICAST_IF 10267 * changes it 10268 */ 10269 mutex_enter(&connp->conn_lock); 10270 connp->conn_multicast_ill = ill; 10271 connp->conn_orig_multicast_ifindex = 10272 ill->ill_phyint->phyint_ifindex; 10273 mutex_exit(&connp->conn_lock); 10274 } 10275 } 10276 if (conn_lock_held) 10277 mutex_exit(&connp->conn_lock); 10278 10279 if (saved_ill != NULL) 10280 ill_refrele(saved_ill); 10281 10282 ASSERT(ill != NULL); 10283 /* 10284 * For multicast loopback interfaces replace the multicast address 10285 * with a unicast address for the ire lookup. 10286 */ 10287 if (ill->ill_phyint->phyint_flags & PHYI_LOOPBACK) 10288 v6dstp = &ill->ill_ipif->ipif_v6lcl_addr; 10289 10290 mibptr = ill->ill_ip_mib; 10291 if (do_outrequests) { 10292 BUMP_MIB(mibptr, ipIfStatsHCOutRequests); 10293 do_outrequests = B_FALSE; 10294 } 10295 BUMP_MIB(mibptr, ipIfStatsHCOutMcastPkts); 10296 UPDATE_MIB(mibptr, ipIfStatsHCOutMcastOctets, 10297 ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN); 10298 10299 /* 10300 * As we may lose the conn by the time we reach ip_wput_ire_v6 10301 * we copy conn_multicast_loop and conn_dontroute on to an 10302 * ipsec_out. In case if this datagram goes out secure, 10303 * we need the ill_index also. Copy that also into the 10304 * ipsec_out. 10305 */ 10306 if (mctl_present) { 10307 io = (ipsec_out_t *)first_mp->b_rptr; 10308 ASSERT(first_mp->b_datap->db_type == M_CTL); 10309 ASSERT(io->ipsec_out_type == IPSEC_OUT); 10310 } else { 10311 ASSERT(mp == first_mp); 10312 if ((first_mp = ipsec_alloc_ipsec_out()) == NULL) { 10313 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10314 freemsg(mp); 10315 if (ill != NULL) 10316 ill_refrele(ill); 10317 if (need_decref) 10318 CONN_DEC_REF(connp); 10319 return; 10320 } 10321 io = (ipsec_out_t *)first_mp->b_rptr; 10322 /* This is not a secure packet */ 10323 io->ipsec_out_secure = B_FALSE; 10324 io->ipsec_out_use_global_policy = B_TRUE; 10325 io->ipsec_out_zoneid = 10326 (zoneid != ALL_ZONES ? zoneid : GLOBAL_ZONEID); 10327 first_mp->b_cont = mp; 10328 mctl_present = B_TRUE; 10329 } 10330 io->ipsec_out_ill_index = ill->ill_phyint->phyint_ifindex; 10331 io->ipsec_out_unspec_src = unspec_src; 10332 if (connp != NULL) 10333 io->ipsec_out_dontroute = connp->conn_dontroute; 10334 10335 send_from_ill: 10336 ASSERT(ill != NULL); 10337 ASSERT(mibptr == ill->ill_ip_mib); 10338 if (do_outrequests) { 10339 BUMP_MIB(mibptr, ipIfStatsHCOutRequests); 10340 do_outrequests = B_FALSE; 10341 } 10342 10343 if (io != NULL) 10344 io->ipsec_out_ill_index = ill->ill_phyint->phyint_ifindex; 10345 10346 /* 10347 * When a specific ill is specified (using IPV6_PKTINFO, 10348 * IPV6_MULTICAST_IF, or IPV6_BOUND_IF) we will only match 10349 * on routing entries (ftable and ctable) that have a matching 10350 * ire->ire_ipif->ipif_ill. Thus this can only be used 10351 * for destinations that are on-link for the specific ill 10352 * and that can appear on multiple links. Thus it is useful 10353 * for multicast destinations, link-local destinations, and 10354 * at some point perhaps for site-local destinations (if the 10355 * node sits at a site boundary). 10356 * We create the cache entries in the regular ctable since 10357 * it can not "confuse" things for other destinations. 10358 * table. 10359 * 10360 * NOTE : conn_ire_cache is not used for caching ire_ctable_lookups. 10361 * It is used only when ire_cache_lookup is used above. 10362 */ 10363 ire = ire_ctable_lookup_v6(v6dstp, 0, 0, ill->ill_ipif, 10364 zoneid, MBLK_GETLABEL(mp), match_flags); 10365 if (ire != NULL) { 10366 /* 10367 * Check if the ire has the RTF_MULTIRT flag, inherited 10368 * from an IRE_OFFSUBNET ire entry in ip_newroute(). 10369 */ 10370 if (ire->ire_flags & RTF_MULTIRT) { 10371 /* 10372 * Force hop limit of multirouted packets if required. 10373 * The hop limit of such packets is bounded by the 10374 * ip_multirt_ttl ndd variable. 10375 * NDP packets must have a hop limit of 255; don't 10376 * change the hop limit in that case. 10377 */ 10378 if ((ip_multirt_ttl > 0) && 10379 (ip6h->ip6_hops > ip_multirt_ttl) && 10380 (ip6h->ip6_hops != IPV6_MAX_HOPS)) { 10381 if (ip_debug > 3) { 10382 ip2dbg(("ip_wput_v6: forcing multirt " 10383 "hop limit to %d (was %d) ", 10384 ip_multirt_ttl, ip6h->ip6_hops)); 10385 pr_addr_dbg("v6dst %s\n", AF_INET6, 10386 &ire->ire_addr_v6); 10387 } 10388 ip6h->ip6_hops = ip_multirt_ttl; 10389 } 10390 10391 /* 10392 * We look at this point if there are pending 10393 * unresolved routes. ire_multirt_need_resolve_v6() 10394 * checks in O(n) that all IRE_OFFSUBNET ire 10395 * entries for the packet's destination and 10396 * flagged RTF_MULTIRT are currently resolved. 10397 * If some remain unresolved, we make a copy 10398 * of the current message. It will be used 10399 * to initiate additional route resolutions. 10400 */ 10401 multirt_need_resolve = 10402 ire_multirt_need_resolve_v6(&ire->ire_addr_v6, 10403 MBLK_GETLABEL(first_mp)); 10404 ip2dbg(("ip_wput_v6[send_from_ill]: ire %p, " 10405 "multirt_need_resolve %d, first_mp %p\n", 10406 (void *)ire, multirt_need_resolve, 10407 (void *)first_mp)); 10408 if (multirt_need_resolve) { 10409 copy_mp = copymsg(first_mp); 10410 if (copy_mp != NULL) { 10411 MULTIRT_DEBUG_TAG(copy_mp); 10412 } 10413 } 10414 } 10415 10416 ip1dbg(("ip_wput_v6: send on %s, ire = %p, ill index = %d\n", 10417 ill->ill_name, (void *)ire, 10418 ill->ill_phyint->phyint_ifindex)); 10419 ip_wput_ire_v6(q, first_mp, ire, unspec_src, cksum_request, 10420 connp, caller, 10421 (attach_if ? ill->ill_phyint->phyint_ifindex : 0), 10422 ip6i_flags, zoneid); 10423 ire_refrele(ire); 10424 if (need_decref) { 10425 CONN_DEC_REF(connp); 10426 connp = NULL; 10427 } 10428 10429 /* 10430 * Try to resolve another multiroute if 10431 * ire_multirt_need_resolve_v6() deemed it necessary. 10432 * copy_mp will be consumed (sent or freed) by 10433 * ip_newroute_[ipif_]v6(). 10434 */ 10435 if (copy_mp != NULL) { 10436 if (mctl_present) { 10437 ip6h = (ip6_t *)copy_mp->b_cont->b_rptr; 10438 } else { 10439 ip6h = (ip6_t *)copy_mp->b_rptr; 10440 } 10441 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 10442 ipif = ipif_lookup_group_v6(&ip6h->ip6_dst, 10443 zoneid); 10444 if (ipif == NULL) { 10445 ip1dbg(("ip_wput_v6: No ipif for " 10446 "multicast\n")); 10447 MULTIRT_DEBUG_UNTAG(copy_mp); 10448 freemsg(copy_mp); 10449 return; 10450 } 10451 ip_newroute_ipif_v6(q, copy_mp, ipif, 10452 ip6h->ip6_dst, unspec_src, zoneid); 10453 ipif_refrele(ipif); 10454 } else { 10455 ip_newroute_v6(q, copy_mp, &ip6h->ip6_dst, 10456 &ip6h->ip6_src, ill, zoneid); 10457 } 10458 } 10459 ill_refrele(ill); 10460 return; 10461 } 10462 if (need_decref) { 10463 CONN_DEC_REF(connp); 10464 connp = NULL; 10465 } 10466 10467 /* Update rptr if there was an ip6i_t header. */ 10468 if (ip6i != NULL) 10469 mp->b_rptr -= sizeof (ip6i_t); 10470 if (unspec_src || attach_if) { 10471 if (ip6i == NULL) { 10472 /* 10473 * Add ip6i_t header to carry unspec_src 10474 * or attach_if until the packet comes back in 10475 * ip_wput_v6. 10476 */ 10477 if (mctl_present) { 10478 first_mp->b_cont = 10479 ip_add_info_v6(mp, NULL, v6dstp); 10480 mp = first_mp->b_cont; 10481 if (mp == NULL) 10482 freeb(first_mp); 10483 } else { 10484 first_mp = mp = ip_add_info_v6(mp, NULL, 10485 v6dstp); 10486 } 10487 if (mp == NULL) { 10488 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10489 ill_refrele(ill); 10490 return; 10491 } 10492 ip6i = (ip6i_t *)mp->b_rptr; 10493 if ((mp->b_wptr - (uchar_t *)ip6i) == 10494 sizeof (ip6i_t)) { 10495 /* 10496 * ndp_resolver called from ip_newroute_v6 10497 * expects a pulled up message. 10498 */ 10499 if (!pullupmsg(mp, -1)) { 10500 ip1dbg(("ip_wput_v6: pullupmsg" 10501 " failed\n")); 10502 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10503 freemsg(first_mp); 10504 return; 10505 } 10506 ip6i = (ip6i_t *)mp->b_rptr; 10507 } 10508 ip6h = (ip6_t *)&ip6i[1]; 10509 v6dstp = &ip6h->ip6_dst; 10510 } 10511 if (unspec_src) 10512 ip6i->ip6i_flags |= IP6I_UNSPEC_SRC; 10513 if (attach_if) { 10514 /* 10515 * Bind to nofailover/BOUND_PIF overrides ifindex. 10516 */ 10517 ip6i->ip6i_flags |= IP6I_ATTACH_IF; 10518 ip6i->ip6i_flags &= ~IP6I_IFINDEX; 10519 ip6i->ip6i_ifindex = ill->ill_phyint->phyint_ifindex; 10520 if (drop_if_delayed) { 10521 /* This is a multipathing probe packet */ 10522 ip6i->ip6i_flags |= IP6I_DROP_IFDELAYED; 10523 } 10524 } 10525 if (mctl_present) { 10526 ASSERT(io != NULL); 10527 io->ipsec_out_unspec_src = unspec_src; 10528 } 10529 } 10530 if (IN6_IS_ADDR_MULTICAST(v6dstp)) { 10531 ip_newroute_ipif_v6(q, first_mp, ill->ill_ipif, *v6dstp, 10532 unspec_src, zoneid); 10533 } else { 10534 ip_newroute_v6(q, first_mp, v6dstp, &ip6h->ip6_src, ill, 10535 zoneid); 10536 } 10537 ill_refrele(ill); 10538 return; 10539 10540 notv6: 10541 /* 10542 * XXX implement a IPv4 and IPv6 packet counter per conn and 10543 * switch when ratio exceeds e.g. 10:1 10544 */ 10545 if (q->q_next == NULL) { 10546 connp = Q_TO_CONN(q); 10547 10548 if (IPCL_IS_TCP(connp)) { 10549 /* change conn_send for the tcp_v4_connections */ 10550 connp->conn_send = ip_output; 10551 } else if (connp->conn_ulp == IPPROTO_SCTP) { 10552 /* The 'q' is the default SCTP queue */ 10553 connp = (conn_t *)arg; 10554 } else { 10555 ip_setqinfo(RD(q), IPV4_MINOR, B_TRUE); 10556 } 10557 } 10558 BUMP_MIB(mibptr, ipIfStatsOutWrongIPVersion); 10559 (void) ip_output(arg, first_mp, arg2, caller); 10560 if (ill != NULL) 10561 ill_refrele(ill); 10562 } 10563 10564 /* 10565 * If this is a conn_t queue, then we pass in the conn. This includes the 10566 * zoneid. 10567 * Otherwise, this is a message for an ill_t queue, 10568 * in which case we use the global zoneid since those are all part of 10569 * the global zone. 10570 */ 10571 static void 10572 ip_wput_v6(queue_t *q, mblk_t *mp) 10573 { 10574 if (CONN_Q(q)) 10575 ip_output_v6(Q_TO_CONN(q), mp, q, IP_WPUT); 10576 else 10577 ip_output_v6(GLOBAL_ZONEID, mp, q, IP_WPUT); 10578 } 10579 10580 static void 10581 ipsec_out_attach_if(ipsec_out_t *io, int attach_index) 10582 { 10583 ASSERT(io->ipsec_out_type == IPSEC_OUT); 10584 io->ipsec_out_attach_if = B_TRUE; 10585 io->ipsec_out_ill_index = attach_index; 10586 } 10587 10588 /* 10589 * NULL send-to queue - packet is to be delivered locally. 10590 */ 10591 void 10592 ip_wput_local_v6(queue_t *q, ill_t *ill, ip6_t *ip6h, mblk_t *first_mp, 10593 ire_t *ire, int fanout_flags) 10594 { 10595 uint32_t ports; 10596 mblk_t *mp = first_mp, *first_mp1; 10597 boolean_t mctl_present; 10598 uint8_t nexthdr; 10599 uint16_t hdr_length; 10600 ipsec_out_t *io; 10601 mib2_ipIfStatsEntry_t *mibptr; 10602 ilm_t *ilm; 10603 uint_t nexthdr_offset; 10604 10605 if (DB_TYPE(mp) == M_CTL) { 10606 io = (ipsec_out_t *)mp->b_rptr; 10607 if (!io->ipsec_out_secure) { 10608 mp = mp->b_cont; 10609 freeb(first_mp); 10610 first_mp = mp; 10611 mctl_present = B_FALSE; 10612 } else { 10613 mctl_present = B_TRUE; 10614 mp = first_mp->b_cont; 10615 ipsec_out_to_in(first_mp); 10616 } 10617 } else { 10618 mctl_present = B_FALSE; 10619 } 10620 10621 /* 10622 * Remove reachability confirmation bit from version field 10623 * before passing the packet on to any firewall hooks or 10624 * looping back the packet. 10625 */ 10626 if (ip6h->ip6_vcf & IP_FORWARD_PROG) 10627 ip6h->ip6_vcf &= ~IP_FORWARD_PROG; 10628 10629 DTRACE_PROBE4(ip6__loopback__in__start, 10630 ill_t *, ill, ill_t *, NULL, 10631 ip6_t *, ip6h, mblk_t *, first_mp); 10632 10633 FW_HOOKS6(ip6_loopback_in_event, ipv6firewall_loopback_in, 10634 ill, NULL, ip6h, first_mp, mp); 10635 10636 DTRACE_PROBE1(ip6__loopback__in__end, mblk_t *, first_mp); 10637 10638 if (first_mp == NULL) 10639 return; 10640 10641 nexthdr = ip6h->ip6_nxt; 10642 mibptr = ill->ill_ip_mib; 10643 10644 /* Fastpath */ 10645 switch (nexthdr) { 10646 case IPPROTO_TCP: 10647 case IPPROTO_UDP: 10648 case IPPROTO_ICMPV6: 10649 case IPPROTO_SCTP: 10650 hdr_length = IPV6_HDR_LEN; 10651 nexthdr_offset = (uint_t)((uchar_t *)&ip6h->ip6_nxt - 10652 (uchar_t *)ip6h); 10653 break; 10654 default: { 10655 uint8_t *nexthdrp; 10656 10657 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, 10658 &hdr_length, &nexthdrp)) { 10659 /* Malformed packet */ 10660 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10661 freemsg(first_mp); 10662 return; 10663 } 10664 nexthdr = *nexthdrp; 10665 nexthdr_offset = nexthdrp - (uint8_t *)ip6h; 10666 break; 10667 } 10668 } 10669 10670 UPDATE_OB_PKT_COUNT(ire); 10671 ire->ire_last_used_time = lbolt; 10672 10673 switch (nexthdr) { 10674 case IPPROTO_TCP: 10675 if (DB_TYPE(mp) == M_DATA) { 10676 /* 10677 * M_DATA mblk, so init mblk (chain) for 10678 * no struio(). 10679 */ 10680 mblk_t *mp1 = mp; 10681 10682 do { 10683 mp1->b_datap->db_struioflag = 0; 10684 } while ((mp1 = mp1->b_cont) != NULL); 10685 } 10686 ports = *(uint32_t *)(mp->b_rptr + hdr_length + 10687 TCP_PORTS_OFFSET); 10688 ip_fanout_tcp_v6(q, first_mp, ip6h, ill, ill, 10689 fanout_flags|IP_FF_SEND_ICMP|IP_FF_SYN_ADDIRE| 10690 IP_FF_IPINFO|IP6_NO_IPPOLICY|IP_FF_LOOPBACK, 10691 hdr_length, mctl_present, ire->ire_zoneid); 10692 return; 10693 10694 case IPPROTO_UDP: 10695 ports = *(uint32_t *)(mp->b_rptr + hdr_length + 10696 UDP_PORTS_OFFSET); 10697 ip_fanout_udp_v6(q, first_mp, ip6h, ports, ill, ill, 10698 fanout_flags|IP_FF_SEND_ICMP|IP_FF_IPINFO| 10699 IP6_NO_IPPOLICY, mctl_present, ire->ire_zoneid); 10700 return; 10701 10702 case IPPROTO_SCTP: 10703 { 10704 uint_t ipif_seqid = ire->ire_ipif->ipif_seqid; 10705 10706 ports = *(uint32_t *)(mp->b_rptr + hdr_length); 10707 ip_fanout_sctp(mp, ill, (ipha_t *)ip6h, ports, 10708 fanout_flags|IP_FF_SEND_ICMP|IP_FF_IPINFO, 10709 mctl_present, IP6_NO_IPPOLICY, ipif_seqid, 10710 ire->ire_zoneid); 10711 return; 10712 } 10713 case IPPROTO_ICMPV6: { 10714 icmp6_t *icmp6; 10715 10716 /* check for full IPv6+ICMPv6 header */ 10717 if ((mp->b_wptr - mp->b_rptr) < 10718 (hdr_length + ICMP6_MINLEN)) { 10719 if (!pullupmsg(mp, hdr_length + ICMP6_MINLEN)) { 10720 ip1dbg(("ip_wput_v6: ICMP hdr pullupmsg" 10721 " failed\n")); 10722 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10723 freemsg(first_mp); 10724 return; 10725 } 10726 ip6h = (ip6_t *)mp->b_rptr; 10727 } 10728 icmp6 = (icmp6_t *)((uchar_t *)ip6h + hdr_length); 10729 10730 /* Update output mib stats */ 10731 icmp_update_out_mib_v6(ill, icmp6); 10732 10733 /* Check variable for testing applications */ 10734 if (ipv6_drop_inbound_icmpv6) { 10735 freemsg(first_mp); 10736 return; 10737 } 10738 /* 10739 * Assume that there is always at least one conn for 10740 * ICMPv6 (in.ndpd) i.e. don't optimize the case 10741 * where there is no conn. 10742 */ 10743 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst) && 10744 !(ill->ill_phyint->phyint_flags & PHYI_LOOPBACK)) { 10745 /* 10746 * In the multicast case, applications may have 10747 * joined the group from different zones, so we 10748 * need to deliver the packet to each of them. 10749 * Loop through the multicast memberships 10750 * structures (ilm) on the receive ill and send 10751 * a copy of the packet up each matching one. 10752 * However, we don't do this for multicasts sent 10753 * on the loopback interface (PHYI_LOOPBACK flag 10754 * set) as they must stay in the sender's zone. 10755 */ 10756 ILM_WALKER_HOLD(ill); 10757 for (ilm = ill->ill_ilm; ilm != NULL; 10758 ilm = ilm->ilm_next) { 10759 if (ilm->ilm_flags & ILM_DELETED) 10760 continue; 10761 if (!IN6_ARE_ADDR_EQUAL( 10762 &ilm->ilm_v6addr, &ip6h->ip6_dst)) 10763 continue; 10764 if ((fanout_flags & 10765 IP_FF_NO_MCAST_LOOP) && 10766 ilm->ilm_zoneid == ire->ire_zoneid) 10767 continue; 10768 if (!ipif_lookup_zoneid(ill, 10769 ilm->ilm_zoneid, IPIF_UP, NULL)) 10770 continue; 10771 10772 first_mp1 = ip_copymsg(first_mp); 10773 if (first_mp1 == NULL) 10774 continue; 10775 icmp_inbound_v6(q, first_mp1, ill, 10776 hdr_length, mctl_present, 10777 IP6_NO_IPPOLICY, ilm->ilm_zoneid, 10778 NULL); 10779 } 10780 ILM_WALKER_RELE(ill); 10781 } else { 10782 first_mp1 = ip_copymsg(first_mp); 10783 if (first_mp1 != NULL) 10784 icmp_inbound_v6(q, first_mp1, ill, 10785 hdr_length, mctl_present, 10786 IP6_NO_IPPOLICY, ire->ire_zoneid, 10787 NULL); 10788 } 10789 } 10790 /* FALLTHRU */ 10791 default: { 10792 /* 10793 * Handle protocols with which IPv6 is less intimate. 10794 */ 10795 fanout_flags |= IP_FF_RAWIP|IP_FF_IPINFO; 10796 10797 /* 10798 * Enable sending ICMP for "Unknown" nexthdr 10799 * case. i.e. where we did not FALLTHRU from 10800 * IPPROTO_ICMPV6 processing case above. 10801 */ 10802 if (nexthdr != IPPROTO_ICMPV6) 10803 fanout_flags |= IP_FF_SEND_ICMP; 10804 /* 10805 * Note: There can be more than one stream bound 10806 * to a particular protocol. When this is the case, 10807 * each one gets a copy of any incoming packets. 10808 */ 10809 ip_fanout_proto_v6(q, first_mp, ip6h, ill, ill, nexthdr, 10810 nexthdr_offset, fanout_flags|IP6_NO_IPPOLICY, 10811 mctl_present, ire->ire_zoneid); 10812 return; 10813 } 10814 } 10815 } 10816 10817 /* 10818 * Send packet using IRE. 10819 * Checksumming is controlled by cksum_request: 10820 * -1 => normal i.e. TCP/UDP/SCTP/ICMPv6 are checksummed and nothing else. 10821 * 1 => Skip TCP/UDP/SCTP checksum 10822 * Otherwise => checksum_request contains insert offset for checksum 10823 * 10824 * Assumes that the following set of headers appear in the first 10825 * mblk: 10826 * ip6_t 10827 * Any extension headers 10828 * TCP/UDP/SCTP header (if present) 10829 * The routine can handle an ICMPv6 header that is not in the first mblk. 10830 * 10831 * NOTE : This function does not ire_refrele the ire passed in as the 10832 * argument unlike ip_wput_ire where the REFRELE is done. 10833 * Refer to ip_wput_ire for more on this. 10834 */ 10835 static void 10836 ip_wput_ire_v6(queue_t *q, mblk_t *mp, ire_t *ire, int unspec_src, 10837 int cksum_request, conn_t *connp, int caller, int attach_index, int flags, 10838 zoneid_t zoneid) 10839 { 10840 ip6_t *ip6h; 10841 uint8_t nexthdr; 10842 uint16_t hdr_length; 10843 uint_t reachable = 0x0; 10844 ill_t *ill; 10845 mib2_ipIfStatsEntry_t *mibptr; 10846 mblk_t *first_mp; 10847 boolean_t mctl_present; 10848 ipsec_out_t *io; 10849 boolean_t conn_dontroute; /* conn value for multicast */ 10850 boolean_t conn_multicast_loop; /* conn value for multicast */ 10851 boolean_t multicast_forward; /* Should we forward ? */ 10852 int max_frag; 10853 10854 ill = ire_to_ill(ire); 10855 first_mp = mp; 10856 multicast_forward = B_FALSE; 10857 10858 if (mp->b_datap->db_type != M_CTL) { 10859 ip6h = (ip6_t *)first_mp->b_rptr; 10860 } else { 10861 io = (ipsec_out_t *)first_mp->b_rptr; 10862 ASSERT(io->ipsec_out_type == IPSEC_OUT); 10863 /* 10864 * Grab the zone id now because the M_CTL can be discarded by 10865 * ip_wput_ire_parse_ipsec_out() below. 10866 */ 10867 ASSERT(zoneid == io->ipsec_out_zoneid); 10868 ASSERT(zoneid != ALL_ZONES); 10869 ip6h = (ip6_t *)first_mp->b_cont->b_rptr; 10870 /* 10871 * For the multicast case, ipsec_out carries conn_dontroute and 10872 * conn_multicast_loop as conn may not be available here. We 10873 * need this for multicast loopback and forwarding which is done 10874 * later in the code. 10875 */ 10876 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 10877 conn_dontroute = io->ipsec_out_dontroute; 10878 conn_multicast_loop = io->ipsec_out_multicast_loop; 10879 /* 10880 * If conn_dontroute is not set or conn_multicast_loop 10881 * is set, we need to do forwarding/loopback. For 10882 * datagrams from ip_wput_multicast, conn_dontroute is 10883 * set to B_TRUE and conn_multicast_loop is set to 10884 * B_FALSE so that we neither do forwarding nor 10885 * loopback. 10886 */ 10887 if (!conn_dontroute || conn_multicast_loop) 10888 multicast_forward = B_TRUE; 10889 } 10890 } 10891 10892 /* 10893 * If the sender didn't supply the hop limit and there is a default 10894 * unicast hop limit associated with the output interface, we use 10895 * that if the packet is unicast. Interface specific unicast hop 10896 * limits as set via the SIOCSLIFLNKINFO ioctl. 10897 */ 10898 if (ill->ill_max_hops != 0 && !(flags & IP6I_HOPLIMIT) && 10899 !(IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst))) { 10900 ip6h->ip6_hops = ill->ill_max_hops; 10901 } 10902 10903 if (ire->ire_type == IRE_LOCAL && ire->ire_zoneid != zoneid && 10904 ire->ire_zoneid != ALL_ZONES) { 10905 /* 10906 * When a zone sends a packet to another zone, we try to deliver 10907 * the packet under the same conditions as if the destination 10908 * was a real node on the network. To do so, we look for a 10909 * matching route in the forwarding table. 10910 * RTF_REJECT and RTF_BLACKHOLE are handled just like 10911 * ip_newroute_v6() does. 10912 * Note that IRE_LOCAL are special, since they are used 10913 * when the zoneid doesn't match in some cases. This means that 10914 * we need to handle ipha_src differently since ire_src_addr 10915 * belongs to the receiving zone instead of the sending zone. 10916 * When ip_restrict_interzone_loopback is set, then 10917 * ire_cache_lookup_v6() ensures that IRE_LOCAL are only used 10918 * for loopback between zones when the logical "Ethernet" would 10919 * have looped them back. 10920 */ 10921 ire_t *src_ire; 10922 10923 src_ire = ire_ftable_lookup_v6(&ip6h->ip6_dst, 0, 0, 0, 10924 NULL, NULL, zoneid, 0, NULL, (MATCH_IRE_RECURSIVE | 10925 MATCH_IRE_DEFAULT | MATCH_IRE_RJ_BHOLE)); 10926 if (src_ire != NULL && 10927 !(src_ire->ire_flags & (RTF_REJECT | RTF_BLACKHOLE)) && 10928 (!ip_restrict_interzone_loopback || 10929 ire_local_same_ill_group(ire, src_ire))) { 10930 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src) && 10931 !unspec_src) { 10932 ip6h->ip6_src = src_ire->ire_src_addr_v6; 10933 } 10934 ire_refrele(src_ire); 10935 } else { 10936 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutNoRoutes); 10937 if (src_ire != NULL) { 10938 if (src_ire->ire_flags & RTF_BLACKHOLE) { 10939 ire_refrele(src_ire); 10940 freemsg(first_mp); 10941 return; 10942 } 10943 ire_refrele(src_ire); 10944 } 10945 if (ip_hdr_complete_v6(ip6h, zoneid)) { 10946 /* Failed */ 10947 freemsg(first_mp); 10948 return; 10949 } 10950 icmp_unreachable_v6(q, first_mp, 10951 ICMP6_DST_UNREACH_NOROUTE, B_FALSE, B_FALSE, 10952 zoneid); 10953 return; 10954 } 10955 } 10956 10957 if (mp->b_datap->db_type == M_CTL || ipsec_outbound_v6_policy_present) { 10958 mp = ip_wput_ire_parse_ipsec_out(first_mp, NULL, ip6h, ire, 10959 connp, unspec_src, zoneid); 10960 if (mp == NULL) { 10961 return; 10962 } 10963 } 10964 10965 first_mp = mp; 10966 if (mp->b_datap->db_type == M_CTL) { 10967 io = (ipsec_out_t *)mp->b_rptr; 10968 ASSERT(io->ipsec_out_type == IPSEC_OUT); 10969 mp = mp->b_cont; 10970 mctl_present = B_TRUE; 10971 } else { 10972 mctl_present = B_FALSE; 10973 } 10974 10975 ip6h = (ip6_t *)mp->b_rptr; 10976 nexthdr = ip6h->ip6_nxt; 10977 mibptr = ill->ill_ip_mib; 10978 10979 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src) && !unspec_src) { 10980 ipif_t *ipif; 10981 10982 /* 10983 * Select the source address using ipif_select_source_v6. 10984 */ 10985 if (attach_index != 0) { 10986 ipif = ipif_select_source_v6(ill, &ip6h->ip6_dst, 10987 RESTRICT_TO_ILL, IPV6_PREFER_SRC_DEFAULT, zoneid); 10988 } else { 10989 ipif = ipif_select_source_v6(ill, &ip6h->ip6_dst, 10990 RESTRICT_TO_NONE, IPV6_PREFER_SRC_DEFAULT, zoneid); 10991 } 10992 if (ipif == NULL) { 10993 if (ip_debug > 2) { 10994 /* ip1dbg */ 10995 pr_addr_dbg("ip_wput_ire_v6: no src for " 10996 "dst %s\n, ", AF_INET6, &ip6h->ip6_dst); 10997 printf("ip_wput_ire_v6: interface name %s\n", 10998 ill->ill_name); 10999 } 11000 freemsg(first_mp); 11001 return; 11002 } 11003 ip6h->ip6_src = ipif->ipif_v6src_addr; 11004 ipif_refrele(ipif); 11005 } 11006 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 11007 if ((connp != NULL && connp->conn_multicast_loop) || 11008 !(ill->ill_phyint->phyint_flags & PHYI_LOOPBACK)) { 11009 ilm_t *ilm; 11010 11011 ILM_WALKER_HOLD(ill); 11012 ilm = ilm_lookup_ill_v6(ill, &ip6h->ip6_dst, ALL_ZONES); 11013 ILM_WALKER_RELE(ill); 11014 if (ilm != NULL) { 11015 mblk_t *nmp; 11016 int fanout_flags = 0; 11017 11018 if (connp != NULL && 11019 !connp->conn_multicast_loop) { 11020 fanout_flags |= IP_FF_NO_MCAST_LOOP; 11021 } 11022 ip1dbg(("ip_wput_ire_v6: " 11023 "Loopback multicast\n")); 11024 nmp = ip_copymsg(first_mp); 11025 if (nmp != NULL) { 11026 ip6_t *nip6h; 11027 mblk_t *mp_ip6h; 11028 11029 if (mctl_present) { 11030 nip6h = (ip6_t *) 11031 nmp->b_cont->b_rptr; 11032 mp_ip6h = nmp->b_cont; 11033 } else { 11034 nip6h = (ip6_t *)nmp->b_rptr; 11035 mp_ip6h = nmp; 11036 } 11037 11038 DTRACE_PROBE4( 11039 ip6__loopback__out__start, 11040 ill_t *, NULL, 11041 ill_t *, ill, 11042 ip6_t *, nip6h, 11043 mblk_t *, nmp); 11044 11045 FW_HOOKS6(ip6_loopback_out_event, 11046 ipv6firewall_loopback_out, 11047 NULL, ill, nip6h, nmp, mp_ip6h); 11048 11049 DTRACE_PROBE1( 11050 ip6__loopback__out__end, 11051 mblk_t *, nmp); 11052 11053 if (nmp != NULL) { 11054 /* 11055 * Deliver locally and to 11056 * every local zone, except 11057 * the sending zone when 11058 * IPV6_MULTICAST_LOOP is 11059 * disabled. 11060 */ 11061 ip_wput_local_v6(RD(q), ill, 11062 nip6h, nmp, 11063 ire, fanout_flags); 11064 } 11065 } else { 11066 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 11067 ip1dbg(("ip_wput_ire_v6: " 11068 "copymsg failed\n")); 11069 } 11070 } 11071 } 11072 if (ip6h->ip6_hops == 0 || 11073 IN6_IS_ADDR_MC_NODELOCAL(&ip6h->ip6_dst) || 11074 (ill->ill_phyint->phyint_flags & PHYI_LOOPBACK)) { 11075 /* 11076 * Local multicast or just loopback on loopback 11077 * interface. 11078 */ 11079 BUMP_MIB(mibptr, ipIfStatsHCOutMcastPkts); 11080 UPDATE_MIB(mibptr, ipIfStatsHCOutMcastOctets, 11081 ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN); 11082 ip1dbg(("ip_wput_ire_v6: local multicast only\n")); 11083 freemsg(first_mp); 11084 return; 11085 } 11086 } 11087 11088 if (ire->ire_stq != NULL) { 11089 uint32_t sum; 11090 uint_t ill_index = ((ill_t *)ire->ire_stq->q_ptr)-> 11091 ill_phyint->phyint_ifindex; 11092 queue_t *dev_q = ire->ire_stq->q_next; 11093 11094 /* 11095 * non-NULL send-to queue - packet is to be sent 11096 * out an interface. 11097 */ 11098 11099 /* Driver is flow-controlling? */ 11100 if (!IP_FLOW_CONTROLLED_ULP(nexthdr) && 11101 ((dev_q->q_next || dev_q->q_first) && !canput(dev_q))) { 11102 /* 11103 * Queue packet if we have an conn to give back 11104 * pressure. We can't queue packets intended for 11105 * hardware acceleration since we've tossed that 11106 * state already. If the packet is being fed back 11107 * from ire_send_v6, we don't know the position in 11108 * the queue to enqueue the packet and we discard 11109 * the packet. 11110 */ 11111 if (ip_output_queue && connp != NULL && 11112 !mctl_present && caller != IRE_SEND) { 11113 if (caller == IP_WSRV) { 11114 connp->conn_did_putbq = 1; 11115 (void) putbq(connp->conn_wq, mp); 11116 conn_drain_insert(connp); 11117 /* 11118 * caller == IP_WSRV implies we are 11119 * the service thread, and the 11120 * queue is already noenabled. 11121 * The check for canput and 11122 * the putbq is not atomic. 11123 * So we need to check again. 11124 */ 11125 if (canput(dev_q)) 11126 connp->conn_did_putbq = 0; 11127 } else { 11128 (void) putq(connp->conn_wq, mp); 11129 } 11130 return; 11131 } 11132 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 11133 freemsg(first_mp); 11134 return; 11135 } 11136 11137 /* 11138 * Look for reachability confirmations from the transport. 11139 */ 11140 if (ip6h->ip6_vcf & IP_FORWARD_PROG) { 11141 reachable |= IPV6_REACHABILITY_CONFIRMATION; 11142 ip6h->ip6_vcf &= ~IP_FORWARD_PROG; 11143 if (mctl_present) 11144 io->ipsec_out_reachable = B_TRUE; 11145 } 11146 /* Fastpath */ 11147 switch (nexthdr) { 11148 case IPPROTO_TCP: 11149 case IPPROTO_UDP: 11150 case IPPROTO_ICMPV6: 11151 case IPPROTO_SCTP: 11152 hdr_length = IPV6_HDR_LEN; 11153 break; 11154 default: { 11155 uint8_t *nexthdrp; 11156 11157 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, 11158 &hdr_length, &nexthdrp)) { 11159 /* Malformed packet */ 11160 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 11161 freemsg(first_mp); 11162 return; 11163 } 11164 nexthdr = *nexthdrp; 11165 break; 11166 } 11167 } 11168 11169 if (cksum_request != -1 && nexthdr != IPPROTO_ICMPV6) { 11170 uint16_t *up; 11171 uint16_t *insp; 11172 11173 /* 11174 * The packet header is processed once for all, even 11175 * in the multirouting case. We disable hardware 11176 * checksum if the packet is multirouted, as it will be 11177 * replicated via several interfaces, and not all of 11178 * them may have this capability. 11179 */ 11180 if (cksum_request == 1 && 11181 !(ire->ire_flags & RTF_MULTIRT)) { 11182 /* Skip the transport checksum */ 11183 goto cksum_done; 11184 } 11185 /* 11186 * Do user-configured raw checksum. 11187 * Compute checksum and insert at offset "cksum_request" 11188 */ 11189 11190 /* check for enough headers for checksum */ 11191 cksum_request += hdr_length; /* offset from rptr */ 11192 if ((mp->b_wptr - mp->b_rptr) < 11193 (cksum_request + sizeof (int16_t))) { 11194 if (!pullupmsg(mp, 11195 cksum_request + sizeof (int16_t))) { 11196 ip1dbg(("ip_wput_v6: ICMP hdr pullupmsg" 11197 " failed\n")); 11198 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 11199 freemsg(first_mp); 11200 return; 11201 } 11202 ip6h = (ip6_t *)mp->b_rptr; 11203 } 11204 insp = (uint16_t *)((uchar_t *)ip6h + cksum_request); 11205 ASSERT(((uintptr_t)insp & 0x1) == 0); 11206 up = (uint16_t *)&ip6h->ip6_src; 11207 /* 11208 * icmp has placed length and routing 11209 * header adjustment in *insp. 11210 */ 11211 sum = htons(nexthdr) + 11212 up[0] + up[1] + up[2] + up[3] + 11213 up[4] + up[5] + up[6] + up[7] + 11214 up[8] + up[9] + up[10] + up[11] + 11215 up[12] + up[13] + up[14] + up[15]; 11216 sum = (sum & 0xffff) + (sum >> 16); 11217 *insp = IP_CSUM(mp, hdr_length, sum); 11218 if (*insp == 0) 11219 *insp = 0xFFFF; 11220 } else if (nexthdr == IPPROTO_TCP) { 11221 uint16_t *up; 11222 11223 /* 11224 * Check for full IPv6 header + enough TCP header 11225 * to get at the checksum field. 11226 */ 11227 if ((mp->b_wptr - mp->b_rptr) < 11228 (hdr_length + TCP_CHECKSUM_OFFSET + 11229 TCP_CHECKSUM_SIZE)) { 11230 if (!pullupmsg(mp, hdr_length + 11231 TCP_CHECKSUM_OFFSET + TCP_CHECKSUM_SIZE)) { 11232 ip1dbg(("ip_wput_v6: TCP hdr pullupmsg" 11233 " failed\n")); 11234 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 11235 freemsg(first_mp); 11236 return; 11237 } 11238 ip6h = (ip6_t *)mp->b_rptr; 11239 } 11240 11241 up = (uint16_t *)&ip6h->ip6_src; 11242 /* 11243 * Note: The TCP module has stored the length value 11244 * into the tcp checksum field, so we don't 11245 * need to explicitly sum it in here. 11246 */ 11247 sum = up[0] + up[1] + up[2] + up[3] + 11248 up[4] + up[5] + up[6] + up[7] + 11249 up[8] + up[9] + up[10] + up[11] + 11250 up[12] + up[13] + up[14] + up[15]; 11251 11252 /* Fold the initial sum */ 11253 sum = (sum & 0xffff) + (sum >> 16); 11254 11255 up = (uint16_t *)(((uchar_t *)ip6h) + 11256 hdr_length + TCP_CHECKSUM_OFFSET); 11257 11258 IP_CKSUM_XMIT(ill, ire, mp, ip6h, up, IPPROTO_TCP, 11259 hdr_length, ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN, 11260 ire->ire_max_frag, mctl_present, sum); 11261 11262 /* Software checksum? */ 11263 if (DB_CKSUMFLAGS(mp) == 0) { 11264 IP6_STAT(ip6_out_sw_cksum); 11265 IP6_STAT_UPDATE(ip6_tcp_out_sw_cksum_bytes, 11266 (ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN) - 11267 hdr_length); 11268 } 11269 } else if (nexthdr == IPPROTO_UDP) { 11270 uint16_t *up; 11271 11272 /* 11273 * check for full IPv6 header + enough UDP header 11274 * to get at the UDP checksum field 11275 */ 11276 if ((mp->b_wptr - mp->b_rptr) < (hdr_length + 11277 UDP_CHECKSUM_OFFSET + UDP_CHECKSUM_SIZE)) { 11278 if (!pullupmsg(mp, hdr_length + 11279 UDP_CHECKSUM_OFFSET + UDP_CHECKSUM_SIZE)) { 11280 ip1dbg(("ip_wput_v6: UDP hdr pullupmsg" 11281 " failed\n")); 11282 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 11283 freemsg(first_mp); 11284 return; 11285 } 11286 ip6h = (ip6_t *)mp->b_rptr; 11287 } 11288 up = (uint16_t *)&ip6h->ip6_src; 11289 /* 11290 * Note: The UDP module has stored the length value 11291 * into the udp checksum field, so we don't 11292 * need to explicitly sum it in here. 11293 */ 11294 sum = up[0] + up[1] + up[2] + up[3] + 11295 up[4] + up[5] + up[6] + up[7] + 11296 up[8] + up[9] + up[10] + up[11] + 11297 up[12] + up[13] + up[14] + up[15]; 11298 11299 /* Fold the initial sum */ 11300 sum = (sum & 0xffff) + (sum >> 16); 11301 11302 up = (uint16_t *)(((uchar_t *)ip6h) + 11303 hdr_length + UDP_CHECKSUM_OFFSET); 11304 11305 IP_CKSUM_XMIT(ill, ire, mp, ip6h, up, IPPROTO_UDP, 11306 hdr_length, ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN, 11307 ire->ire_max_frag, mctl_present, sum); 11308 11309 /* Software checksum? */ 11310 if (DB_CKSUMFLAGS(mp) == 0) { 11311 IP6_STAT(ip6_out_sw_cksum); 11312 IP6_STAT_UPDATE(ip6_udp_out_sw_cksum_bytes, 11313 (ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN) - 11314 hdr_length); 11315 } 11316 } else if (nexthdr == IPPROTO_ICMPV6) { 11317 uint16_t *up; 11318 icmp6_t *icmp6; 11319 11320 /* check for full IPv6+ICMPv6 header */ 11321 if ((mp->b_wptr - mp->b_rptr) < 11322 (hdr_length + ICMP6_MINLEN)) { 11323 if (!pullupmsg(mp, hdr_length + ICMP6_MINLEN)) { 11324 ip1dbg(("ip_wput_v6: ICMP hdr pullupmsg" 11325 " failed\n")); 11326 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 11327 freemsg(first_mp); 11328 return; 11329 } 11330 ip6h = (ip6_t *)mp->b_rptr; 11331 } 11332 icmp6 = (icmp6_t *)((uchar_t *)ip6h + hdr_length); 11333 up = (uint16_t *)&ip6h->ip6_src; 11334 /* 11335 * icmp has placed length and routing 11336 * header adjustment in icmp6_cksum. 11337 */ 11338 sum = htons(IPPROTO_ICMPV6) + 11339 up[0] + up[1] + up[2] + up[3] + 11340 up[4] + up[5] + up[6] + up[7] + 11341 up[8] + up[9] + up[10] + up[11] + 11342 up[12] + up[13] + up[14] + up[15]; 11343 sum = (sum & 0xffff) + (sum >> 16); 11344 icmp6->icmp6_cksum = IP_CSUM(mp, hdr_length, sum); 11345 if (icmp6->icmp6_cksum == 0) 11346 icmp6->icmp6_cksum = 0xFFFF; 11347 11348 /* Update output mib stats */ 11349 icmp_update_out_mib_v6(ill, icmp6); 11350 } else if (nexthdr == IPPROTO_SCTP) { 11351 sctp_hdr_t *sctph; 11352 11353 if (MBLKL(mp) < (hdr_length + sizeof (*sctph))) { 11354 if (!pullupmsg(mp, hdr_length + 11355 sizeof (*sctph))) { 11356 ip1dbg(("ip_wput_v6: SCTP hdr pullupmsg" 11357 " failed\n")); 11358 BUMP_MIB(ill->ill_ip_mib, 11359 ipIfStatsOutDiscards); 11360 freemsg(mp); 11361 return; 11362 } 11363 ip6h = (ip6_t *)mp->b_rptr; 11364 } 11365 sctph = (sctp_hdr_t *)(mp->b_rptr + hdr_length); 11366 sctph->sh_chksum = 0; 11367 sctph->sh_chksum = sctp_cksum(mp, hdr_length); 11368 } 11369 11370 cksum_done: 11371 /* 11372 * We force the insertion of a fragment header using the 11373 * IPH_FRAG_HDR flag in two cases: 11374 * - after reception of an ICMPv6 "packet too big" message 11375 * with a MTU < 1280 (cf. RFC 2460 section 5) 11376 * - for multirouted IPv6 packets, so that the receiver can 11377 * discard duplicates according to their fragment identifier 11378 * 11379 * Two flags modifed from the API can modify this behavior. 11380 * The first is IPV6_USE_MIN_MTU. With this API the user 11381 * can specify how to manage PMTUD for unicast and multicast. 11382 * 11383 * IPV6_DONTFRAG disallows fragmentation. 11384 */ 11385 max_frag = ire->ire_max_frag; 11386 switch (IP6I_USE_MIN_MTU_API(flags)) { 11387 case IPV6_USE_MIN_MTU_DEFAULT: 11388 case IPV6_USE_MIN_MTU_UNICAST: 11389 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 11390 max_frag = IPV6_MIN_MTU; 11391 } 11392 break; 11393 11394 case IPV6_USE_MIN_MTU_NEVER: 11395 max_frag = IPV6_MIN_MTU; 11396 break; 11397 } 11398 if (ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN > max_frag || 11399 (ire->ire_frag_flag & IPH_FRAG_HDR)) { 11400 if (connp != NULL && (flags & IP6I_DONTFRAG)) { 11401 icmp_pkt2big_v6(ire->ire_stq, first_mp, 11402 max_frag, B_FALSE, B_TRUE, zoneid); 11403 return; 11404 } 11405 11406 if (ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN != 11407 (mp->b_cont ? msgdsize(mp) : 11408 mp->b_wptr - (uchar_t *)ip6h)) { 11409 ip0dbg(("Packet length mismatch: %d, %ld\n", 11410 ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN, 11411 msgdsize(mp))); 11412 freemsg(first_mp); 11413 return; 11414 } 11415 /* Do IPSEC processing first */ 11416 if (mctl_present) { 11417 if (attach_index != 0) 11418 ipsec_out_attach_if(io, attach_index); 11419 ipsec_out_process(q, first_mp, ire, ill_index); 11420 return; 11421 } 11422 ASSERT(mp->b_prev == NULL); 11423 ip2dbg(("Fragmenting Size = %d, mtu = %d\n", 11424 ntohs(ip6h->ip6_plen) + 11425 IPV6_HDR_LEN, max_frag)); 11426 ASSERT(mp == first_mp); 11427 /* Initiate IPPF processing */ 11428 if (IPP_ENABLED(IPP_LOCAL_OUT)) { 11429 ip_process(IPP_LOCAL_OUT, &mp, ill_index); 11430 if (mp == NULL) { 11431 return; 11432 } 11433 } 11434 ip_wput_frag_v6(mp, ire, reachable, connp, 11435 caller, max_frag); 11436 return; 11437 } 11438 /* Do IPSEC processing first */ 11439 if (mctl_present) { 11440 int extra_len = ipsec_out_extra_length(first_mp); 11441 11442 if (ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN + extra_len > 11443 max_frag) { 11444 /* 11445 * IPsec headers will push the packet over the 11446 * MTU limit. Issue an ICMPv6 Packet Too Big 11447 * message for this packet if the upper-layer 11448 * that issued this packet will be able to 11449 * react to the icmp_pkt2big_v6() that we'll 11450 * generate. 11451 */ 11452 icmp_pkt2big_v6(ire->ire_stq, first_mp, 11453 max_frag, B_FALSE, B_TRUE, zoneid); 11454 return; 11455 } 11456 if (attach_index != 0) 11457 ipsec_out_attach_if(io, attach_index); 11458 ipsec_out_process(q, first_mp, ire, ill_index); 11459 return; 11460 } 11461 /* 11462 * XXX multicast: add ip_mforward_v6() here. 11463 * Check conn_dontroute 11464 */ 11465 #ifdef lint 11466 /* 11467 * XXX The only purpose of this statement is to avoid lint 11468 * errors. See the above "XXX multicast". When that gets 11469 * fixed, remove this whole #ifdef lint section. 11470 */ 11471 ip3dbg(("multicast forward is %s.\n", 11472 (multicast_forward ? "TRUE" : "FALSE"))); 11473 #endif 11474 11475 UPDATE_OB_PKT_COUNT(ire); 11476 ire->ire_last_used_time = lbolt; 11477 ASSERT(mp == first_mp); 11478 ip_xmit_v6(mp, ire, reachable, connp, caller, NULL); 11479 } else { 11480 DTRACE_PROBE4(ip6__loopback__out__start, 11481 ill_t *, NULL, ill_t *, ill, 11482 ip6_t *, ip6h, mblk_t *, first_mp); 11483 FW_HOOKS6(ip6_loopback_out_event, ipv6firewall_loopback_out, 11484 NULL, ill, ip6h, first_mp, mp); 11485 DTRACE_PROBE1(ip6__loopback__out__end, mblk_t *, first_mp); 11486 if (first_mp != NULL) 11487 ip_wput_local_v6(RD(q), ill, ip6h, first_mp, ire, 0); 11488 } 11489 } 11490 11491 /* 11492 * Outbound IPv6 fragmentation routine using MDT. 11493 */ 11494 static void 11495 ip_wput_frag_mdt_v6(mblk_t *mp, ire_t *ire, size_t max_chunk, 11496 size_t unfragmentable_len, uint8_t nexthdr, uint_t prev_nexthdr_offset) 11497 { 11498 ip6_t *ip6h = (ip6_t *)mp->b_rptr; 11499 uint_t pkts, wroff, hdr_chunk_len, pbuf_idx; 11500 mblk_t *hdr_mp, *md_mp = NULL; 11501 int i1; 11502 multidata_t *mmd; 11503 unsigned char *hdr_ptr, *pld_ptr; 11504 ip_pdescinfo_t pdi; 11505 uint32_t ident; 11506 size_t len; 11507 uint16_t offset; 11508 queue_t *stq = ire->ire_stq; 11509 ill_t *ill = (ill_t *)stq->q_ptr; 11510 11511 ASSERT(DB_TYPE(mp) == M_DATA); 11512 ASSERT(MBLKL(mp) > unfragmentable_len); 11513 11514 /* 11515 * Move read ptr past unfragmentable portion, we don't want this part 11516 * of the data in our fragments. 11517 */ 11518 mp->b_rptr += unfragmentable_len; 11519 11520 /* Calculate how many packets we will send out */ 11521 i1 = (mp->b_cont == NULL) ? MBLKL(mp) : msgsize(mp); 11522 pkts = (i1 + max_chunk - 1) / max_chunk; 11523 ASSERT(pkts > 1); 11524 11525 /* Allocate a message block which will hold all the IP Headers. */ 11526 wroff = ip_wroff_extra; 11527 hdr_chunk_len = wroff + unfragmentable_len + sizeof (ip6_frag_t); 11528 11529 i1 = pkts * hdr_chunk_len; 11530 /* 11531 * Create the header buffer, Multidata and destination address 11532 * and SAP attribute that should be associated with it. 11533 */ 11534 if ((hdr_mp = allocb(i1, BPRI_HI)) == NULL || 11535 ((hdr_mp->b_wptr += i1), 11536 (mmd = mmd_alloc(hdr_mp, &md_mp, KM_NOSLEEP)) == NULL) || 11537 !ip_md_addr_attr(mmd, NULL, ire->ire_nce->nce_res_mp)) { 11538 freemsg(mp); 11539 if (md_mp == NULL) { 11540 freemsg(hdr_mp); 11541 } else { 11542 free_mmd: IP6_STAT(ip6_frag_mdt_discarded); 11543 freemsg(md_mp); 11544 } 11545 IP6_STAT(ip6_frag_mdt_allocfail); 11546 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails); 11547 return; 11548 } 11549 IP6_STAT(ip6_frag_mdt_allocd); 11550 11551 /* 11552 * Add a payload buffer to the Multidata; this operation must not 11553 * fail, or otherwise our logic in this routine is broken. There 11554 * is no memory allocation done by the routine, so any returned 11555 * failure simply tells us that we've done something wrong. 11556 * 11557 * A failure tells us that either we're adding the same payload 11558 * buffer more than once, or we're trying to add more buffers than 11559 * allowed. None of the above cases should happen, and we panic 11560 * because either there's horrible heap corruption, and/or 11561 * programming mistake. 11562 */ 11563 if ((pbuf_idx = mmd_addpldbuf(mmd, mp)) < 0) { 11564 goto pbuf_panic; 11565 } 11566 11567 hdr_ptr = hdr_mp->b_rptr; 11568 pld_ptr = mp->b_rptr; 11569 11570 pdi.flags = PDESC_HBUF_REF | PDESC_PBUF_REF; 11571 11572 ident = htonl(atomic_add_32_nv(&ire->ire_ident, 1)); 11573 11574 /* 11575 * len is the total length of the fragmentable data in this 11576 * datagram. For each fragment sent, we will decrement len 11577 * by the amount of fragmentable data sent in that fragment 11578 * until len reaches zero. 11579 */ 11580 len = ntohs(ip6h->ip6_plen) - (unfragmentable_len - IPV6_HDR_LEN); 11581 11582 offset = 0; 11583 prev_nexthdr_offset += wroff; 11584 11585 while (len != 0) { 11586 size_t mlen; 11587 ip6_t *fip6h; 11588 ip6_frag_t *fraghdr; 11589 int error; 11590 11591 ASSERT((hdr_ptr + hdr_chunk_len) <= hdr_mp->b_wptr); 11592 mlen = MIN(len, max_chunk); 11593 len -= mlen; 11594 11595 fip6h = (ip6_t *)(hdr_ptr + wroff); 11596 ASSERT(OK_32PTR(fip6h)); 11597 bcopy(ip6h, fip6h, unfragmentable_len); 11598 hdr_ptr[prev_nexthdr_offset] = IPPROTO_FRAGMENT; 11599 11600 fip6h->ip6_plen = htons((uint16_t)(mlen + 11601 unfragmentable_len - IPV6_HDR_LEN + sizeof (ip6_frag_t))); 11602 11603 fraghdr = (ip6_frag_t *)((unsigned char *)fip6h + 11604 unfragmentable_len); 11605 fraghdr->ip6f_nxt = nexthdr; 11606 fraghdr->ip6f_reserved = 0; 11607 fraghdr->ip6f_offlg = htons(offset) | 11608 ((len != 0) ? IP6F_MORE_FRAG : 0); 11609 fraghdr->ip6f_ident = ident; 11610 11611 /* 11612 * Record offset and size of header and data of the next packet 11613 * in the multidata message. 11614 */ 11615 PDESC_HDR_ADD(&pdi, hdr_ptr, wroff, 11616 unfragmentable_len + sizeof (ip6_frag_t), 0); 11617 PDESC_PLD_INIT(&pdi); 11618 i1 = MIN(mp->b_wptr - pld_ptr, mlen); 11619 ASSERT(i1 > 0); 11620 PDESC_PLD_SPAN_ADD(&pdi, pbuf_idx, pld_ptr, i1); 11621 if (i1 == mlen) { 11622 pld_ptr += mlen; 11623 } else { 11624 i1 = mlen - i1; 11625 mp = mp->b_cont; 11626 ASSERT(mp != NULL); 11627 ASSERT(MBLKL(mp) >= i1); 11628 /* 11629 * Attach the next payload message block to the 11630 * multidata message. 11631 */ 11632 if ((pbuf_idx = mmd_addpldbuf(mmd, mp)) < 0) 11633 goto pbuf_panic; 11634 PDESC_PLD_SPAN_ADD(&pdi, pbuf_idx, mp->b_rptr, i1); 11635 pld_ptr = mp->b_rptr + i1; 11636 } 11637 11638 if ((mmd_addpdesc(mmd, (pdescinfo_t *)&pdi, &error, 11639 KM_NOSLEEP)) == NULL) { 11640 /* 11641 * Any failure other than ENOMEM indicates that we 11642 * have passed in invalid pdesc info or parameters 11643 * to mmd_addpdesc, which must not happen. 11644 * 11645 * EINVAL is a result of failure on boundary checks 11646 * against the pdesc info contents. It should not 11647 * happen, and we panic because either there's 11648 * horrible heap corruption, and/or programming 11649 * mistake. 11650 */ 11651 if (error != ENOMEM) { 11652 cmn_err(CE_PANIC, "ip_wput_frag_mdt_v6: " 11653 "pdesc logic error detected for " 11654 "mmd %p pinfo %p (%d)\n", 11655 (void *)mmd, (void *)&pdi, error); 11656 /* NOTREACHED */ 11657 } 11658 IP6_STAT(ip6_frag_mdt_addpdescfail); 11659 /* Free unattached payload message blocks as well */ 11660 md_mp->b_cont = mp->b_cont; 11661 goto free_mmd; 11662 } 11663 11664 /* Advance fragment offset. */ 11665 offset += mlen; 11666 11667 /* Advance to location for next header in the buffer. */ 11668 hdr_ptr += hdr_chunk_len; 11669 11670 /* Did we reach the next payload message block? */ 11671 if (pld_ptr == mp->b_wptr && mp->b_cont != NULL) { 11672 mp = mp->b_cont; 11673 /* 11674 * Attach the next message block with payload 11675 * data to the multidata message. 11676 */ 11677 if ((pbuf_idx = mmd_addpldbuf(mmd, mp)) < 0) 11678 goto pbuf_panic; 11679 pld_ptr = mp->b_rptr; 11680 } 11681 } 11682 11683 ASSERT(hdr_mp->b_wptr == hdr_ptr); 11684 ASSERT(mp->b_wptr == pld_ptr); 11685 11686 /* Update IP statistics */ 11687 UPDATE_MIB(ill->ill_ip_mib, ipIfStatsOutFragCreates, pkts); 11688 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragOKs); 11689 UPDATE_MIB(ill->ill_ip_mib, ipIfStatsHCOutTransmits, pkts); 11690 /* 11691 * The ipv6 header len is accounted for in unfragmentable_len so 11692 * when calculating the fragmentation overhead just add the frag 11693 * header len. 11694 */ 11695 UPDATE_MIB(ill->ill_ip_mib, ipIfStatsHCOutOctets, 11696 (ntohs(ip6h->ip6_plen) - (unfragmentable_len - IPV6_HDR_LEN)) + 11697 pkts * (unfragmentable_len + sizeof (ip6_frag_t))); 11698 IP6_STAT_UPDATE(ip6_frag_mdt_pkt_out, pkts); 11699 11700 ire->ire_ob_pkt_count += pkts; 11701 if (ire->ire_ipif != NULL) 11702 atomic_add_32(&ire->ire_ipif->ipif_ob_pkt_count, pkts); 11703 11704 ire->ire_last_used_time = lbolt; 11705 /* Send it down */ 11706 putnext(stq, md_mp); 11707 return; 11708 11709 pbuf_panic: 11710 cmn_err(CE_PANIC, "ip_wput_frag_mdt_v6: payload buffer logic " 11711 "error for mmd %p pbuf %p (%d)", (void *)mmd, (void *)mp, 11712 pbuf_idx); 11713 /* NOTREACHED */ 11714 } 11715 11716 /* 11717 * IPv6 fragmentation. Essentially the same as IPv4 fragmentation. 11718 * We have not optimized this in terms of number of mblks 11719 * allocated. For instance, for each fragment sent we always allocate a 11720 * mblk to hold the IPv6 header and fragment header. 11721 * 11722 * Assumes that all the extension headers are contained in the first mblk. 11723 * 11724 * The fragment header is inserted after an hop-by-hop options header 11725 * and after [an optional destinations header followed by] a routing header. 11726 * 11727 * NOTE : This function does not ire_refrele the ire passed in as 11728 * the argument. 11729 */ 11730 void 11731 ip_wput_frag_v6(mblk_t *mp, ire_t *ire, uint_t reachable, conn_t *connp, 11732 int caller, int max_frag) 11733 { 11734 ip6_t *ip6h = (ip6_t *)mp->b_rptr; 11735 ip6_t *fip6h; 11736 mblk_t *hmp; 11737 mblk_t *hmp0; 11738 mblk_t *dmp; 11739 ip6_frag_t *fraghdr; 11740 size_t unfragmentable_len; 11741 size_t len; 11742 size_t mlen; 11743 size_t max_chunk; 11744 uint32_t ident; 11745 uint16_t off_flags; 11746 uint16_t offset = 0; 11747 ill_t *ill; 11748 uint8_t nexthdr; 11749 uint_t prev_nexthdr_offset; 11750 uint8_t *ptr; 11751 11752 ASSERT(ire->ire_type == IRE_CACHE); 11753 ill = (ill_t *)ire->ire_stq->q_ptr; 11754 11755 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragReqds); 11756 11757 /* 11758 * Determine the length of the unfragmentable portion of this 11759 * datagram. This consists of the IPv6 header, a potential 11760 * hop-by-hop options header, a potential pre-routing-header 11761 * destination options header, and a potential routing header. 11762 */ 11763 nexthdr = ip6h->ip6_nxt; 11764 prev_nexthdr_offset = (uint8_t *)&ip6h->ip6_nxt - (uint8_t *)ip6h; 11765 ptr = (uint8_t *)&ip6h[1]; 11766 11767 if (nexthdr == IPPROTO_HOPOPTS) { 11768 ip6_hbh_t *hbh_hdr; 11769 uint_t hdr_len; 11770 11771 hbh_hdr = (ip6_hbh_t *)ptr; 11772 hdr_len = 8 * (hbh_hdr->ip6h_len + 1); 11773 nexthdr = hbh_hdr->ip6h_nxt; 11774 prev_nexthdr_offset = (uint8_t *)&hbh_hdr->ip6h_nxt 11775 - (uint8_t *)ip6h; 11776 ptr += hdr_len; 11777 } 11778 if (nexthdr == IPPROTO_DSTOPTS) { 11779 ip6_dest_t *dest_hdr; 11780 uint_t hdr_len; 11781 11782 dest_hdr = (ip6_dest_t *)ptr; 11783 if (dest_hdr->ip6d_nxt == IPPROTO_ROUTING) { 11784 hdr_len = 8 * (dest_hdr->ip6d_len + 1); 11785 nexthdr = dest_hdr->ip6d_nxt; 11786 prev_nexthdr_offset = (uint8_t *)&dest_hdr->ip6d_nxt 11787 - (uint8_t *)ip6h; 11788 ptr += hdr_len; 11789 } 11790 } 11791 if (nexthdr == IPPROTO_ROUTING) { 11792 ip6_rthdr_t *rthdr; 11793 uint_t hdr_len; 11794 11795 rthdr = (ip6_rthdr_t *)ptr; 11796 nexthdr = rthdr->ip6r_nxt; 11797 prev_nexthdr_offset = (uint8_t *)&rthdr->ip6r_nxt 11798 - (uint8_t *)ip6h; 11799 hdr_len = 8 * (rthdr->ip6r_len + 1); 11800 ptr += hdr_len; 11801 } 11802 unfragmentable_len = (uint_t)(ptr - (uint8_t *)ip6h); 11803 11804 max_chunk = (min(max_frag, ire->ire_max_frag) - unfragmentable_len - 11805 sizeof (ip6_frag_t)) & ~7; 11806 11807 /* Check if we can use MDT to send out the frags. */ 11808 ASSERT(!IRE_IS_LOCAL(ire)); 11809 if (ip_multidata_outbound && reachable == 0 && 11810 !(ire->ire_flags & RTF_MULTIRT) && ILL_MDT_CAPABLE(ill) && 11811 IP_CAN_FRAG_MDT(mp, unfragmentable_len, max_chunk)) { 11812 ip_wput_frag_mdt_v6(mp, ire, max_chunk, unfragmentable_len, 11813 nexthdr, prev_nexthdr_offset); 11814 return; 11815 } 11816 11817 /* 11818 * Allocate an mblk with enough room for the link-layer 11819 * header, the unfragmentable part of the datagram, and the 11820 * fragment header. This (or a copy) will be used as the 11821 * first mblk for each fragment we send. 11822 */ 11823 hmp = allocb(unfragmentable_len + sizeof (ip6_frag_t) + ip_wroff_extra, 11824 BPRI_HI); 11825 if (hmp == NULL) { 11826 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails); 11827 freemsg(mp); 11828 return; 11829 } 11830 hmp->b_rptr += ip_wroff_extra; 11831 hmp->b_wptr = hmp->b_rptr + unfragmentable_len + sizeof (ip6_frag_t); 11832 11833 fip6h = (ip6_t *)hmp->b_rptr; 11834 fraghdr = (ip6_frag_t *)(hmp->b_rptr + unfragmentable_len); 11835 11836 bcopy(ip6h, fip6h, unfragmentable_len); 11837 hmp->b_rptr[prev_nexthdr_offset] = IPPROTO_FRAGMENT; 11838 11839 ident = atomic_add_32_nv(&ire->ire_ident, 1); 11840 11841 fraghdr->ip6f_nxt = nexthdr; 11842 fraghdr->ip6f_reserved = 0; 11843 fraghdr->ip6f_offlg = 0; 11844 fraghdr->ip6f_ident = htonl(ident); 11845 11846 /* 11847 * len is the total length of the fragmentable data in this 11848 * datagram. For each fragment sent, we will decrement len 11849 * by the amount of fragmentable data sent in that fragment 11850 * until len reaches zero. 11851 */ 11852 len = ntohs(ip6h->ip6_plen) - (unfragmentable_len - IPV6_HDR_LEN); 11853 11854 /* 11855 * Move read ptr past unfragmentable portion, we don't want this part 11856 * of the data in our fragments. 11857 */ 11858 mp->b_rptr += unfragmentable_len; 11859 11860 while (len != 0) { 11861 mlen = MIN(len, max_chunk); 11862 len -= mlen; 11863 if (len != 0) { 11864 /* Not last */ 11865 hmp0 = copyb(hmp); 11866 if (hmp0 == NULL) { 11867 freeb(hmp); 11868 freemsg(mp); 11869 BUMP_MIB(ill->ill_ip_mib, 11870 ipIfStatsOutFragFails); 11871 ip1dbg(("ip_wput_frag_v6: copyb failed\n")); 11872 return; 11873 } 11874 off_flags = IP6F_MORE_FRAG; 11875 } else { 11876 /* Last fragment */ 11877 hmp0 = hmp; 11878 hmp = NULL; 11879 off_flags = 0; 11880 } 11881 fip6h = (ip6_t *)(hmp0->b_rptr); 11882 fraghdr = (ip6_frag_t *)(hmp0->b_rptr + unfragmentable_len); 11883 11884 fip6h->ip6_plen = htons((uint16_t)(mlen + 11885 unfragmentable_len - IPV6_HDR_LEN + sizeof (ip6_frag_t))); 11886 /* 11887 * Note: Optimization alert. 11888 * In IPv6 (and IPv4) protocol header, Fragment Offset 11889 * ("offset") is 13 bits wide and in 8-octet units. 11890 * In IPv6 protocol header (unlike IPv4) in a 16 bit field, 11891 * it occupies the most significant 13 bits. 11892 * (least significant 13 bits in IPv4). 11893 * We do not do any shifts here. Not shifting is same effect 11894 * as taking offset value in octet units, dividing by 8 and 11895 * then shifting 3 bits left to line it up in place in proper 11896 * place protocol header. 11897 */ 11898 fraghdr->ip6f_offlg = htons(offset) | off_flags; 11899 11900 if (!(dmp = ip_carve_mp(&mp, mlen))) { 11901 /* mp has already been freed by ip_carve_mp() */ 11902 if (hmp != NULL) 11903 freeb(hmp); 11904 freeb(hmp0); 11905 ip1dbg(("ip_carve_mp: failed\n")); 11906 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails); 11907 return; 11908 } 11909 hmp0->b_cont = dmp; 11910 /* Get the priority marking, if any */ 11911 hmp0->b_band = dmp->b_band; 11912 UPDATE_OB_PKT_COUNT(ire); 11913 ire->ire_last_used_time = lbolt; 11914 ip_xmit_v6(hmp0, ire, reachable | IP6_NO_IPPOLICY, connp, 11915 caller, NULL); 11916 reachable = 0; /* No need to redo state machine in loop */ 11917 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragCreates); 11918 offset += mlen; 11919 } 11920 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragOKs); 11921 } 11922 11923 /* 11924 * Determine if the ill and multicast aspects of that packets 11925 * "matches" the conn. 11926 */ 11927 boolean_t 11928 conn_wantpacket_v6(conn_t *connp, ill_t *ill, ip6_t *ip6h, int fanout_flags, 11929 zoneid_t zoneid) 11930 { 11931 ill_t *in_ill; 11932 boolean_t wantpacket = B_TRUE; 11933 in6_addr_t *v6dst_ptr = &ip6h->ip6_dst; 11934 in6_addr_t *v6src_ptr = &ip6h->ip6_src; 11935 11936 /* 11937 * conn_incoming_ill is set by IPV6_BOUND_IF which limits 11938 * unicast and multicast reception to conn_incoming_ill. 11939 * conn_wantpacket_v6 is called both for unicast and 11940 * multicast. 11941 * 11942 * 1) The unicast copy of the packet can come anywhere in 11943 * the ill group if it is part of the group. Thus, we 11944 * need to check to see whether the ill group matches 11945 * if in_ill is part of a group. 11946 * 11947 * 2) ip_rput does not suppress duplicate multicast packets. 11948 * If there are two interfaces in a ill group and we have 11949 * 2 applications (conns) joined a multicast group G on 11950 * both the interfaces, ilm_lookup_ill filter in ip_rput 11951 * will give us two packets because we join G on both the 11952 * interfaces rather than nominating just one interface 11953 * for receiving multicast like broadcast above. So, 11954 * we have to call ilg_lookup_ill to filter out duplicate 11955 * copies, if ill is part of a group, to supress duplicates. 11956 */ 11957 in_ill = connp->conn_incoming_ill; 11958 if (in_ill != NULL) { 11959 mutex_enter(&connp->conn_lock); 11960 in_ill = connp->conn_incoming_ill; 11961 mutex_enter(&ill->ill_lock); 11962 /* 11963 * No IPMP, and the packet did not arrive on conn_incoming_ill 11964 * OR, IPMP in use and the packet arrived on an IPMP group 11965 * different from the conn_incoming_ill's IPMP group. 11966 * Reject the packet. 11967 */ 11968 if ((in_ill->ill_group == NULL && in_ill != ill) || 11969 (in_ill->ill_group != NULL && 11970 in_ill->ill_group != ill->ill_group)) { 11971 wantpacket = B_FALSE; 11972 } 11973 mutex_exit(&ill->ill_lock); 11974 mutex_exit(&connp->conn_lock); 11975 if (!wantpacket) 11976 return (B_FALSE); 11977 } 11978 11979 if (connp->conn_multi_router) 11980 return (B_TRUE); 11981 11982 if (!IN6_IS_ADDR_MULTICAST(v6dst_ptr) && 11983 !IN6_IS_ADDR_V4MAPPED_CLASSD(v6dst_ptr)) { 11984 /* 11985 * Unicast case: we match the conn only if it's in the specified 11986 * zone. 11987 */ 11988 return (IPCL_ZONE_MATCH(connp, zoneid)); 11989 } 11990 11991 if ((fanout_flags & IP_FF_NO_MCAST_LOOP) && 11992 (connp->conn_zoneid == zoneid || zoneid == ALL_ZONES)) { 11993 /* 11994 * Loopback case: the sending endpoint has IP_MULTICAST_LOOP 11995 * disabled, therefore we don't dispatch the multicast packet to 11996 * the sending zone. 11997 */ 11998 return (B_FALSE); 11999 } 12000 12001 if ((ill->ill_phyint->phyint_flags & PHYI_LOOPBACK) && 12002 connp->conn_zoneid != zoneid && zoneid != ALL_ZONES) { 12003 /* 12004 * Multicast packet on the loopback interface: we only match 12005 * conns who joined the group in the specified zone. 12006 */ 12007 return (B_FALSE); 12008 } 12009 12010 mutex_enter(&connp->conn_lock); 12011 wantpacket = 12012 ilg_lookup_ill_withsrc_v6(connp, v6dst_ptr, v6src_ptr, ill) != NULL; 12013 mutex_exit(&connp->conn_lock); 12014 12015 return (wantpacket); 12016 } 12017 12018 12019 /* 12020 * Transmit a packet and update any NUD state based on the flags 12021 * XXX need to "recover" any ip6i_t when doing putq! 12022 * 12023 * NOTE : This function does not ire_refrele the ire passed in as the 12024 * argument. 12025 */ 12026 void 12027 ip_xmit_v6(mblk_t *mp, ire_t *ire, uint_t flags, conn_t *connp, 12028 int caller, ipsec_out_t *io) 12029 { 12030 mblk_t *mp1; 12031 nce_t *nce = ire->ire_nce; 12032 ill_t *ill; 12033 ill_t *out_ill; 12034 uint64_t delta; 12035 ip6_t *ip6h; 12036 queue_t *stq = ire->ire_stq; 12037 ire_t *ire1 = NULL; 12038 ire_t *save_ire = ire; 12039 boolean_t multirt_send = B_FALSE; 12040 mblk_t *next_mp = NULL; 12041 12042 ip6h = (ip6_t *)mp->b_rptr; 12043 ASSERT(!IN6_IS_ADDR_V4MAPPED(&ire->ire_addr_v6)); 12044 ASSERT(ire->ire_ipversion == IPV6_VERSION); 12045 ASSERT(nce != NULL); 12046 ASSERT(mp->b_datap->db_type == M_DATA); 12047 ASSERT(stq != NULL); 12048 12049 ill = ire_to_ill(ire); 12050 if (!ill) { 12051 ip0dbg(("ip_xmit_v6: ire_to_ill failed\n")); 12052 freemsg(mp); 12053 return; 12054 } 12055 12056 /* 12057 * If a packet is to be sent out an interface that is a 6to4 12058 * tunnel, outgoing IPv6 packets, with a 6to4 addressed IPv6 12059 * destination, must be checked to have a 6to4 prefix 12060 * (2002:V4ADDR::/48) that is NOT equal to the 6to4 prefix of 12061 * address configured on the sending interface. Otherwise, 12062 * the packet was delivered to this interface in error and the 12063 * packet must be dropped. 12064 */ 12065 if ((ill->ill_is_6to4tun) && IN6_IS_ADDR_6TO4(&ip6h->ip6_dst)) { 12066 ipif_t *ipif = ill->ill_ipif; 12067 12068 if (IN6_ARE_6TO4_PREFIX_EQUAL(&ipif->ipif_v6lcl_addr, 12069 &ip6h->ip6_dst)) { 12070 if (ip_debug > 2) { 12071 /* ip1dbg */ 12072 pr_addr_dbg("ip_xmit_v6: attempting to " 12073 "send 6to4 addressed IPv6 " 12074 "destination (%s) out the wrong " 12075 "interface.\n", AF_INET6, 12076 &ip6h->ip6_dst); 12077 } 12078 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); 12079 freemsg(mp); 12080 return; 12081 } 12082 } 12083 12084 /* Flow-control check has been done in ip_wput_ire_v6 */ 12085 if (IP_FLOW_CONTROLLED_ULP(ip6h->ip6_nxt) || caller == IP_WPUT || 12086 caller == IP_WSRV || canput(stq->q_next)) { 12087 uint32_t ill_index; 12088 12089 /* 12090 * In most cases, the emission loop below is entered only 12091 * once. Only in the case where the ire holds the 12092 * RTF_MULTIRT flag, do we loop to process all RTF_MULTIRT 12093 * flagged ires in the bucket, and send the packet 12094 * through all crossed RTF_MULTIRT routes. 12095 */ 12096 if (ire->ire_flags & RTF_MULTIRT) { 12097 /* 12098 * Multirouting case. The bucket where ire is stored 12099 * probably holds other RTF_MULTIRT flagged ires 12100 * to the destination. In this call to ip_xmit_v6, 12101 * we attempt to send the packet through all 12102 * those ires. Thus, we first ensure that ire is the 12103 * first RTF_MULTIRT ire in the bucket, 12104 * before walking the ire list. 12105 */ 12106 ire_t *first_ire; 12107 irb_t *irb = ire->ire_bucket; 12108 ASSERT(irb != NULL); 12109 multirt_send = B_TRUE; 12110 12111 /* Make sure we do not omit any multiroute ire. */ 12112 IRB_REFHOLD(irb); 12113 for (first_ire = irb->irb_ire; 12114 first_ire != NULL; 12115 first_ire = first_ire->ire_next) { 12116 if ((first_ire->ire_flags & RTF_MULTIRT) && 12117 (IN6_ARE_ADDR_EQUAL(&first_ire->ire_addr_v6, 12118 &ire->ire_addr_v6)) && 12119 !(first_ire->ire_marks & 12120 (IRE_MARK_CONDEMNED | IRE_MARK_HIDDEN))) 12121 break; 12122 } 12123 12124 if ((first_ire != NULL) && (first_ire != ire)) { 12125 IRE_REFHOLD(first_ire); 12126 /* ire will be released by the caller */ 12127 ire = first_ire; 12128 nce = ire->ire_nce; 12129 stq = ire->ire_stq; 12130 ill = ire_to_ill(ire); 12131 } 12132 IRB_REFRELE(irb); 12133 } else if (connp != NULL && IPCL_IS_TCP(connp) && 12134 connp->conn_mdt_ok && !connp->conn_tcp->tcp_mdt && 12135 ILL_MDT_USABLE(ill)) { 12136 /* 12137 * This tcp connection was marked as MDT-capable, but 12138 * it has been turned off due changes in the interface. 12139 * Now that the interface support is back, turn it on 12140 * by notifying tcp. We don't directly modify tcp_mdt, 12141 * since we leave all the details to the tcp code that 12142 * knows better. 12143 */ 12144 mblk_t *mdimp = ip_mdinfo_alloc(ill->ill_mdt_capab); 12145 12146 if (mdimp == NULL) { 12147 ip0dbg(("ip_xmit_v6: can't re-enable MDT for " 12148 "connp %p (ENOMEM)\n", (void *)connp)); 12149 } else { 12150 CONN_INC_REF(connp); 12151 squeue_fill(connp->conn_sqp, mdimp, tcp_input, 12152 connp, SQTAG_TCP_INPUT_MCTL); 12153 } 12154 } 12155 12156 do { 12157 mblk_t *mp_ip6h; 12158 12159 if (multirt_send) { 12160 irb_t *irb; 12161 /* 12162 * We are in a multiple send case, need to get 12163 * the next ire and make a duplicate of the 12164 * packet. ire1 holds here the next ire to 12165 * process in the bucket. If multirouting is 12166 * expected, any non-RTF_MULTIRT ire that has 12167 * the right destination address is ignored. 12168 */ 12169 irb = ire->ire_bucket; 12170 ASSERT(irb != NULL); 12171 12172 IRB_REFHOLD(irb); 12173 for (ire1 = ire->ire_next; 12174 ire1 != NULL; 12175 ire1 = ire1->ire_next) { 12176 if (!(ire1->ire_flags & RTF_MULTIRT)) 12177 continue; 12178 if (!IN6_ARE_ADDR_EQUAL( 12179 &ire1->ire_addr_v6, 12180 &ire->ire_addr_v6)) 12181 continue; 12182 if (ire1->ire_marks & 12183 (IRE_MARK_CONDEMNED| 12184 IRE_MARK_HIDDEN)) 12185 continue; 12186 12187 /* Got one */ 12188 if (ire1 != save_ire) { 12189 IRE_REFHOLD(ire1); 12190 } 12191 break; 12192 } 12193 IRB_REFRELE(irb); 12194 12195 if (ire1 != NULL) { 12196 next_mp = copyb(mp); 12197 if ((next_mp == NULL) || 12198 ((mp->b_cont != NULL) && 12199 ((next_mp->b_cont = 12200 dupmsg(mp->b_cont)) == 12201 NULL))) { 12202 freemsg(next_mp); 12203 next_mp = NULL; 12204 ire_refrele(ire1); 12205 ire1 = NULL; 12206 } 12207 } 12208 12209 /* Last multiroute ire; don't loop anymore. */ 12210 if (ire1 == NULL) { 12211 multirt_send = B_FALSE; 12212 } 12213 } 12214 12215 ill_index = 12216 ((ill_t *)stq->q_ptr)->ill_phyint->phyint_ifindex; 12217 12218 /* Initiate IPPF processing */ 12219 if (IP6_OUT_IPP(flags)) { 12220 ip_process(IPP_LOCAL_OUT, &mp, ill_index); 12221 if (mp == NULL) { 12222 BUMP_MIB(ill->ill_ip_mib, 12223 ipIfStatsOutDiscards); 12224 if (next_mp != NULL) 12225 freemsg(next_mp); 12226 if (ire != save_ire) { 12227 ire_refrele(ire); 12228 } 12229 return; 12230 } 12231 ip6h = (ip6_t *)mp->b_rptr; 12232 } 12233 mp_ip6h = mp; 12234 12235 /* 12236 * Check for fastpath, we need to hold nce_lock to 12237 * prevent fastpath update from chaining nce_fp_mp. 12238 */ 12239 12240 ASSERT(nce->nce_ipversion != IPV4_VERSION); 12241 mutex_enter(&nce->nce_lock); 12242 if ((mp1 = nce->nce_fp_mp) != NULL) { 12243 uint32_t hlen; 12244 uchar_t *rptr; 12245 12246 hlen = MBLKL(mp1); 12247 rptr = mp->b_rptr - hlen; 12248 /* 12249 * make sure there is room for the fastpath 12250 * datalink header 12251 */ 12252 if (rptr < mp->b_datap->db_base) { 12253 mp1 = copyb(mp1); 12254 mutex_exit(&nce->nce_lock); 12255 if (mp1 == NULL) { 12256 BUMP_MIB(ill->ill_ip_mib, 12257 ipIfStatsOutDiscards); 12258 freemsg(mp); 12259 if (next_mp != NULL) 12260 freemsg(next_mp); 12261 if (ire != save_ire) { 12262 ire_refrele(ire); 12263 } 12264 return; 12265 } 12266 mp1->b_cont = mp; 12267 12268 /* Get the priority marking, if any */ 12269 mp1->b_band = mp->b_band; 12270 mp = mp1; 12271 } else { 12272 mp->b_rptr = rptr; 12273 /* 12274 * fastpath - pre-pend datalink 12275 * header 12276 */ 12277 bcopy(mp1->b_rptr, rptr, hlen); 12278 mutex_exit(&nce->nce_lock); 12279 } 12280 } else { 12281 /* 12282 * Get the DL_UNITDATA_REQ. 12283 */ 12284 mp1 = nce->nce_res_mp; 12285 if (mp1 == NULL) { 12286 mutex_exit(&nce->nce_lock); 12287 ip1dbg(("ip_xmit_v6: No resolution " 12288 "block ire = %p\n", (void *)ire)); 12289 freemsg(mp); 12290 if (next_mp != NULL) 12291 freemsg(next_mp); 12292 if (ire != save_ire) { 12293 ire_refrele(ire); 12294 } 12295 return; 12296 } 12297 /* 12298 * Prepend the DL_UNITDATA_REQ. 12299 */ 12300 mp1 = copyb(mp1); 12301 mutex_exit(&nce->nce_lock); 12302 if (mp1 == NULL) { 12303 BUMP_MIB(ill->ill_ip_mib, 12304 ipIfStatsOutDiscards); 12305 freemsg(mp); 12306 if (next_mp != NULL) 12307 freemsg(next_mp); 12308 if (ire != save_ire) { 12309 ire_refrele(ire); 12310 } 12311 return; 12312 } 12313 mp1->b_cont = mp; 12314 12315 /* Get the priority marking, if any */ 12316 mp1->b_band = mp->b_band; 12317 mp = mp1; 12318 } 12319 12320 out_ill = (ill_t *)stq->q_ptr; 12321 12322 DTRACE_PROBE4(ip6__physical__out__start, 12323 ill_t *, NULL, ill_t *, out_ill, 12324 ip6_t *, ip6h, mblk_t *, mp); 12325 12326 FW_HOOKS6(ip6_physical_out_event, 12327 ipv6firewall_physical_out, 12328 NULL, out_ill, ip6h, mp, mp_ip6h); 12329 12330 DTRACE_PROBE1(ip6__physical__out__end, mblk_t *, mp); 12331 12332 if (mp == NULL) { 12333 if (multirt_send) { 12334 ASSERT(ire1 != NULL); 12335 if (ire != save_ire) { 12336 ire_refrele(ire); 12337 } 12338 /* 12339 * Proceed with the next RTF_MULTIRT 12340 * ire, also set up the send-to queue 12341 * accordingly. 12342 */ 12343 ire = ire1; 12344 ire1 = NULL; 12345 stq = ire->ire_stq; 12346 nce = ire->ire_nce; 12347 ill = ire_to_ill(ire); 12348 mp = next_mp; 12349 next_mp = NULL; 12350 continue; 12351 } else { 12352 ASSERT(next_mp == NULL); 12353 ASSERT(ire1 == NULL); 12354 break; 12355 } 12356 } 12357 12358 /* 12359 * Update ire and MIB counters; for save_ire, this has 12360 * been done by the caller. 12361 */ 12362 if (ire != save_ire) { 12363 UPDATE_OB_PKT_COUNT(ire); 12364 ire->ire_last_used_time = lbolt; 12365 12366 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 12367 BUMP_MIB(ill->ill_ip_mib, 12368 ipIfStatsHCOutMcastPkts); 12369 UPDATE_MIB(ill->ill_ip_mib, 12370 ipIfStatsHCOutMcastOctets, 12371 ntohs(ip6h->ip6_plen) + 12372 IPV6_HDR_LEN); 12373 } 12374 } 12375 12376 /* 12377 * Send it down. XXX Do we want to flow control AH/ESP 12378 * packets that carry TCP payloads? We don't flow 12379 * control TCP packets, but we should also not 12380 * flow-control TCP packets that have been protected. 12381 * We don't have an easy way to find out if an AH/ESP 12382 * packet was originally TCP or not currently. 12383 */ 12384 if (io == NULL) { 12385 BUMP_MIB(ill->ill_ip_mib, 12386 ipIfStatsHCOutTransmits); 12387 UPDATE_MIB(ill->ill_ip_mib, 12388 ipIfStatsHCOutOctets, 12389 ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN); 12390 putnext(stq, mp); 12391 } else { 12392 /* 12393 * Safety Pup says: make sure this is 12394 * going to the right interface! 12395 */ 12396 if (io->ipsec_out_capab_ill_index != 12397 ill_index) { 12398 /* IPsec kstats: bump lose counter */ 12399 freemsg(mp1); 12400 } else { 12401 BUMP_MIB(ill->ill_ip_mib, 12402 ipIfStatsHCOutTransmits); 12403 UPDATE_MIB(ill->ill_ip_mib, 12404 ipIfStatsHCOutOctets, 12405 ntohs(ip6h->ip6_plen) + 12406 IPV6_HDR_LEN); 12407 ipsec_hw_putnext(stq, mp); 12408 } 12409 } 12410 12411 if (nce->nce_flags & (NCE_F_NONUD|NCE_F_PERMANENT)) { 12412 if (ire != save_ire) { 12413 ire_refrele(ire); 12414 } 12415 if (multirt_send) { 12416 ASSERT(ire1 != NULL); 12417 /* 12418 * Proceed with the next RTF_MULTIRT 12419 * ire, also set up the send-to queue 12420 * accordingly. 12421 */ 12422 ire = ire1; 12423 ire1 = NULL; 12424 stq = ire->ire_stq; 12425 nce = ire->ire_nce; 12426 ill = ire_to_ill(ire); 12427 mp = next_mp; 12428 next_mp = NULL; 12429 continue; 12430 } 12431 ASSERT(next_mp == NULL); 12432 ASSERT(ire1 == NULL); 12433 return; 12434 } 12435 12436 ASSERT(nce->nce_state != ND_INCOMPLETE); 12437 12438 /* 12439 * Check for upper layer advice 12440 */ 12441 if (flags & IPV6_REACHABILITY_CONFIRMATION) { 12442 /* 12443 * It should be o.k. to check the state without 12444 * a lock here, at most we lose an advice. 12445 */ 12446 nce->nce_last = TICK_TO_MSEC(lbolt64); 12447 if (nce->nce_state != ND_REACHABLE) { 12448 12449 mutex_enter(&nce->nce_lock); 12450 nce->nce_state = ND_REACHABLE; 12451 nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT; 12452 mutex_exit(&nce->nce_lock); 12453 (void) untimeout(nce->nce_timeout_id); 12454 if (ip_debug > 2) { 12455 /* ip1dbg */ 12456 pr_addr_dbg("ip_xmit_v6: state" 12457 " for %s changed to" 12458 " REACHABLE\n", AF_INET6, 12459 &ire->ire_addr_v6); 12460 } 12461 } 12462 if (ire != save_ire) { 12463 ire_refrele(ire); 12464 } 12465 if (multirt_send) { 12466 ASSERT(ire1 != NULL); 12467 /* 12468 * Proceed with the next RTF_MULTIRT 12469 * ire, also set up the send-to queue 12470 * accordingly. 12471 */ 12472 ire = ire1; 12473 ire1 = NULL; 12474 stq = ire->ire_stq; 12475 nce = ire->ire_nce; 12476 ill = ire_to_ill(ire); 12477 mp = next_mp; 12478 next_mp = NULL; 12479 continue; 12480 } 12481 ASSERT(next_mp == NULL); 12482 ASSERT(ire1 == NULL); 12483 return; 12484 } 12485 12486 delta = TICK_TO_MSEC(lbolt64) - nce->nce_last; 12487 ip1dbg(("ip_xmit_v6: delta = %" PRId64 12488 " ill_reachable_time = %d \n", delta, 12489 ill->ill_reachable_time)); 12490 if (delta > (uint64_t)ill->ill_reachable_time) { 12491 nce = ire->ire_nce; 12492 mutex_enter(&nce->nce_lock); 12493 switch (nce->nce_state) { 12494 case ND_REACHABLE: 12495 case ND_STALE: 12496 /* 12497 * ND_REACHABLE is identical to 12498 * ND_STALE in this specific case. If 12499 * reachable time has expired for this 12500 * neighbor (delta is greater than 12501 * reachable time), conceptually, the 12502 * neighbor cache is no longer in 12503 * REACHABLE state, but already in 12504 * STALE state. So the correct 12505 * transition here is to ND_DELAY. 12506 */ 12507 nce->nce_state = ND_DELAY; 12508 mutex_exit(&nce->nce_lock); 12509 NDP_RESTART_TIMER(nce, 12510 delay_first_probe_time); 12511 if (ip_debug > 3) { 12512 /* ip2dbg */ 12513 pr_addr_dbg("ip_xmit_v6: state" 12514 " for %s changed to" 12515 " DELAY\n", AF_INET6, 12516 &ire->ire_addr_v6); 12517 } 12518 break; 12519 case ND_DELAY: 12520 case ND_PROBE: 12521 mutex_exit(&nce->nce_lock); 12522 /* Timers have already started */ 12523 break; 12524 case ND_UNREACHABLE: 12525 /* 12526 * ndp timer has detected that this nce 12527 * is unreachable and initiated deleting 12528 * this nce and all its associated IREs. 12529 * This is a race where we found the 12530 * ire before it was deleted and have 12531 * just sent out a packet using this 12532 * unreachable nce. 12533 */ 12534 mutex_exit(&nce->nce_lock); 12535 break; 12536 default: 12537 ASSERT(0); 12538 } 12539 } 12540 12541 if (multirt_send) { 12542 ASSERT(ire1 != NULL); 12543 /* 12544 * Proceed with the next RTF_MULTIRT ire, 12545 * Also set up the send-to queue accordingly. 12546 */ 12547 if (ire != save_ire) { 12548 ire_refrele(ire); 12549 } 12550 ire = ire1; 12551 ire1 = NULL; 12552 stq = ire->ire_stq; 12553 nce = ire->ire_nce; 12554 ill = ire_to_ill(ire); 12555 mp = next_mp; 12556 next_mp = NULL; 12557 } 12558 } while (multirt_send); 12559 /* 12560 * In the multirouting case, release the last ire used for 12561 * emission. save_ire will be released by the caller. 12562 */ 12563 if (ire != save_ire) { 12564 ire_refrele(ire); 12565 } 12566 } else { 12567 /* 12568 * Queue packet if we have an conn to give back pressure. 12569 * We can't queue packets intended for hardware acceleration 12570 * since we've tossed that state already. If the packet is 12571 * being fed back from ire_send_v6, we don't know the 12572 * position in the queue to enqueue the packet and we discard 12573 * the packet. 12574 */ 12575 if (ip_output_queue && (connp != NULL) && (io == NULL) && 12576 (caller != IRE_SEND)) { 12577 if (caller == IP_WSRV) { 12578 connp->conn_did_putbq = 1; 12579 (void) putbq(connp->conn_wq, mp); 12580 conn_drain_insert(connp); 12581 /* 12582 * caller == IP_WSRV implies we are 12583 * the service thread, and the 12584 * queue is already noenabled. 12585 * The check for canput and 12586 * the putbq is not atomic. 12587 * So we need to check again. 12588 */ 12589 if (canput(stq->q_next)) 12590 connp->conn_did_putbq = 0; 12591 } else { 12592 (void) putq(connp->conn_wq, mp); 12593 } 12594 return; 12595 } 12596 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); 12597 freemsg(mp); 12598 return; 12599 } 12600 } 12601 12602 /* 12603 * pr_addr_dbg function provides the needed buffer space to call 12604 * inet_ntop() function's 3rd argument. This function should be 12605 * used by any kernel routine which wants to save INET6_ADDRSTRLEN 12606 * stack buffer space in it's own stack frame. This function uses 12607 * a buffer from it's own stack and prints the information. 12608 * Example: pr_addr_dbg("func: no route for %s\n ", AF_INET, addr) 12609 * 12610 * Note: This function can call inet_ntop() once. 12611 */ 12612 void 12613 pr_addr_dbg(char *fmt1, int af, const void *addr) 12614 { 12615 char buf[INET6_ADDRSTRLEN]; 12616 12617 if (fmt1 == NULL) { 12618 ip0dbg(("pr_addr_dbg: Wrong arguments\n")); 12619 return; 12620 } 12621 12622 /* 12623 * This does not compare debug level and just prints 12624 * out. Thus it is the responsibility of the caller 12625 * to check the appropriate debug-level before calling 12626 * this function. 12627 */ 12628 if (ip_debug > 0) { 12629 printf(fmt1, inet_ntop(af, addr, buf, sizeof (buf))); 12630 } 12631 12632 12633 } 12634 12635 12636 /* 12637 * Return the length in bytes of the IPv6 headers (base header, ip6i_t 12638 * if needed and extension headers) that will be needed based on the 12639 * ip6_pkt_t structure passed by the caller. 12640 * 12641 * The returned length does not include the length of the upper level 12642 * protocol (ULP) header. 12643 */ 12644 int 12645 ip_total_hdrs_len_v6(ip6_pkt_t *ipp) 12646 { 12647 int len; 12648 12649 len = IPV6_HDR_LEN; 12650 if (ipp->ipp_fields & IPPF_HAS_IP6I) 12651 len += sizeof (ip6i_t); 12652 if (ipp->ipp_fields & IPPF_HOPOPTS) { 12653 ASSERT(ipp->ipp_hopoptslen != 0); 12654 len += ipp->ipp_hopoptslen; 12655 } 12656 if (ipp->ipp_fields & IPPF_RTHDR) { 12657 ASSERT(ipp->ipp_rthdrlen != 0); 12658 len += ipp->ipp_rthdrlen; 12659 } 12660 /* 12661 * En-route destination options 12662 * Only do them if there's a routing header as well 12663 */ 12664 if ((ipp->ipp_fields & (IPPF_RTDSTOPTS|IPPF_RTHDR)) == 12665 (IPPF_RTDSTOPTS|IPPF_RTHDR)) { 12666 ASSERT(ipp->ipp_rtdstoptslen != 0); 12667 len += ipp->ipp_rtdstoptslen; 12668 } 12669 if (ipp->ipp_fields & IPPF_DSTOPTS) { 12670 ASSERT(ipp->ipp_dstoptslen != 0); 12671 len += ipp->ipp_dstoptslen; 12672 } 12673 return (len); 12674 } 12675 12676 /* 12677 * All-purpose routine to build a header chain of an IPv6 header 12678 * followed by any required extension headers and a proto header, 12679 * preceeded (where necessary) by an ip6i_t private header. 12680 * 12681 * The fields of the IPv6 header that are derived from the ip6_pkt_t 12682 * will be filled in appropriately. 12683 * Thus the caller must fill in the rest of the IPv6 header, such as 12684 * traffic class/flowid, source address (if not set here), hoplimit (if not 12685 * set here) and destination address. 12686 * 12687 * The extension headers and ip6i_t header will all be fully filled in. 12688 */ 12689 void 12690 ip_build_hdrs_v6(uchar_t *ext_hdrs, uint_t ext_hdrs_len, 12691 ip6_pkt_t *ipp, uint8_t protocol) 12692 { 12693 uint8_t *nxthdr_ptr; 12694 uint8_t *cp; 12695 ip6i_t *ip6i; 12696 ip6_t *ip6h = (ip6_t *)ext_hdrs; 12697 12698 /* 12699 * If sending private ip6i_t header down (checksum info, nexthop, 12700 * or ifindex), adjust ip header pointer and set ip6i_t header pointer, 12701 * then fill it in. (The checksum info will be filled in by icmp). 12702 */ 12703 if (ipp->ipp_fields & IPPF_HAS_IP6I) { 12704 ip6i = (ip6i_t *)ip6h; 12705 ip6h = (ip6_t *)&ip6i[1]; 12706 12707 ip6i->ip6i_flags = 0; 12708 ip6i->ip6i_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 12709 if (ipp->ipp_fields & IPPF_IFINDEX || 12710 ipp->ipp_fields & IPPF_SCOPE_ID) { 12711 ASSERT(ipp->ipp_ifindex != 0); 12712 ip6i->ip6i_flags |= IP6I_IFINDEX; 12713 ip6i->ip6i_ifindex = ipp->ipp_ifindex; 12714 } 12715 if (ipp->ipp_fields & IPPF_ADDR) { 12716 /* 12717 * Enable per-packet source address verification if 12718 * IPV6_PKTINFO specified the source address. 12719 * ip6_src is set in the transport's _wput function. 12720 */ 12721 ASSERT(!IN6_IS_ADDR_UNSPECIFIED( 12722 &ipp->ipp_addr)); 12723 ip6i->ip6i_flags |= IP6I_VERIFY_SRC; 12724 } 12725 if (ipp->ipp_fields & IPPF_UNICAST_HOPS) { 12726 ip6h->ip6_hops = ipp->ipp_unicast_hops; 12727 /* 12728 * We need to set this flag so that IP doesn't 12729 * rewrite the IPv6 header's hoplimit with the 12730 * current default value. 12731 */ 12732 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 12733 } 12734 if (ipp->ipp_fields & IPPF_NEXTHOP) { 12735 ASSERT(!IN6_IS_ADDR_UNSPECIFIED( 12736 &ipp->ipp_nexthop)); 12737 ip6i->ip6i_flags |= IP6I_NEXTHOP; 12738 ip6i->ip6i_nexthop = ipp->ipp_nexthop; 12739 } 12740 /* 12741 * tell IP this is an ip6i_t private header 12742 */ 12743 ip6i->ip6i_nxt = IPPROTO_RAW; 12744 } 12745 /* Initialize IPv6 header */ 12746 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 12747 if (ipp->ipp_fields & IPPF_TCLASS) { 12748 ip6h->ip6_vcf = (ip6h->ip6_vcf & ~IPV6_FLOWINFO_TCLASS) | 12749 (ipp->ipp_tclass << 20); 12750 } 12751 if (ipp->ipp_fields & IPPF_ADDR) 12752 ip6h->ip6_src = ipp->ipp_addr; 12753 12754 nxthdr_ptr = (uint8_t *)&ip6h->ip6_nxt; 12755 cp = (uint8_t *)&ip6h[1]; 12756 /* 12757 * Here's where we have to start stringing together 12758 * any extension headers in the right order: 12759 * Hop-by-hop, destination, routing, and final destination opts. 12760 */ 12761 if (ipp->ipp_fields & IPPF_HOPOPTS) { 12762 /* Hop-by-hop options */ 12763 ip6_hbh_t *hbh = (ip6_hbh_t *)cp; 12764 12765 *nxthdr_ptr = IPPROTO_HOPOPTS; 12766 nxthdr_ptr = &hbh->ip6h_nxt; 12767 12768 bcopy(ipp->ipp_hopopts, cp, ipp->ipp_hopoptslen); 12769 cp += ipp->ipp_hopoptslen; 12770 } 12771 /* 12772 * En-route destination options 12773 * Only do them if there's a routing header as well 12774 */ 12775 if ((ipp->ipp_fields & (IPPF_RTDSTOPTS|IPPF_RTHDR)) == 12776 (IPPF_RTDSTOPTS|IPPF_RTHDR)) { 12777 ip6_dest_t *dst = (ip6_dest_t *)cp; 12778 12779 *nxthdr_ptr = IPPROTO_DSTOPTS; 12780 nxthdr_ptr = &dst->ip6d_nxt; 12781 12782 bcopy(ipp->ipp_rtdstopts, cp, ipp->ipp_rtdstoptslen); 12783 cp += ipp->ipp_rtdstoptslen; 12784 } 12785 /* 12786 * Routing header next 12787 */ 12788 if (ipp->ipp_fields & IPPF_RTHDR) { 12789 ip6_rthdr_t *rt = (ip6_rthdr_t *)cp; 12790 12791 *nxthdr_ptr = IPPROTO_ROUTING; 12792 nxthdr_ptr = &rt->ip6r_nxt; 12793 12794 bcopy(ipp->ipp_rthdr, cp, ipp->ipp_rthdrlen); 12795 cp += ipp->ipp_rthdrlen; 12796 } 12797 /* 12798 * Do ultimate destination options 12799 */ 12800 if (ipp->ipp_fields & IPPF_DSTOPTS) { 12801 ip6_dest_t *dest = (ip6_dest_t *)cp; 12802 12803 *nxthdr_ptr = IPPROTO_DSTOPTS; 12804 nxthdr_ptr = &dest->ip6d_nxt; 12805 12806 bcopy(ipp->ipp_dstopts, cp, ipp->ipp_dstoptslen); 12807 cp += ipp->ipp_dstoptslen; 12808 } 12809 /* 12810 * Now set the last header pointer to the proto passed in 12811 */ 12812 *nxthdr_ptr = protocol; 12813 ASSERT((int)(cp - ext_hdrs) == ext_hdrs_len); 12814 } 12815 12816 /* 12817 * Return a pointer to the routing header extension header 12818 * in the IPv6 header(s) chain passed in. 12819 * If none found, return NULL 12820 * Assumes that all extension headers are in same mblk as the v6 header 12821 */ 12822 ip6_rthdr_t * 12823 ip_find_rthdr_v6(ip6_t *ip6h, uint8_t *endptr) 12824 { 12825 ip6_dest_t *desthdr; 12826 ip6_frag_t *fraghdr; 12827 uint_t hdrlen; 12828 uint8_t nexthdr; 12829 uint8_t *ptr = (uint8_t *)&ip6h[1]; 12830 12831 if (ip6h->ip6_nxt == IPPROTO_ROUTING) 12832 return ((ip6_rthdr_t *)ptr); 12833 12834 /* 12835 * The routing header will precede all extension headers 12836 * other than the hop-by-hop and destination options 12837 * extension headers, so if we see anything other than those, 12838 * we're done and didn't find it. 12839 * We could see a destination options header alone but no 12840 * routing header, in which case we'll return NULL as soon as 12841 * we see anything after that. 12842 * Hop-by-hop and destination option headers are identical, 12843 * so we can use either one we want as a template. 12844 */ 12845 nexthdr = ip6h->ip6_nxt; 12846 while (ptr < endptr) { 12847 /* Is there enough left for len + nexthdr? */ 12848 if (ptr + MIN_EHDR_LEN > endptr) 12849 return (NULL); 12850 12851 switch (nexthdr) { 12852 case IPPROTO_HOPOPTS: 12853 case IPPROTO_DSTOPTS: 12854 /* Assumes the headers are identical for hbh and dst */ 12855 desthdr = (ip6_dest_t *)ptr; 12856 hdrlen = 8 * (desthdr->ip6d_len + 1); 12857 nexthdr = desthdr->ip6d_nxt; 12858 break; 12859 12860 case IPPROTO_ROUTING: 12861 return ((ip6_rthdr_t *)ptr); 12862 12863 case IPPROTO_FRAGMENT: 12864 fraghdr = (ip6_frag_t *)ptr; 12865 hdrlen = sizeof (ip6_frag_t); 12866 nexthdr = fraghdr->ip6f_nxt; 12867 break; 12868 12869 default: 12870 return (NULL); 12871 } 12872 ptr += hdrlen; 12873 } 12874 return (NULL); 12875 } 12876 12877 /* 12878 * Called for source-routed packets originating on this node. 12879 * Manipulates the original routing header by moving every entry up 12880 * one slot, placing the first entry in the v6 header's v6_dst field, 12881 * and placing the ultimate destination in the routing header's last 12882 * slot. 12883 * 12884 * Returns the checksum diference between the ultimate destination 12885 * (last hop in the routing header when the packet is sent) and 12886 * the first hop (ip6_dst when the packet is sent) 12887 */ 12888 uint32_t 12889 ip_massage_options_v6(ip6_t *ip6h, ip6_rthdr_t *rth) 12890 { 12891 uint_t numaddr; 12892 uint_t i; 12893 in6_addr_t *addrptr; 12894 in6_addr_t tmp; 12895 ip6_rthdr0_t *rthdr = (ip6_rthdr0_t *)rth; 12896 uint32_t cksm; 12897 uint32_t addrsum = 0; 12898 uint16_t *ptr; 12899 12900 /* 12901 * Perform any processing needed for source routing. 12902 * We know that all extension headers will be in the same mblk 12903 * as the IPv6 header. 12904 */ 12905 12906 /* 12907 * If no segments left in header, or the header length field is zero, 12908 * don't move hop addresses around; 12909 * Checksum difference is zero. 12910 */ 12911 if ((rthdr->ip6r0_segleft == 0) || (rthdr->ip6r0_len == 0)) 12912 return (0); 12913 12914 ptr = (uint16_t *)&ip6h->ip6_dst; 12915 cksm = 0; 12916 for (i = 0; i < (sizeof (in6_addr_t) / sizeof (uint16_t)); i++) { 12917 cksm += ptr[i]; 12918 } 12919 cksm = (cksm & 0xFFFF) + (cksm >> 16); 12920 12921 /* 12922 * Here's where the fun begins - we have to 12923 * move all addresses up one spot, take the 12924 * first hop and make it our first ip6_dst, 12925 * and place the ultimate destination in the 12926 * newly-opened last slot. 12927 */ 12928 addrptr = (in6_addr_t *)((char *)rthdr + sizeof (*rthdr)); 12929 numaddr = rthdr->ip6r0_len / 2; 12930 tmp = *addrptr; 12931 for (i = 0; i < (numaddr - 1); addrptr++, i++) { 12932 *addrptr = addrptr[1]; 12933 } 12934 *addrptr = ip6h->ip6_dst; 12935 ip6h->ip6_dst = tmp; 12936 12937 /* 12938 * From the checksummed ultimate destination subtract the checksummed 12939 * current ip6_dst (the first hop address). Return that number. 12940 * (In the v4 case, the second part of this is done in each routine 12941 * that calls ip_massage_options(). We do it all in this one place 12942 * for v6). 12943 */ 12944 ptr = (uint16_t *)&ip6h->ip6_dst; 12945 for (i = 0; i < (sizeof (in6_addr_t) / sizeof (uint16_t)); i++) { 12946 addrsum += ptr[i]; 12947 } 12948 cksm -= ((addrsum >> 16) + (addrsum & 0xFFFF)); 12949 if ((int)cksm < 0) 12950 cksm--; 12951 cksm = (cksm & 0xFFFF) + (cksm >> 16); 12952 12953 return (cksm); 12954 } 12955 12956 /* 12957 * Propagate a multicast group membership operation (join/leave) (*fn) on 12958 * all interfaces crossed by the related multirt routes. 12959 * The call is considered successful if the operation succeeds 12960 * on at least one interface. 12961 * The function is called if the destination address in the packet to send 12962 * is multirouted. 12963 */ 12964 int 12965 ip_multirt_apply_membership_v6(int (*fn)(conn_t *, boolean_t, 12966 const in6_addr_t *, int, mcast_record_t, const in6_addr_t *, mblk_t *), 12967 ire_t *ire, conn_t *connp, boolean_t checkonly, const in6_addr_t *v6grp, 12968 mcast_record_t fmode, const in6_addr_t *v6src, mblk_t *first_mp) 12969 { 12970 ire_t *ire_gw; 12971 irb_t *irb; 12972 int index, error = 0; 12973 opt_restart_t *or; 12974 12975 irb = ire->ire_bucket; 12976 ASSERT(irb != NULL); 12977 12978 ASSERT(DB_TYPE(first_mp) == M_CTL); 12979 or = (opt_restart_t *)first_mp->b_rptr; 12980 12981 IRB_REFHOLD(irb); 12982 for (; ire != NULL; ire = ire->ire_next) { 12983 if ((ire->ire_flags & RTF_MULTIRT) == 0) 12984 continue; 12985 if (!IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, v6grp)) 12986 continue; 12987 12988 ire_gw = ire_ftable_lookup_v6(&ire->ire_gateway_addr_v6, 0, 0, 12989 IRE_INTERFACE, NULL, NULL, ALL_ZONES, 0, NULL, 12990 MATCH_IRE_RECURSIVE | MATCH_IRE_TYPE); 12991 /* No resolver exists for the gateway; skip this ire. */ 12992 if (ire_gw == NULL) 12993 continue; 12994 index = ire_gw->ire_ipif->ipif_ill->ill_phyint->phyint_ifindex; 12995 /* 12996 * A resolver exists: we can get the interface on which we have 12997 * to apply the operation. 12998 */ 12999 error = fn(connp, checkonly, v6grp, index, fmode, v6src, 13000 first_mp); 13001 if (error == 0) 13002 or->or_private = CGTP_MCAST_SUCCESS; 13003 13004 if (ip_debug > 0) { 13005 ulong_t off; 13006 char *ksym; 13007 13008 ksym = kobj_getsymname((uintptr_t)fn, &off); 13009 ip2dbg(("ip_multirt_apply_membership_v6: " 13010 "called %s, multirt group 0x%08x via itf 0x%08x, " 13011 "error %d [success %u]\n", 13012 ksym ? ksym : "?", 13013 ntohl(V4_PART_OF_V6((*v6grp))), 13014 ntohl(V4_PART_OF_V6(ire_gw->ire_src_addr_v6)), 13015 error, or->or_private)); 13016 } 13017 13018 ire_refrele(ire_gw); 13019 if (error == EINPROGRESS) { 13020 IRB_REFRELE(irb); 13021 return (error); 13022 } 13023 } 13024 IRB_REFRELE(irb); 13025 /* 13026 * Consider the call as successful if we succeeded on at least 13027 * one interface. Otherwise, return the last encountered error. 13028 */ 13029 return (or->or_private == CGTP_MCAST_SUCCESS ? 0 : error); 13030 } 13031 13032 void 13033 ip6_kstat_init(void) 13034 { 13035 if ((ip6_kstat = kstat_create("ip", 0, "ip6stat", 13036 "net", KSTAT_TYPE_NAMED, 13037 sizeof (ip6_statistics) / sizeof (kstat_named_t), 13038 KSTAT_FLAG_VIRTUAL)) != NULL) { 13039 ip6_kstat->ks_data = &ip6_statistics; 13040 kstat_install(ip6_kstat); 13041 } 13042 } 13043 13044 /* 13045 * The following two functions set and get the value for the 13046 * IPV6_SRC_PREFERENCES socket option. 13047 */ 13048 int 13049 ip6_set_src_preferences(conn_t *connp, uint32_t prefs) 13050 { 13051 /* 13052 * We only support preferences that are covered by 13053 * IPV6_PREFER_SRC_MASK. 13054 */ 13055 if (prefs & ~IPV6_PREFER_SRC_MASK) 13056 return (EINVAL); 13057 13058 /* 13059 * Look for conflicting preferences or default preferences. If 13060 * both bits of a related pair are clear, the application wants the 13061 * system's default value for that pair. Both bits in a pair can't 13062 * be set. 13063 */ 13064 if ((prefs & IPV6_PREFER_SRC_MIPMASK) == 0) { 13065 prefs |= IPV6_PREFER_SRC_MIPDEFAULT; 13066 } else if ((prefs & IPV6_PREFER_SRC_MIPMASK) == 13067 IPV6_PREFER_SRC_MIPMASK) { 13068 return (EINVAL); 13069 } 13070 if ((prefs & IPV6_PREFER_SRC_TMPMASK) == 0) { 13071 prefs |= IPV6_PREFER_SRC_TMPDEFAULT; 13072 } else if ((prefs & IPV6_PREFER_SRC_TMPMASK) == 13073 IPV6_PREFER_SRC_TMPMASK) { 13074 return (EINVAL); 13075 } 13076 if ((prefs & IPV6_PREFER_SRC_CGAMASK) == 0) { 13077 prefs |= IPV6_PREFER_SRC_CGADEFAULT; 13078 } else if ((prefs & IPV6_PREFER_SRC_CGAMASK) == 13079 IPV6_PREFER_SRC_CGAMASK) { 13080 return (EINVAL); 13081 } 13082 13083 connp->conn_src_preferences = prefs; 13084 return (0); 13085 } 13086 13087 size_t 13088 ip6_get_src_preferences(conn_t *connp, uint32_t *val) 13089 { 13090 *val = connp->conn_src_preferences; 13091 return (sizeof (connp->conn_src_preferences)); 13092 } 13093 13094 int 13095 ip6_set_pktinfo(cred_t *cr, conn_t *connp, struct in6_pktinfo *pkti, mblk_t *mp) 13096 { 13097 ill_t *ill; 13098 ire_t *ire; 13099 int error; 13100 13101 /* 13102 * Verify the source address and ifindex. Privileged users can use 13103 * any source address. For ancillary data the source address is 13104 * checked in ip_wput_v6. 13105 */ 13106 if (pkti->ipi6_ifindex != 0) { 13107 ASSERT(connp != NULL); 13108 ill = ill_lookup_on_ifindex(pkti->ipi6_ifindex, B_TRUE, 13109 CONNP_TO_WQ(connp), mp, ip_restart_optmgmt, &error); 13110 if (ill == NULL) { 13111 /* 13112 * We just want to know if the interface exists, we 13113 * don't really care about the ill pointer itself. 13114 */ 13115 if (error != EINPROGRESS) 13116 return (error); 13117 error = 0; /* Ensure we don't use it below */ 13118 } else { 13119 ill_refrele(ill); 13120 } 13121 } 13122 if (!IN6_IS_ADDR_UNSPECIFIED(&pkti->ipi6_addr) && 13123 secpolicy_net_rawaccess(cr) != 0) { 13124 ire = ire_route_lookup_v6(&pkti->ipi6_addr, 0, 0, 13125 (IRE_LOCAL|IRE_LOOPBACK), NULL, NULL, 13126 connp->conn_zoneid, NULL, MATCH_IRE_TYPE); 13127 if (ire != NULL) 13128 ire_refrele(ire); 13129 else 13130 return (ENXIO); 13131 } 13132 return (0); 13133 } 13134 13135 /* 13136 * Get the size of the IP options (including the IP headers size) 13137 * without including the AH header's size. If till_ah is B_FALSE, 13138 * and if AH header is present, dest options beyond AH header will 13139 * also be included in the returned size. 13140 */ 13141 int 13142 ipsec_ah_get_hdr_size_v6(mblk_t *mp, boolean_t till_ah) 13143 { 13144 ip6_t *ip6h; 13145 uint8_t nexthdr; 13146 uint8_t *whereptr; 13147 ip6_hbh_t *hbhhdr; 13148 ip6_dest_t *dsthdr; 13149 ip6_rthdr_t *rthdr; 13150 int ehdrlen; 13151 int size; 13152 ah_t *ah; 13153 13154 ip6h = (ip6_t *)mp->b_rptr; 13155 size = IPV6_HDR_LEN; 13156 nexthdr = ip6h->ip6_nxt; 13157 whereptr = (uint8_t *)&ip6h[1]; 13158 for (;;) { 13159 /* Assume IP has already stripped it */ 13160 ASSERT(nexthdr != IPPROTO_FRAGMENT && nexthdr != IPPROTO_RAW); 13161 switch (nexthdr) { 13162 case IPPROTO_HOPOPTS: 13163 hbhhdr = (ip6_hbh_t *)whereptr; 13164 nexthdr = hbhhdr->ip6h_nxt; 13165 ehdrlen = 8 * (hbhhdr->ip6h_len + 1); 13166 break; 13167 case IPPROTO_DSTOPTS: 13168 dsthdr = (ip6_dest_t *)whereptr; 13169 nexthdr = dsthdr->ip6d_nxt; 13170 ehdrlen = 8 * (dsthdr->ip6d_len + 1); 13171 break; 13172 case IPPROTO_ROUTING: 13173 rthdr = (ip6_rthdr_t *)whereptr; 13174 nexthdr = rthdr->ip6r_nxt; 13175 ehdrlen = 8 * (rthdr->ip6r_len + 1); 13176 break; 13177 default : 13178 if (till_ah) { 13179 ASSERT(nexthdr == IPPROTO_AH); 13180 return (size); 13181 } 13182 /* 13183 * If we don't have a AH header to traverse, 13184 * return now. This happens normally for 13185 * outbound datagrams where we have not inserted 13186 * the AH header. 13187 */ 13188 if (nexthdr != IPPROTO_AH) { 13189 return (size); 13190 } 13191 13192 /* 13193 * We don't include the AH header's size 13194 * to be symmetrical with other cases where 13195 * we either don't have a AH header (outbound) 13196 * or peek into the AH header yet (inbound and 13197 * not pulled up yet). 13198 */ 13199 ah = (ah_t *)whereptr; 13200 nexthdr = ah->ah_nexthdr; 13201 ehdrlen = (ah->ah_length << 2) + 8; 13202 13203 if (nexthdr == IPPROTO_DSTOPTS) { 13204 if (whereptr + ehdrlen >= mp->b_wptr) { 13205 /* 13206 * The destination options header 13207 * is not part of the first mblk. 13208 */ 13209 whereptr = mp->b_cont->b_rptr; 13210 } else { 13211 whereptr += ehdrlen; 13212 } 13213 13214 dsthdr = (ip6_dest_t *)whereptr; 13215 ehdrlen = 8 * (dsthdr->ip6d_len + 1); 13216 size += ehdrlen; 13217 } 13218 return (size); 13219 } 13220 whereptr += ehdrlen; 13221 size += ehdrlen; 13222 } 13223 } 13224