1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* 26 * Copyright (c) 1990 Mentat Inc. 27 */ 28 29 #include <sys/types.h> 30 #include <sys/stream.h> 31 #include <sys/dlpi.h> 32 #include <sys/stropts.h> 33 #include <sys/sysmacros.h> 34 #include <sys/strsun.h> 35 #include <sys/strlog.h> 36 #include <sys/strsubr.h> 37 #define _SUN_TPI_VERSION 2 38 #include <sys/tihdr.h> 39 #include <sys/ddi.h> 40 #include <sys/sunddi.h> 41 #include <sys/cmn_err.h> 42 #include <sys/debug.h> 43 #include <sys/sdt.h> 44 #include <sys/kobj.h> 45 #include <sys/zone.h> 46 #include <sys/neti.h> 47 #include <sys/hook.h> 48 49 #include <sys/kmem.h> 50 #include <sys/systm.h> 51 #include <sys/param.h> 52 #include <sys/socket.h> 53 #include <sys/vtrace.h> 54 #include <sys/isa_defs.h> 55 #include <sys/atomic.h> 56 #include <sys/iphada.h> 57 #include <sys/policy.h> 58 #include <net/if.h> 59 #include <net/if_types.h> 60 #include <net/route.h> 61 #include <net/if_dl.h> 62 #include <sys/sockio.h> 63 #include <netinet/in.h> 64 #include <netinet/ip6.h> 65 #include <netinet/icmp6.h> 66 #include <netinet/sctp.h> 67 68 #include <inet/common.h> 69 #include <inet/mi.h> 70 #include <inet/optcom.h> 71 #include <inet/mib2.h> 72 #include <inet/nd.h> 73 #include <inet/arp.h> 74 75 #include <inet/ip.h> 76 #include <inet/ip_impl.h> 77 #include <inet/ip6.h> 78 #include <inet/ip6_asp.h> 79 #include <inet/tcp.h> 80 #include <inet/tcp_impl.h> 81 #include <inet/udp_impl.h> 82 #include <inet/ipp_common.h> 83 84 #include <inet/ip_multi.h> 85 #include <inet/ip_if.h> 86 #include <inet/ip_ire.h> 87 #include <inet/ip_rts.h> 88 #include <inet/ip_ndp.h> 89 #include <net/pfkeyv2.h> 90 #include <inet/ipsec_info.h> 91 #include <inet/sadb.h> 92 #include <inet/ipsec_impl.h> 93 #include <inet/tun.h> 94 #include <inet/sctp_ip.h> 95 #include <sys/pattr.h> 96 #include <inet/ipclassifier.h> 97 #include <inet/ipsecah.h> 98 #include <inet/udp_impl.h> 99 #include <inet/rawip_impl.h> 100 #include <inet/rts_impl.h> 101 #include <sys/squeue.h> 102 103 #include <sys/tsol/label.h> 104 #include <sys/tsol/tnet.h> 105 106 #include <rpc/pmap_prot.h> 107 108 /* Temporary; for CR 6451644 work-around */ 109 #include <sys/ethernet.h> 110 111 extern squeue_func_t ip_input_proc; 112 113 /* 114 * Naming conventions: 115 * These rules should be judiciously applied 116 * if there is a need to identify something as IPv6 versus IPv4 117 * IPv6 funcions will end with _v6 in the ip module. 118 * IPv6 funcions will end with _ipv6 in the transport modules. 119 * IPv6 macros: 120 * Some macros end with _V6; e.g. ILL_FRAG_HASH_V6 121 * Some macros start with V6_; e.g. V6_OR_V4_INADDR_ANY 122 * And then there are ..V4_PART_OF_V6. 123 * The intent is that macros in the ip module end with _V6. 124 * IPv6 global variables will start with ipv6_ 125 * IPv6 structures will start with ipv6 126 * IPv6 defined constants should start with IPV6_ 127 * (but then there are NDP_DEFAULT_VERS_PRI_AND_FLOW, etc) 128 */ 129 130 /* 131 * ip6opt_ls is used to enable IPv6 (via /etc/system on TX systems). 132 * We need to do this because we didn't obtain the IP6OPT_LS (0x0a) 133 * from IANA. This mechanism will remain in effect until an official 134 * number is obtained. 135 */ 136 uchar_t ip6opt_ls; 137 138 const in6_addr_t ipv6_all_ones = 139 { 0xffffffffU, 0xffffffffU, 0xffffffffU, 0xffffffffU }; 140 const in6_addr_t ipv6_all_zeros = { 0, 0, 0, 0 }; 141 142 #ifdef _BIG_ENDIAN 143 const in6_addr_t ipv6_unspecified_group = { 0xff000000U, 0, 0, 0 }; 144 #else /* _BIG_ENDIAN */ 145 const in6_addr_t ipv6_unspecified_group = { 0x000000ffU, 0, 0, 0 }; 146 #endif /* _BIG_ENDIAN */ 147 148 #ifdef _BIG_ENDIAN 149 const in6_addr_t ipv6_loopback = { 0, 0, 0, 0x00000001U }; 150 #else /* _BIG_ENDIAN */ 151 const in6_addr_t ipv6_loopback = { 0, 0, 0, 0x01000000U }; 152 #endif /* _BIG_ENDIAN */ 153 154 #ifdef _BIG_ENDIAN 155 const in6_addr_t ipv6_all_hosts_mcast = { 0xff020000U, 0, 0, 0x00000001U }; 156 #else /* _BIG_ENDIAN */ 157 const in6_addr_t ipv6_all_hosts_mcast = { 0x000002ffU, 0, 0, 0x01000000U }; 158 #endif /* _BIG_ENDIAN */ 159 160 #ifdef _BIG_ENDIAN 161 const in6_addr_t ipv6_all_rtrs_mcast = { 0xff020000U, 0, 0, 0x00000002U }; 162 #else /* _BIG_ENDIAN */ 163 const in6_addr_t ipv6_all_rtrs_mcast = { 0x000002ffU, 0, 0, 0x02000000U }; 164 #endif /* _BIG_ENDIAN */ 165 166 #ifdef _BIG_ENDIAN 167 const in6_addr_t ipv6_all_v2rtrs_mcast = { 0xff020000U, 0, 0, 0x00000016U }; 168 #else /* _BIG_ENDIAN */ 169 const in6_addr_t ipv6_all_v2rtrs_mcast = { 0x000002ffU, 0, 0, 0x16000000U }; 170 #endif /* _BIG_ENDIAN */ 171 172 #ifdef _BIG_ENDIAN 173 const in6_addr_t ipv6_solicited_node_mcast = 174 { 0xff020000U, 0, 0x00000001U, 0xff000000U }; 175 #else /* _BIG_ENDIAN */ 176 const in6_addr_t ipv6_solicited_node_mcast = 177 { 0x000002ffU, 0, 0x01000000U, 0x000000ffU }; 178 #endif /* _BIG_ENDIAN */ 179 180 /* Leave room for ip_newroute to tack on the src and target addresses */ 181 #define OK_RESOLVER_MP_V6(mp) \ 182 ((mp) && ((mp)->b_wptr - (mp)->b_rptr) >= (2 * IPV6_ADDR_LEN)) 183 184 #define IP6_MBLK_OK 0 185 #define IP6_MBLK_HDR_ERR 1 186 #define IP6_MBLK_LEN_ERR 2 187 188 static void icmp_inbound_too_big_v6(queue_t *, mblk_t *, ill_t *ill, 189 boolean_t, zoneid_t); 190 static void icmp_pkt_v6(queue_t *, mblk_t *, void *, size_t, 191 const in6_addr_t *, boolean_t, zoneid_t, ip_stack_t *); 192 static void icmp_redirect_v6(queue_t *, mblk_t *, ill_t *ill); 193 static int ip_bind_connected_v6(conn_t *, mblk_t *, in6_addr_t *, 194 uint16_t, const in6_addr_t *, ip6_pkt_t *, uint16_t, 195 boolean_t, boolean_t, boolean_t, boolean_t); 196 static boolean_t ip_bind_insert_ire_v6(mblk_t *, ire_t *, const in6_addr_t *, 197 iulp_t *, ip_stack_t *); 198 static int ip_bind_laddr_v6(conn_t *, mblk_t *, const in6_addr_t *, 199 uint16_t, boolean_t, boolean_t, boolean_t); 200 static void ip_fanout_proto_v6(queue_t *, mblk_t *, ip6_t *, ill_t *, 201 ill_t *, uint8_t, uint_t, uint_t, boolean_t, zoneid_t); 202 static void ip_fanout_tcp_v6(queue_t *, mblk_t *, ip6_t *, ill_t *, 203 ill_t *, uint_t, uint_t, boolean_t, zoneid_t); 204 static void ip_fanout_udp_v6(queue_t *, mblk_t *, ip6_t *, uint32_t, 205 ill_t *, ill_t *, uint_t, boolean_t, zoneid_t); 206 static int ip_process_options_v6(queue_t *, mblk_t *, ip6_t *, 207 uint8_t *, uint_t, uint8_t, ip_stack_t *); 208 static mblk_t *ip_rput_frag_v6(queue_t *, mblk_t *, ip6_t *, 209 ip6_frag_t *, uint_t, uint_t *, uint32_t *, uint16_t *); 210 static boolean_t ip_source_routed_v6(ip6_t *, mblk_t *, ip_stack_t *); 211 static void ip_wput_ire_v6(queue_t *, mblk_t *, ire_t *, int, int, 212 conn_t *, int, int, int, zoneid_t); 213 214 /* 215 * A template for an IPv6 AR_ENTRY_QUERY 216 */ 217 static areq_t ipv6_areq_template = { 218 AR_ENTRY_QUERY, /* cmd */ 219 sizeof (areq_t)+(2*IPV6_ADDR_LEN), /* name offset */ 220 sizeof (areq_t), /* name len (filled by ill_arp_alloc) */ 221 IP6_DL_SAP, /* protocol, from arps perspective */ 222 sizeof (areq_t), /* target addr offset */ 223 IPV6_ADDR_LEN, /* target addr_length */ 224 0, /* flags */ 225 sizeof (areq_t) + IPV6_ADDR_LEN, /* sender addr offset */ 226 IPV6_ADDR_LEN, /* sender addr length */ 227 6, /* xmit_count */ 228 1000, /* (re)xmit_interval in milliseconds */ 229 4 /* max # of requests to buffer */ 230 /* anything else filled in by the code */ 231 }; 232 233 /* 234 * Handle IPv6 ICMP packets sent to us. Consume the mblk passed in. 235 * The message has already been checksummed and if needed, 236 * a copy has been made to be sent any interested ICMP client (conn) 237 * Note that this is different than icmp_inbound() which does the fanout 238 * to conn's as well as local processing of the ICMP packets. 239 * 240 * All error messages are passed to the matching transport stream. 241 * 242 * Zones notes: 243 * The packet is only processed in the context of the specified zone: typically 244 * only this zone will reply to an echo request. This means that the caller must 245 * call icmp_inbound_v6() for each relevant zone. 246 */ 247 static void 248 icmp_inbound_v6(queue_t *q, mblk_t *mp, ill_t *ill, uint_t hdr_length, 249 boolean_t mctl_present, uint_t flags, zoneid_t zoneid, mblk_t *dl_mp) 250 { 251 icmp6_t *icmp6; 252 ip6_t *ip6h; 253 boolean_t interested; 254 ip6i_t *ip6i; 255 in6_addr_t origsrc; 256 ire_t *ire; 257 mblk_t *first_mp; 258 ipsec_in_t *ii; 259 ip_stack_t *ipst = ill->ill_ipst; 260 261 ASSERT(ill != NULL); 262 first_mp = mp; 263 if (mctl_present) { 264 mp = first_mp->b_cont; 265 ASSERT(mp != NULL); 266 267 ii = (ipsec_in_t *)first_mp->b_rptr; 268 ASSERT(ii->ipsec_in_type == IPSEC_IN); 269 } 270 271 ip6h = (ip6_t *)mp->b_rptr; 272 273 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInMsgs); 274 275 if ((mp->b_wptr - mp->b_rptr) < (hdr_length + ICMP6_MINLEN)) { 276 if (!pullupmsg(mp, hdr_length + ICMP6_MINLEN)) { 277 ip1dbg(("icmp_inbound_v6: pullupmsg failed\n")); 278 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors); 279 freemsg(first_mp); 280 return; 281 } 282 ip6h = (ip6_t *)mp->b_rptr; 283 } 284 if (ipst->ips_icmp_accept_clear_messages == 0) { 285 first_mp = ipsec_check_global_policy(first_mp, NULL, 286 NULL, ip6h, mctl_present, ipst->ips_netstack); 287 if (first_mp == NULL) 288 return; 289 } 290 291 /* 292 * On a labeled system, we have to check whether the zone itself is 293 * permitted to receive raw traffic. 294 */ 295 if (is_system_labeled()) { 296 if (zoneid == ALL_ZONES) 297 zoneid = tsol_packet_to_zoneid(mp); 298 if (!tsol_can_accept_raw(mp, B_FALSE)) { 299 ip1dbg(("icmp_inbound_v6: zone %d can't receive raw", 300 zoneid)); 301 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors); 302 freemsg(first_mp); 303 return; 304 } 305 } 306 307 icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]); 308 ip2dbg(("icmp_inbound_v6: type %d code %d\n", icmp6->icmp6_type, 309 icmp6->icmp6_code)); 310 interested = !(icmp6->icmp6_type & ICMP6_INFOMSG_MASK); 311 312 /* Initiate IPPF processing here */ 313 if (IP6_IN_IPP(flags, ipst)) { 314 315 /* 316 * If the ifindex changes due to SIOCSLIFINDEX 317 * packet may return to IP on the wrong ill. 318 */ 319 ip_process(IPP_LOCAL_IN, &mp, ill->ill_phyint->phyint_ifindex); 320 if (mp == NULL) { 321 if (mctl_present) { 322 freeb(first_mp); 323 } 324 return; 325 } 326 } 327 328 switch (icmp6->icmp6_type) { 329 case ICMP6_DST_UNREACH: 330 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInDestUnreachs); 331 if (icmp6->icmp6_code == ICMP6_DST_UNREACH_ADMIN) 332 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInAdminProhibs); 333 break; 334 335 case ICMP6_TIME_EXCEEDED: 336 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInTimeExcds); 337 break; 338 339 case ICMP6_PARAM_PROB: 340 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInParmProblems); 341 break; 342 343 case ICMP6_PACKET_TOO_BIG: 344 icmp_inbound_too_big_v6(q, first_mp, ill, mctl_present, 345 zoneid); 346 return; 347 case ICMP6_ECHO_REQUEST: 348 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInEchos); 349 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst) && 350 !ipst->ips_ipv6_resp_echo_mcast) 351 break; 352 353 /* 354 * We must have exclusive use of the mblk to convert it to 355 * a response. 356 * If not, we copy it. 357 */ 358 if (mp->b_datap->db_ref > 1) { 359 mblk_t *mp1; 360 361 mp1 = copymsg(mp); 362 freemsg(mp); 363 if (mp1 == NULL) { 364 BUMP_MIB(ill->ill_icmp6_mib, 365 ipv6IfIcmpInErrors); 366 if (mctl_present) 367 freeb(first_mp); 368 return; 369 } 370 mp = mp1; 371 ip6h = (ip6_t *)mp->b_rptr; 372 icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]); 373 if (mctl_present) 374 first_mp->b_cont = mp; 375 else 376 first_mp = mp; 377 } 378 379 /* 380 * Turn the echo into an echo reply. 381 * Remove any extension headers (do not reverse a source route) 382 * and clear the flow id (keep traffic class for now). 383 */ 384 if (hdr_length != IPV6_HDR_LEN) { 385 int i; 386 387 for (i = 0; i < IPV6_HDR_LEN; i++) 388 mp->b_rptr[hdr_length - i - 1] = 389 mp->b_rptr[IPV6_HDR_LEN - i - 1]; 390 mp->b_rptr += (hdr_length - IPV6_HDR_LEN); 391 ip6h = (ip6_t *)mp->b_rptr; 392 ip6h->ip6_nxt = IPPROTO_ICMPV6; 393 hdr_length = IPV6_HDR_LEN; 394 } 395 ip6h->ip6_vcf &= ~IPV6_FLOWINFO_FLOWLABEL; 396 icmp6->icmp6_type = ICMP6_ECHO_REPLY; 397 398 ip6h->ip6_plen = 399 htons((uint16_t)(msgdsize(mp) - IPV6_HDR_LEN)); 400 origsrc = ip6h->ip6_src; 401 /* 402 * Reverse the source and destination addresses. 403 * If the return address is a multicast, zero out the source 404 * (ip_wput_v6 will set an address). 405 */ 406 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 407 ip6h->ip6_src = ipv6_all_zeros; 408 ip6h->ip6_dst = origsrc; 409 } else { 410 ip6h->ip6_src = ip6h->ip6_dst; 411 ip6h->ip6_dst = origsrc; 412 } 413 414 /* set the hop limit */ 415 ip6h->ip6_hops = ipst->ips_ipv6_def_hops; 416 417 /* 418 * Prepare for checksum by putting icmp length in the icmp 419 * checksum field. The checksum is calculated in ip_wput_v6. 420 */ 421 icmp6->icmp6_cksum = ip6h->ip6_plen; 422 /* 423 * ICMP echo replies should go out on the same interface 424 * the request came on as probes used by in.mpathd for 425 * detecting NIC failures are ECHO packets. We turn-off load 426 * spreading by allocating a ip6i and setting ip6i_attach_if 427 * to B_TRUE which is handled both by ip_wput_v6 and 428 * ip_newroute_v6. If we don't turnoff load spreading, 429 * the packets might get dropped if there are no 430 * non-FAILED/INACTIVE interfaces for it to go out on and 431 * in.mpathd would wrongly detect a failure or mis-detect 432 * a NIC failure as a link failure. As load spreading can 433 * happen only if ill_group is not NULL, we do only for 434 * that case and this does not affect the normal case. 435 * 436 * We force this only on echo packets that came from on-link 437 * hosts. We restrict this to link-local addresses which 438 * is used by in.mpathd for probing. In the IPv6 case, 439 * default routes typically have an ire_ipif pointer and 440 * hence a MATCH_IRE_ILL later in ip_newroute_v6/ip_wput_v6 441 * might work. As a default route out of this interface 442 * may not be present, enforcing this packet to go out in 443 * this case may not work. 444 */ 445 if (ill->ill_group != NULL && 446 IN6_IS_ADDR_LINKLOCAL(&origsrc)) { 447 /* 448 * If we are sending replies to ourselves, don't 449 * set ATTACH_IF as we may not be able to find 450 * the IRE_LOCAL on this ill i.e setting ATTACH_IF 451 * causes ip_wput_v6 to look for an IRE_LOCAL on 452 * "ill" which it may not find and will try to 453 * create an IRE_CACHE for our local address. Once 454 * we do this, we will try to forward all packets 455 * meant to our LOCAL address. 456 */ 457 ire = ire_cache_lookup_v6(&ip6h->ip6_dst, ALL_ZONES, 458 NULL, ipst); 459 if (ire == NULL || ire->ire_type != IRE_LOCAL) { 460 mp = ip_add_info_v6(mp, NULL, &ip6h->ip6_dst); 461 if (mp == NULL) { 462 BUMP_MIB(ill->ill_icmp6_mib, 463 ipv6IfIcmpInErrors); 464 if (ire != NULL) 465 ire_refrele(ire); 466 if (mctl_present) 467 freeb(first_mp); 468 return; 469 } else if (mctl_present) { 470 first_mp->b_cont = mp; 471 } else { 472 first_mp = mp; 473 } 474 ip6i = (ip6i_t *)mp->b_rptr; 475 ip6i->ip6i_flags = IP6I_ATTACH_IF; 476 ip6i->ip6i_ifindex = 477 ill->ill_phyint->phyint_ifindex; 478 } 479 if (ire != NULL) 480 ire_refrele(ire); 481 } 482 483 if (!mctl_present) { 484 /* 485 * This packet should go out the same way as it 486 * came in i.e in clear. To make sure that global 487 * policy will not be applied to this in ip_wput, 488 * we attach a IPSEC_IN mp and clear ipsec_in_secure. 489 */ 490 ASSERT(first_mp == mp); 491 first_mp = ipsec_in_alloc(B_FALSE, ipst->ips_netstack); 492 if (first_mp == NULL) { 493 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 494 freemsg(mp); 495 return; 496 } 497 ii = (ipsec_in_t *)first_mp->b_rptr; 498 499 /* This is not a secure packet */ 500 ii->ipsec_in_secure = B_FALSE; 501 first_mp->b_cont = mp; 502 } 503 ii->ipsec_in_zoneid = zoneid; 504 ASSERT(zoneid != ALL_ZONES); 505 if (!ipsec_in_to_out(first_mp, NULL, ip6h)) { 506 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 507 return; 508 } 509 put(WR(q), first_mp); 510 return; 511 512 case ICMP6_ECHO_REPLY: 513 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInEchoReplies); 514 break; 515 516 case ND_ROUTER_SOLICIT: 517 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInRouterSolicits); 518 break; 519 520 case ND_ROUTER_ADVERT: 521 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInRouterAdvertisements); 522 break; 523 524 case ND_NEIGHBOR_SOLICIT: 525 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInNeighborSolicits); 526 if (mctl_present) 527 freeb(first_mp); 528 /* XXX may wish to pass first_mp up to ndp_input someday. */ 529 ndp_input(ill, mp, dl_mp); 530 return; 531 532 case ND_NEIGHBOR_ADVERT: 533 BUMP_MIB(ill->ill_icmp6_mib, 534 ipv6IfIcmpInNeighborAdvertisements); 535 if (mctl_present) 536 freeb(first_mp); 537 /* XXX may wish to pass first_mp up to ndp_input someday. */ 538 ndp_input(ill, mp, dl_mp); 539 return; 540 541 case ND_REDIRECT: { 542 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInRedirects); 543 544 if (ipst->ips_ipv6_ignore_redirect) 545 break; 546 547 /* 548 * As there is no upper client to deliver, we don't 549 * need the first_mp any more. 550 */ 551 if (mctl_present) 552 freeb(first_mp); 553 if (!pullupmsg(mp, -1)) { 554 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects); 555 break; 556 } 557 icmp_redirect_v6(q, mp, ill); 558 return; 559 } 560 561 /* 562 * The next three icmp messages will be handled by MLD. 563 * Pass all valid MLD packets up to any process(es) 564 * listening on a raw ICMP socket. MLD messages are 565 * freed by mld_input function. 566 */ 567 case MLD_LISTENER_QUERY: 568 case MLD_LISTENER_REPORT: 569 case MLD_LISTENER_REDUCTION: 570 if (mctl_present) 571 freeb(first_mp); 572 mld_input(q, mp, ill); 573 return; 574 default: 575 break; 576 } 577 if (interested) { 578 icmp_inbound_error_fanout_v6(q, first_mp, ip6h, icmp6, ill, 579 mctl_present, zoneid); 580 } else { 581 freemsg(first_mp); 582 } 583 } 584 585 /* 586 * Process received IPv6 ICMP Packet too big. 587 * After updating any IRE it does the fanout to any matching transport streams. 588 * Assumes the IPv6 plus ICMPv6 headers have been pulled up but nothing else. 589 */ 590 /* ARGSUSED */ 591 static void 592 icmp_inbound_too_big_v6(queue_t *q, mblk_t *mp, ill_t *ill, 593 boolean_t mctl_present, zoneid_t zoneid) 594 { 595 ip6_t *ip6h; 596 ip6_t *inner_ip6h; 597 icmp6_t *icmp6; 598 uint16_t hdr_length; 599 uint32_t mtu; 600 ire_t *ire, *first_ire; 601 mblk_t *first_mp; 602 ip_stack_t *ipst = ill->ill_ipst; 603 604 first_mp = mp; 605 if (mctl_present) 606 mp = first_mp->b_cont; 607 /* 608 * We must have exclusive use of the mblk to update the MTU 609 * in the packet. 610 * If not, we copy it. 611 * 612 * If there's an M_CTL present, we know that allocated first_mp 613 * earlier in this function, so we know first_mp has refcnt of one. 614 */ 615 ASSERT(!mctl_present || first_mp->b_datap->db_ref == 1); 616 if (mp->b_datap->db_ref > 1) { 617 mblk_t *mp1; 618 619 mp1 = copymsg(mp); 620 freemsg(mp); 621 if (mp1 == NULL) { 622 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 623 if (mctl_present) 624 freeb(first_mp); 625 return; 626 } 627 mp = mp1; 628 if (mctl_present) 629 first_mp->b_cont = mp; 630 else 631 first_mp = mp; 632 } 633 ip6h = (ip6_t *)mp->b_rptr; 634 if (ip6h->ip6_nxt != IPPROTO_ICMPV6) 635 hdr_length = ip_hdr_length_v6(mp, ip6h); 636 else 637 hdr_length = IPV6_HDR_LEN; 638 639 icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]); 640 ASSERT((size_t)(mp->b_wptr - mp->b_rptr) >= hdr_length + ICMP6_MINLEN); 641 inner_ip6h = (ip6_t *)&icmp6[1]; /* Packet in error */ 642 if ((uchar_t *)&inner_ip6h[1] > mp->b_wptr) { 643 if (!pullupmsg(mp, (uchar_t *)&inner_ip6h[1] - mp->b_rptr)) { 644 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 645 freemsg(first_mp); 646 return; 647 } 648 ip6h = (ip6_t *)mp->b_rptr; 649 icmp6 = (icmp6_t *)&mp->b_rptr[hdr_length]; 650 inner_ip6h = (ip6_t *)&icmp6[1]; 651 } 652 653 /* 654 * For link local destinations matching simply on IRE type is not 655 * sufficient. Same link local addresses for different ILL's is 656 * possible. 657 */ 658 659 if (IN6_IS_ADDR_LINKLOCAL(&inner_ip6h->ip6_dst)) { 660 first_ire = ire_ctable_lookup_v6(&inner_ip6h->ip6_dst, NULL, 661 IRE_CACHE, ill->ill_ipif, ALL_ZONES, NULL, 662 MATCH_IRE_TYPE | MATCH_IRE_ILL_GROUP, ipst); 663 664 if (first_ire == NULL) { 665 if (ip_debug > 2) { 666 /* ip1dbg */ 667 pr_addr_dbg("icmp_inbound_too_big_v6:" 668 "no ire for dst %s\n", AF_INET6, 669 &inner_ip6h->ip6_dst); 670 } 671 freemsg(first_mp); 672 return; 673 } 674 675 mtu = ntohl(icmp6->icmp6_mtu); 676 rw_enter(&first_ire->ire_bucket->irb_lock, RW_READER); 677 for (ire = first_ire; ire != NULL && 678 IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, &inner_ip6h->ip6_dst); 679 ire = ire->ire_next) { 680 mutex_enter(&ire->ire_lock); 681 if (mtu < IPV6_MIN_MTU) { 682 ip1dbg(("Received mtu less than IPv6 " 683 "min mtu %d: %d\n", IPV6_MIN_MTU, mtu)); 684 mtu = IPV6_MIN_MTU; 685 /* 686 * If an mtu less than IPv6 min mtu is received, 687 * we must include a fragment header in 688 * subsequent packets. 689 */ 690 ire->ire_frag_flag |= IPH_FRAG_HDR; 691 } 692 ip1dbg(("Received mtu from router: %d\n", mtu)); 693 ire->ire_max_frag = MIN(ire->ire_max_frag, mtu); 694 /* Record the new max frag size for the ULP. */ 695 if (ire->ire_frag_flag & IPH_FRAG_HDR) { 696 /* 697 * If we need a fragment header in every packet 698 * (above case or multirouting), make sure the 699 * ULP takes it into account when computing the 700 * payload size. 701 */ 702 icmp6->icmp6_mtu = htonl(ire->ire_max_frag - 703 sizeof (ip6_frag_t)); 704 } else { 705 icmp6->icmp6_mtu = htonl(ire->ire_max_frag); 706 } 707 mutex_exit(&ire->ire_lock); 708 } 709 rw_exit(&first_ire->ire_bucket->irb_lock); 710 ire_refrele(first_ire); 711 } else { 712 irb_t *irb = NULL; 713 /* 714 * for non-link local destinations we match only on the IRE type 715 */ 716 ire = ire_ctable_lookup_v6(&inner_ip6h->ip6_dst, NULL, 717 IRE_CACHE, ill->ill_ipif, ALL_ZONES, NULL, MATCH_IRE_TYPE, 718 ipst); 719 if (ire == NULL) { 720 if (ip_debug > 2) { 721 /* ip1dbg */ 722 pr_addr_dbg("icmp_inbound_too_big_v6:" 723 "no ire for dst %s\n", 724 AF_INET6, &inner_ip6h->ip6_dst); 725 } 726 freemsg(first_mp); 727 return; 728 } 729 irb = ire->ire_bucket; 730 ire_refrele(ire); 731 rw_enter(&irb->irb_lock, RW_READER); 732 for (ire = irb->irb_ire; ire != NULL; ire = ire->ire_next) { 733 if (IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, 734 &inner_ip6h->ip6_dst)) { 735 mtu = ntohl(icmp6->icmp6_mtu); 736 mutex_enter(&ire->ire_lock); 737 if (mtu < IPV6_MIN_MTU) { 738 ip1dbg(("Received mtu less than IPv6" 739 "min mtu %d: %d\n", 740 IPV6_MIN_MTU, mtu)); 741 mtu = IPV6_MIN_MTU; 742 /* 743 * If an mtu less than IPv6 min mtu is 744 * received, we must include a fragment 745 * header in subsequent packets. 746 */ 747 ire->ire_frag_flag |= IPH_FRAG_HDR; 748 } 749 750 ip1dbg(("Received mtu from router: %d\n", mtu)); 751 ire->ire_max_frag = MIN(ire->ire_max_frag, mtu); 752 /* Record the new max frag size for the ULP. */ 753 if (ire->ire_frag_flag & IPH_FRAG_HDR) { 754 /* 755 * If we need a fragment header in 756 * every packet (above case or 757 * multirouting), make sure the ULP 758 * takes it into account when computing 759 * the payload size. 760 */ 761 icmp6->icmp6_mtu = 762 htonl(ire->ire_max_frag - 763 sizeof (ip6_frag_t)); 764 } else { 765 icmp6->icmp6_mtu = 766 htonl(ire->ire_max_frag); 767 } 768 mutex_exit(&ire->ire_lock); 769 } 770 } 771 rw_exit(&irb->irb_lock); 772 } 773 icmp_inbound_error_fanout_v6(q, first_mp, ip6h, icmp6, ill, 774 mctl_present, zoneid); 775 } 776 777 /* 778 * Fanout received ICMPv6 error packets to the transports. 779 * Assumes the IPv6 plus ICMPv6 headers have been pulled up but nothing else. 780 */ 781 void 782 icmp_inbound_error_fanout_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, 783 icmp6_t *icmp6, ill_t *ill, boolean_t mctl_present, zoneid_t zoneid) 784 { 785 uint16_t *up; /* Pointer to ports in ULP header */ 786 uint32_t ports; /* reversed ports for fanout */ 787 ip6_t rip6h; /* With reversed addresses */ 788 uint16_t hdr_length; 789 uint8_t *nexthdrp; 790 uint8_t nexthdr; 791 mblk_t *first_mp; 792 ipsec_in_t *ii; 793 tcpha_t *tcpha; 794 conn_t *connp; 795 ip_stack_t *ipst = ill->ill_ipst; 796 797 first_mp = mp; 798 if (mctl_present) { 799 mp = first_mp->b_cont; 800 ASSERT(mp != NULL); 801 802 ii = (ipsec_in_t *)first_mp->b_rptr; 803 ASSERT(ii->ipsec_in_type == IPSEC_IN); 804 } else { 805 ii = NULL; 806 } 807 808 hdr_length = (uint16_t)((uchar_t *)icmp6 - (uchar_t *)ip6h); 809 ASSERT((size_t)(mp->b_wptr - (uchar_t *)icmp6) >= ICMP6_MINLEN); 810 811 /* 812 * Need to pullup everything in order to use 813 * ip_hdr_length_nexthdr_v6() 814 */ 815 if (mp->b_cont != NULL) { 816 if (!pullupmsg(mp, -1)) { 817 ip1dbg(("icmp_inbound_error_fanout_v6: " 818 "pullupmsg failed\n")); 819 goto drop_pkt; 820 } 821 ip6h = (ip6_t *)mp->b_rptr; 822 icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]); 823 } 824 825 ip6h = (ip6_t *)&icmp6[1]; /* Packet in error */ 826 if ((uchar_t *)&ip6h[1] > mp->b_wptr) 827 goto drop_pkt; 828 829 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_length, &nexthdrp)) 830 goto drop_pkt; 831 nexthdr = *nexthdrp; 832 833 /* Set message type, must be done after pullups */ 834 mp->b_datap->db_type = M_CTL; 835 836 /* Try to pass the ICMP message to clients who need it */ 837 switch (nexthdr) { 838 case IPPROTO_UDP: { 839 /* 840 * Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of 841 * UDP header to get the port information. 842 */ 843 if ((uchar_t *)ip6h + hdr_length + ICMP_MIN_TP_HDR_LEN > 844 mp->b_wptr) { 845 break; 846 } 847 /* 848 * Attempt to find a client stream based on port. 849 * Note that we do a reverse lookup since the header is 850 * in the form we sent it out. 851 * The rip6h header is only used for the IPCL_UDP_MATCH_V6 852 * and we only set the src and dst addresses and nexthdr. 853 */ 854 up = (uint16_t *)((uchar_t *)ip6h + hdr_length); 855 rip6h.ip6_src = ip6h->ip6_dst; 856 rip6h.ip6_dst = ip6h->ip6_src; 857 rip6h.ip6_nxt = nexthdr; 858 ((uint16_t *)&ports)[0] = up[1]; 859 ((uint16_t *)&ports)[1] = up[0]; 860 861 ip_fanout_udp_v6(q, first_mp, &rip6h, ports, ill, ill, 862 IP6_NO_IPPOLICY, mctl_present, zoneid); 863 return; 864 } 865 case IPPROTO_TCP: { 866 /* 867 * Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of 868 * the TCP header to get the port information. 869 */ 870 if ((uchar_t *)ip6h + hdr_length + ICMP_MIN_TP_HDR_LEN > 871 mp->b_wptr) { 872 break; 873 } 874 875 /* 876 * Attempt to find a client stream based on port. 877 * Note that we do a reverse lookup since the header is 878 * in the form we sent it out. 879 * The rip6h header is only used for the IP_TCP_*MATCH_V6 and 880 * we only set the src and dst addresses and nexthdr. 881 */ 882 883 tcpha = (tcpha_t *)((char *)ip6h + hdr_length); 884 connp = ipcl_tcp_lookup_reversed_ipv6(ip6h, tcpha, 885 TCPS_LISTEN, ill->ill_phyint->phyint_ifindex, ipst); 886 if (connp == NULL) { 887 goto drop_pkt; 888 } 889 890 squeue_fill(connp->conn_sqp, first_mp, tcp_input, 891 connp, SQTAG_TCP6_INPUT_ICMP_ERR); 892 return; 893 894 } 895 case IPPROTO_SCTP: 896 /* 897 * Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of 898 * the SCTP header to get the port information. 899 */ 900 if ((uchar_t *)ip6h + hdr_length + ICMP_MIN_TP_HDR_LEN > 901 mp->b_wptr) { 902 break; 903 } 904 905 up = (uint16_t *)((uchar_t *)ip6h + hdr_length); 906 ((uint16_t *)&ports)[0] = up[1]; 907 ((uint16_t *)&ports)[1] = up[0]; 908 ip_fanout_sctp(first_mp, ill, (ipha_t *)ip6h, ports, 0, 909 mctl_present, IP6_NO_IPPOLICY, zoneid); 910 return; 911 case IPPROTO_ESP: 912 case IPPROTO_AH: { 913 int ipsec_rc; 914 ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec; 915 916 /* 917 * We need a IPSEC_IN in the front to fanout to AH/ESP. 918 * We will re-use the IPSEC_IN if it is already present as 919 * AH/ESP will not affect any fields in the IPSEC_IN for 920 * ICMP errors. If there is no IPSEC_IN, allocate a new 921 * one and attach it in the front. 922 */ 923 if (ii != NULL) { 924 /* 925 * ip_fanout_proto_again converts the ICMP errors 926 * that come back from AH/ESP to M_DATA so that 927 * if it is non-AH/ESP and we do a pullupmsg in 928 * this function, it would work. Convert it back 929 * to M_CTL before we send up as this is a ICMP 930 * error. This could have been generated locally or 931 * by some router. Validate the inner IPSEC 932 * headers. 933 * 934 * NOTE : ill_index is used by ip_fanout_proto_again 935 * to locate the ill. 936 */ 937 ASSERT(ill != NULL); 938 ii->ipsec_in_ill_index = 939 ill->ill_phyint->phyint_ifindex; 940 ii->ipsec_in_rill_index = ii->ipsec_in_ill_index; 941 first_mp->b_cont->b_datap->db_type = M_CTL; 942 } else { 943 /* 944 * IPSEC_IN is not present. We attach a ipsec_in 945 * message and send up to IPSEC for validating 946 * and removing the IPSEC headers. Clear 947 * ipsec_in_secure so that when we return 948 * from IPSEC, we don't mistakenly think that this 949 * is a secure packet came from the network. 950 * 951 * NOTE : ill_index is used by ip_fanout_proto_again 952 * to locate the ill. 953 */ 954 ASSERT(first_mp == mp); 955 first_mp = ipsec_in_alloc(B_FALSE, ipst->ips_netstack); 956 ASSERT(ill != NULL); 957 if (first_mp == NULL) { 958 freemsg(mp); 959 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 960 return; 961 } 962 ii = (ipsec_in_t *)first_mp->b_rptr; 963 964 /* This is not a secure packet */ 965 ii->ipsec_in_secure = B_FALSE; 966 first_mp->b_cont = mp; 967 mp->b_datap->db_type = M_CTL; 968 ii->ipsec_in_ill_index = 969 ill->ill_phyint->phyint_ifindex; 970 ii->ipsec_in_rill_index = ii->ipsec_in_ill_index; 971 } 972 973 if (!ipsec_loaded(ipss)) { 974 ip_proto_not_sup(q, first_mp, 0, zoneid, ipst); 975 return; 976 } 977 978 if (nexthdr == IPPROTO_ESP) 979 ipsec_rc = ipsecesp_icmp_error(first_mp); 980 else 981 ipsec_rc = ipsecah_icmp_error(first_mp); 982 if (ipsec_rc == IPSEC_STATUS_FAILED) 983 return; 984 985 ip_fanout_proto_again(first_mp, ill, ill, NULL); 986 return; 987 } 988 case IPPROTO_ENCAP: 989 case IPPROTO_IPV6: 990 if ((uint8_t *)ip6h + hdr_length + 991 (nexthdr == IPPROTO_ENCAP ? sizeof (ipha_t) : 992 sizeof (ip6_t)) > mp->b_wptr) { 993 goto drop_pkt; 994 } 995 996 if (nexthdr == IPPROTO_ENCAP || 997 !IN6_ARE_ADDR_EQUAL( 998 &((ip6_t *)(((uint8_t *)ip6h) + hdr_length))->ip6_src, 999 &ip6h->ip6_src) || 1000 !IN6_ARE_ADDR_EQUAL( 1001 &((ip6_t *)(((uint8_t *)ip6h) + hdr_length))->ip6_dst, 1002 &ip6h->ip6_dst)) { 1003 /* 1004 * For tunnels that have used IPsec protection, 1005 * we need to adjust the MTU to take into account 1006 * the IPsec overhead. 1007 */ 1008 if (ii != NULL) 1009 icmp6->icmp6_mtu = htonl( 1010 ntohl(icmp6->icmp6_mtu) - 1011 ipsec_in_extra_length(first_mp)); 1012 } else { 1013 /* 1014 * Self-encapsulated case. As in the ipv4 case, 1015 * we need to strip the 2nd IP header. Since mp 1016 * is already pulled-up, we can simply bcopy 1017 * the 3rd header + data over the 2nd header. 1018 */ 1019 uint16_t unused_len; 1020 ip6_t *inner_ip6h = (ip6_t *) 1021 ((uchar_t *)ip6h + hdr_length); 1022 1023 /* 1024 * Make sure we don't do recursion more than once. 1025 */ 1026 if (!ip_hdr_length_nexthdr_v6(mp, inner_ip6h, 1027 &unused_len, &nexthdrp) || 1028 *nexthdrp == IPPROTO_IPV6) { 1029 goto drop_pkt; 1030 } 1031 1032 /* 1033 * We are about to modify the packet. Make a copy if 1034 * someone else has a reference to it. 1035 */ 1036 if (DB_REF(mp) > 1) { 1037 mblk_t *mp1; 1038 uint16_t icmp6_offset; 1039 1040 mp1 = copymsg(mp); 1041 if (mp1 == NULL) { 1042 goto drop_pkt; 1043 } 1044 icmp6_offset = (uint16_t) 1045 ((uchar_t *)icmp6 - mp->b_rptr); 1046 freemsg(mp); 1047 mp = mp1; 1048 1049 icmp6 = (icmp6_t *)(mp->b_rptr + icmp6_offset); 1050 ip6h = (ip6_t *)&icmp6[1]; 1051 inner_ip6h = (ip6_t *) 1052 ((uchar_t *)ip6h + hdr_length); 1053 1054 if (mctl_present) 1055 first_mp->b_cont = mp; 1056 else 1057 first_mp = mp; 1058 } 1059 1060 /* 1061 * Need to set db_type back to M_DATA before 1062 * refeeding mp into this function. 1063 */ 1064 DB_TYPE(mp) = M_DATA; 1065 1066 /* 1067 * Copy the 3rd header + remaining data on top 1068 * of the 2nd header. 1069 */ 1070 bcopy(inner_ip6h, ip6h, 1071 mp->b_wptr - (uchar_t *)inner_ip6h); 1072 1073 /* 1074 * Subtract length of the 2nd header. 1075 */ 1076 mp->b_wptr -= hdr_length; 1077 1078 /* 1079 * Now recurse, and see what I _really_ should be 1080 * doing here. 1081 */ 1082 icmp_inbound_error_fanout_v6(q, first_mp, 1083 (ip6_t *)mp->b_rptr, icmp6, ill, mctl_present, 1084 zoneid); 1085 return; 1086 } 1087 /* FALLTHRU */ 1088 default: 1089 /* 1090 * The rip6h header is only used for the lookup and we 1091 * only set the src and dst addresses and nexthdr. 1092 */ 1093 rip6h.ip6_src = ip6h->ip6_dst; 1094 rip6h.ip6_dst = ip6h->ip6_src; 1095 rip6h.ip6_nxt = nexthdr; 1096 ip_fanout_proto_v6(q, first_mp, &rip6h, ill, ill, nexthdr, 0, 1097 IP6_NO_IPPOLICY, mctl_present, zoneid); 1098 return; 1099 } 1100 /* NOTREACHED */ 1101 drop_pkt: 1102 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors); 1103 ip1dbg(("icmp_inbound_error_fanout_v6: drop pkt\n")); 1104 freemsg(first_mp); 1105 } 1106 1107 /* 1108 * Process received IPv6 ICMP Redirect messages. 1109 */ 1110 /* ARGSUSED */ 1111 static void 1112 icmp_redirect_v6(queue_t *q, mblk_t *mp, ill_t *ill) 1113 { 1114 ip6_t *ip6h; 1115 uint16_t hdr_length; 1116 nd_redirect_t *rd; 1117 ire_t *ire; 1118 ire_t *prev_ire; 1119 ire_t *redir_ire; 1120 in6_addr_t *src, *dst, *gateway; 1121 nd_opt_hdr_t *opt; 1122 nce_t *nce; 1123 int nce_flags = 0; 1124 int err = 0; 1125 boolean_t redirect_to_router = B_FALSE; 1126 int len; 1127 int optlen; 1128 iulp_t ulp_info = { 0 }; 1129 ill_t *prev_ire_ill; 1130 ipif_t *ipif; 1131 ip_stack_t *ipst = ill->ill_ipst; 1132 1133 ip6h = (ip6_t *)mp->b_rptr; 1134 if (ip6h->ip6_nxt != IPPROTO_ICMPV6) 1135 hdr_length = ip_hdr_length_v6(mp, ip6h); 1136 else 1137 hdr_length = IPV6_HDR_LEN; 1138 1139 rd = (nd_redirect_t *)&mp->b_rptr[hdr_length]; 1140 len = mp->b_wptr - mp->b_rptr - hdr_length; 1141 src = &ip6h->ip6_src; 1142 dst = &rd->nd_rd_dst; 1143 gateway = &rd->nd_rd_target; 1144 1145 /* Verify if it is a valid redirect */ 1146 if (!IN6_IS_ADDR_LINKLOCAL(src) || 1147 (ip6h->ip6_hops != IPV6_MAX_HOPS) || 1148 (rd->nd_rd_code != 0) || 1149 (len < sizeof (nd_redirect_t)) || 1150 (IN6_IS_ADDR_V4MAPPED(dst)) || 1151 (IN6_IS_ADDR_MULTICAST(dst))) { 1152 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects); 1153 freemsg(mp); 1154 return; 1155 } 1156 1157 if (!(IN6_IS_ADDR_LINKLOCAL(gateway) || 1158 IN6_ARE_ADDR_EQUAL(gateway, dst))) { 1159 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects); 1160 freemsg(mp); 1161 return; 1162 } 1163 1164 if (len > sizeof (nd_redirect_t)) { 1165 if (!ndp_verify_optlen((nd_opt_hdr_t *)&rd[1], 1166 len - sizeof (nd_redirect_t))) { 1167 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects); 1168 freemsg(mp); 1169 return; 1170 } 1171 } 1172 1173 if (!IN6_ARE_ADDR_EQUAL(gateway, dst)) { 1174 redirect_to_router = B_TRUE; 1175 nce_flags |= NCE_F_ISROUTER; 1176 } 1177 1178 /* ipif will be refreleased afterwards */ 1179 ipif = ipif_get_next_ipif(NULL, ill); 1180 if (ipif == NULL) { 1181 freemsg(mp); 1182 return; 1183 } 1184 1185 /* 1186 * Verify that the IP source address of the redirect is 1187 * the same as the current first-hop router for the specified 1188 * ICMP destination address. 1189 * Also, Make sure we had a route for the dest in question and 1190 * that route was pointing to the old gateway (the source of the 1191 * redirect packet.) 1192 */ 1193 1194 prev_ire = ire_route_lookup_v6(dst, 0, src, 0, ipif, NULL, 1195 ALL_ZONES, NULL, MATCH_IRE_GW | MATCH_IRE_ILL_GROUP | 1196 MATCH_IRE_DEFAULT, ipst); 1197 1198 /* 1199 * Check that 1200 * the redirect was not from ourselves 1201 * old gateway is still directly reachable 1202 */ 1203 if (prev_ire == NULL || 1204 prev_ire->ire_type == IRE_LOCAL) { 1205 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects); 1206 ipif_refrele(ipif); 1207 goto fail_redirect; 1208 } 1209 prev_ire_ill = ire_to_ill(prev_ire); 1210 ASSERT(prev_ire_ill != NULL); 1211 if (prev_ire_ill->ill_flags & ILLF_NONUD) 1212 nce_flags |= NCE_F_NONUD; 1213 1214 /* 1215 * Should we use the old ULP info to create the new gateway? From 1216 * a user's perspective, we should inherit the info so that it 1217 * is a "smooth" transition. If we do not do that, then new 1218 * connections going thru the new gateway will have no route metrics, 1219 * which is counter-intuitive to user. From a network point of 1220 * view, this may or may not make sense even though the new gateway 1221 * is still directly connected to us so the route metrics should not 1222 * change much. 1223 * 1224 * But if the old ire_uinfo is not initialized, we do another 1225 * recursive lookup on the dest using the new gateway. There may 1226 * be a route to that. If so, use it to initialize the redirect 1227 * route. 1228 */ 1229 if (prev_ire->ire_uinfo.iulp_set) { 1230 bcopy(&prev_ire->ire_uinfo, &ulp_info, sizeof (iulp_t)); 1231 } else if (redirect_to_router) { 1232 /* 1233 * Only do the following if the redirection is really to 1234 * a router. 1235 */ 1236 ire_t *tmp_ire; 1237 ire_t *sire; 1238 1239 tmp_ire = ire_ftable_lookup_v6(dst, 0, gateway, 0, NULL, &sire, 1240 ALL_ZONES, 0, NULL, 1241 (MATCH_IRE_RECURSIVE | MATCH_IRE_GW | MATCH_IRE_DEFAULT), 1242 ipst); 1243 if (sire != NULL) { 1244 bcopy(&sire->ire_uinfo, &ulp_info, sizeof (iulp_t)); 1245 ASSERT(tmp_ire != NULL); 1246 ire_refrele(tmp_ire); 1247 ire_refrele(sire); 1248 } else if (tmp_ire != NULL) { 1249 bcopy(&tmp_ire->ire_uinfo, &ulp_info, 1250 sizeof (iulp_t)); 1251 ire_refrele(tmp_ire); 1252 } 1253 } 1254 1255 optlen = mp->b_wptr - mp->b_rptr - hdr_length - sizeof (nd_redirect_t); 1256 opt = (nd_opt_hdr_t *)&rd[1]; 1257 opt = ndp_get_option(opt, optlen, ND_OPT_TARGET_LINKADDR); 1258 if (opt != NULL) { 1259 err = ndp_lookup_then_add_v6(ill, 1260 (uchar_t *)&opt[1], /* Link layer address */ 1261 gateway, 1262 &ipv6_all_ones, /* prefix mask */ 1263 &ipv6_all_zeros, /* Mapping mask */ 1264 0, 1265 nce_flags, 1266 ND_STALE, 1267 &nce); 1268 switch (err) { 1269 case 0: 1270 NCE_REFRELE(nce); 1271 break; 1272 case EEXIST: 1273 /* 1274 * Check to see if link layer address has changed and 1275 * process the nce_state accordingly. 1276 */ 1277 ndp_process(nce, (uchar_t *)&opt[1], 0, B_FALSE); 1278 NCE_REFRELE(nce); 1279 break; 1280 default: 1281 ip1dbg(("icmp_redirect_v6: NCE create failed %d\n", 1282 err)); 1283 ipif_refrele(ipif); 1284 goto fail_redirect; 1285 } 1286 } 1287 if (redirect_to_router) { 1288 /* icmp_redirect_ok_v6() must have already verified this */ 1289 ASSERT(IN6_IS_ADDR_LINKLOCAL(gateway)); 1290 1291 /* 1292 * Create a Route Association. This will allow us to remember 1293 * a router told us to use the particular gateway. 1294 */ 1295 ire = ire_create_v6( 1296 dst, 1297 &ipv6_all_ones, /* mask */ 1298 &prev_ire->ire_src_addr_v6, /* source addr */ 1299 gateway, /* gateway addr */ 1300 &prev_ire->ire_max_frag, /* max frag */ 1301 NULL, /* no src nce */ 1302 NULL, /* no rfq */ 1303 NULL, /* no stq */ 1304 IRE_HOST, 1305 prev_ire->ire_ipif, 1306 NULL, 1307 0, 1308 0, 1309 (RTF_DYNAMIC | RTF_GATEWAY | RTF_HOST), 1310 &ulp_info, 1311 NULL, 1312 NULL, 1313 ipst); 1314 } else { 1315 queue_t *stq; 1316 1317 stq = (ipif->ipif_net_type == IRE_IF_RESOLVER) 1318 ? ipif->ipif_rq : ipif->ipif_wq; 1319 1320 /* 1321 * Just create an on link entry, i.e. interface route. 1322 */ 1323 ire = ire_create_v6( 1324 dst, /* gateway == dst */ 1325 &ipv6_all_ones, /* mask */ 1326 &prev_ire->ire_src_addr_v6, /* source addr */ 1327 &ipv6_all_zeros, /* gateway addr */ 1328 &prev_ire->ire_max_frag, /* max frag */ 1329 NULL, /* no src nce */ 1330 NULL, /* ire rfq */ 1331 stq, /* ire stq */ 1332 ipif->ipif_net_type, /* IF_[NO]RESOLVER */ 1333 prev_ire->ire_ipif, 1334 &ipv6_all_ones, 1335 0, 1336 0, 1337 (RTF_DYNAMIC | RTF_HOST), 1338 &ulp_info, 1339 NULL, 1340 NULL, 1341 ipst); 1342 } 1343 1344 /* Release reference from earlier ipif_get_next_ipif() */ 1345 ipif_refrele(ipif); 1346 1347 if (ire == NULL) 1348 goto fail_redirect; 1349 1350 if (ire_add(&ire, NULL, NULL, NULL, B_FALSE) == 0) { 1351 1352 /* tell routing sockets that we received a redirect */ 1353 ip_rts_change_v6(RTM_REDIRECT, 1354 &rd->nd_rd_dst, 1355 &rd->nd_rd_target, 1356 &ipv6_all_ones, 0, &ire->ire_src_addr_v6, 1357 (RTF_DYNAMIC | RTF_GATEWAY | RTF_HOST), 0, 1358 (RTA_DST | RTA_GATEWAY | RTA_NETMASK | RTA_AUTHOR), ipst); 1359 1360 /* 1361 * Delete any existing IRE_HOST type ires for this destination. 1362 * This together with the added IRE has the effect of 1363 * modifying an existing redirect. 1364 */ 1365 redir_ire = ire_ftable_lookup_v6(dst, 0, src, IRE_HOST, 1366 ire->ire_ipif, NULL, ALL_ZONES, 0, NULL, 1367 (MATCH_IRE_GW | MATCH_IRE_TYPE | MATCH_IRE_ILL_GROUP), 1368 ipst); 1369 1370 ire_refrele(ire); /* Held in ire_add_v6 */ 1371 1372 if (redir_ire != NULL) { 1373 if (redir_ire->ire_flags & RTF_DYNAMIC) 1374 ire_delete(redir_ire); 1375 ire_refrele(redir_ire); 1376 } 1377 } 1378 1379 if (prev_ire->ire_type == IRE_CACHE) 1380 ire_delete(prev_ire); 1381 ire_refrele(prev_ire); 1382 prev_ire = NULL; 1383 1384 fail_redirect: 1385 if (prev_ire != NULL) 1386 ire_refrele(prev_ire); 1387 freemsg(mp); 1388 } 1389 1390 static ill_t * 1391 ip_queue_to_ill_v6(queue_t *q, ip_stack_t *ipst) 1392 { 1393 ill_t *ill; 1394 1395 ASSERT(WR(q) == q); 1396 1397 if (q->q_next != NULL) { 1398 ill = (ill_t *)q->q_ptr; 1399 if (ILL_CAN_LOOKUP(ill)) 1400 ill_refhold(ill); 1401 else 1402 ill = NULL; 1403 } else { 1404 ill = ill_lookup_on_name(ipif_loopback_name, B_FALSE, B_TRUE, 1405 NULL, NULL, NULL, NULL, NULL, ipst); 1406 } 1407 if (ill == NULL) 1408 ip0dbg(("ip_queue_to_ill_v6: no ill\n")); 1409 return (ill); 1410 } 1411 1412 /* 1413 * Assigns an appropriate source address to the packet. 1414 * If origdst is one of our IP addresses that use it as the source. 1415 * If the queue is an ill queue then select a source from that ill. 1416 * Otherwise pick a source based on a route lookup back to the origsrc. 1417 * 1418 * src is the return parameter. Returns a pointer to src or NULL if failure. 1419 */ 1420 static in6_addr_t * 1421 icmp_pick_source_v6(queue_t *wq, in6_addr_t *origsrc, in6_addr_t *origdst, 1422 in6_addr_t *src, zoneid_t zoneid, ip_stack_t *ipst) 1423 { 1424 ill_t *ill; 1425 ire_t *ire; 1426 ipif_t *ipif; 1427 1428 ASSERT(!(wq->q_flag & QREADR)); 1429 if (wq->q_next != NULL) { 1430 ill = (ill_t *)wq->q_ptr; 1431 } else { 1432 ill = NULL; 1433 } 1434 1435 ire = ire_route_lookup_v6(origdst, 0, 0, (IRE_LOCAL|IRE_LOOPBACK), 1436 NULL, NULL, zoneid, NULL, (MATCH_IRE_TYPE|MATCH_IRE_ZONEONLY), 1437 ipst); 1438 if (ire != NULL) { 1439 /* Destined to one of our addresses */ 1440 *src = *origdst; 1441 ire_refrele(ire); 1442 return (src); 1443 } 1444 if (ire != NULL) { 1445 ire_refrele(ire); 1446 ire = NULL; 1447 } 1448 if (ill == NULL) { 1449 /* What is the route back to the original source? */ 1450 ire = ire_route_lookup_v6(origsrc, 0, 0, 0, 1451 NULL, NULL, zoneid, NULL, 1452 (MATCH_IRE_DEFAULT|MATCH_IRE_RECURSIVE), ipst); 1453 if (ire == NULL) { 1454 BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsOutNoRoutes); 1455 return (NULL); 1456 } 1457 /* 1458 * Does not matter whether we use ire_stq or ire_ipif here. 1459 * Just pick an ill for ICMP replies. 1460 */ 1461 ASSERT(ire->ire_ipif != NULL); 1462 ill = ire->ire_ipif->ipif_ill; 1463 ire_refrele(ire); 1464 } 1465 ipif = ipif_select_source_v6(ill, origsrc, RESTRICT_TO_NONE, 1466 IPV6_PREFER_SRC_DEFAULT, zoneid); 1467 if (ipif != NULL) { 1468 *src = ipif->ipif_v6src_addr; 1469 ipif_refrele(ipif); 1470 return (src); 1471 } 1472 /* 1473 * Unusual case - can't find a usable source address to reach the 1474 * original source. Use what in the route to the source. 1475 */ 1476 ire = ire_route_lookup_v6(origsrc, 0, 0, 0, 1477 NULL, NULL, zoneid, NULL, 1478 (MATCH_IRE_DEFAULT|MATCH_IRE_RECURSIVE), ipst); 1479 if (ire == NULL) { 1480 BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsOutNoRoutes); 1481 return (NULL); 1482 } 1483 ASSERT(ire != NULL); 1484 *src = ire->ire_src_addr_v6; 1485 ire_refrele(ire); 1486 return (src); 1487 } 1488 1489 /* 1490 * Build and ship an IPv6 ICMP message using the packet data in mp, 1491 * and the ICMP header pointed to by "stuff". (May be called as 1492 * writer.) 1493 * Note: assumes that icmp_pkt_err_ok_v6 has been called to 1494 * verify that an icmp error packet can be sent. 1495 * 1496 * If q is an ill write side queue (which is the case when packets 1497 * arrive from ip_rput) then ip_wput code will ensure that packets to 1498 * link-local destinations are sent out that ill. 1499 * 1500 * If v6src_ptr is set use it as a source. Otherwise select a reasonable 1501 * source address (see above function). 1502 */ 1503 static void 1504 icmp_pkt_v6(queue_t *q, mblk_t *mp, void *stuff, size_t len, 1505 const in6_addr_t *v6src_ptr, boolean_t mctl_present, zoneid_t zoneid, 1506 ip_stack_t *ipst) 1507 { 1508 ip6_t *ip6h; 1509 in6_addr_t v6dst; 1510 size_t len_needed; 1511 size_t msg_len; 1512 mblk_t *mp1; 1513 icmp6_t *icmp6; 1514 ill_t *ill; 1515 in6_addr_t v6src; 1516 mblk_t *ipsec_mp; 1517 ipsec_out_t *io; 1518 1519 ill = ip_queue_to_ill_v6(q, ipst); 1520 if (ill == NULL) { 1521 freemsg(mp); 1522 return; 1523 } 1524 1525 if (mctl_present) { 1526 /* 1527 * If it is : 1528 * 1529 * 1) a IPSEC_OUT, then this is caused by outbound 1530 * datagram originating on this host. IPSEC processing 1531 * may or may not have been done. Refer to comments above 1532 * icmp_inbound_error_fanout for details. 1533 * 1534 * 2) a IPSEC_IN if we are generating a icmp_message 1535 * for an incoming datagram destined for us i.e called 1536 * from ip_fanout_send_icmp. 1537 */ 1538 ipsec_info_t *in; 1539 1540 ipsec_mp = mp; 1541 mp = ipsec_mp->b_cont; 1542 1543 in = (ipsec_info_t *)ipsec_mp->b_rptr; 1544 ip6h = (ip6_t *)mp->b_rptr; 1545 1546 ASSERT(in->ipsec_info_type == IPSEC_OUT || 1547 in->ipsec_info_type == IPSEC_IN); 1548 1549 if (in->ipsec_info_type == IPSEC_IN) { 1550 /* 1551 * Convert the IPSEC_IN to IPSEC_OUT. 1552 */ 1553 if (!ipsec_in_to_out(ipsec_mp, NULL, ip6h)) { 1554 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 1555 ill_refrele(ill); 1556 return; 1557 } 1558 } else { 1559 ASSERT(in->ipsec_info_type == IPSEC_OUT); 1560 io = (ipsec_out_t *)in; 1561 /* 1562 * Clear out ipsec_out_proc_begin, so we do a fresh 1563 * ire lookup. 1564 */ 1565 io->ipsec_out_proc_begin = B_FALSE; 1566 } 1567 } else { 1568 /* 1569 * This is in clear. The icmp message we are building 1570 * here should go out in clear. 1571 */ 1572 ipsec_in_t *ii; 1573 ASSERT(mp->b_datap->db_type == M_DATA); 1574 ipsec_mp = ipsec_in_alloc(B_FALSE, ipst->ips_netstack); 1575 if (ipsec_mp == NULL) { 1576 freemsg(mp); 1577 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 1578 ill_refrele(ill); 1579 return; 1580 } 1581 ii = (ipsec_in_t *)ipsec_mp->b_rptr; 1582 1583 /* This is not a secure packet */ 1584 ii->ipsec_in_secure = B_FALSE; 1585 /* 1586 * For trusted extensions using a shared IP address we can 1587 * send using any zoneid. 1588 */ 1589 if (zoneid == ALL_ZONES) 1590 ii->ipsec_in_zoneid = GLOBAL_ZONEID; 1591 else 1592 ii->ipsec_in_zoneid = zoneid; 1593 ipsec_mp->b_cont = mp; 1594 ip6h = (ip6_t *)mp->b_rptr; 1595 /* 1596 * Convert the IPSEC_IN to IPSEC_OUT. 1597 */ 1598 if (!ipsec_in_to_out(ipsec_mp, NULL, ip6h)) { 1599 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 1600 ill_refrele(ill); 1601 return; 1602 } 1603 } 1604 io = (ipsec_out_t *)ipsec_mp->b_rptr; 1605 1606 if (v6src_ptr != NULL) { 1607 v6src = *v6src_ptr; 1608 } else { 1609 if (icmp_pick_source_v6(q, &ip6h->ip6_src, &ip6h->ip6_dst, 1610 &v6src, zoneid, ipst) == NULL) { 1611 freemsg(ipsec_mp); 1612 ill_refrele(ill); 1613 return; 1614 } 1615 } 1616 v6dst = ip6h->ip6_src; 1617 len_needed = ipst->ips_ipv6_icmp_return - IPV6_HDR_LEN - len; 1618 msg_len = msgdsize(mp); 1619 if (msg_len > len_needed) { 1620 if (!adjmsg(mp, len_needed - msg_len)) { 1621 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutErrors); 1622 freemsg(ipsec_mp); 1623 ill_refrele(ill); 1624 return; 1625 } 1626 msg_len = len_needed; 1627 } 1628 mp1 = allocb_cred(IPV6_HDR_LEN + len, DB_CRED(mp)); 1629 if (mp1 == NULL) { 1630 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutErrors); 1631 freemsg(ipsec_mp); 1632 ill_refrele(ill); 1633 return; 1634 } 1635 ill_refrele(ill); 1636 mp1->b_cont = mp; 1637 mp = mp1; 1638 ASSERT(ipsec_mp->b_datap->db_type == M_CTL && 1639 io->ipsec_out_type == IPSEC_OUT); 1640 ipsec_mp->b_cont = mp; 1641 1642 /* 1643 * Set ipsec_out_icmp_loopback so we can let the ICMP messages this 1644 * node generates be accepted in peace by all on-host destinations. 1645 * If we do NOT assume that all on-host destinations trust 1646 * self-generated ICMP messages, then rework here, ip.c, and spd.c. 1647 * (Look for ipsec_out_icmp_loopback). 1648 */ 1649 io->ipsec_out_icmp_loopback = B_TRUE; 1650 1651 ip6h = (ip6_t *)mp->b_rptr; 1652 mp1->b_wptr = (uchar_t *)ip6h + (IPV6_HDR_LEN + len); 1653 1654 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 1655 ip6h->ip6_nxt = IPPROTO_ICMPV6; 1656 ip6h->ip6_hops = ipst->ips_ipv6_def_hops; 1657 ip6h->ip6_dst = v6dst; 1658 ip6h->ip6_src = v6src; 1659 msg_len += IPV6_HDR_LEN + len; 1660 if (msg_len > IP_MAXPACKET + IPV6_HDR_LEN) { 1661 (void) adjmsg(mp, IP_MAXPACKET + IPV6_HDR_LEN - msg_len); 1662 msg_len = IP_MAXPACKET + IPV6_HDR_LEN; 1663 } 1664 ip6h->ip6_plen = htons((uint16_t)(msgdsize(mp) - IPV6_HDR_LEN)); 1665 icmp6 = (icmp6_t *)&ip6h[1]; 1666 bcopy(stuff, (char *)icmp6, len); 1667 /* 1668 * Prepare for checksum by putting icmp length in the icmp 1669 * checksum field. The checksum is calculated in ip_wput_v6. 1670 */ 1671 icmp6->icmp6_cksum = ip6h->ip6_plen; 1672 if (icmp6->icmp6_type == ND_REDIRECT) { 1673 ip6h->ip6_hops = IPV6_MAX_HOPS; 1674 } 1675 /* Send to V6 writeside put routine */ 1676 put(q, ipsec_mp); 1677 } 1678 1679 /* 1680 * Update the output mib when ICMPv6 packets are sent. 1681 */ 1682 static void 1683 icmp_update_out_mib_v6(ill_t *ill, icmp6_t *icmp6) 1684 { 1685 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutMsgs); 1686 1687 switch (icmp6->icmp6_type) { 1688 case ICMP6_DST_UNREACH: 1689 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutDestUnreachs); 1690 if (icmp6->icmp6_code == ICMP6_DST_UNREACH_ADMIN) 1691 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutAdminProhibs); 1692 break; 1693 1694 case ICMP6_TIME_EXCEEDED: 1695 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutTimeExcds); 1696 break; 1697 1698 case ICMP6_PARAM_PROB: 1699 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutParmProblems); 1700 break; 1701 1702 case ICMP6_PACKET_TOO_BIG: 1703 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutPktTooBigs); 1704 break; 1705 1706 case ICMP6_ECHO_REQUEST: 1707 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutEchos); 1708 break; 1709 1710 case ICMP6_ECHO_REPLY: 1711 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutEchoReplies); 1712 break; 1713 1714 case ND_ROUTER_SOLICIT: 1715 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutRouterSolicits); 1716 break; 1717 1718 case ND_ROUTER_ADVERT: 1719 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutRouterAdvertisements); 1720 break; 1721 1722 case ND_NEIGHBOR_SOLICIT: 1723 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutNeighborSolicits); 1724 break; 1725 1726 case ND_NEIGHBOR_ADVERT: 1727 BUMP_MIB(ill->ill_icmp6_mib, 1728 ipv6IfIcmpOutNeighborAdvertisements); 1729 break; 1730 1731 case ND_REDIRECT: 1732 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutRedirects); 1733 break; 1734 1735 case MLD_LISTENER_QUERY: 1736 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutGroupMembQueries); 1737 break; 1738 1739 case MLD_LISTENER_REPORT: 1740 case MLD_V2_LISTENER_REPORT: 1741 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutGroupMembResponses); 1742 break; 1743 1744 case MLD_LISTENER_REDUCTION: 1745 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutGroupMembReductions); 1746 break; 1747 } 1748 } 1749 1750 /* 1751 * Check if it is ok to send an ICMPv6 error packet in 1752 * response to the IP packet in mp. 1753 * Free the message and return null if no 1754 * ICMP error packet should be sent. 1755 */ 1756 static mblk_t * 1757 icmp_pkt_err_ok_v6(queue_t *q, mblk_t *mp, 1758 boolean_t llbcast, boolean_t mcast_ok, ip_stack_t *ipst) 1759 { 1760 ip6_t *ip6h; 1761 1762 if (!mp) 1763 return (NULL); 1764 1765 ip6h = (ip6_t *)mp->b_rptr; 1766 1767 /* Check if source address uniquely identifies the host */ 1768 1769 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_src) || 1770 IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_src) || 1771 IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 1772 freemsg(mp); 1773 return (NULL); 1774 } 1775 1776 if (ip6h->ip6_nxt == IPPROTO_ICMPV6) { 1777 size_t len_needed = IPV6_HDR_LEN + ICMP6_MINLEN; 1778 icmp6_t *icmp6; 1779 1780 if (mp->b_wptr - mp->b_rptr < len_needed) { 1781 if (!pullupmsg(mp, len_needed)) { 1782 ill_t *ill; 1783 1784 ill = ip_queue_to_ill_v6(q, ipst); 1785 if (ill == NULL) { 1786 BUMP_MIB(&ipst->ips_icmp6_mib, 1787 ipv6IfIcmpInErrors); 1788 } else { 1789 BUMP_MIB(ill->ill_icmp6_mib, 1790 ipv6IfIcmpInErrors); 1791 ill_refrele(ill); 1792 } 1793 freemsg(mp); 1794 return (NULL); 1795 } 1796 ip6h = (ip6_t *)mp->b_rptr; 1797 } 1798 icmp6 = (icmp6_t *)&ip6h[1]; 1799 /* Explicitly do not generate errors in response to redirects */ 1800 if (ICMP6_IS_ERROR(icmp6->icmp6_type) || 1801 icmp6->icmp6_type == ND_REDIRECT) { 1802 freemsg(mp); 1803 return (NULL); 1804 } 1805 } 1806 /* 1807 * Check that the destination is not multicast and that the packet 1808 * was not sent on link layer broadcast or multicast. (Exception 1809 * is Packet too big message as per the draft - when mcast_ok is set.) 1810 */ 1811 if (!mcast_ok && 1812 (llbcast || IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst))) { 1813 freemsg(mp); 1814 return (NULL); 1815 } 1816 if (icmp_err_rate_limit(ipst)) { 1817 /* 1818 * Only send ICMP error packets every so often. 1819 * This should be done on a per port/source basis, 1820 * but for now this will suffice. 1821 */ 1822 freemsg(mp); 1823 return (NULL); 1824 } 1825 return (mp); 1826 } 1827 1828 /* 1829 * Generate an ICMPv6 redirect message. 1830 * Include target link layer address option if it exits. 1831 * Always include redirect header. 1832 */ 1833 static void 1834 icmp_send_redirect_v6(queue_t *q, mblk_t *mp, in6_addr_t *targetp, 1835 in6_addr_t *dest, ill_t *ill, boolean_t llbcast) 1836 { 1837 nd_redirect_t *rd; 1838 nd_opt_rd_hdr_t *rdh; 1839 uchar_t *buf; 1840 nce_t *nce = NULL; 1841 nd_opt_hdr_t *opt; 1842 int len; 1843 int ll_opt_len = 0; 1844 int max_redir_hdr_data_len; 1845 int pkt_len; 1846 in6_addr_t *srcp; 1847 ip_stack_t *ipst = ill->ill_ipst; 1848 1849 /* 1850 * We are called from ip_rput where we could 1851 * not have attached an IPSEC_IN. 1852 */ 1853 ASSERT(mp->b_datap->db_type == M_DATA); 1854 1855 mp = icmp_pkt_err_ok_v6(q, mp, llbcast, B_FALSE, ipst); 1856 if (mp == NULL) 1857 return; 1858 nce = ndp_lookup_v6(ill, targetp, B_FALSE); 1859 if (nce != NULL && nce->nce_state != ND_INCOMPLETE) { 1860 ll_opt_len = (sizeof (nd_opt_hdr_t) + 1861 ill->ill_phys_addr_length + 7)/8 * 8; 1862 } 1863 len = sizeof (nd_redirect_t) + sizeof (nd_opt_rd_hdr_t) + ll_opt_len; 1864 ASSERT(len % 4 == 0); 1865 buf = kmem_alloc(len, KM_NOSLEEP); 1866 if (buf == NULL) { 1867 if (nce != NULL) 1868 NCE_REFRELE(nce); 1869 freemsg(mp); 1870 return; 1871 } 1872 1873 rd = (nd_redirect_t *)buf; 1874 rd->nd_rd_type = (uint8_t)ND_REDIRECT; 1875 rd->nd_rd_code = 0; 1876 rd->nd_rd_reserved = 0; 1877 rd->nd_rd_target = *targetp; 1878 rd->nd_rd_dst = *dest; 1879 1880 opt = (nd_opt_hdr_t *)(buf + sizeof (nd_redirect_t)); 1881 if (nce != NULL && ll_opt_len != 0) { 1882 opt->nd_opt_type = ND_OPT_TARGET_LINKADDR; 1883 opt->nd_opt_len = ll_opt_len/8; 1884 bcopy((char *)nce->nce_res_mp->b_rptr + 1885 NCE_LL_ADDR_OFFSET(ill), &opt[1], 1886 ill->ill_phys_addr_length); 1887 } 1888 if (nce != NULL) 1889 NCE_REFRELE(nce); 1890 rdh = (nd_opt_rd_hdr_t *)(buf + sizeof (nd_redirect_t) + ll_opt_len); 1891 rdh->nd_opt_rh_type = (uint8_t)ND_OPT_REDIRECTED_HEADER; 1892 /* max_redir_hdr_data_len and nd_opt_rh_len must be multiple of 8 */ 1893 max_redir_hdr_data_len = 1894 (ipst->ips_ipv6_icmp_return - IPV6_HDR_LEN - len)/8*8; 1895 pkt_len = msgdsize(mp); 1896 /* Make sure mp is 8 byte aligned */ 1897 if (pkt_len > max_redir_hdr_data_len) { 1898 rdh->nd_opt_rh_len = (max_redir_hdr_data_len + 1899 sizeof (nd_opt_rd_hdr_t))/8; 1900 (void) adjmsg(mp, max_redir_hdr_data_len - pkt_len); 1901 } else { 1902 rdh->nd_opt_rh_len = (pkt_len + sizeof (nd_opt_rd_hdr_t))/8; 1903 (void) adjmsg(mp, -(pkt_len % 8)); 1904 } 1905 rdh->nd_opt_rh_reserved1 = 0; 1906 rdh->nd_opt_rh_reserved2 = 0; 1907 /* ipif_v6src_addr contains the link-local source address */ 1908 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 1909 if (ill->ill_group != NULL) { 1910 /* 1911 * The receiver of the redirect will verify whether it 1912 * had a route through us (srcp that we will use in 1913 * the redirect) or not. As we load spread even link-locals, 1914 * we don't know which source address the receiver of 1915 * redirect has in its route for communicating with us. 1916 * Thus we randomly choose a source here and finally we 1917 * should get to the right one and it will eventually 1918 * accept the redirect from us. We can't call 1919 * ip_lookup_scope_v6 because we don't have the right 1920 * link-local address here. Thus we randomly choose one. 1921 */ 1922 int cnt = ill->ill_group->illgrp_ill_count; 1923 1924 ill = ill->ill_group->illgrp_ill; 1925 cnt = ++ipst->ips_icmp_redirect_v6_src_index % cnt; 1926 while (cnt--) 1927 ill = ill->ill_group_next; 1928 srcp = &ill->ill_ipif->ipif_v6src_addr; 1929 } else { 1930 srcp = &ill->ill_ipif->ipif_v6src_addr; 1931 } 1932 rw_exit(&ipst->ips_ill_g_lock); 1933 /* Redirects sent by router, and router is global zone */ 1934 icmp_pkt_v6(q, mp, buf, len, srcp, B_FALSE, GLOBAL_ZONEID, ipst); 1935 kmem_free(buf, len); 1936 } 1937 1938 1939 /* Generate an ICMP time exceeded message. (May be called as writer.) */ 1940 void 1941 icmp_time_exceeded_v6(queue_t *q, mblk_t *mp, uint8_t code, 1942 boolean_t llbcast, boolean_t mcast_ok, zoneid_t zoneid, 1943 ip_stack_t *ipst) 1944 { 1945 icmp6_t icmp6; 1946 boolean_t mctl_present; 1947 mblk_t *first_mp; 1948 1949 EXTRACT_PKT_MP(mp, first_mp, mctl_present); 1950 1951 mp = icmp_pkt_err_ok_v6(q, mp, llbcast, mcast_ok, ipst); 1952 if (mp == NULL) { 1953 if (mctl_present) 1954 freeb(first_mp); 1955 return; 1956 } 1957 bzero(&icmp6, sizeof (icmp6_t)); 1958 icmp6.icmp6_type = ICMP6_TIME_EXCEEDED; 1959 icmp6.icmp6_code = code; 1960 icmp_pkt_v6(q, first_mp, &icmp6, sizeof (icmp6_t), NULL, mctl_present, 1961 zoneid, ipst); 1962 } 1963 1964 /* 1965 * Generate an ICMP unreachable message. 1966 */ 1967 void 1968 icmp_unreachable_v6(queue_t *q, mblk_t *mp, uint8_t code, 1969 boolean_t llbcast, boolean_t mcast_ok, zoneid_t zoneid, 1970 ip_stack_t *ipst) 1971 { 1972 icmp6_t icmp6; 1973 boolean_t mctl_present; 1974 mblk_t *first_mp; 1975 1976 EXTRACT_PKT_MP(mp, first_mp, mctl_present); 1977 1978 mp = icmp_pkt_err_ok_v6(q, mp, llbcast, mcast_ok, ipst); 1979 if (mp == NULL) { 1980 if (mctl_present) 1981 freeb(first_mp); 1982 return; 1983 } 1984 bzero(&icmp6, sizeof (icmp6_t)); 1985 icmp6.icmp6_type = ICMP6_DST_UNREACH; 1986 icmp6.icmp6_code = code; 1987 icmp_pkt_v6(q, first_mp, &icmp6, sizeof (icmp6_t), NULL, mctl_present, 1988 zoneid, ipst); 1989 } 1990 1991 /* 1992 * Generate an ICMP pkt too big message. 1993 */ 1994 static void 1995 icmp_pkt2big_v6(queue_t *q, mblk_t *mp, uint32_t mtu, 1996 boolean_t llbcast, boolean_t mcast_ok, zoneid_t zoneid, ip_stack_t *ipst) 1997 { 1998 icmp6_t icmp6; 1999 mblk_t *first_mp; 2000 boolean_t mctl_present; 2001 2002 EXTRACT_PKT_MP(mp, first_mp, mctl_present); 2003 2004 mp = icmp_pkt_err_ok_v6(q, mp, llbcast, mcast_ok, ipst); 2005 if (mp == NULL) { 2006 if (mctl_present) 2007 freeb(first_mp); 2008 return; 2009 } 2010 bzero(&icmp6, sizeof (icmp6_t)); 2011 icmp6.icmp6_type = ICMP6_PACKET_TOO_BIG; 2012 icmp6.icmp6_code = 0; 2013 icmp6.icmp6_mtu = htonl(mtu); 2014 2015 icmp_pkt_v6(q, first_mp, &icmp6, sizeof (icmp6_t), NULL, mctl_present, 2016 zoneid, ipst); 2017 } 2018 2019 /* 2020 * Generate an ICMP parameter problem message. (May be called as writer.) 2021 * 'offset' is the offset from the beginning of the packet in error. 2022 */ 2023 static void 2024 icmp_param_problem_v6(queue_t *q, mblk_t *mp, uint8_t code, 2025 uint32_t offset, boolean_t llbcast, boolean_t mcast_ok, zoneid_t zoneid, 2026 ip_stack_t *ipst) 2027 { 2028 icmp6_t icmp6; 2029 boolean_t mctl_present; 2030 mblk_t *first_mp; 2031 2032 EXTRACT_PKT_MP(mp, first_mp, mctl_present); 2033 2034 mp = icmp_pkt_err_ok_v6(q, mp, llbcast, mcast_ok, ipst); 2035 if (mp == NULL) { 2036 if (mctl_present) 2037 freeb(first_mp); 2038 return; 2039 } 2040 bzero((char *)&icmp6, sizeof (icmp6_t)); 2041 icmp6.icmp6_type = ICMP6_PARAM_PROB; 2042 icmp6.icmp6_code = code; 2043 icmp6.icmp6_pptr = htonl(offset); 2044 icmp_pkt_v6(q, first_mp, &icmp6, sizeof (icmp6_t), NULL, mctl_present, 2045 zoneid, ipst); 2046 } 2047 2048 /* 2049 * This code will need to take into account the possibility of binding 2050 * to a link local address on a multi-homed host, in which case the 2051 * outgoing interface (from the conn) will need to be used when getting 2052 * an ire for the dst. Going through proper outgoing interface and 2053 * choosing the source address corresponding to the outgoing interface 2054 * is necessary when the destination address is a link-local address and 2055 * IPV6_BOUND_IF or IPV6_PKTINFO or scope_id has been set. 2056 * This can happen when active connection is setup; thus ipp pointer 2057 * is passed here from tcp_connect_*() routines, in non-TCP cases NULL 2058 * pointer is passed as ipp pointer. 2059 */ 2060 mblk_t * 2061 ip_bind_v6(queue_t *q, mblk_t *mp, conn_t *connp, ip6_pkt_t *ipp) 2062 { 2063 ssize_t len; 2064 int protocol; 2065 struct T_bind_req *tbr; 2066 sin6_t *sin6; 2067 ipa6_conn_t *ac6; 2068 in6_addr_t *v6srcp; 2069 in6_addr_t *v6dstp; 2070 uint16_t lport; 2071 uint16_t fport; 2072 uchar_t *ucp; 2073 mblk_t *mp1; 2074 boolean_t ire_requested; 2075 boolean_t ipsec_policy_set; 2076 int error = 0; 2077 boolean_t local_bind; 2078 boolean_t orig_pkt_isv6 = connp->conn_pkt_isv6; 2079 ipa6_conn_x_t *acx6; 2080 boolean_t verify_dst; 2081 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 2082 2083 ASSERT(connp->conn_af_isv6); 2084 len = mp->b_wptr - mp->b_rptr; 2085 if (len < (sizeof (*tbr) + 1)) { 2086 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 2087 "ip_bind_v6: bogus msg, len %ld", len); 2088 goto bad_addr; 2089 } 2090 /* Back up and extract the protocol identifier. */ 2091 mp->b_wptr--; 2092 tbr = (struct T_bind_req *)mp->b_rptr; 2093 /* Reset the message type in preparation for shipping it back. */ 2094 mp->b_datap->db_type = M_PCPROTO; 2095 2096 protocol = *mp->b_wptr & 0xFF; 2097 connp->conn_ulp = (uint8_t)protocol; 2098 2099 /* 2100 * Check for a zero length address. This is from a protocol that 2101 * wants to register to receive all packets of its type. 2102 */ 2103 if (tbr->ADDR_length == 0) { 2104 if ((protocol == IPPROTO_TCP || protocol == IPPROTO_SCTP || 2105 protocol == IPPROTO_ESP || protocol == IPPROTO_AH) && 2106 ipst->ips_ipcl_proto_fanout_v6[protocol].connf_head != 2107 NULL) { 2108 /* 2109 * TCP, SCTP, AH, and ESP have single protocol fanouts. 2110 * Do not allow others to bind to these. 2111 */ 2112 goto bad_addr; 2113 } 2114 2115 /* 2116 * 2117 * The udp module never sends down a zero-length address, 2118 * and allowing this on a labeled system will break MLP 2119 * functionality. 2120 */ 2121 if (is_system_labeled() && protocol == IPPROTO_UDP) 2122 goto bad_addr; 2123 2124 /* Allow ipsec plumbing */ 2125 if (connp->conn_mac_exempt && protocol != IPPROTO_AH && 2126 protocol != IPPROTO_ESP) 2127 goto bad_addr; 2128 2129 connp->conn_srcv6 = ipv6_all_zeros; 2130 ipcl_proto_insert_v6(connp, protocol); 2131 2132 tbr->PRIM_type = T_BIND_ACK; 2133 return (mp); 2134 } 2135 2136 /* Extract the address pointer from the message. */ 2137 ucp = (uchar_t *)mi_offset_param(mp, tbr->ADDR_offset, 2138 tbr->ADDR_length); 2139 if (ucp == NULL) { 2140 ip1dbg(("ip_bind_v6: no address\n")); 2141 goto bad_addr; 2142 } 2143 if (!OK_32PTR(ucp)) { 2144 ip1dbg(("ip_bind_v6: unaligned address\n")); 2145 goto bad_addr; 2146 } 2147 mp1 = mp->b_cont; /* trailing mp if any */ 2148 ire_requested = (mp1 && mp1->b_datap->db_type == IRE_DB_REQ_TYPE); 2149 ipsec_policy_set = (mp1 && mp1->b_datap->db_type == IPSEC_POLICY_SET); 2150 2151 switch (tbr->ADDR_length) { 2152 default: 2153 ip1dbg(("ip_bind_v6: bad address length %d\n", 2154 (int)tbr->ADDR_length)); 2155 goto bad_addr; 2156 2157 case IPV6_ADDR_LEN: 2158 /* Verification of local address only */ 2159 v6srcp = (in6_addr_t *)ucp; 2160 lport = 0; 2161 local_bind = B_TRUE; 2162 break; 2163 2164 case sizeof (sin6_t): 2165 sin6 = (sin6_t *)ucp; 2166 v6srcp = &sin6->sin6_addr; 2167 lport = sin6->sin6_port; 2168 local_bind = B_TRUE; 2169 break; 2170 2171 case sizeof (ipa6_conn_t): 2172 /* 2173 * Verify that both the source and destination addresses 2174 * are valid. 2175 * Note that we allow connect to broadcast and multicast 2176 * addresses when ire_requested is set. Thus the ULP 2177 * has to check for IRE_BROADCAST and multicast. 2178 */ 2179 ac6 = (ipa6_conn_t *)ucp; 2180 v6srcp = &ac6->ac6_laddr; 2181 v6dstp = &ac6->ac6_faddr; 2182 fport = ac6->ac6_fport; 2183 /* For raw socket, the local port is not set. */ 2184 lport = ac6->ac6_lport != 0 ? ac6->ac6_lport : 2185 connp->conn_lport; 2186 local_bind = B_FALSE; 2187 /* Always verify destination reachability. */ 2188 verify_dst = B_TRUE; 2189 break; 2190 2191 case sizeof (ipa6_conn_x_t): 2192 /* 2193 * Verify that the source address is valid. 2194 * Note that we allow connect to broadcast and multicast 2195 * addresses when ire_requested is set. Thus the ULP 2196 * has to check for IRE_BROADCAST and multicast. 2197 */ 2198 acx6 = (ipa6_conn_x_t *)ucp; 2199 ac6 = &acx6->ac6x_conn; 2200 v6srcp = &ac6->ac6_laddr; 2201 v6dstp = &ac6->ac6_faddr; 2202 fport = ac6->ac6_fport; 2203 lport = ac6->ac6_lport; 2204 local_bind = B_FALSE; 2205 /* 2206 * Client that passed ipa6_conn_x_t to us specifies whether to 2207 * verify destination reachability. 2208 */ 2209 verify_dst = (acx6->ac6x_flags & ACX_VERIFY_DST) != 0; 2210 break; 2211 } 2212 if (local_bind) { 2213 if (IN6_IS_ADDR_V4MAPPED(v6srcp) && !connp->conn_ipv6_v6only) { 2214 /* Bind to IPv4 address */ 2215 ipaddr_t v4src; 2216 2217 IN6_V4MAPPED_TO_IPADDR(v6srcp, v4src); 2218 2219 error = ip_bind_laddr(connp, mp, v4src, lport, 2220 ire_requested, ipsec_policy_set, 2221 tbr->ADDR_length != IPV6_ADDR_LEN); 2222 if (error != 0) 2223 goto bad_addr; 2224 connp->conn_pkt_isv6 = B_FALSE; 2225 } else { 2226 if (IN6_IS_ADDR_V4MAPPED(v6srcp)) { 2227 error = 0; 2228 goto bad_addr; 2229 } 2230 error = ip_bind_laddr_v6(connp, mp, v6srcp, lport, 2231 ire_requested, ipsec_policy_set, 2232 (tbr->ADDR_length != IPV6_ADDR_LEN)); 2233 if (error != 0) 2234 goto bad_addr; 2235 connp->conn_pkt_isv6 = B_TRUE; 2236 } 2237 } else { 2238 /* 2239 * Bind to local and remote address. Local might be 2240 * unspecified in which case it will be extracted from 2241 * ire_src_addr_v6 2242 */ 2243 if (IN6_IS_ADDR_V4MAPPED(v6dstp) && !connp->conn_ipv6_v6only) { 2244 /* Connect to IPv4 address */ 2245 ipaddr_t v4src; 2246 ipaddr_t v4dst; 2247 2248 /* Is the source unspecified or mapped? */ 2249 if (!IN6_IS_ADDR_V4MAPPED(v6srcp) && 2250 !IN6_IS_ADDR_UNSPECIFIED(v6srcp)) { 2251 ip1dbg(("ip_bind_v6: " 2252 "dst is mapped, but not the src\n")); 2253 goto bad_addr; 2254 } 2255 IN6_V4MAPPED_TO_IPADDR(v6srcp, v4src); 2256 IN6_V4MAPPED_TO_IPADDR(v6dstp, v4dst); 2257 2258 /* 2259 * XXX Fix needed. Need to pass ipsec_policy_set 2260 * instead of B_FALSE. 2261 */ 2262 2263 /* Always verify destination reachability. */ 2264 error = ip_bind_connected(connp, mp, &v4src, lport, 2265 v4dst, fport, ire_requested, ipsec_policy_set, 2266 B_TRUE, B_TRUE); 2267 if (error != 0) 2268 goto bad_addr; 2269 IN6_IPADDR_TO_V4MAPPED(v4src, v6srcp); 2270 connp->conn_pkt_isv6 = B_FALSE; 2271 } else if (IN6_IS_ADDR_V4MAPPED(v6srcp)) { 2272 ip1dbg(("ip_bind_v6: " 2273 "src is mapped, but not the dst\n")); 2274 goto bad_addr; 2275 } else { 2276 error = ip_bind_connected_v6(connp, mp, v6srcp, 2277 lport, v6dstp, ipp, fport, ire_requested, 2278 ipsec_policy_set, B_TRUE, verify_dst); 2279 if (error != 0) 2280 goto bad_addr; 2281 connp->conn_pkt_isv6 = B_TRUE; 2282 } 2283 } 2284 2285 /* Update conn_send and pktversion if v4/v6 changed */ 2286 if (orig_pkt_isv6 != connp->conn_pkt_isv6) { 2287 ip_setpktversion(connp, connp->conn_pkt_isv6, B_TRUE, ipst); 2288 } 2289 /* 2290 * Pass the IPSEC headers size in ire_ipsec_overhead. 2291 * We can't do this in ip_bind_insert_ire because the policy 2292 * may not have been inherited at that point in time and hence 2293 * conn_out_enforce_policy may not be set. 2294 */ 2295 mp1 = mp->b_cont; 2296 if (ire_requested && connp->conn_out_enforce_policy && 2297 mp1 != NULL && DB_TYPE(mp1) == IRE_DB_REQ_TYPE) { 2298 ire_t *ire = (ire_t *)mp1->b_rptr; 2299 ASSERT(MBLKL(mp1) >= sizeof (ire_t)); 2300 ire->ire_ipsec_overhead = (conn_ipsec_length(connp)); 2301 } 2302 2303 /* Send it home. */ 2304 mp->b_datap->db_type = M_PCPROTO; 2305 tbr->PRIM_type = T_BIND_ACK; 2306 return (mp); 2307 2308 bad_addr: 2309 if (error == EINPROGRESS) 2310 return (NULL); 2311 if (error > 0) 2312 mp = mi_tpi_err_ack_alloc(mp, TSYSERR, error); 2313 else 2314 mp = mi_tpi_err_ack_alloc(mp, TBADADDR, 0); 2315 return (mp); 2316 } 2317 2318 /* 2319 * Here address is verified to be a valid local address. 2320 * If the IRE_DB_REQ_TYPE mp is present, a multicast 2321 * address is also considered a valid local address. 2322 * In the case of a multicast address, however, the 2323 * upper protocol is expected to reset the src address 2324 * to 0 if it sees an ire with IN6_IS_ADDR_MULTICAST returned so that 2325 * no packets are emitted with multicast address as 2326 * source address. 2327 * The addresses valid for bind are: 2328 * (1) - in6addr_any 2329 * (2) - IP address of an UP interface 2330 * (3) - IP address of a DOWN interface 2331 * (4) - a multicast address. In this case 2332 * the conn will only receive packets destined to 2333 * the specified multicast address. Note: the 2334 * application still has to issue an 2335 * IPV6_JOIN_GROUP socket option. 2336 * 2337 * In all the above cases, the bound address must be valid in the current zone. 2338 * When the address is loopback or multicast, there might be many matching IREs 2339 * so bind has to look up based on the zone. 2340 */ 2341 static int 2342 ip_bind_laddr_v6(conn_t *connp, mblk_t *mp, const in6_addr_t *v6src, 2343 uint16_t lport, boolean_t ire_requested, boolean_t ipsec_policy_set, 2344 boolean_t fanout_insert) 2345 { 2346 int error = 0; 2347 ire_t *src_ire = NULL; 2348 ipif_t *ipif = NULL; 2349 mblk_t *policy_mp; 2350 zoneid_t zoneid; 2351 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 2352 2353 if (ipsec_policy_set) 2354 policy_mp = mp->b_cont; 2355 2356 /* 2357 * If it was previously connected, conn_fully_bound would have 2358 * been set. 2359 */ 2360 connp->conn_fully_bound = B_FALSE; 2361 2362 zoneid = connp->conn_zoneid; 2363 2364 if (!IN6_IS_ADDR_UNSPECIFIED(v6src)) { 2365 src_ire = ire_route_lookup_v6(v6src, 0, 0, 2366 0, NULL, NULL, zoneid, NULL, MATCH_IRE_ZONEONLY, ipst); 2367 /* 2368 * If an address other than in6addr_any is requested, 2369 * we verify that it is a valid address for bind 2370 * Note: Following code is in if-else-if form for 2371 * readability compared to a condition check. 2372 */ 2373 ASSERT(src_ire == NULL || !(src_ire->ire_type & IRE_BROADCAST)); 2374 if (IRE_IS_LOCAL(src_ire)) { 2375 /* 2376 * (2) Bind to address of local UP interface 2377 */ 2378 ipif = src_ire->ire_ipif; 2379 } else if (IN6_IS_ADDR_MULTICAST(v6src)) { 2380 ipif_t *multi_ipif = NULL; 2381 ire_t *save_ire; 2382 /* 2383 * (4) bind to multicast address. 2384 * Fake out the IRE returned to upper 2385 * layer to be a broadcast IRE in 2386 * ip_bind_insert_ire_v6(). 2387 * Pass other information that matches 2388 * the ipif (e.g. the source address). 2389 * conn_multicast_ill is only used for 2390 * IPv6 packets 2391 */ 2392 mutex_enter(&connp->conn_lock); 2393 if (connp->conn_multicast_ill != NULL) { 2394 (void) ipif_lookup_zoneid( 2395 connp->conn_multicast_ill, zoneid, 0, 2396 &multi_ipif); 2397 } else { 2398 /* 2399 * Look for default like 2400 * ip_wput_v6 2401 */ 2402 multi_ipif = ipif_lookup_group_v6( 2403 &ipv6_unspecified_group, zoneid, ipst); 2404 } 2405 mutex_exit(&connp->conn_lock); 2406 save_ire = src_ire; 2407 src_ire = NULL; 2408 if (multi_ipif == NULL || !ire_requested || 2409 (src_ire = ipif_to_ire_v6(multi_ipif)) == NULL) { 2410 src_ire = save_ire; 2411 error = EADDRNOTAVAIL; 2412 } else { 2413 ASSERT(src_ire != NULL); 2414 if (save_ire != NULL) 2415 ire_refrele(save_ire); 2416 } 2417 if (multi_ipif != NULL) 2418 ipif_refrele(multi_ipif); 2419 } else { 2420 *mp->b_wptr++ = (char)connp->conn_ulp; 2421 ipif = ipif_lookup_addr_v6(v6src, NULL, zoneid, 2422 CONNP_TO_WQ(connp), mp, ip_wput_nondata, &error, 2423 ipst); 2424 if (ipif == NULL) { 2425 if (error == EINPROGRESS) { 2426 if (src_ire != NULL) 2427 ire_refrele(src_ire); 2428 return (error); 2429 } 2430 /* 2431 * Not a valid address for bind 2432 */ 2433 error = EADDRNOTAVAIL; 2434 } else { 2435 ipif_refrele(ipif); 2436 } 2437 /* 2438 * Just to keep it consistent with the processing in 2439 * ip_bind_v6(). 2440 */ 2441 mp->b_wptr--; 2442 } 2443 2444 if (error != 0) { 2445 /* Red Alert! Attempting to be a bogon! */ 2446 if (ip_debug > 2) { 2447 /* ip1dbg */ 2448 pr_addr_dbg("ip_bind_laddr_v6: bad src" 2449 " address %s\n", AF_INET6, v6src); 2450 } 2451 goto bad_addr; 2452 } 2453 } 2454 2455 /* 2456 * Allow setting new policies. For example, disconnects come 2457 * down as ipa_t bind. As we would have set conn_policy_cached 2458 * to B_TRUE before, we should set it to B_FALSE, so that policy 2459 * can change after the disconnect. 2460 */ 2461 connp->conn_policy_cached = B_FALSE; 2462 2463 /* If not fanout_insert this was just an address verification */ 2464 if (fanout_insert) { 2465 /* 2466 * The addresses have been verified. Time to insert in 2467 * the correct fanout list. 2468 */ 2469 connp->conn_srcv6 = *v6src; 2470 connp->conn_remv6 = ipv6_all_zeros; 2471 connp->conn_lport = lport; 2472 connp->conn_fport = 0; 2473 error = ipcl_bind_insert_v6(connp, *mp->b_wptr, v6src, lport); 2474 } 2475 if (error == 0) { 2476 if (ire_requested) { 2477 if (!ip_bind_insert_ire_v6(mp, src_ire, v6src, NULL, 2478 ipst)) { 2479 error = -1; 2480 goto bad_addr; 2481 } 2482 } else if (ipsec_policy_set) { 2483 if (!ip_bind_ipsec_policy_set(connp, policy_mp)) { 2484 error = -1; 2485 goto bad_addr; 2486 } 2487 } 2488 } 2489 bad_addr: 2490 if (error != 0) { 2491 if (connp->conn_anon_port) { 2492 (void) tsol_mlp_anon(crgetzone(connp->conn_cred), 2493 connp->conn_mlp_type, connp->conn_ulp, ntohs(lport), 2494 B_FALSE); 2495 } 2496 connp->conn_mlp_type = mlptSingle; 2497 } 2498 2499 if (src_ire != NULL) 2500 ire_refrele(src_ire); 2501 2502 if (ipsec_policy_set) { 2503 ASSERT(policy_mp != NULL); 2504 freeb(policy_mp); 2505 /* 2506 * As of now assume that nothing else accompanies 2507 * IPSEC_POLICY_SET. 2508 */ 2509 mp->b_cont = NULL; 2510 } 2511 return (error); 2512 } 2513 2514 /* ARGSUSED */ 2515 static void 2516 ip_bind_connected_resume_v6(ipsq_t *ipsq, queue_t *q, mblk_t *mp, 2517 void *dummy_arg) 2518 { 2519 conn_t *connp = NULL; 2520 t_scalar_t prim; 2521 2522 ASSERT(DB_TYPE(mp) == M_PROTO || DB_TYPE(mp) == M_PCPROTO); 2523 2524 if (CONN_Q(q)) 2525 connp = Q_TO_CONN(q); 2526 ASSERT(connp != NULL); 2527 2528 prim = ((union T_primitives *)mp->b_rptr)->type; 2529 ASSERT(prim == O_T_BIND_REQ || prim == T_BIND_REQ); 2530 2531 if (IPCL_IS_TCP(connp)) { 2532 /* Pass sticky_ipp for scope_id and pktinfo */ 2533 mp = ip_bind_v6(q, mp, connp, &connp->conn_tcp->tcp_sticky_ipp); 2534 } else { 2535 /* For UDP and ICMP */ 2536 mp = ip_bind_v6(q, mp, connp, NULL); 2537 } 2538 if (mp != NULL) { 2539 if (IPCL_IS_TCP(connp)) { 2540 CONN_INC_REF(connp); 2541 squeue_fill(connp->conn_sqp, mp, ip_resume_tcp_bind, 2542 connp, SQTAG_TCP_RPUTOTHER); 2543 } else if (IPCL_IS_UDP(connp)) { 2544 udp_resume_bind(connp, mp); 2545 } else { 2546 ASSERT(IPCL_IS_RAWIP(connp)); 2547 rawip_resume_bind(connp, mp); 2548 } 2549 } 2550 } 2551 2552 /* 2553 * Verify that both the source and destination addresses 2554 * are valid. If verify_dst, then destination address must also be reachable, 2555 * i.e. have a route. Protocols like TCP want this. Tunnels do not. 2556 * It takes ip6_pkt_t * as one of the arguments to determine correct 2557 * source address when IPV6_PKTINFO or scope_id is set along with a link-local 2558 * destination address. Note that parameter ipp is only useful for TCP connect 2559 * when scope_id is set or IPV6_PKTINFO option is set with an ifindex. For all 2560 * non-TCP cases, it is NULL and for all other tcp cases it is not useful. 2561 * 2562 */ 2563 static int 2564 ip_bind_connected_v6(conn_t *connp, mblk_t *mp, in6_addr_t *v6src, 2565 uint16_t lport, const in6_addr_t *v6dst, ip6_pkt_t *ipp, uint16_t fport, 2566 boolean_t ire_requested, boolean_t ipsec_policy_set, 2567 boolean_t fanout_insert, boolean_t verify_dst) 2568 { 2569 ire_t *src_ire; 2570 ire_t *dst_ire; 2571 int error = 0; 2572 int protocol; 2573 mblk_t *policy_mp; 2574 ire_t *sire = NULL; 2575 ire_t *md_dst_ire = NULL; 2576 ill_t *md_ill = NULL; 2577 ill_t *dst_ill = NULL; 2578 ipif_t *src_ipif = NULL; 2579 zoneid_t zoneid; 2580 boolean_t ill_held = B_FALSE; 2581 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 2582 2583 src_ire = dst_ire = NULL; 2584 /* 2585 * NOTE: The protocol is beyond the wptr because that's how 2586 * the undocumented transport<-->IP T_BIND_REQ behavior works. 2587 */ 2588 protocol = *mp->b_wptr & 0xFF; 2589 2590 /* 2591 * If we never got a disconnect before, clear it now. 2592 */ 2593 connp->conn_fully_bound = B_FALSE; 2594 2595 if (ipsec_policy_set) { 2596 policy_mp = mp->b_cont; 2597 } 2598 2599 zoneid = connp->conn_zoneid; 2600 2601 if (IN6_IS_ADDR_MULTICAST(v6dst)) { 2602 ipif_t *ipif; 2603 2604 /* 2605 * Use an "emulated" IRE_BROADCAST to tell the transport it 2606 * is a multicast. 2607 * Pass other information that matches 2608 * the ipif (e.g. the source address). 2609 * 2610 * conn_multicast_ill is only used for IPv6 packets 2611 */ 2612 mutex_enter(&connp->conn_lock); 2613 if (connp->conn_multicast_ill != NULL) { 2614 (void) ipif_lookup_zoneid(connp->conn_multicast_ill, 2615 zoneid, 0, &ipif); 2616 } else { 2617 /* Look for default like ip_wput_v6 */ 2618 ipif = ipif_lookup_group_v6(v6dst, zoneid, ipst); 2619 } 2620 mutex_exit(&connp->conn_lock); 2621 if (ipif == NULL || !ire_requested || 2622 (dst_ire = ipif_to_ire_v6(ipif)) == NULL) { 2623 if (ipif != NULL) 2624 ipif_refrele(ipif); 2625 if (ip_debug > 2) { 2626 /* ip1dbg */ 2627 pr_addr_dbg("ip_bind_connected_v6: bad " 2628 "connected multicast %s\n", AF_INET6, 2629 v6dst); 2630 } 2631 error = ENETUNREACH; 2632 goto bad_addr; 2633 } 2634 if (ipif != NULL) 2635 ipif_refrele(ipif); 2636 } else { 2637 dst_ire = ire_route_lookup_v6(v6dst, NULL, NULL, 0, 2638 NULL, &sire, zoneid, MBLK_GETLABEL(mp), 2639 MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT | 2640 MATCH_IRE_PARENT | MATCH_IRE_RJ_BHOLE | MATCH_IRE_SECATTR, 2641 ipst); 2642 /* 2643 * We also prevent ire's with src address INADDR_ANY to 2644 * be used, which are created temporarily for 2645 * sending out packets from endpoints that have 2646 * conn_unspec_src set. 2647 */ 2648 if (dst_ire == NULL || 2649 (dst_ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) || 2650 IN6_IS_ADDR_UNSPECIFIED(&dst_ire->ire_src_addr_v6)) { 2651 /* 2652 * When verifying destination reachability, we always 2653 * complain. 2654 * 2655 * When not verifying destination reachability but we 2656 * found an IRE, i.e. the destination is reachable, 2657 * then the other tests still apply and we complain. 2658 */ 2659 if (verify_dst || (dst_ire != NULL)) { 2660 if (ip_debug > 2) { 2661 /* ip1dbg */ 2662 pr_addr_dbg("ip_bind_connected_v6: bad" 2663 " connected dst %s\n", AF_INET6, 2664 v6dst); 2665 } 2666 if (dst_ire == NULL || 2667 !(dst_ire->ire_type & IRE_HOST)) { 2668 error = ENETUNREACH; 2669 } else { 2670 error = EHOSTUNREACH; 2671 } 2672 goto bad_addr; 2673 } 2674 } 2675 } 2676 2677 /* 2678 * We now know that routing will allow us to reach the destination. 2679 * Check whether Trusted Solaris policy allows communication with this 2680 * host, and pretend that the destination is unreachable if not. 2681 * 2682 * This is never a problem for TCP, since that transport is known to 2683 * compute the label properly as part of the tcp_rput_other T_BIND_ACK 2684 * handling. If the remote is unreachable, it will be detected at that 2685 * point, so there's no reason to check it here. 2686 * 2687 * Note that for sendto (and other datagram-oriented friends), this 2688 * check is done as part of the data path label computation instead. 2689 * The check here is just to make non-TCP connect() report the right 2690 * error. 2691 */ 2692 if (dst_ire != NULL && is_system_labeled() && 2693 !IPCL_IS_TCP(connp) && 2694 tsol_compute_label_v6(DB_CREDDEF(mp, connp->conn_cred), v6dst, NULL, 2695 connp->conn_mac_exempt, ipst) != 0) { 2696 error = EHOSTUNREACH; 2697 if (ip_debug > 2) { 2698 pr_addr_dbg("ip_bind_connected: no label for dst %s\n", 2699 AF_INET6, v6dst); 2700 } 2701 goto bad_addr; 2702 } 2703 2704 /* 2705 * If the app does a connect(), it means that it will most likely 2706 * send more than 1 packet to the destination. It makes sense 2707 * to clear the temporary flag. 2708 */ 2709 if (dst_ire != NULL && dst_ire->ire_type == IRE_CACHE && 2710 (dst_ire->ire_marks & IRE_MARK_TEMPORARY)) { 2711 irb_t *irb = dst_ire->ire_bucket; 2712 2713 rw_enter(&irb->irb_lock, RW_WRITER); 2714 /* 2715 * We need to recheck for IRE_MARK_TEMPORARY after acquiring 2716 * the lock in order to guarantee irb_tmp_ire_cnt. 2717 */ 2718 if (dst_ire->ire_marks & IRE_MARK_TEMPORARY) { 2719 dst_ire->ire_marks &= ~IRE_MARK_TEMPORARY; 2720 irb->irb_tmp_ire_cnt--; 2721 } 2722 rw_exit(&irb->irb_lock); 2723 } 2724 2725 ASSERT(dst_ire == NULL || dst_ire->ire_ipversion == IPV6_VERSION); 2726 2727 /* 2728 * See if we should notify ULP about MDT; we do this whether or not 2729 * ire_requested is TRUE, in order to handle active connects; MDT 2730 * eligibility tests for passive connects are handled separately 2731 * through tcp_adapt_ire(). We do this before the source address 2732 * selection, because dst_ire may change after a call to 2733 * ipif_select_source_v6(). This is a best-effort check, as the 2734 * packet for this connection may not actually go through 2735 * dst_ire->ire_stq, and the exact IRE can only be known after 2736 * calling ip_newroute_v6(). This is why we further check on the 2737 * IRE during Multidata packet transmission in tcp_multisend(). 2738 */ 2739 if (ipst->ips_ip_multidata_outbound && !ipsec_policy_set && 2740 dst_ire != NULL && 2741 !(dst_ire->ire_type & (IRE_LOCAL | IRE_LOOPBACK | IRE_BROADCAST)) && 2742 (md_ill = ire_to_ill(dst_ire), md_ill != NULL) && 2743 ILL_MDT_CAPABLE(md_ill)) { 2744 md_dst_ire = dst_ire; 2745 IRE_REFHOLD(md_dst_ire); 2746 } 2747 2748 if (dst_ire != NULL && 2749 dst_ire->ire_type == IRE_LOCAL && 2750 dst_ire->ire_zoneid != zoneid && 2751 dst_ire->ire_zoneid != ALL_ZONES) { 2752 src_ire = ire_ftable_lookup_v6(v6dst, 0, 0, 0, NULL, NULL, 2753 zoneid, 0, NULL, 2754 MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT | 2755 MATCH_IRE_RJ_BHOLE, ipst); 2756 if (src_ire == NULL) { 2757 error = EHOSTUNREACH; 2758 goto bad_addr; 2759 } else if (src_ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) { 2760 if (!(src_ire->ire_type & IRE_HOST)) 2761 error = ENETUNREACH; 2762 else 2763 error = EHOSTUNREACH; 2764 goto bad_addr; 2765 } 2766 if (IN6_IS_ADDR_UNSPECIFIED(v6src)) { 2767 src_ipif = src_ire->ire_ipif; 2768 ipif_refhold(src_ipif); 2769 *v6src = src_ipif->ipif_v6lcl_addr; 2770 } 2771 ire_refrele(src_ire); 2772 src_ire = NULL; 2773 } else if (IN6_IS_ADDR_UNSPECIFIED(v6src) && dst_ire != NULL) { 2774 if ((sire != NULL) && (sire->ire_flags & RTF_SETSRC)) { 2775 *v6src = sire->ire_src_addr_v6; 2776 ire_refrele(dst_ire); 2777 dst_ire = sire; 2778 sire = NULL; 2779 } else if (dst_ire->ire_type == IRE_CACHE && 2780 (dst_ire->ire_flags & RTF_SETSRC)) { 2781 ASSERT(dst_ire->ire_zoneid == zoneid || 2782 dst_ire->ire_zoneid == ALL_ZONES); 2783 *v6src = dst_ire->ire_src_addr_v6; 2784 } else { 2785 /* 2786 * Pick a source address so that a proper inbound load 2787 * spreading would happen. Use dst_ill specified by the 2788 * app. when socket option or scopeid is set. 2789 */ 2790 int err; 2791 2792 if (ipp != NULL && ipp->ipp_ifindex != 0) { 2793 uint_t if_index; 2794 2795 /* 2796 * Scope id or IPV6_PKTINFO 2797 */ 2798 2799 if_index = ipp->ipp_ifindex; 2800 dst_ill = ill_lookup_on_ifindex( 2801 if_index, B_TRUE, NULL, NULL, NULL, NULL, 2802 ipst); 2803 if (dst_ill == NULL) { 2804 ip1dbg(("ip_bind_connected_v6:" 2805 " bad ifindex %d\n", if_index)); 2806 error = EADDRNOTAVAIL; 2807 goto bad_addr; 2808 } 2809 ill_held = B_TRUE; 2810 } else if (connp->conn_outgoing_ill != NULL) { 2811 /* 2812 * For IPV6_BOUND_IF socket option, 2813 * conn_outgoing_ill should be set 2814 * already in TCP or UDP/ICMP. 2815 */ 2816 dst_ill = conn_get_held_ill(connp, 2817 &connp->conn_outgoing_ill, &err); 2818 if (err == ILL_LOOKUP_FAILED) { 2819 ip1dbg(("ip_bind_connected_v6:" 2820 "no ill for bound_if\n")); 2821 error = EADDRNOTAVAIL; 2822 goto bad_addr; 2823 } 2824 ill_held = B_TRUE; 2825 } else if (dst_ire->ire_stq != NULL) { 2826 /* No need to hold ill here */ 2827 dst_ill = (ill_t *)dst_ire->ire_stq->q_ptr; 2828 } else { 2829 /* No need to hold ill here */ 2830 dst_ill = dst_ire->ire_ipif->ipif_ill; 2831 } 2832 if (!ip6_asp_can_lookup(ipst)) { 2833 *mp->b_wptr++ = (char)protocol; 2834 ip6_asp_pending_op(CONNP_TO_WQ(connp), mp, 2835 ip_bind_connected_resume_v6); 2836 error = EINPROGRESS; 2837 goto refrele_and_quit; 2838 } 2839 src_ipif = ipif_select_source_v6(dst_ill, v6dst, 2840 RESTRICT_TO_NONE, connp->conn_src_preferences, 2841 zoneid); 2842 ip6_asp_table_refrele(ipst); 2843 if (src_ipif == NULL) { 2844 pr_addr_dbg("ip_bind_connected_v6: " 2845 "no usable source address for " 2846 "connection to %s\n", AF_INET6, v6dst); 2847 error = EADDRNOTAVAIL; 2848 goto bad_addr; 2849 } 2850 *v6src = src_ipif->ipif_v6lcl_addr; 2851 } 2852 } 2853 2854 /* 2855 * We do ire_route_lookup_v6() here (and not an interface lookup) 2856 * as we assert that v6src should only come from an 2857 * UP interface for hard binding. 2858 */ 2859 src_ire = ire_route_lookup_v6(v6src, 0, 0, 0, NULL, 2860 NULL, zoneid, NULL, MATCH_IRE_ZONEONLY, ipst); 2861 2862 /* src_ire must be a local|loopback */ 2863 if (!IRE_IS_LOCAL(src_ire)) { 2864 if (ip_debug > 2) { 2865 /* ip1dbg */ 2866 pr_addr_dbg("ip_bind_connected_v6: bad " 2867 "connected src %s\n", AF_INET6, v6src); 2868 } 2869 error = EADDRNOTAVAIL; 2870 goto bad_addr; 2871 } 2872 2873 /* 2874 * If the source address is a loopback address, the 2875 * destination had best be local or multicast. 2876 * The transports that can't handle multicast will reject 2877 * those addresses. 2878 */ 2879 if (src_ire->ire_type == IRE_LOOPBACK && 2880 !(IRE_IS_LOCAL(dst_ire) || IN6_IS_ADDR_MULTICAST(v6dst) || 2881 IN6_IS_ADDR_V4MAPPED_CLASSD(v6dst))) { 2882 ip1dbg(("ip_bind_connected_v6: bad connected loopback\n")); 2883 error = -1; 2884 goto bad_addr; 2885 } 2886 /* 2887 * Allow setting new policies. For example, disconnects come 2888 * down as ipa_t bind. As we would have set conn_policy_cached 2889 * to B_TRUE before, we should set it to B_FALSE, so that policy 2890 * can change after the disconnect. 2891 */ 2892 connp->conn_policy_cached = B_FALSE; 2893 2894 /* 2895 * The addresses have been verified. Initialize the conn 2896 * before calling the policy as they expect the conns 2897 * initialized. 2898 */ 2899 connp->conn_srcv6 = *v6src; 2900 connp->conn_remv6 = *v6dst; 2901 connp->conn_lport = lport; 2902 connp->conn_fport = fport; 2903 2904 ASSERT(!(ipsec_policy_set && ire_requested)); 2905 if (ire_requested) { 2906 iulp_t *ulp_info = NULL; 2907 2908 /* 2909 * Note that sire will not be NULL if this is an off-link 2910 * connection and there is not cache for that dest yet. 2911 * 2912 * XXX Because of an existing bug, if there are multiple 2913 * default routes, the IRE returned now may not be the actual 2914 * default route used (default routes are chosen in a 2915 * round robin fashion). So if the metrics for different 2916 * default routes are different, we may return the wrong 2917 * metrics. This will not be a problem if the existing 2918 * bug is fixed. 2919 */ 2920 if (sire != NULL) 2921 ulp_info = &(sire->ire_uinfo); 2922 2923 if (!ip_bind_insert_ire_v6(mp, dst_ire, v6dst, ulp_info, 2924 ipst)) { 2925 error = -1; 2926 goto bad_addr; 2927 } 2928 } else if (ipsec_policy_set) { 2929 if (!ip_bind_ipsec_policy_set(connp, policy_mp)) { 2930 error = -1; 2931 goto bad_addr; 2932 } 2933 } 2934 2935 /* 2936 * Cache IPsec policy in this conn. If we have per-socket policy, 2937 * we'll cache that. If we don't, we'll inherit global policy. 2938 * 2939 * We can't insert until the conn reflects the policy. Note that 2940 * conn_policy_cached is set by ipsec_conn_cache_policy() even for 2941 * connections where we don't have a policy. This is to prevent 2942 * global policy lookups in the inbound path. 2943 * 2944 * If we insert before we set conn_policy_cached, 2945 * CONN_INBOUND_POLICY_PRESENT_V6() check can still evaluate true 2946 * because global policy cound be non-empty. We normally call 2947 * ipsec_check_policy() for conn_policy_cached connections only if 2948 * conn_in_enforce_policy is set. But in this case, 2949 * conn_policy_cached can get set anytime since we made the 2950 * CONN_INBOUND_POLICY_PRESENT_V6() check and ipsec_check_policy() 2951 * is called, which will make the above assumption false. Thus, we 2952 * need to insert after we set conn_policy_cached. 2953 */ 2954 if ((error = ipsec_conn_cache_policy(connp, B_FALSE)) != 0) 2955 goto bad_addr; 2956 2957 /* If not fanout_insert this was just an address verification */ 2958 if (fanout_insert) { 2959 /* 2960 * The addresses have been verified. Time to insert in 2961 * the correct fanout list. 2962 */ 2963 error = ipcl_conn_insert_v6(connp, protocol, v6src, v6dst, 2964 connp->conn_ports, 2965 IPCL_IS_TCP(connp) ? connp->conn_tcp->tcp_bound_if : 0); 2966 } 2967 if (error == 0) { 2968 connp->conn_fully_bound = B_TRUE; 2969 /* 2970 * Our initial checks for MDT have passed; the IRE is not 2971 * LOCAL/LOOPBACK/BROADCAST, and the link layer seems to 2972 * be supporting MDT. Pass the IRE, IPC and ILL into 2973 * ip_mdinfo_return(), which performs further checks 2974 * against them and upon success, returns the MDT info 2975 * mblk which we will attach to the bind acknowledgment. 2976 */ 2977 if (md_dst_ire != NULL) { 2978 mblk_t *mdinfo_mp; 2979 2980 ASSERT(md_ill != NULL); 2981 ASSERT(md_ill->ill_mdt_capab != NULL); 2982 if ((mdinfo_mp = ip_mdinfo_return(md_dst_ire, connp, 2983 md_ill->ill_name, md_ill->ill_mdt_capab)) != NULL) 2984 linkb(mp, mdinfo_mp); 2985 } 2986 } 2987 bad_addr: 2988 if (ipsec_policy_set) { 2989 ASSERT(policy_mp != NULL); 2990 freeb(policy_mp); 2991 /* 2992 * As of now assume that nothing else accompanies 2993 * IPSEC_POLICY_SET. 2994 */ 2995 mp->b_cont = NULL; 2996 } 2997 refrele_and_quit: 2998 if (src_ire != NULL) 2999 IRE_REFRELE(src_ire); 3000 if (dst_ire != NULL) 3001 IRE_REFRELE(dst_ire); 3002 if (sire != NULL) 3003 IRE_REFRELE(sire); 3004 if (src_ipif != NULL) 3005 ipif_refrele(src_ipif); 3006 if (md_dst_ire != NULL) 3007 IRE_REFRELE(md_dst_ire); 3008 if (ill_held && dst_ill != NULL) 3009 ill_refrele(dst_ill); 3010 return (error); 3011 } 3012 3013 /* 3014 * Insert the ire in b_cont. Returns false if it fails (due to lack of space). 3015 * Makes the IRE be IRE_BROADCAST if dst is a multicast address. 3016 */ 3017 /* ARGSUSED4 */ 3018 static boolean_t 3019 ip_bind_insert_ire_v6(mblk_t *mp, ire_t *ire, const in6_addr_t *dst, 3020 iulp_t *ulp_info, ip_stack_t *ipst) 3021 { 3022 mblk_t *mp1; 3023 ire_t *ret_ire; 3024 3025 mp1 = mp->b_cont; 3026 ASSERT(mp1 != NULL); 3027 3028 if (ire != NULL) { 3029 /* 3030 * mp1 initialized above to IRE_DB_REQ_TYPE 3031 * appended mblk. Its <upper protocol>'s 3032 * job to make sure there is room. 3033 */ 3034 if ((mp1->b_datap->db_lim - mp1->b_rptr) < sizeof (ire_t)) 3035 return (B_FALSE); 3036 3037 mp1->b_datap->db_type = IRE_DB_TYPE; 3038 mp1->b_wptr = mp1->b_rptr + sizeof (ire_t); 3039 bcopy(ire, mp1->b_rptr, sizeof (ire_t)); 3040 ret_ire = (ire_t *)mp1->b_rptr; 3041 if (IN6_IS_ADDR_MULTICAST(dst) || 3042 IN6_IS_ADDR_V4MAPPED_CLASSD(dst)) { 3043 ret_ire->ire_type = IRE_BROADCAST; 3044 ret_ire->ire_addr_v6 = *dst; 3045 } 3046 if (ulp_info != NULL) { 3047 bcopy(ulp_info, &(ret_ire->ire_uinfo), 3048 sizeof (iulp_t)); 3049 } 3050 ret_ire->ire_mp = mp1; 3051 } else { 3052 /* 3053 * No IRE was found. Remove IRE mblk. 3054 */ 3055 mp->b_cont = mp1->b_cont; 3056 freeb(mp1); 3057 } 3058 return (B_TRUE); 3059 } 3060 3061 /* 3062 * Add an ip6i_t header to the front of the mblk. 3063 * Inline if possible else allocate a separate mblk containing only the ip6i_t. 3064 * Returns NULL if allocation fails (and frees original message). 3065 * Used in outgoing path when going through ip_newroute_*v6(). 3066 * Used in incoming path to pass ifindex to transports. 3067 */ 3068 mblk_t * 3069 ip_add_info_v6(mblk_t *mp, ill_t *ill, const in6_addr_t *dst) 3070 { 3071 mblk_t *mp1; 3072 ip6i_t *ip6i; 3073 ip6_t *ip6h; 3074 3075 ip6h = (ip6_t *)mp->b_rptr; 3076 ip6i = (ip6i_t *)(mp->b_rptr - sizeof (ip6i_t)); 3077 if ((uchar_t *)ip6i < mp->b_datap->db_base || 3078 mp->b_datap->db_ref > 1) { 3079 mp1 = allocb(sizeof (ip6i_t), BPRI_MED); 3080 if (mp1 == NULL) { 3081 freemsg(mp); 3082 return (NULL); 3083 } 3084 mp1->b_wptr = mp1->b_rptr = mp1->b_datap->db_lim; 3085 mp1->b_cont = mp; 3086 mp = mp1; 3087 ip6i = (ip6i_t *)(mp->b_rptr - sizeof (ip6i_t)); 3088 } 3089 mp->b_rptr = (uchar_t *)ip6i; 3090 ip6i->ip6i_vcf = ip6h->ip6_vcf; 3091 ip6i->ip6i_nxt = IPPROTO_RAW; 3092 if (ill != NULL) { 3093 ip6i->ip6i_flags = IP6I_IFINDEX; 3094 ip6i->ip6i_ifindex = ill->ill_phyint->phyint_ifindex; 3095 } else { 3096 ip6i->ip6i_flags = 0; 3097 } 3098 ip6i->ip6i_nexthop = *dst; 3099 return (mp); 3100 } 3101 3102 /* 3103 * Handle protocols with which IP is less intimate. There 3104 * can be more than one stream bound to a particular 3105 * protocol. When this is the case, normally each one gets a copy 3106 * of any incoming packets. 3107 * However, if the packet was tunneled and not multicast we only send to it 3108 * the first match. 3109 * 3110 * Zones notes: 3111 * Packets will be distributed to streams in all zones. This is really only 3112 * useful for ICMPv6 as only applications in the global zone can create raw 3113 * sockets for other protocols. 3114 */ 3115 static void 3116 ip_fanout_proto_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, ill_t *ill, 3117 ill_t *inill, uint8_t nexthdr, uint_t nexthdr_offset, uint_t flags, 3118 boolean_t mctl_present, zoneid_t zoneid) 3119 { 3120 queue_t *rq; 3121 mblk_t *mp1, *first_mp1; 3122 in6_addr_t dst = ip6h->ip6_dst; 3123 in6_addr_t src = ip6h->ip6_src; 3124 boolean_t one_only; 3125 mblk_t *first_mp = mp; 3126 boolean_t secure, shared_addr; 3127 conn_t *connp, *first_connp, *next_connp; 3128 connf_t *connfp; 3129 ip_stack_t *ipst = inill->ill_ipst; 3130 ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec; 3131 3132 if (mctl_present) { 3133 mp = first_mp->b_cont; 3134 secure = ipsec_in_is_secure(first_mp); 3135 ASSERT(mp != NULL); 3136 } else { 3137 secure = B_FALSE; 3138 } 3139 3140 /* 3141 * If the packet was tunneled and not multicast we only send to it 3142 * the first match. 3143 */ 3144 one_only = ((nexthdr == IPPROTO_ENCAP || nexthdr == IPPROTO_IPV6) && 3145 !IN6_IS_ADDR_MULTICAST(&dst)); 3146 3147 shared_addr = (zoneid == ALL_ZONES); 3148 if (shared_addr) { 3149 /* 3150 * We don't allow multilevel ports for raw IP, so no need to 3151 * check for that here. 3152 */ 3153 zoneid = tsol_packet_to_zoneid(mp); 3154 } 3155 3156 connfp = &ipst->ips_ipcl_proto_fanout_v6[nexthdr]; 3157 mutex_enter(&connfp->connf_lock); 3158 connp = connfp->connf_head; 3159 for (connp = connfp->connf_head; connp != NULL; 3160 connp = connp->conn_next) { 3161 if (IPCL_PROTO_MATCH_V6(connp, nexthdr, ip6h, ill, flags, 3162 zoneid) && 3163 (!is_system_labeled() || 3164 tsol_receive_local(mp, &dst, IPV6_VERSION, shared_addr, 3165 connp))) 3166 break; 3167 } 3168 3169 if (connp == NULL || connp->conn_upq == NULL) { 3170 /* 3171 * No one bound to this port. Is 3172 * there a client that wants all 3173 * unclaimed datagrams? 3174 */ 3175 mutex_exit(&connfp->connf_lock); 3176 if (ip_fanout_send_icmp_v6(q, first_mp, flags, 3177 ICMP6_PARAM_PROB, ICMP6_PARAMPROB_NEXTHEADER, 3178 nexthdr_offset, mctl_present, zoneid, ipst)) { 3179 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInUnknownProtos); 3180 } 3181 3182 return; 3183 } 3184 3185 CONN_INC_REF(connp); 3186 first_connp = connp; 3187 3188 /* 3189 * XXX: Fix the multiple protocol listeners case. We should not 3190 * be walking the conn->next list here. 3191 */ 3192 if (one_only) { 3193 /* 3194 * Only send message to one tunnel driver by immediately 3195 * terminating the loop. 3196 */ 3197 connp = NULL; 3198 } else { 3199 connp = connp->conn_next; 3200 3201 } 3202 for (;;) { 3203 while (connp != NULL) { 3204 if (IPCL_PROTO_MATCH_V6(connp, nexthdr, ip6h, ill, 3205 flags, zoneid) && 3206 (!is_system_labeled() || 3207 tsol_receive_local(mp, &dst, IPV6_VERSION, 3208 shared_addr, connp))) 3209 break; 3210 connp = connp->conn_next; 3211 } 3212 3213 /* 3214 * Just copy the data part alone. The mctl part is 3215 * needed just for verifying policy and it is never 3216 * sent up. 3217 */ 3218 if (connp == NULL || connp->conn_upq == NULL || 3219 (((first_mp1 = dupmsg(first_mp)) == NULL) && 3220 ((first_mp1 = ip_copymsg(first_mp)) == NULL))) { 3221 /* 3222 * No more intested clients or memory 3223 * allocation failed 3224 */ 3225 connp = first_connp; 3226 break; 3227 } 3228 mp1 = mctl_present ? first_mp1->b_cont : first_mp1; 3229 CONN_INC_REF(connp); 3230 mutex_exit(&connfp->connf_lock); 3231 rq = connp->conn_rq; 3232 /* 3233 * For link-local always add ifindex so that transport can set 3234 * sin6_scope_id. Avoid it for ICMP error fanout. 3235 */ 3236 if ((connp->conn_ip_recvpktinfo || 3237 IN6_IS_ADDR_LINKLOCAL(&src)) && 3238 (flags & IP_FF_IPINFO)) { 3239 /* Add header */ 3240 mp1 = ip_add_info_v6(mp1, inill, &dst); 3241 } 3242 if (mp1 == NULL) { 3243 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3244 } else if (!canputnext(rq)) { 3245 if (flags & IP_FF_RAWIP) { 3246 BUMP_MIB(ill->ill_ip_mib, 3247 rawipIfStatsInOverflows); 3248 } else { 3249 BUMP_MIB(ill->ill_icmp6_mib, 3250 ipv6IfIcmpInOverflows); 3251 } 3252 3253 freemsg(mp1); 3254 } else { 3255 /* 3256 * Don't enforce here if we're a tunnel - let "tun" do 3257 * it instead. 3258 */ 3259 if (!IPCL_IS_IPTUN(connp) && 3260 (CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) || 3261 secure)) { 3262 first_mp1 = ipsec_check_inbound_policy( 3263 first_mp1, connp, NULL, ip6h, mctl_present); 3264 } 3265 if (first_mp1 != NULL) { 3266 if (mctl_present) 3267 freeb(first_mp1); 3268 BUMP_MIB(ill->ill_ip_mib, 3269 ipIfStatsHCInDelivers); 3270 (connp->conn_recv)(connp, mp1, NULL); 3271 } 3272 } 3273 mutex_enter(&connfp->connf_lock); 3274 /* Follow the next pointer before releasing the conn. */ 3275 next_connp = connp->conn_next; 3276 CONN_DEC_REF(connp); 3277 connp = next_connp; 3278 } 3279 3280 /* Last one. Send it upstream. */ 3281 mutex_exit(&connfp->connf_lock); 3282 3283 /* Initiate IPPF processing */ 3284 if (IP6_IN_IPP(flags, ipst)) { 3285 uint_t ifindex; 3286 3287 mutex_enter(&ill->ill_lock); 3288 ifindex = ill->ill_phyint->phyint_ifindex; 3289 mutex_exit(&ill->ill_lock); 3290 ip_process(IPP_LOCAL_IN, &mp, ifindex); 3291 if (mp == NULL) { 3292 CONN_DEC_REF(connp); 3293 if (mctl_present) 3294 freeb(first_mp); 3295 return; 3296 } 3297 } 3298 3299 /* 3300 * For link-local always add ifindex so that transport can set 3301 * sin6_scope_id. Avoid it for ICMP error fanout. 3302 */ 3303 if ((connp->conn_ip_recvpktinfo || IN6_IS_ADDR_LINKLOCAL(&src)) && 3304 (flags & IP_FF_IPINFO)) { 3305 /* Add header */ 3306 mp = ip_add_info_v6(mp, inill, &dst); 3307 if (mp == NULL) { 3308 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3309 CONN_DEC_REF(connp); 3310 if (mctl_present) 3311 freeb(first_mp); 3312 return; 3313 } else if (mctl_present) { 3314 first_mp->b_cont = mp; 3315 } else { 3316 first_mp = mp; 3317 } 3318 } 3319 3320 rq = connp->conn_rq; 3321 if (!canputnext(rq)) { 3322 if (flags & IP_FF_RAWIP) { 3323 BUMP_MIB(ill->ill_ip_mib, rawipIfStatsInOverflows); 3324 } else { 3325 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInOverflows); 3326 } 3327 3328 freemsg(first_mp); 3329 } else { 3330 if (IPCL_IS_IPTUN(connp)) { 3331 /* 3332 * Tunneled packet. We enforce policy in the tunnel 3333 * module itself. 3334 * 3335 * Send the WHOLE packet up (incl. IPSEC_IN) without 3336 * a policy check. 3337 */ 3338 putnext(rq, first_mp); 3339 CONN_DEC_REF(connp); 3340 return; 3341 } 3342 /* 3343 * Don't enforce here if we're a tunnel - let "tun" do 3344 * it instead. 3345 */ 3346 if (nexthdr != IPPROTO_ENCAP && nexthdr != IPPROTO_IPV6 && 3347 (CONN_INBOUND_POLICY_PRESENT(connp, ipss) || secure)) { 3348 first_mp = ipsec_check_inbound_policy(first_mp, connp, 3349 NULL, ip6h, mctl_present); 3350 if (first_mp == NULL) { 3351 CONN_DEC_REF(connp); 3352 return; 3353 } 3354 } 3355 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 3356 (connp->conn_recv)(connp, mp, NULL); 3357 if (mctl_present) 3358 freeb(first_mp); 3359 } 3360 CONN_DEC_REF(connp); 3361 } 3362 3363 /* 3364 * Send an ICMP error after patching up the packet appropriately. Returns 3365 * non-zero if the appropriate MIB should be bumped; zero otherwise. 3366 */ 3367 int 3368 ip_fanout_send_icmp_v6(queue_t *q, mblk_t *mp, uint_t flags, 3369 uint_t icmp_type, uint8_t icmp_code, uint_t nexthdr_offset, 3370 boolean_t mctl_present, zoneid_t zoneid, ip_stack_t *ipst) 3371 { 3372 ip6_t *ip6h; 3373 mblk_t *first_mp; 3374 boolean_t secure; 3375 unsigned char db_type; 3376 ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec; 3377 3378 first_mp = mp; 3379 if (mctl_present) { 3380 mp = mp->b_cont; 3381 secure = ipsec_in_is_secure(first_mp); 3382 ASSERT(mp != NULL); 3383 } else { 3384 /* 3385 * If this is an ICMP error being reported - which goes 3386 * up as M_CTLs, we need to convert them to M_DATA till 3387 * we finish checking with global policy because 3388 * ipsec_check_global_policy() assumes M_DATA as clear 3389 * and M_CTL as secure. 3390 */ 3391 db_type = mp->b_datap->db_type; 3392 mp->b_datap->db_type = M_DATA; 3393 secure = B_FALSE; 3394 } 3395 /* 3396 * We are generating an icmp error for some inbound packet. 3397 * Called from all ip_fanout_(udp, tcp, proto) functions. 3398 * Before we generate an error, check with global policy 3399 * to see whether this is allowed to enter the system. As 3400 * there is no "conn", we are checking with global policy. 3401 */ 3402 ip6h = (ip6_t *)mp->b_rptr; 3403 if (secure || ipss->ipsec_inbound_v6_policy_present) { 3404 first_mp = ipsec_check_global_policy(first_mp, NULL, 3405 NULL, ip6h, mctl_present, ipst->ips_netstack); 3406 if (first_mp == NULL) 3407 return (0); 3408 } 3409 3410 if (!mctl_present) 3411 mp->b_datap->db_type = db_type; 3412 3413 if (flags & IP_FF_SEND_ICMP) { 3414 if (flags & IP_FF_HDR_COMPLETE) { 3415 if (ip_hdr_complete_v6(ip6h, zoneid, ipst)) { 3416 freemsg(first_mp); 3417 return (1); 3418 } 3419 } 3420 switch (icmp_type) { 3421 case ICMP6_DST_UNREACH: 3422 icmp_unreachable_v6(WR(q), first_mp, icmp_code, 3423 B_FALSE, B_FALSE, zoneid, ipst); 3424 break; 3425 case ICMP6_PARAM_PROB: 3426 icmp_param_problem_v6(WR(q), first_mp, icmp_code, 3427 nexthdr_offset, B_FALSE, B_FALSE, zoneid, ipst); 3428 break; 3429 default: 3430 #ifdef DEBUG 3431 panic("ip_fanout_send_icmp_v6: wrong type"); 3432 /*NOTREACHED*/ 3433 #else 3434 freemsg(first_mp); 3435 break; 3436 #endif 3437 } 3438 } else { 3439 freemsg(first_mp); 3440 return (0); 3441 } 3442 3443 return (1); 3444 } 3445 3446 3447 /* 3448 * Fanout for TCP packets 3449 * The caller puts <fport, lport> in the ports parameter. 3450 */ 3451 static void 3452 ip_fanout_tcp_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, ill_t *ill, ill_t *inill, 3453 uint_t flags, uint_t hdr_len, boolean_t mctl_present, zoneid_t zoneid) 3454 { 3455 mblk_t *first_mp; 3456 boolean_t secure; 3457 conn_t *connp; 3458 tcph_t *tcph; 3459 boolean_t syn_present = B_FALSE; 3460 ip_stack_t *ipst = inill->ill_ipst; 3461 ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec; 3462 3463 first_mp = mp; 3464 if (mctl_present) { 3465 mp = first_mp->b_cont; 3466 secure = ipsec_in_is_secure(first_mp); 3467 ASSERT(mp != NULL); 3468 } else { 3469 secure = B_FALSE; 3470 } 3471 3472 connp = ipcl_classify_v6(mp, IPPROTO_TCP, hdr_len, zoneid, ipst); 3473 3474 if (connp == NULL || 3475 !conn_wantpacket_v6(connp, ill, ip6h, flags, zoneid)) { 3476 /* 3477 * No hard-bound match. Send Reset. 3478 */ 3479 dblk_t *dp = mp->b_datap; 3480 uint32_t ill_index; 3481 3482 ASSERT((dp->db_struioflag & STRUIO_IP) == 0); 3483 3484 /* Initiate IPPf processing, if needed. */ 3485 if (IPP_ENABLED(IPP_LOCAL_IN, ipst) && 3486 (flags & IP6_NO_IPPOLICY)) { 3487 ill_index = ill->ill_phyint->phyint_ifindex; 3488 ip_process(IPP_LOCAL_IN, &first_mp, ill_index); 3489 if (first_mp == NULL) { 3490 if (connp != NULL) 3491 CONN_DEC_REF(connp); 3492 return; 3493 } 3494 } 3495 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 3496 tcp_xmit_listeners_reset(first_mp, hdr_len, zoneid, 3497 ipst->ips_netstack->netstack_tcp, connp); 3498 if (connp != NULL) 3499 CONN_DEC_REF(connp); 3500 return; 3501 } 3502 3503 tcph = (tcph_t *)&mp->b_rptr[hdr_len]; 3504 if ((tcph->th_flags[0] & (TH_SYN|TH_ACK|TH_RST|TH_URG)) == TH_SYN) { 3505 if (connp->conn_flags & IPCL_TCP) { 3506 squeue_t *sqp; 3507 3508 /* 3509 * For fused tcp loopback, assign the eager's 3510 * squeue to be that of the active connect's. 3511 */ 3512 if ((flags & IP_FF_LOOPBACK) && do_tcp_fusion && 3513 !CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) && 3514 !secure && 3515 !IP6_IN_IPP(flags, ipst)) { 3516 ASSERT(Q_TO_CONN(q) != NULL); 3517 sqp = Q_TO_CONN(q)->conn_sqp; 3518 } else { 3519 sqp = IP_SQUEUE_GET(lbolt); 3520 } 3521 3522 mp->b_datap->db_struioflag |= STRUIO_EAGER; 3523 DB_CKSUMSTART(mp) = (intptr_t)sqp; 3524 3525 /* 3526 * db_cksumstuff is unused in the incoming 3527 * path; Thus store the ifindex here. It will 3528 * be cleared in tcp_conn_create_v6(). 3529 */ 3530 DB_CKSUMSTUFF(mp) = 3531 (intptr_t)ill->ill_phyint->phyint_ifindex; 3532 syn_present = B_TRUE; 3533 } 3534 } 3535 3536 if (IPCL_IS_TCP(connp) && IPCL_IS_BOUND(connp) && !syn_present) { 3537 uint_t flags = (unsigned int)tcph->th_flags[0] & 0xFF; 3538 if ((flags & TH_RST) || (flags & TH_URG)) { 3539 CONN_DEC_REF(connp); 3540 freemsg(first_mp); 3541 return; 3542 } 3543 if (flags & TH_ACK) { 3544 tcp_xmit_listeners_reset(first_mp, hdr_len, zoneid, 3545 ipst->ips_netstack->netstack_tcp, connp); 3546 CONN_DEC_REF(connp); 3547 return; 3548 } 3549 3550 CONN_DEC_REF(connp); 3551 freemsg(first_mp); 3552 return; 3553 } 3554 3555 if (CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) || secure) { 3556 first_mp = ipsec_check_inbound_policy(first_mp, connp, 3557 NULL, ip6h, mctl_present); 3558 if (first_mp == NULL) { 3559 CONN_DEC_REF(connp); 3560 return; 3561 } 3562 if (IPCL_IS_TCP(connp) && IPCL_IS_BOUND(connp)) { 3563 ASSERT(syn_present); 3564 if (mctl_present) { 3565 ASSERT(first_mp != mp); 3566 first_mp->b_datap->db_struioflag |= 3567 STRUIO_POLICY; 3568 } else { 3569 ASSERT(first_mp == mp); 3570 mp->b_datap->db_struioflag &= 3571 ~STRUIO_EAGER; 3572 mp->b_datap->db_struioflag |= 3573 STRUIO_POLICY; 3574 } 3575 } else { 3576 /* 3577 * Discard first_mp early since we're dealing with a 3578 * fully-connected conn_t and tcp doesn't do policy in 3579 * this case. Also, if someone is bound to IPPROTO_TCP 3580 * over raw IP, they don't expect to see a M_CTL. 3581 */ 3582 if (mctl_present) { 3583 freeb(first_mp); 3584 mctl_present = B_FALSE; 3585 } 3586 first_mp = mp; 3587 } 3588 } 3589 3590 /* Initiate IPPF processing */ 3591 if (IP6_IN_IPP(flags, ipst)) { 3592 uint_t ifindex; 3593 3594 mutex_enter(&ill->ill_lock); 3595 ifindex = ill->ill_phyint->phyint_ifindex; 3596 mutex_exit(&ill->ill_lock); 3597 ip_process(IPP_LOCAL_IN, &mp, ifindex); 3598 if (mp == NULL) { 3599 CONN_DEC_REF(connp); 3600 if (mctl_present) { 3601 freeb(first_mp); 3602 } 3603 return; 3604 } else if (mctl_present) { 3605 /* 3606 * ip_add_info_v6 might return a new mp. 3607 */ 3608 ASSERT(first_mp != mp); 3609 first_mp->b_cont = mp; 3610 } else { 3611 first_mp = mp; 3612 } 3613 } 3614 3615 /* 3616 * For link-local always add ifindex so that TCP can bind to that 3617 * interface. Avoid it for ICMP error fanout. 3618 */ 3619 if (!syn_present && ((connp->conn_ip_recvpktinfo || 3620 IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src)) && 3621 (flags & IP_FF_IPINFO))) { 3622 /* Add header */ 3623 mp = ip_add_info_v6(mp, inill, &ip6h->ip6_dst); 3624 if (mp == NULL) { 3625 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3626 CONN_DEC_REF(connp); 3627 if (mctl_present) 3628 freeb(first_mp); 3629 return; 3630 } else if (mctl_present) { 3631 ASSERT(first_mp != mp); 3632 first_mp->b_cont = mp; 3633 } else { 3634 first_mp = mp; 3635 } 3636 } 3637 3638 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 3639 if (IPCL_IS_TCP(connp)) { 3640 (*ip_input_proc)(connp->conn_sqp, first_mp, 3641 connp->conn_recv, connp, SQTAG_IP6_TCP_INPUT); 3642 } else { 3643 /* SOCK_RAW, IPPROTO_TCP case */ 3644 (connp->conn_recv)(connp, first_mp, NULL); 3645 CONN_DEC_REF(connp); 3646 } 3647 } 3648 3649 /* 3650 * Fanout for UDP packets. 3651 * The caller puts <fport, lport> in the ports parameter. 3652 * ire_type must be IRE_BROADCAST for multicast and broadcast packets. 3653 * 3654 * If SO_REUSEADDR is set all multicast and broadcast packets 3655 * will be delivered to all streams bound to the same port. 3656 * 3657 * Zones notes: 3658 * Multicast packets will be distributed to streams in all zones. 3659 */ 3660 static void 3661 ip_fanout_udp_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, uint32_t ports, 3662 ill_t *ill, ill_t *inill, uint_t flags, boolean_t mctl_present, 3663 zoneid_t zoneid) 3664 { 3665 uint32_t dstport, srcport; 3666 in6_addr_t dst; 3667 mblk_t *first_mp; 3668 boolean_t secure; 3669 conn_t *connp; 3670 connf_t *connfp; 3671 conn_t *first_conn; 3672 conn_t *next_conn; 3673 mblk_t *mp1, *first_mp1; 3674 in6_addr_t src; 3675 boolean_t shared_addr; 3676 ip_stack_t *ipst = inill->ill_ipst; 3677 ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec; 3678 3679 first_mp = mp; 3680 if (mctl_present) { 3681 mp = first_mp->b_cont; 3682 secure = ipsec_in_is_secure(first_mp); 3683 ASSERT(mp != NULL); 3684 } else { 3685 secure = B_FALSE; 3686 } 3687 3688 /* Extract ports in net byte order */ 3689 dstport = htons(ntohl(ports) & 0xFFFF); 3690 srcport = htons(ntohl(ports) >> 16); 3691 dst = ip6h->ip6_dst; 3692 src = ip6h->ip6_src; 3693 3694 shared_addr = (zoneid == ALL_ZONES); 3695 if (shared_addr) { 3696 /* 3697 * No need to handle exclusive-stack zones since ALL_ZONES 3698 * only applies to the shared stack. 3699 */ 3700 zoneid = tsol_mlp_findzone(IPPROTO_UDP, dstport); 3701 /* 3702 * If no shared MLP is found, tsol_mlp_findzone returns 3703 * ALL_ZONES. In that case, we assume it's SLP, and 3704 * search for the zone based on the packet label. 3705 * That will also return ALL_ZONES on failure, but 3706 * we never allow conn_zoneid to be set to ALL_ZONES. 3707 */ 3708 if (zoneid == ALL_ZONES) 3709 zoneid = tsol_packet_to_zoneid(mp); 3710 } 3711 3712 /* Attempt to find a client stream based on destination port. */ 3713 connfp = &ipst->ips_ipcl_udp_fanout[IPCL_UDP_HASH(dstport, ipst)]; 3714 mutex_enter(&connfp->connf_lock); 3715 connp = connfp->connf_head; 3716 if (!IN6_IS_ADDR_MULTICAST(&dst)) { 3717 /* 3718 * Not multicast. Send to the one (first) client we find. 3719 */ 3720 while (connp != NULL) { 3721 if (IPCL_UDP_MATCH_V6(connp, dstport, dst, srcport, 3722 src) && IPCL_ZONE_MATCH(connp, zoneid) && 3723 conn_wantpacket_v6(connp, ill, ip6h, 3724 flags, zoneid)) { 3725 break; 3726 } 3727 connp = connp->conn_next; 3728 } 3729 if (connp == NULL || connp->conn_upq == NULL) 3730 goto notfound; 3731 3732 if (is_system_labeled() && 3733 !tsol_receive_local(mp, &dst, IPV6_VERSION, shared_addr, 3734 connp)) 3735 goto notfound; 3736 3737 /* Found a client */ 3738 CONN_INC_REF(connp); 3739 mutex_exit(&connfp->connf_lock); 3740 3741 if (CONN_UDP_FLOWCTLD(connp)) { 3742 freemsg(first_mp); 3743 CONN_DEC_REF(connp); 3744 return; 3745 } 3746 if (CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) || secure) { 3747 first_mp = ipsec_check_inbound_policy(first_mp, 3748 connp, NULL, ip6h, mctl_present); 3749 if (first_mp == NULL) { 3750 CONN_DEC_REF(connp); 3751 return; 3752 } 3753 } 3754 /* Initiate IPPF processing */ 3755 if (IP6_IN_IPP(flags, ipst)) { 3756 uint_t ifindex; 3757 3758 mutex_enter(&ill->ill_lock); 3759 ifindex = ill->ill_phyint->phyint_ifindex; 3760 mutex_exit(&ill->ill_lock); 3761 ip_process(IPP_LOCAL_IN, &mp, ifindex); 3762 if (mp == NULL) { 3763 CONN_DEC_REF(connp); 3764 if (mctl_present) 3765 freeb(first_mp); 3766 return; 3767 } 3768 } 3769 /* 3770 * For link-local always add ifindex so that 3771 * transport can set sin6_scope_id. Avoid it for 3772 * ICMP error fanout. 3773 */ 3774 if ((connp->conn_ip_recvpktinfo || 3775 IN6_IS_ADDR_LINKLOCAL(&src)) && 3776 (flags & IP_FF_IPINFO)) { 3777 /* Add header */ 3778 mp = ip_add_info_v6(mp, inill, &dst); 3779 if (mp == NULL) { 3780 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3781 CONN_DEC_REF(connp); 3782 if (mctl_present) 3783 freeb(first_mp); 3784 return; 3785 } else if (mctl_present) { 3786 first_mp->b_cont = mp; 3787 } else { 3788 first_mp = mp; 3789 } 3790 } 3791 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 3792 3793 /* Send it upstream */ 3794 (connp->conn_recv)(connp, mp, NULL); 3795 3796 IP6_STAT(ipst, ip6_udp_fannorm); 3797 CONN_DEC_REF(connp); 3798 if (mctl_present) 3799 freeb(first_mp); 3800 return; 3801 } 3802 3803 while (connp != NULL) { 3804 if ((IPCL_UDP_MATCH_V6(connp, dstport, dst, srcport, src)) && 3805 conn_wantpacket_v6(connp, ill, ip6h, flags, zoneid) && 3806 (!is_system_labeled() || 3807 tsol_receive_local(mp, &dst, IPV6_VERSION, shared_addr, 3808 connp))) 3809 break; 3810 connp = connp->conn_next; 3811 } 3812 3813 if (connp == NULL || connp->conn_upq == NULL) 3814 goto notfound; 3815 3816 first_conn = connp; 3817 3818 CONN_INC_REF(connp); 3819 connp = connp->conn_next; 3820 for (;;) { 3821 while (connp != NULL) { 3822 if (IPCL_UDP_MATCH_V6(connp, dstport, dst, srcport, 3823 src) && conn_wantpacket_v6(connp, ill, ip6h, 3824 flags, zoneid) && 3825 (!is_system_labeled() || 3826 tsol_receive_local(mp, &dst, IPV6_VERSION, 3827 shared_addr, connp))) 3828 break; 3829 connp = connp->conn_next; 3830 } 3831 /* 3832 * Just copy the data part alone. The mctl part is 3833 * needed just for verifying policy and it is never 3834 * sent up. 3835 */ 3836 if (connp == NULL || 3837 (((first_mp1 = dupmsg(first_mp)) == NULL) && 3838 ((first_mp1 = ip_copymsg(first_mp)) == NULL))) { 3839 /* 3840 * No more interested clients or memory 3841 * allocation failed 3842 */ 3843 connp = first_conn; 3844 break; 3845 } 3846 mp1 = mctl_present ? first_mp1->b_cont : first_mp1; 3847 CONN_INC_REF(connp); 3848 mutex_exit(&connfp->connf_lock); 3849 /* 3850 * For link-local always add ifindex so that transport 3851 * can set sin6_scope_id. Avoid it for ICMP error 3852 * fanout. 3853 */ 3854 if ((connp->conn_ip_recvpktinfo || 3855 IN6_IS_ADDR_LINKLOCAL(&src)) && 3856 (flags & IP_FF_IPINFO)) { 3857 /* Add header */ 3858 mp1 = ip_add_info_v6(mp1, inill, &dst); 3859 } 3860 /* mp1 could have changed */ 3861 if (mctl_present) 3862 first_mp1->b_cont = mp1; 3863 else 3864 first_mp1 = mp1; 3865 if (mp1 == NULL) { 3866 if (mctl_present) 3867 freeb(first_mp1); 3868 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3869 goto next_one; 3870 } 3871 if (CONN_UDP_FLOWCTLD(connp)) { 3872 BUMP_MIB(ill->ill_ip_mib, udpIfStatsInOverflows); 3873 freemsg(first_mp1); 3874 goto next_one; 3875 } 3876 3877 if (CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) || secure) { 3878 first_mp1 = ipsec_check_inbound_policy 3879 (first_mp1, connp, NULL, ip6h, 3880 mctl_present); 3881 } 3882 if (first_mp1 != NULL) { 3883 if (mctl_present) 3884 freeb(first_mp1); 3885 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 3886 3887 /* Send it upstream */ 3888 (connp->conn_recv)(connp, mp1, NULL); 3889 } 3890 next_one: 3891 mutex_enter(&connfp->connf_lock); 3892 /* Follow the next pointer before releasing the conn. */ 3893 next_conn = connp->conn_next; 3894 IP6_STAT(ipst, ip6_udp_fanmb); 3895 CONN_DEC_REF(connp); 3896 connp = next_conn; 3897 } 3898 3899 /* Last one. Send it upstream. */ 3900 mutex_exit(&connfp->connf_lock); 3901 3902 /* Initiate IPPF processing */ 3903 if (IP6_IN_IPP(flags, ipst)) { 3904 uint_t ifindex; 3905 3906 mutex_enter(&ill->ill_lock); 3907 ifindex = ill->ill_phyint->phyint_ifindex; 3908 mutex_exit(&ill->ill_lock); 3909 ip_process(IPP_LOCAL_IN, &mp, ifindex); 3910 if (mp == NULL) { 3911 CONN_DEC_REF(connp); 3912 if (mctl_present) { 3913 freeb(first_mp); 3914 } 3915 return; 3916 } 3917 } 3918 3919 /* 3920 * For link-local always add ifindex so that transport can set 3921 * sin6_scope_id. Avoid it for ICMP error fanout. 3922 */ 3923 if ((connp->conn_ip_recvpktinfo || 3924 IN6_IS_ADDR_LINKLOCAL(&src)) && (flags & IP_FF_IPINFO)) { 3925 /* Add header */ 3926 mp = ip_add_info_v6(mp, inill, &dst); 3927 if (mp == NULL) { 3928 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3929 CONN_DEC_REF(connp); 3930 if (mctl_present) 3931 freeb(first_mp); 3932 return; 3933 } else if (mctl_present) { 3934 first_mp->b_cont = mp; 3935 } else { 3936 first_mp = mp; 3937 } 3938 } 3939 if (CONN_UDP_FLOWCTLD(connp)) { 3940 BUMP_MIB(ill->ill_ip_mib, udpIfStatsInOverflows); 3941 freemsg(mp); 3942 } else { 3943 if (CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) || secure) { 3944 first_mp = ipsec_check_inbound_policy(first_mp, 3945 connp, NULL, ip6h, mctl_present); 3946 if (first_mp == NULL) { 3947 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3948 CONN_DEC_REF(connp); 3949 return; 3950 } 3951 } 3952 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 3953 3954 /* Send it upstream */ 3955 (connp->conn_recv)(connp, mp, NULL); 3956 } 3957 IP6_STAT(ipst, ip6_udp_fanmb); 3958 CONN_DEC_REF(connp); 3959 if (mctl_present) 3960 freeb(first_mp); 3961 return; 3962 3963 notfound: 3964 mutex_exit(&connfp->connf_lock); 3965 /* 3966 * No one bound to this port. Is 3967 * there a client that wants all 3968 * unclaimed datagrams? 3969 */ 3970 if (ipst->ips_ipcl_proto_fanout_v6[IPPROTO_UDP].connf_head != NULL) { 3971 ip_fanout_proto_v6(q, first_mp, ip6h, ill, inill, IPPROTO_UDP, 3972 0, flags | IP_FF_RAWIP | IP_FF_IPINFO, mctl_present, 3973 zoneid); 3974 } else { 3975 if (ip_fanout_send_icmp_v6(q, first_mp, flags, 3976 ICMP6_DST_UNREACH, ICMP6_DST_UNREACH_NOPORT, 0, 3977 mctl_present, zoneid, ipst)) { 3978 BUMP_MIB(ill->ill_ip_mib, udpIfStatsNoPorts); 3979 } 3980 } 3981 } 3982 3983 /* 3984 * int ip_find_hdr_v6() 3985 * 3986 * This routine is used by the upper layer protocols and the IP tunnel 3987 * module to: 3988 * - Set extension header pointers to appropriate locations 3989 * - Determine IPv6 header length and return it 3990 * - Return a pointer to the last nexthdr value 3991 * 3992 * The caller must initialize ipp_fields. 3993 * 3994 * NOTE: If multiple extension headers of the same type are present, 3995 * ip_find_hdr_v6() will set the respective extension header pointers 3996 * to the first one that it encounters in the IPv6 header. It also 3997 * skips fragment headers. This routine deals with malformed packets 3998 * of various sorts in which case the returned length is up to the 3999 * malformed part. 4000 */ 4001 int 4002 ip_find_hdr_v6(mblk_t *mp, ip6_t *ip6h, ip6_pkt_t *ipp, uint8_t *nexthdrp) 4003 { 4004 uint_t length, ehdrlen; 4005 uint8_t nexthdr; 4006 uint8_t *whereptr, *endptr; 4007 ip6_dest_t *tmpdstopts; 4008 ip6_rthdr_t *tmprthdr; 4009 ip6_hbh_t *tmphopopts; 4010 ip6_frag_t *tmpfraghdr; 4011 4012 length = IPV6_HDR_LEN; 4013 whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */ 4014 endptr = mp->b_wptr; 4015 4016 nexthdr = ip6h->ip6_nxt; 4017 while (whereptr < endptr) { 4018 /* Is there enough left for len + nexthdr? */ 4019 if (whereptr + MIN_EHDR_LEN > endptr) 4020 goto done; 4021 4022 switch (nexthdr) { 4023 case IPPROTO_HOPOPTS: 4024 tmphopopts = (ip6_hbh_t *)whereptr; 4025 ehdrlen = 8 * (tmphopopts->ip6h_len + 1); 4026 if ((uchar_t *)tmphopopts + ehdrlen > endptr) 4027 goto done; 4028 nexthdr = tmphopopts->ip6h_nxt; 4029 /* return only 1st hbh */ 4030 if (!(ipp->ipp_fields & IPPF_HOPOPTS)) { 4031 ipp->ipp_fields |= IPPF_HOPOPTS; 4032 ipp->ipp_hopopts = tmphopopts; 4033 ipp->ipp_hopoptslen = ehdrlen; 4034 } 4035 break; 4036 case IPPROTO_DSTOPTS: 4037 tmpdstopts = (ip6_dest_t *)whereptr; 4038 ehdrlen = 8 * (tmpdstopts->ip6d_len + 1); 4039 if ((uchar_t *)tmpdstopts + ehdrlen > endptr) 4040 goto done; 4041 nexthdr = tmpdstopts->ip6d_nxt; 4042 /* 4043 * ipp_dstopts is set to the destination header after a 4044 * routing header. 4045 * Assume it is a post-rthdr destination header 4046 * and adjust when we find an rthdr. 4047 */ 4048 if (!(ipp->ipp_fields & IPPF_DSTOPTS)) { 4049 ipp->ipp_fields |= IPPF_DSTOPTS; 4050 ipp->ipp_dstopts = tmpdstopts; 4051 ipp->ipp_dstoptslen = ehdrlen; 4052 } 4053 break; 4054 case IPPROTO_ROUTING: 4055 tmprthdr = (ip6_rthdr_t *)whereptr; 4056 ehdrlen = 8 * (tmprthdr->ip6r_len + 1); 4057 if ((uchar_t *)tmprthdr + ehdrlen > endptr) 4058 goto done; 4059 nexthdr = tmprthdr->ip6r_nxt; 4060 /* return only 1st rthdr */ 4061 if (!(ipp->ipp_fields & IPPF_RTHDR)) { 4062 ipp->ipp_fields |= IPPF_RTHDR; 4063 ipp->ipp_rthdr = tmprthdr; 4064 ipp->ipp_rthdrlen = ehdrlen; 4065 } 4066 /* 4067 * Make any destination header we've seen be a 4068 * pre-rthdr destination header. 4069 */ 4070 if (ipp->ipp_fields & IPPF_DSTOPTS) { 4071 ipp->ipp_fields &= ~IPPF_DSTOPTS; 4072 ipp->ipp_fields |= IPPF_RTDSTOPTS; 4073 ipp->ipp_rtdstopts = ipp->ipp_dstopts; 4074 ipp->ipp_dstopts = NULL; 4075 ipp->ipp_rtdstoptslen = ipp->ipp_dstoptslen; 4076 ipp->ipp_dstoptslen = 0; 4077 } 4078 break; 4079 case IPPROTO_FRAGMENT: 4080 tmpfraghdr = (ip6_frag_t *)whereptr; 4081 ehdrlen = sizeof (ip6_frag_t); 4082 if ((uchar_t *)tmpfraghdr + ehdrlen > endptr) 4083 goto done; 4084 nexthdr = tmpfraghdr->ip6f_nxt; 4085 if (!(ipp->ipp_fields & IPPF_FRAGHDR)) { 4086 ipp->ipp_fields |= IPPF_FRAGHDR; 4087 ipp->ipp_fraghdr = tmpfraghdr; 4088 ipp->ipp_fraghdrlen = ehdrlen; 4089 } 4090 break; 4091 case IPPROTO_NONE: 4092 default: 4093 goto done; 4094 } 4095 length += ehdrlen; 4096 whereptr += ehdrlen; 4097 } 4098 done: 4099 if (nexthdrp != NULL) 4100 *nexthdrp = nexthdr; 4101 return (length); 4102 } 4103 4104 int 4105 ip_hdr_complete_v6(ip6_t *ip6h, zoneid_t zoneid, ip_stack_t *ipst) 4106 { 4107 ire_t *ire; 4108 4109 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 4110 ire = ire_lookup_local_v6(zoneid, ipst); 4111 if (ire == NULL) { 4112 ip1dbg(("ip_hdr_complete_v6: no source IRE\n")); 4113 return (1); 4114 } 4115 ip6h->ip6_src = ire->ire_addr_v6; 4116 ire_refrele(ire); 4117 } 4118 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 4119 ip6h->ip6_hops = ipst->ips_ipv6_def_hops; 4120 return (0); 4121 } 4122 4123 /* 4124 * Try to determine where and what are the IPv6 header length and 4125 * pointer to nexthdr value for the upper layer protocol (or an 4126 * unknown next hdr). 4127 * 4128 * Parameters returns a pointer to the nexthdr value; 4129 * Must handle malformed packets of various sorts. 4130 * Function returns failure for malformed cases. 4131 */ 4132 boolean_t 4133 ip_hdr_length_nexthdr_v6(mblk_t *mp, ip6_t *ip6h, uint16_t *hdr_length_ptr, 4134 uint8_t **nexthdrpp) 4135 { 4136 uint16_t length; 4137 uint_t ehdrlen; 4138 uint8_t *nexthdrp; 4139 uint8_t *whereptr; 4140 uint8_t *endptr; 4141 ip6_dest_t *desthdr; 4142 ip6_rthdr_t *rthdr; 4143 ip6_frag_t *fraghdr; 4144 4145 ASSERT((IPH_HDR_VERSION(ip6h) & ~IP_FORWARD_PROG_BIT) == IPV6_VERSION); 4146 length = IPV6_HDR_LEN; 4147 whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */ 4148 endptr = mp->b_wptr; 4149 4150 nexthdrp = &ip6h->ip6_nxt; 4151 while (whereptr < endptr) { 4152 /* Is there enough left for len + nexthdr? */ 4153 if (whereptr + MIN_EHDR_LEN > endptr) 4154 break; 4155 4156 switch (*nexthdrp) { 4157 case IPPROTO_HOPOPTS: 4158 case IPPROTO_DSTOPTS: 4159 /* Assumes the headers are identical for hbh and dst */ 4160 desthdr = (ip6_dest_t *)whereptr; 4161 ehdrlen = 8 * (desthdr->ip6d_len + 1); 4162 if ((uchar_t *)desthdr + ehdrlen > endptr) 4163 return (B_FALSE); 4164 nexthdrp = &desthdr->ip6d_nxt; 4165 break; 4166 case IPPROTO_ROUTING: 4167 rthdr = (ip6_rthdr_t *)whereptr; 4168 ehdrlen = 8 * (rthdr->ip6r_len + 1); 4169 if ((uchar_t *)rthdr + ehdrlen > endptr) 4170 return (B_FALSE); 4171 nexthdrp = &rthdr->ip6r_nxt; 4172 break; 4173 case IPPROTO_FRAGMENT: 4174 fraghdr = (ip6_frag_t *)whereptr; 4175 ehdrlen = sizeof (ip6_frag_t); 4176 if ((uchar_t *)&fraghdr[1] > endptr) 4177 return (B_FALSE); 4178 nexthdrp = &fraghdr->ip6f_nxt; 4179 break; 4180 case IPPROTO_NONE: 4181 /* No next header means we're finished */ 4182 default: 4183 *hdr_length_ptr = length; 4184 *nexthdrpp = nexthdrp; 4185 return (B_TRUE); 4186 } 4187 length += ehdrlen; 4188 whereptr += ehdrlen; 4189 *hdr_length_ptr = length; 4190 *nexthdrpp = nexthdrp; 4191 } 4192 switch (*nexthdrp) { 4193 case IPPROTO_HOPOPTS: 4194 case IPPROTO_DSTOPTS: 4195 case IPPROTO_ROUTING: 4196 case IPPROTO_FRAGMENT: 4197 /* 4198 * If any know extension headers are still to be processed, 4199 * the packet's malformed (or at least all the IP header(s) are 4200 * not in the same mblk - and that should never happen. 4201 */ 4202 return (B_FALSE); 4203 4204 default: 4205 /* 4206 * If we get here, we know that all of the IP headers were in 4207 * the same mblk, even if the ULP header is in the next mblk. 4208 */ 4209 *hdr_length_ptr = length; 4210 *nexthdrpp = nexthdrp; 4211 return (B_TRUE); 4212 } 4213 } 4214 4215 /* 4216 * Return the length of the IPv6 related headers (including extension headers) 4217 * Returns a length even if the packet is malformed. 4218 */ 4219 int 4220 ip_hdr_length_v6(mblk_t *mp, ip6_t *ip6h) 4221 { 4222 uint16_t hdr_len; 4223 uint8_t *nexthdrp; 4224 4225 (void) ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_len, &nexthdrp); 4226 return (hdr_len); 4227 } 4228 4229 /* 4230 * Select an ill for the packet by considering load spreading across 4231 * a different ill in the group if dst_ill is part of some group. 4232 */ 4233 static ill_t * 4234 ip_newroute_get_dst_ill_v6(ill_t *dst_ill) 4235 { 4236 ill_t *ill; 4237 4238 /* 4239 * We schedule irrespective of whether the source address is 4240 * INADDR_UNSPECIED or not. 4241 */ 4242 ill = illgrp_scheduler(dst_ill); 4243 if (ill == NULL) 4244 return (NULL); 4245 4246 /* 4247 * For groups with names ip_sioctl_groupname ensures that all 4248 * ills are of same type. For groups without names, ifgrp_insert 4249 * ensures this. 4250 */ 4251 ASSERT(dst_ill->ill_type == ill->ill_type); 4252 4253 return (ill); 4254 } 4255 4256 /* 4257 * IPv6 - 4258 * ip_newroute_v6 is called by ip_rput_data_v6 or ip_wput_v6 whenever we need 4259 * to send out a packet to a destination address for which we do not have 4260 * specific routing information. 4261 * 4262 * Handle non-multicast packets. If ill is non-NULL the match is done 4263 * for that ill. 4264 * 4265 * When a specific ill is specified (using IPV6_PKTINFO, 4266 * IPV6_MULTICAST_IF, or IPV6_BOUND_IF) we will only match 4267 * on routing entries (ftable and ctable) that have a matching 4268 * ire->ire_ipif->ipif_ill. Thus this can only be used 4269 * for destinations that are on-link for the specific ill 4270 * and that can appear on multiple links. Thus it is useful 4271 * for multicast destinations, link-local destinations, and 4272 * at some point perhaps for site-local destinations (if the 4273 * node sits at a site boundary). 4274 * We create the cache entries in the regular ctable since 4275 * it can not "confuse" things for other destinations. 4276 * table. 4277 * 4278 * When ill is part of a ill group, we subject the packets 4279 * to load spreading even if the ill is specified by the 4280 * means described above. We disable only for IPV6_BOUND_PIF 4281 * and for the cases where IP6I_ATTACH_IF is set i.e NS/NA/ 4282 * Echo replies to link-local destinations have IP6I_ATTACH_IF 4283 * set. 4284 * 4285 * NOTE : These are the scopes of some of the variables that point at IRE, 4286 * which needs to be followed while making any future modifications 4287 * to avoid memory leaks. 4288 * 4289 * - ire and sire are the entries looked up initially by 4290 * ire_ftable_lookup_v6. 4291 * - ipif_ire is used to hold the interface ire associated with 4292 * the new cache ire. But it's scope is limited, so we always REFRELE 4293 * it before branching out to error paths. 4294 * - save_ire is initialized before ire_create, so that ire returned 4295 * by ire_create will not over-write the ire. We REFRELE save_ire 4296 * before breaking out of the switch. 4297 * 4298 * Thus on failures, we have to REFRELE only ire and sire, if they 4299 * are not NULL. 4300 * 4301 * v6srcp may be used in the future. Currently unused. 4302 */ 4303 /* ARGSUSED */ 4304 void 4305 ip_newroute_v6(queue_t *q, mblk_t *mp, const in6_addr_t *v6dstp, 4306 const in6_addr_t *v6srcp, ill_t *ill, zoneid_t zoneid, ip_stack_t *ipst) 4307 { 4308 in6_addr_t v6gw; 4309 in6_addr_t dst; 4310 ire_t *ire = NULL; 4311 ipif_t *src_ipif = NULL; 4312 ill_t *dst_ill = NULL; 4313 ire_t *sire = NULL; 4314 ire_t *save_ire; 4315 ip6_t *ip6h; 4316 int err = 0; 4317 mblk_t *first_mp; 4318 ipsec_out_t *io; 4319 ill_t *attach_ill = NULL; 4320 ushort_t ire_marks = 0; 4321 int match_flags; 4322 boolean_t ip6i_present; 4323 ire_t *first_sire = NULL; 4324 mblk_t *copy_mp = NULL; 4325 mblk_t *xmit_mp = NULL; 4326 in6_addr_t save_dst; 4327 uint32_t multirt_flags = 4328 MULTIRT_CACHEGW | MULTIRT_USESTAMP | MULTIRT_SETSTAMP; 4329 boolean_t multirt_is_resolvable; 4330 boolean_t multirt_resolve_next; 4331 boolean_t need_rele = B_FALSE; 4332 boolean_t do_attach_ill = B_FALSE; 4333 boolean_t ip6_asp_table_held = B_FALSE; 4334 tsol_ire_gw_secattr_t *attrp = NULL; 4335 tsol_gcgrp_t *gcgrp = NULL; 4336 tsol_gcgrp_addr_t ga; 4337 4338 ASSERT(!IN6_IS_ADDR_MULTICAST(v6dstp)); 4339 4340 first_mp = mp; 4341 if (mp->b_datap->db_type == M_CTL) { 4342 mp = mp->b_cont; 4343 io = (ipsec_out_t *)first_mp->b_rptr; 4344 ASSERT(io->ipsec_out_type == IPSEC_OUT); 4345 } else { 4346 io = NULL; 4347 } 4348 4349 /* 4350 * If this end point is bound to IPIF_NOFAILOVER, set bnf_ill and 4351 * bind_to_nofailover B_TRUE. We can't use conn to determine as it 4352 * could be NULL. 4353 * 4354 * This information can appear either in an ip6i_t or an IPSEC_OUT 4355 * message. 4356 */ 4357 ip6h = (ip6_t *)mp->b_rptr; 4358 ip6i_present = (ip6h->ip6_nxt == IPPROTO_RAW); 4359 if (ip6i_present || (io != NULL && io->ipsec_out_attach_if)) { 4360 if (!ip6i_present || 4361 ((ip6i_t *)ip6h)->ip6i_flags & IP6I_ATTACH_IF) { 4362 attach_ill = ip_grab_attach_ill(ill, first_mp, 4363 (ip6i_present ? ((ip6i_t *)ip6h)->ip6i_ifindex : 4364 io->ipsec_out_ill_index), B_TRUE, ipst); 4365 /* Failure case frees things for us. */ 4366 if (attach_ill == NULL) 4367 return; 4368 4369 /* 4370 * Check if we need an ire that will not be 4371 * looked up by anybody else i.e. HIDDEN. 4372 */ 4373 if (ill_is_probeonly(attach_ill)) 4374 ire_marks = IRE_MARK_HIDDEN; 4375 } 4376 } 4377 4378 if (IN6_IS_ADDR_LOOPBACK(v6dstp)) { 4379 ip1dbg(("ip_newroute_v6: dst with loopback addr\n")); 4380 goto icmp_err_ret; 4381 } else if ((v6srcp != NULL) && IN6_IS_ADDR_LOOPBACK(v6srcp)) { 4382 ip1dbg(("ip_newroute_v6: src with loopback addr\n")); 4383 goto icmp_err_ret; 4384 } 4385 4386 /* 4387 * If this IRE is created for forwarding or it is not for 4388 * TCP traffic, mark it as temporary. 4389 * 4390 * Is it sufficient just to check the next header?? 4391 */ 4392 if (mp->b_prev != NULL || !IP_FLOW_CONTROLLED_ULP(ip6h->ip6_nxt)) 4393 ire_marks |= IRE_MARK_TEMPORARY; 4394 4395 /* 4396 * Get what we can from ire_ftable_lookup_v6 which will follow an IRE 4397 * chain until it gets the most specific information available. 4398 * For example, we know that there is no IRE_CACHE for this dest, 4399 * but there may be an IRE_OFFSUBNET which specifies a gateway. 4400 * ire_ftable_lookup_v6 will look up the gateway, etc. 4401 */ 4402 4403 if (ill == NULL) { 4404 match_flags = MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT | 4405 MATCH_IRE_PARENT | MATCH_IRE_RJ_BHOLE | MATCH_IRE_SECATTR; 4406 ire = ire_ftable_lookup_v6(v6dstp, 0, 0, 0, 4407 NULL, &sire, zoneid, 0, MBLK_GETLABEL(mp), 4408 match_flags, ipst); 4409 /* 4410 * ire_add_then_send -> ip_newroute_v6 in the CGTP case passes 4411 * in a NULL ill, but the packet could be a neighbor 4412 * solicitation/advertisment and could have a valid attach_ill. 4413 */ 4414 if (attach_ill != NULL) 4415 ill_refrele(attach_ill); 4416 } else { 4417 if (attach_ill != NULL) { 4418 /* 4419 * attach_ill is set only for communicating with 4420 * on-link hosts. So, don't look for DEFAULT. 4421 * ip_wput_v6 passes the right ill in this case and 4422 * hence we can assert. 4423 */ 4424 ASSERT(ill == attach_ill); 4425 ill_refrele(attach_ill); 4426 do_attach_ill = B_TRUE; 4427 match_flags = MATCH_IRE_RJ_BHOLE | MATCH_IRE_ILL; 4428 } else { 4429 match_flags = MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT | 4430 MATCH_IRE_RJ_BHOLE | MATCH_IRE_ILL_GROUP; 4431 } 4432 match_flags |= MATCH_IRE_PARENT | MATCH_IRE_SECATTR; 4433 ire = ire_ftable_lookup_v6(v6dstp, NULL, NULL, 0, ill->ill_ipif, 4434 &sire, zoneid, 0, MBLK_GETLABEL(mp), match_flags, ipst); 4435 } 4436 4437 ip3dbg(("ip_newroute_v6: ire_ftable_lookup_v6() " 4438 "returned ire %p, sire %p\n", (void *)ire, (void *)sire)); 4439 4440 /* 4441 * We enter a loop that will be run only once in most cases. 4442 * The loop is re-entered in the case where the destination 4443 * can be reached through multiple RTF_MULTIRT-flagged routes. 4444 * The intention is to compute multiple routes to a single 4445 * destination in a single ip_newroute_v6 call. 4446 * The information is contained in sire->ire_flags. 4447 */ 4448 do { 4449 multirt_resolve_next = B_FALSE; 4450 4451 if (dst_ill != NULL) { 4452 ill_refrele(dst_ill); 4453 dst_ill = NULL; 4454 } 4455 if (src_ipif != NULL) { 4456 ipif_refrele(src_ipif); 4457 src_ipif = NULL; 4458 } 4459 if ((sire != NULL) && sire->ire_flags & RTF_MULTIRT) { 4460 ip3dbg(("ip_newroute_v6: starting new resolution " 4461 "with first_mp %p, tag %d\n", 4462 (void *)first_mp, MULTIRT_DEBUG_TAGGED(first_mp))); 4463 4464 /* 4465 * We check if there are trailing unresolved routes for 4466 * the destination contained in sire. 4467 */ 4468 multirt_is_resolvable = ire_multirt_lookup_v6(&ire, 4469 &sire, multirt_flags, MBLK_GETLABEL(mp), ipst); 4470 4471 ip3dbg(("ip_newroute_v6: multirt_is_resolvable %d, " 4472 "ire %p, sire %p\n", 4473 multirt_is_resolvable, (void *)ire, (void *)sire)); 4474 4475 if (!multirt_is_resolvable) { 4476 /* 4477 * No more multirt routes to resolve; give up 4478 * (all routes resolved or no more resolvable 4479 * routes). 4480 */ 4481 if (ire != NULL) { 4482 ire_refrele(ire); 4483 ire = NULL; 4484 } 4485 } else { 4486 ASSERT(sire != NULL); 4487 ASSERT(ire != NULL); 4488 /* 4489 * We simply use first_sire as a flag that 4490 * indicates if a resolvable multirt route has 4491 * already been found during the preceding 4492 * loops. If it is not the case, we may have 4493 * to send an ICMP error to report that the 4494 * destination is unreachable. We do not 4495 * IRE_REFHOLD first_sire. 4496 */ 4497 if (first_sire == NULL) { 4498 first_sire = sire; 4499 } 4500 } 4501 } 4502 if ((ire == NULL) || (ire == sire)) { 4503 /* 4504 * either ire == NULL (the destination cannot be 4505 * resolved) or ire == sire (the gateway cannot be 4506 * resolved). At this point, there are no more routes 4507 * to resolve for the destination, thus we exit. 4508 */ 4509 if (ip_debug > 3) { 4510 /* ip2dbg */ 4511 pr_addr_dbg("ip_newroute_v6: " 4512 "can't resolve %s\n", AF_INET6, v6dstp); 4513 } 4514 ip3dbg(("ip_newroute_v6: " 4515 "ire %p, sire %p, first_sire %p\n", 4516 (void *)ire, (void *)sire, (void *)first_sire)); 4517 4518 if (sire != NULL) { 4519 ire_refrele(sire); 4520 sire = NULL; 4521 } 4522 4523 if (first_sire != NULL) { 4524 /* 4525 * At least one multirt route has been found 4526 * in the same ip_newroute() call; there is no 4527 * need to report an ICMP error. 4528 * first_sire was not IRE_REFHOLDed. 4529 */ 4530 MULTIRT_DEBUG_UNTAG(first_mp); 4531 freemsg(first_mp); 4532 return; 4533 } 4534 ip_rts_change_v6(RTM_MISS, v6dstp, 0, 0, 0, 0, 0, 0, 4535 RTA_DST, ipst); 4536 goto icmp_err_ret; 4537 } 4538 4539 ASSERT(ire->ire_ipversion == IPV6_VERSION); 4540 4541 /* 4542 * Verify that the returned IRE does not have either the 4543 * RTF_REJECT or RTF_BLACKHOLE flags set and that the IRE is 4544 * either an IRE_CACHE, IRE_IF_NORESOLVER or IRE_IF_RESOLVER. 4545 */ 4546 if ((ire->ire_flags & (RTF_REJECT | RTF_BLACKHOLE)) || 4547 (ire->ire_type & (IRE_CACHE | IRE_INTERFACE)) == 0) 4548 goto icmp_err_ret; 4549 4550 /* 4551 * Increment the ire_ob_pkt_count field for ire if it is an 4552 * INTERFACE (IF_RESOLVER or IF_NORESOLVER) IRE type, and 4553 * increment the same for the parent IRE, sire, if it is some 4554 * sort of prefix IRE (which includes DEFAULT, PREFIX, and HOST) 4555 */ 4556 if ((ire->ire_type & IRE_INTERFACE) != 0) { 4557 UPDATE_OB_PKT_COUNT(ire); 4558 ire->ire_last_used_time = lbolt; 4559 } 4560 4561 if (sire != NULL) { 4562 mutex_enter(&sire->ire_lock); 4563 v6gw = sire->ire_gateway_addr_v6; 4564 mutex_exit(&sire->ire_lock); 4565 ASSERT((sire->ire_type & (IRE_CACHETABLE | 4566 IRE_INTERFACE)) == 0); 4567 UPDATE_OB_PKT_COUNT(sire); 4568 sire->ire_last_used_time = lbolt; 4569 } else { 4570 v6gw = ipv6_all_zeros; 4571 } 4572 4573 /* 4574 * We have a route to reach the destination. 4575 * 4576 * 1) If the interface is part of ill group, try to get a new 4577 * ill taking load spreading into account. 4578 * 4579 * 2) After selecting the ill, get a source address that might 4580 * create good inbound load spreading and that matches the 4581 * right scope. ipif_select_source_v6 does this for us. 4582 * 4583 * If the application specified the ill (ifindex), we still 4584 * load spread. Only if the packets needs to go out specifically 4585 * on a given ill e.g. bind to IPIF_NOFAILOVER address, 4586 * IPV6_BOUND_PIF we don't try to use a different ill for load 4587 * spreading. 4588 */ 4589 if (!do_attach_ill) { 4590 /* 4591 * If the interface belongs to an interface group, 4592 * make sure the next possible interface in the group 4593 * is used. This encourages load spreading among 4594 * peers in an interface group. However, in the case 4595 * of multirouting, load spreading is not used, as we 4596 * actually want to replicate outgoing packets through 4597 * particular interfaces. 4598 * 4599 * Note: While we pick a dst_ill we are really only 4600 * interested in the ill for load spreading. 4601 * The source ipif is determined by source address 4602 * selection below. 4603 */ 4604 if ((sire != NULL) && (sire->ire_flags & RTF_MULTIRT)) { 4605 dst_ill = ire->ire_ipif->ipif_ill; 4606 /* For uniformity do a refhold */ 4607 ill_refhold(dst_ill); 4608 } else { 4609 /* 4610 * If we are here trying to create an IRE_CACHE 4611 * for an offlink destination and have the 4612 * IRE_CACHE for the next hop and the latter is 4613 * using virtual IP source address selection i.e 4614 * it's ire->ire_ipif is pointing to a virtual 4615 * network interface (vni) then 4616 * ip_newroute_get_dst_ll() will return the vni 4617 * interface as the dst_ill. Since the vni is 4618 * virtual i.e not associated with any physical 4619 * interface, it cannot be the dst_ill, hence 4620 * in such a case call ip_newroute_get_dst_ll() 4621 * with the stq_ill instead of the ire_ipif ILL. 4622 * The function returns a refheld ill. 4623 */ 4624 if ((ire->ire_type == IRE_CACHE) && 4625 IS_VNI(ire->ire_ipif->ipif_ill)) 4626 dst_ill = ip_newroute_get_dst_ill_v6( 4627 ire->ire_stq->q_ptr); 4628 else 4629 dst_ill = ip_newroute_get_dst_ill_v6( 4630 ire->ire_ipif->ipif_ill); 4631 } 4632 if (dst_ill == NULL) { 4633 if (ip_debug > 2) { 4634 pr_addr_dbg("ip_newroute_v6 : no dst " 4635 "ill for dst %s\n", 4636 AF_INET6, v6dstp); 4637 } 4638 goto icmp_err_ret; 4639 } else if (dst_ill->ill_group == NULL && ill != NULL && 4640 dst_ill != ill) { 4641 /* 4642 * If "ill" is not part of any group, we should 4643 * have found a route matching "ill" as we 4644 * called ire_ftable_lookup_v6 with 4645 * MATCH_IRE_ILL_GROUP. 4646 * Rather than asserting when there is a 4647 * mismatch, we just drop the packet. 4648 */ 4649 ip0dbg(("ip_newroute_v6: BOUND_IF failed : " 4650 "dst_ill %s ill %s\n", 4651 dst_ill->ill_name, 4652 ill->ill_name)); 4653 goto icmp_err_ret; 4654 } 4655 } else { 4656 dst_ill = ire->ire_ipif->ipif_ill; 4657 /* For uniformity do refhold */ 4658 ill_refhold(dst_ill); 4659 /* 4660 * We should have found a route matching ill as we 4661 * called ire_ftable_lookup_v6 with MATCH_IRE_ILL. 4662 * Rather than asserting, while there is a mismatch, 4663 * we just drop the packet. 4664 */ 4665 if (dst_ill != ill) { 4666 ip0dbg(("ip_newroute_v6: Packet dropped as " 4667 "IP6I_ATTACH_IF ill is %s, " 4668 "ire->ire_ipif->ipif_ill is %s\n", 4669 ill->ill_name, 4670 dst_ill->ill_name)); 4671 goto icmp_err_ret; 4672 } 4673 } 4674 /* 4675 * Pick a source address which matches the scope of the 4676 * destination address. 4677 * For RTF_SETSRC routes, the source address is imposed by the 4678 * parent ire (sire). 4679 */ 4680 ASSERT(src_ipif == NULL); 4681 if (ire->ire_type == IRE_IF_RESOLVER && 4682 !IN6_IS_ADDR_UNSPECIFIED(&v6gw) && 4683 ip6_asp_can_lookup(ipst)) { 4684 /* 4685 * The ire cache entry we're adding is for the 4686 * gateway itself. The source address in this case 4687 * is relative to the gateway's address. 4688 */ 4689 ip6_asp_table_held = B_TRUE; 4690 src_ipif = ipif_select_source_v6(dst_ill, &v6gw, 4691 RESTRICT_TO_GROUP, IPV6_PREFER_SRC_DEFAULT, zoneid); 4692 if (src_ipif != NULL) 4693 ire_marks |= IRE_MARK_USESRC_CHECK; 4694 } else { 4695 if ((sire != NULL) && (sire->ire_flags & RTF_SETSRC)) { 4696 /* 4697 * Check that the ipif matching the requested 4698 * source address still exists. 4699 */ 4700 src_ipif = ipif_lookup_addr_v6( 4701 &sire->ire_src_addr_v6, NULL, zoneid, 4702 NULL, NULL, NULL, NULL, ipst); 4703 } 4704 if (src_ipif == NULL && ip6_asp_can_lookup(ipst)) { 4705 uint_t restrict_ill = RESTRICT_TO_NONE; 4706 4707 if (ip6i_present && ((ip6i_t *)ip6h)->ip6i_flags 4708 & IP6I_ATTACH_IF) 4709 restrict_ill = RESTRICT_TO_ILL; 4710 ip6_asp_table_held = B_TRUE; 4711 src_ipif = ipif_select_source_v6(dst_ill, 4712 v6dstp, restrict_ill, 4713 IPV6_PREFER_SRC_DEFAULT, zoneid); 4714 if (src_ipif != NULL) 4715 ire_marks |= IRE_MARK_USESRC_CHECK; 4716 } 4717 } 4718 4719 if (src_ipif == NULL) { 4720 if (ip_debug > 2) { 4721 /* ip1dbg */ 4722 pr_addr_dbg("ip_newroute_v6: no src for " 4723 "dst %s\n, ", AF_INET6, v6dstp); 4724 printf("ip_newroute_v6: interface name %s\n", 4725 dst_ill->ill_name); 4726 } 4727 goto icmp_err_ret; 4728 } 4729 4730 if (ip_debug > 3) { 4731 /* ip2dbg */ 4732 pr_addr_dbg("ip_newroute_v6: first hop %s\n", 4733 AF_INET6, &v6gw); 4734 } 4735 ip2dbg(("\tire type %s (%d)\n", 4736 ip_nv_lookup(ire_nv_tbl, ire->ire_type), ire->ire_type)); 4737 4738 /* 4739 * At this point in ip_newroute_v6(), ire is either the 4740 * IRE_CACHE of the next-hop gateway for an off-subnet 4741 * destination or an IRE_INTERFACE type that should be used 4742 * to resolve an on-subnet destination or an on-subnet 4743 * next-hop gateway. 4744 * 4745 * In the IRE_CACHE case, we have the following : 4746 * 4747 * 1) src_ipif - used for getting a source address. 4748 * 4749 * 2) dst_ill - from which we derive ire_stq/ire_rfq. This 4750 * means packets using this IRE_CACHE will go out on dst_ill. 4751 * 4752 * 3) The IRE sire will point to the prefix that is the longest 4753 * matching route for the destination. These prefix types 4754 * include IRE_DEFAULT, IRE_PREFIX, IRE_HOST. 4755 * 4756 * The newly created IRE_CACHE entry for the off-subnet 4757 * destination is tied to both the prefix route and the 4758 * interface route used to resolve the next-hop gateway 4759 * via the ire_phandle and ire_ihandle fields, respectively. 4760 * 4761 * In the IRE_INTERFACE case, we have the following : 4762 * 4763 * 1) src_ipif - used for getting a source address. 4764 * 4765 * 2) dst_ill - from which we derive ire_stq/ire_rfq. This 4766 * means packets using the IRE_CACHE that we will build 4767 * here will go out on dst_ill. 4768 * 4769 * 3) sire may or may not be NULL. But, the IRE_CACHE that is 4770 * to be created will only be tied to the IRE_INTERFACE that 4771 * was derived from the ire_ihandle field. 4772 * 4773 * If sire is non-NULL, it means the destination is off-link 4774 * and we will first create the IRE_CACHE for the gateway. 4775 * Next time through ip_newroute_v6, we will create the 4776 * IRE_CACHE for the final destination as described above. 4777 */ 4778 save_ire = ire; 4779 switch (ire->ire_type) { 4780 case IRE_CACHE: { 4781 ire_t *ipif_ire; 4782 4783 ASSERT(sire != NULL); 4784 if (IN6_IS_ADDR_UNSPECIFIED(&v6gw)) { 4785 mutex_enter(&ire->ire_lock); 4786 v6gw = ire->ire_gateway_addr_v6; 4787 mutex_exit(&ire->ire_lock); 4788 } 4789 /* 4790 * We need 3 ire's to create a new cache ire for an 4791 * off-link destination from the cache ire of the 4792 * gateway. 4793 * 4794 * 1. The prefix ire 'sire' 4795 * 2. The cache ire of the gateway 'ire' 4796 * 3. The interface ire 'ipif_ire' 4797 * 4798 * We have (1) and (2). We lookup (3) below. 4799 * 4800 * If there is no interface route to the gateway, 4801 * it is a race condition, where we found the cache 4802 * but the inteface route has been deleted. 4803 */ 4804 ipif_ire = ire_ihandle_lookup_offlink_v6(ire, sire); 4805 if (ipif_ire == NULL) { 4806 ip1dbg(("ip_newroute_v6:" 4807 "ire_ihandle_lookup_offlink_v6 failed\n")); 4808 goto icmp_err_ret; 4809 } 4810 /* 4811 * Assume DL_UNITDATA_REQ is same for all physical 4812 * interfaces in the ifgrp. If it isn't, this code will 4813 * have to be seriously rewhacked to allow the 4814 * fastpath probing (such that I cache the link 4815 * header in the IRE_CACHE) to work over ifgrps. 4816 * We have what we need to build an IRE_CACHE. 4817 */ 4818 /* 4819 * Note: the new ire inherits RTF_SETSRC 4820 * and RTF_MULTIRT to propagate these flags from prefix 4821 * to cache. 4822 */ 4823 4824 /* 4825 * Check cached gateway IRE for any security 4826 * attributes; if found, associate the gateway 4827 * credentials group to the destination IRE. 4828 */ 4829 if ((attrp = save_ire->ire_gw_secattr) != NULL) { 4830 mutex_enter(&attrp->igsa_lock); 4831 if ((gcgrp = attrp->igsa_gcgrp) != NULL) 4832 GCGRP_REFHOLD(gcgrp); 4833 mutex_exit(&attrp->igsa_lock); 4834 } 4835 4836 ire = ire_create_v6( 4837 v6dstp, /* dest address */ 4838 &ipv6_all_ones, /* mask */ 4839 &src_ipif->ipif_v6src_addr, /* source address */ 4840 &v6gw, /* gateway address */ 4841 &save_ire->ire_max_frag, 4842 NULL, /* src nce */ 4843 dst_ill->ill_rq, /* recv-from queue */ 4844 dst_ill->ill_wq, /* send-to queue */ 4845 IRE_CACHE, 4846 src_ipif, 4847 &sire->ire_mask_v6, /* Parent mask */ 4848 sire->ire_phandle, /* Parent handle */ 4849 ipif_ire->ire_ihandle, /* Interface handle */ 4850 sire->ire_flags & /* flags if any */ 4851 (RTF_SETSRC | RTF_MULTIRT), 4852 &(sire->ire_uinfo), 4853 NULL, 4854 gcgrp, 4855 ipst); 4856 4857 if (ire == NULL) { 4858 if (gcgrp != NULL) { 4859 GCGRP_REFRELE(gcgrp); 4860 gcgrp = NULL; 4861 } 4862 ire_refrele(save_ire); 4863 ire_refrele(ipif_ire); 4864 break; 4865 } 4866 4867 /* reference now held by IRE */ 4868 gcgrp = NULL; 4869 4870 ire->ire_marks |= ire_marks; 4871 4872 /* 4873 * Prevent sire and ipif_ire from getting deleted. The 4874 * newly created ire is tied to both of them via the 4875 * phandle and ihandle respectively. 4876 */ 4877 IRB_REFHOLD(sire->ire_bucket); 4878 /* Has it been removed already ? */ 4879 if (sire->ire_marks & IRE_MARK_CONDEMNED) { 4880 IRB_REFRELE(sire->ire_bucket); 4881 ire_refrele(ipif_ire); 4882 ire_refrele(save_ire); 4883 break; 4884 } 4885 4886 IRB_REFHOLD(ipif_ire->ire_bucket); 4887 /* Has it been removed already ? */ 4888 if (ipif_ire->ire_marks & IRE_MARK_CONDEMNED) { 4889 IRB_REFRELE(ipif_ire->ire_bucket); 4890 IRB_REFRELE(sire->ire_bucket); 4891 ire_refrele(ipif_ire); 4892 ire_refrele(save_ire); 4893 break; 4894 } 4895 4896 xmit_mp = first_mp; 4897 if (ire->ire_flags & RTF_MULTIRT) { 4898 copy_mp = copymsg(first_mp); 4899 if (copy_mp != NULL) { 4900 xmit_mp = copy_mp; 4901 MULTIRT_DEBUG_TAG(first_mp); 4902 } 4903 } 4904 ire_add_then_send(q, ire, xmit_mp); 4905 if (ip6_asp_table_held) { 4906 ip6_asp_table_refrele(ipst); 4907 ip6_asp_table_held = B_FALSE; 4908 } 4909 ire_refrele(save_ire); 4910 4911 /* Assert that sire is not deleted yet. */ 4912 ASSERT(sire->ire_ptpn != NULL); 4913 IRB_REFRELE(sire->ire_bucket); 4914 4915 /* Assert that ipif_ire is not deleted yet. */ 4916 ASSERT(ipif_ire->ire_ptpn != NULL); 4917 IRB_REFRELE(ipif_ire->ire_bucket); 4918 ire_refrele(ipif_ire); 4919 4920 if (copy_mp != NULL) { 4921 /* 4922 * Search for the next unresolved 4923 * multirt route. 4924 */ 4925 copy_mp = NULL; 4926 ipif_ire = NULL; 4927 ire = NULL; 4928 /* re-enter the loop */ 4929 multirt_resolve_next = B_TRUE; 4930 continue; 4931 } 4932 ire_refrele(sire); 4933 ill_refrele(dst_ill); 4934 ipif_refrele(src_ipif); 4935 return; 4936 } 4937 case IRE_IF_NORESOLVER: 4938 /* 4939 * We have what we need to build an IRE_CACHE. 4940 * 4941 * handle the Gated case, where we create 4942 * a NORESOLVER route for loopback. 4943 */ 4944 if (dst_ill->ill_net_type != IRE_IF_NORESOLVER) 4945 break; 4946 /* 4947 * TSol note: We are creating the ire cache for the 4948 * destination 'dst'. If 'dst' is offlink, going 4949 * through the first hop 'gw', the security attributes 4950 * of 'dst' must be set to point to the gateway 4951 * credentials of gateway 'gw'. If 'dst' is onlink, it 4952 * is possible that 'dst' is a potential gateway that is 4953 * referenced by some route that has some security 4954 * attributes. Thus in the former case, we need to do a 4955 * gcgrp_lookup of 'gw' while in the latter case we 4956 * need to do gcgrp_lookup of 'dst' itself. 4957 */ 4958 ga.ga_af = AF_INET6; 4959 if (!IN6_IS_ADDR_UNSPECIFIED(&v6gw)) 4960 ga.ga_addr = v6gw; 4961 else 4962 ga.ga_addr = *v6dstp; 4963 gcgrp = gcgrp_lookup(&ga, B_FALSE); 4964 4965 /* 4966 * Note: the new ire inherits sire flags RTF_SETSRC 4967 * and RTF_MULTIRT to propagate those rules from prefix 4968 * to cache. 4969 */ 4970 ire = ire_create_v6( 4971 v6dstp, /* dest address */ 4972 &ipv6_all_ones, /* mask */ 4973 &src_ipif->ipif_v6src_addr, /* source address */ 4974 &v6gw, /* gateway address */ 4975 &save_ire->ire_max_frag, 4976 NULL, /* no src nce */ 4977 dst_ill->ill_rq, /* recv-from queue */ 4978 dst_ill->ill_wq, /* send-to queue */ 4979 IRE_CACHE, 4980 src_ipif, 4981 &save_ire->ire_mask_v6, /* Parent mask */ 4982 (sire != NULL) ? /* Parent handle */ 4983 sire->ire_phandle : 0, 4984 save_ire->ire_ihandle, /* Interface handle */ 4985 (sire != NULL) ? /* flags if any */ 4986 sire->ire_flags & 4987 (RTF_SETSRC | RTF_MULTIRT) : 0, 4988 &(save_ire->ire_uinfo), 4989 NULL, 4990 gcgrp, 4991 ipst); 4992 4993 if (ire == NULL) { 4994 if (gcgrp != NULL) { 4995 GCGRP_REFRELE(gcgrp); 4996 gcgrp = NULL; 4997 } 4998 ire_refrele(save_ire); 4999 break; 5000 } 5001 5002 /* reference now held by IRE */ 5003 gcgrp = NULL; 5004 5005 ire->ire_marks |= ire_marks; 5006 5007 if (!IN6_IS_ADDR_UNSPECIFIED(&v6gw)) 5008 dst = v6gw; 5009 else 5010 dst = *v6dstp; 5011 err = ndp_noresolver(dst_ill, &dst); 5012 if (err != 0) { 5013 ire_refrele(save_ire); 5014 break; 5015 } 5016 5017 /* Prevent save_ire from getting deleted */ 5018 IRB_REFHOLD(save_ire->ire_bucket); 5019 /* Has it been removed already ? */ 5020 if (save_ire->ire_marks & IRE_MARK_CONDEMNED) { 5021 IRB_REFRELE(save_ire->ire_bucket); 5022 ire_refrele(save_ire); 5023 break; 5024 } 5025 5026 xmit_mp = first_mp; 5027 /* 5028 * In case of MULTIRT, a copy of the current packet 5029 * to send is made to further re-enter the 5030 * loop and attempt another route resolution 5031 */ 5032 if ((sire != NULL) && sire->ire_flags & RTF_MULTIRT) { 5033 copy_mp = copymsg(first_mp); 5034 if (copy_mp != NULL) { 5035 xmit_mp = copy_mp; 5036 MULTIRT_DEBUG_TAG(first_mp); 5037 } 5038 } 5039 ire_add_then_send(q, ire, xmit_mp); 5040 if (ip6_asp_table_held) { 5041 ip6_asp_table_refrele(ipst); 5042 ip6_asp_table_held = B_FALSE; 5043 } 5044 5045 /* Assert that it is not deleted yet. */ 5046 ASSERT(save_ire->ire_ptpn != NULL); 5047 IRB_REFRELE(save_ire->ire_bucket); 5048 ire_refrele(save_ire); 5049 5050 if (copy_mp != NULL) { 5051 /* 5052 * If we found a (no)resolver, we ignore any 5053 * trailing top priority IRE_CACHE in 5054 * further loops. This ensures that we do not 5055 * omit any (no)resolver despite the priority 5056 * in this call. 5057 * IRE_CACHE, if any, will be processed 5058 * by another thread entering ip_newroute(), 5059 * (on resolver response, for example). 5060 * We use this to force multiple parallel 5061 * resolution as soon as a packet needs to be 5062 * sent. The result is, after one packet 5063 * emission all reachable routes are generally 5064 * resolved. 5065 * Otherwise, complete resolution of MULTIRT 5066 * routes would require several emissions as 5067 * side effect. 5068 */ 5069 multirt_flags &= ~MULTIRT_CACHEGW; 5070 5071 /* 5072 * Search for the next unresolved multirt 5073 * route. 5074 */ 5075 copy_mp = NULL; 5076 save_ire = NULL; 5077 ire = NULL; 5078 /* re-enter the loop */ 5079 multirt_resolve_next = B_TRUE; 5080 continue; 5081 } 5082 5083 /* Don't need sire anymore */ 5084 if (sire != NULL) 5085 ire_refrele(sire); 5086 ill_refrele(dst_ill); 5087 ipif_refrele(src_ipif); 5088 return; 5089 5090 case IRE_IF_RESOLVER: 5091 /* 5092 * We can't build an IRE_CACHE yet, but at least we 5093 * found a resolver that can help. 5094 */ 5095 dst = *v6dstp; 5096 5097 /* 5098 * To be at this point in the code with a non-zero gw 5099 * means that dst is reachable through a gateway that 5100 * we have never resolved. By changing dst to the gw 5101 * addr we resolve the gateway first. When 5102 * ire_add_then_send() tries to put the IP dg to dst, 5103 * it will reenter ip_newroute() at which time we will 5104 * find the IRE_CACHE for the gw and create another 5105 * IRE_CACHE above (for dst itself). 5106 */ 5107 if (!IN6_IS_ADDR_UNSPECIFIED(&v6gw)) { 5108 save_dst = dst; 5109 dst = v6gw; 5110 v6gw = ipv6_all_zeros; 5111 } 5112 if (dst_ill->ill_flags & ILLF_XRESOLV) { 5113 /* 5114 * Ask the external resolver to do its thing. 5115 * Make an mblk chain in the following form: 5116 * ARQ_REQ_MBLK-->IRE_MBLK-->packet 5117 */ 5118 mblk_t *ire_mp; 5119 mblk_t *areq_mp; 5120 areq_t *areq; 5121 in6_addr_t *addrp; 5122 5123 ip1dbg(("ip_newroute_v6:ILLF_XRESOLV\n")); 5124 if (ip6_asp_table_held) { 5125 ip6_asp_table_refrele(ipst); 5126 ip6_asp_table_held = B_FALSE; 5127 } 5128 ire = ire_create_mp_v6( 5129 &dst, /* dest address */ 5130 &ipv6_all_ones, /* mask */ 5131 &src_ipif->ipif_v6src_addr, 5132 /* source address */ 5133 &v6gw, /* gateway address */ 5134 NULL, /* no src nce */ 5135 dst_ill->ill_rq, /* recv-from queue */ 5136 dst_ill->ill_wq, /* send-to queue */ 5137 IRE_CACHE, 5138 src_ipif, 5139 &save_ire->ire_mask_v6, /* Parent mask */ 5140 0, 5141 save_ire->ire_ihandle, 5142 /* Interface handle */ 5143 0, /* flags if any */ 5144 &(save_ire->ire_uinfo), 5145 NULL, 5146 NULL, 5147 ipst); 5148 5149 ire_refrele(save_ire); 5150 if (ire == NULL) { 5151 ip1dbg(("ip_newroute_v6:" 5152 "ire is NULL\n")); 5153 break; 5154 } 5155 5156 if ((sire != NULL) && 5157 (sire->ire_flags & RTF_MULTIRT)) { 5158 /* 5159 * processing a copy of the packet to 5160 * send for further resolution loops 5161 */ 5162 copy_mp = copymsg(first_mp); 5163 if (copy_mp != NULL) 5164 MULTIRT_DEBUG_TAG(copy_mp); 5165 } 5166 ire->ire_marks |= ire_marks; 5167 ire_mp = ire->ire_mp; 5168 /* 5169 * Now create or find an nce for this interface. 5170 * The hw addr will need to to be set from 5171 * the reply to the AR_ENTRY_QUERY that 5172 * we're about to send. This will be done in 5173 * ire_add_v6(). 5174 */ 5175 err = ndp_resolver(dst_ill, &dst, mp, zoneid); 5176 switch (err) { 5177 case 0: 5178 /* 5179 * New cache entry created. 5180 * Break, then ask the external 5181 * resolver. 5182 */ 5183 break; 5184 case EINPROGRESS: 5185 /* 5186 * Resolution in progress; 5187 * packet has been queued by 5188 * ndp_resolver(). 5189 */ 5190 ire_delete(ire); 5191 ire = NULL; 5192 /* 5193 * Check if another multirt 5194 * route must be resolved. 5195 */ 5196 if (copy_mp != NULL) { 5197 /* 5198 * If we found a resolver, we 5199 * ignore any trailing top 5200 * priority IRE_CACHE in 5201 * further loops. The reason is 5202 * the same as for noresolver. 5203 */ 5204 multirt_flags &= 5205 ~MULTIRT_CACHEGW; 5206 /* 5207 * Search for the next 5208 * unresolved multirt route. 5209 */ 5210 first_mp = copy_mp; 5211 copy_mp = NULL; 5212 mp = first_mp; 5213 if (mp->b_datap->db_type == 5214 M_CTL) { 5215 mp = mp->b_cont; 5216 } 5217 ASSERT(sire != NULL); 5218 dst = save_dst; 5219 /* 5220 * re-enter the loop 5221 */ 5222 multirt_resolve_next = 5223 B_TRUE; 5224 continue; 5225 } 5226 5227 if (sire != NULL) 5228 ire_refrele(sire); 5229 ill_refrele(dst_ill); 5230 ipif_refrele(src_ipif); 5231 return; 5232 default: 5233 /* 5234 * Transient error; packet will be 5235 * freed. 5236 */ 5237 ire_delete(ire); 5238 ire = NULL; 5239 break; 5240 } 5241 if (err != 0) 5242 break; 5243 /* 5244 * Now set up the AR_ENTRY_QUERY and send it. 5245 */ 5246 areq_mp = ill_arp_alloc(dst_ill, 5247 (uchar_t *)&ipv6_areq_template, 5248 (caddr_t)&dst); 5249 if (areq_mp == NULL) { 5250 ip1dbg(("ip_newroute_v6:" 5251 "areq_mp is NULL\n")); 5252 freemsg(ire_mp); 5253 break; 5254 } 5255 areq = (areq_t *)areq_mp->b_rptr; 5256 addrp = (in6_addr_t *)((char *)areq + 5257 areq->areq_target_addr_offset); 5258 *addrp = dst; 5259 addrp = (in6_addr_t *)((char *)areq + 5260 areq->areq_sender_addr_offset); 5261 *addrp = src_ipif->ipif_v6src_addr; 5262 /* 5263 * link the chain, then send up to the resolver. 5264 */ 5265 linkb(areq_mp, ire_mp); 5266 linkb(areq_mp, mp); 5267 ip1dbg(("ip_newroute_v6:" 5268 "putnext to resolver\n")); 5269 putnext(dst_ill->ill_rq, areq_mp); 5270 /* 5271 * Check if another multirt route 5272 * must be resolved. 5273 */ 5274 ire = NULL; 5275 if (copy_mp != NULL) { 5276 /* 5277 * If we find a resolver, we ignore any 5278 * trailing top priority IRE_CACHE in 5279 * further loops. The reason is the 5280 * same as for noresolver. 5281 */ 5282 multirt_flags &= ~MULTIRT_CACHEGW; 5283 /* 5284 * Search for the next unresolved 5285 * multirt route. 5286 */ 5287 first_mp = copy_mp; 5288 copy_mp = NULL; 5289 mp = first_mp; 5290 if (mp->b_datap->db_type == M_CTL) { 5291 mp = mp->b_cont; 5292 } 5293 ASSERT(sire != NULL); 5294 dst = save_dst; 5295 /* 5296 * re-enter the loop 5297 */ 5298 multirt_resolve_next = B_TRUE; 5299 continue; 5300 } 5301 5302 if (sire != NULL) 5303 ire_refrele(sire); 5304 ill_refrele(dst_ill); 5305 ipif_refrele(src_ipif); 5306 return; 5307 } 5308 /* 5309 * Non-external resolver case. 5310 * 5311 * TSol note: Please see the note above the 5312 * IRE_IF_NORESOLVER case. 5313 */ 5314 ga.ga_af = AF_INET6; 5315 ga.ga_addr = dst; 5316 gcgrp = gcgrp_lookup(&ga, B_FALSE); 5317 5318 ire = ire_create_v6( 5319 &dst, /* dest address */ 5320 &ipv6_all_ones, /* mask */ 5321 &src_ipif->ipif_v6src_addr, /* source address */ 5322 &v6gw, /* gateway address */ 5323 &save_ire->ire_max_frag, 5324 NULL, /* no src nce */ 5325 dst_ill->ill_rq, /* recv-from queue */ 5326 dst_ill->ill_wq, /* send-to queue */ 5327 IRE_CACHE, 5328 src_ipif, 5329 &save_ire->ire_mask_v6, /* Parent mask */ 5330 0, 5331 save_ire->ire_ihandle, /* Interface handle */ 5332 0, /* flags if any */ 5333 &(save_ire->ire_uinfo), 5334 NULL, 5335 gcgrp, 5336 ipst); 5337 5338 if (ire == NULL) { 5339 if (gcgrp != NULL) { 5340 GCGRP_REFRELE(gcgrp); 5341 gcgrp = NULL; 5342 } 5343 ire_refrele(save_ire); 5344 break; 5345 } 5346 5347 /* reference now held by IRE */ 5348 gcgrp = NULL; 5349 5350 if ((sire != NULL) && 5351 (sire->ire_flags & RTF_MULTIRT)) { 5352 copy_mp = copymsg(first_mp); 5353 if (copy_mp != NULL) 5354 MULTIRT_DEBUG_TAG(copy_mp); 5355 } 5356 5357 ire->ire_marks |= ire_marks; 5358 err = ndp_resolver(dst_ill, &dst, first_mp, zoneid); 5359 switch (err) { 5360 case 0: 5361 /* Prevent save_ire from getting deleted */ 5362 IRB_REFHOLD(save_ire->ire_bucket); 5363 /* Has it been removed already ? */ 5364 if (save_ire->ire_marks & IRE_MARK_CONDEMNED) { 5365 IRB_REFRELE(save_ire->ire_bucket); 5366 ire_refrele(save_ire); 5367 break; 5368 } 5369 5370 /* 5371 * We have a resolved cache entry, 5372 * add in the IRE. 5373 */ 5374 ire_add_then_send(q, ire, first_mp); 5375 if (ip6_asp_table_held) { 5376 ip6_asp_table_refrele(ipst); 5377 ip6_asp_table_held = B_FALSE; 5378 } 5379 5380 /* Assert that it is not deleted yet. */ 5381 ASSERT(save_ire->ire_ptpn != NULL); 5382 IRB_REFRELE(save_ire->ire_bucket); 5383 ire_refrele(save_ire); 5384 /* 5385 * Check if another multirt route 5386 * must be resolved. 5387 */ 5388 ire = NULL; 5389 if (copy_mp != NULL) { 5390 /* 5391 * If we find a resolver, we ignore any 5392 * trailing top priority IRE_CACHE in 5393 * further loops. The reason is the 5394 * same as for noresolver. 5395 */ 5396 multirt_flags &= ~MULTIRT_CACHEGW; 5397 /* 5398 * Search for the next unresolved 5399 * multirt route. 5400 */ 5401 first_mp = copy_mp; 5402 copy_mp = NULL; 5403 mp = first_mp; 5404 if (mp->b_datap->db_type == M_CTL) { 5405 mp = mp->b_cont; 5406 } 5407 ASSERT(sire != NULL); 5408 dst = save_dst; 5409 /* 5410 * re-enter the loop 5411 */ 5412 multirt_resolve_next = B_TRUE; 5413 continue; 5414 } 5415 5416 if (sire != NULL) 5417 ire_refrele(sire); 5418 ill_refrele(dst_ill); 5419 ipif_refrele(src_ipif); 5420 return; 5421 5422 case EINPROGRESS: 5423 /* 5424 * mp was consumed - presumably queued. 5425 * No need for ire, presumably resolution is 5426 * in progress, and ire will be added when the 5427 * address is resolved. 5428 */ 5429 if (ip6_asp_table_held) { 5430 ip6_asp_table_refrele(ipst); 5431 ip6_asp_table_held = B_FALSE; 5432 } 5433 ASSERT(ire->ire_nce == NULL); 5434 ire_delete(ire); 5435 ire_refrele(save_ire); 5436 /* 5437 * Check if another multirt route 5438 * must be resolved. 5439 */ 5440 ire = NULL; 5441 if (copy_mp != NULL) { 5442 /* 5443 * If we find a resolver, we ignore any 5444 * trailing top priority IRE_CACHE in 5445 * further loops. The reason is the 5446 * same as for noresolver. 5447 */ 5448 multirt_flags &= ~MULTIRT_CACHEGW; 5449 /* 5450 * Search for the next unresolved 5451 * multirt route. 5452 */ 5453 first_mp = copy_mp; 5454 copy_mp = NULL; 5455 mp = first_mp; 5456 if (mp->b_datap->db_type == M_CTL) { 5457 mp = mp->b_cont; 5458 } 5459 ASSERT(sire != NULL); 5460 dst = save_dst; 5461 /* 5462 * re-enter the loop 5463 */ 5464 multirt_resolve_next = B_TRUE; 5465 continue; 5466 } 5467 if (sire != NULL) 5468 ire_refrele(sire); 5469 ill_refrele(dst_ill); 5470 ipif_refrele(src_ipif); 5471 return; 5472 default: 5473 /* Some transient error */ 5474 ASSERT(ire->ire_nce == NULL); 5475 ire_refrele(save_ire); 5476 break; 5477 } 5478 break; 5479 default: 5480 break; 5481 } 5482 if (ip6_asp_table_held) { 5483 ip6_asp_table_refrele(ipst); 5484 ip6_asp_table_held = B_FALSE; 5485 } 5486 } while (multirt_resolve_next); 5487 5488 err_ret: 5489 ip1dbg(("ip_newroute_v6: dropped\n")); 5490 if (src_ipif != NULL) 5491 ipif_refrele(src_ipif); 5492 if (dst_ill != NULL) { 5493 need_rele = B_TRUE; 5494 ill = dst_ill; 5495 } 5496 if (ill != NULL) { 5497 if (mp->b_prev != NULL) { 5498 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 5499 } else { 5500 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); 5501 } 5502 5503 if (need_rele) 5504 ill_refrele(ill); 5505 } else { 5506 if (mp->b_prev != NULL) { 5507 BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsInDiscards); 5508 } else { 5509 BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsOutDiscards); 5510 } 5511 } 5512 /* Did this packet originate externally? */ 5513 if (mp->b_prev) { 5514 mp->b_next = NULL; 5515 mp->b_prev = NULL; 5516 } 5517 if (copy_mp != NULL) { 5518 MULTIRT_DEBUG_UNTAG(copy_mp); 5519 freemsg(copy_mp); 5520 } 5521 MULTIRT_DEBUG_UNTAG(first_mp); 5522 freemsg(first_mp); 5523 if (ire != NULL) 5524 ire_refrele(ire); 5525 if (sire != NULL) 5526 ire_refrele(sire); 5527 return; 5528 5529 icmp_err_ret: 5530 if (ip6_asp_table_held) 5531 ip6_asp_table_refrele(ipst); 5532 if (src_ipif != NULL) 5533 ipif_refrele(src_ipif); 5534 if (dst_ill != NULL) { 5535 need_rele = B_TRUE; 5536 ill = dst_ill; 5537 } 5538 ip1dbg(("ip_newroute_v6: no route\n")); 5539 if (sire != NULL) 5540 ire_refrele(sire); 5541 /* 5542 * We need to set sire to NULL to avoid double freeing if we 5543 * ever goto err_ret from below. 5544 */ 5545 sire = NULL; 5546 ip6h = (ip6_t *)mp->b_rptr; 5547 /* Skip ip6i_t header if present */ 5548 if (ip6h->ip6_nxt == IPPROTO_RAW) { 5549 /* Make sure the IPv6 header is present */ 5550 if ((mp->b_wptr - (uchar_t *)ip6h) < 5551 sizeof (ip6i_t) + IPV6_HDR_LEN) { 5552 if (!pullupmsg(mp, sizeof (ip6i_t) + IPV6_HDR_LEN)) { 5553 ip1dbg(("ip_newroute_v6: pullupmsg failed\n")); 5554 goto err_ret; 5555 } 5556 } 5557 mp->b_rptr += sizeof (ip6i_t); 5558 ip6h = (ip6_t *)mp->b_rptr; 5559 } 5560 /* Did this packet originate externally? */ 5561 if (mp->b_prev) { 5562 if (ill != NULL) { 5563 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInNoRoutes); 5564 } else { 5565 BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsInNoRoutes); 5566 } 5567 mp->b_next = NULL; 5568 mp->b_prev = NULL; 5569 q = WR(q); 5570 } else { 5571 if (ill != NULL) { 5572 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutNoRoutes); 5573 } else { 5574 BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsOutNoRoutes); 5575 } 5576 if (ip_hdr_complete_v6(ip6h, zoneid, ipst)) { 5577 /* Failed */ 5578 if (copy_mp != NULL) { 5579 MULTIRT_DEBUG_UNTAG(copy_mp); 5580 freemsg(copy_mp); 5581 } 5582 MULTIRT_DEBUG_UNTAG(first_mp); 5583 freemsg(first_mp); 5584 if (ire != NULL) 5585 ire_refrele(ire); 5586 if (need_rele) 5587 ill_refrele(ill); 5588 return; 5589 } 5590 } 5591 5592 if (need_rele) 5593 ill_refrele(ill); 5594 5595 /* 5596 * At this point we will have ire only if RTF_BLACKHOLE 5597 * or RTF_REJECT flags are set on the IRE. It will not 5598 * generate ICMP6_DST_UNREACH_NOROUTE if RTF_BLACKHOLE is set. 5599 */ 5600 if (ire != NULL) { 5601 if (ire->ire_flags & RTF_BLACKHOLE) { 5602 ire_refrele(ire); 5603 if (copy_mp != NULL) { 5604 MULTIRT_DEBUG_UNTAG(copy_mp); 5605 freemsg(copy_mp); 5606 } 5607 MULTIRT_DEBUG_UNTAG(first_mp); 5608 freemsg(first_mp); 5609 return; 5610 } 5611 ire_refrele(ire); 5612 } 5613 if (ip_debug > 3) { 5614 /* ip2dbg */ 5615 pr_addr_dbg("ip_newroute_v6: no route to %s\n", 5616 AF_INET6, v6dstp); 5617 } 5618 icmp_unreachable_v6(WR(q), first_mp, ICMP6_DST_UNREACH_NOROUTE, 5619 B_FALSE, B_FALSE, zoneid, ipst); 5620 } 5621 5622 /* 5623 * ip_newroute_ipif_v6 is called by ip_wput_v6 and ip_wput_ipsec_out_v6 whenever 5624 * we need to send out a packet to a destination address for which we do not 5625 * have specific routing information. It is only used for multicast packets. 5626 * 5627 * If unspec_src we allow creating an IRE with source address zero. 5628 * ire_send_v6() will delete it after the packet is sent. 5629 */ 5630 void 5631 ip_newroute_ipif_v6(queue_t *q, mblk_t *mp, ipif_t *ipif, 5632 in6_addr_t v6dst, int unspec_src, zoneid_t zoneid) 5633 { 5634 ire_t *ire = NULL; 5635 ipif_t *src_ipif = NULL; 5636 int err = 0; 5637 ill_t *dst_ill = NULL; 5638 ire_t *save_ire; 5639 ushort_t ire_marks = 0; 5640 ipsec_out_t *io; 5641 ill_t *attach_ill = NULL; 5642 ill_t *ill; 5643 ip6_t *ip6h; 5644 mblk_t *first_mp; 5645 boolean_t ip6i_present; 5646 ire_t *fire = NULL; 5647 mblk_t *copy_mp = NULL; 5648 boolean_t multirt_resolve_next; 5649 in6_addr_t *v6dstp = &v6dst; 5650 boolean_t ipif_held = B_FALSE; 5651 boolean_t ill_held = B_FALSE; 5652 boolean_t ip6_asp_table_held = B_FALSE; 5653 ip_stack_t *ipst = ipif->ipif_ill->ill_ipst; 5654 5655 /* 5656 * This loop is run only once in most cases. 5657 * We loop to resolve further routes only when the destination 5658 * can be reached through multiple RTF_MULTIRT-flagged ires. 5659 */ 5660 do { 5661 multirt_resolve_next = B_FALSE; 5662 if (dst_ill != NULL) { 5663 ill_refrele(dst_ill); 5664 dst_ill = NULL; 5665 } 5666 5667 if (src_ipif != NULL) { 5668 ipif_refrele(src_ipif); 5669 src_ipif = NULL; 5670 } 5671 ASSERT(ipif != NULL); 5672 ill = ipif->ipif_ill; 5673 5674 ASSERT(!IN6_IS_ADDR_V4MAPPED(v6dstp)); 5675 if (ip_debug > 2) { 5676 /* ip1dbg */ 5677 pr_addr_dbg("ip_newroute_ipif_v6: v6dst %s\n", 5678 AF_INET6, v6dstp); 5679 printf("ip_newroute_ipif_v6: if %s, v6 %d\n", 5680 ill->ill_name, ipif->ipif_isv6); 5681 } 5682 5683 first_mp = mp; 5684 if (mp->b_datap->db_type == M_CTL) { 5685 mp = mp->b_cont; 5686 io = (ipsec_out_t *)first_mp->b_rptr; 5687 ASSERT(io->ipsec_out_type == IPSEC_OUT); 5688 } else { 5689 io = NULL; 5690 } 5691 5692 /* 5693 * If the interface is a pt-pt interface we look for an 5694 * IRE_IF_RESOLVER or IRE_IF_NORESOLVER that matches both the 5695 * local_address and the pt-pt destination address. 5696 * Otherwise we just match the local address. 5697 */ 5698 if (!(ill->ill_flags & ILLF_MULTICAST)) { 5699 goto err_ret; 5700 } 5701 /* 5702 * If this end point is bound to IPIF_NOFAILOVER, set bnf_ill 5703 * and bind_to_nofailover B_TRUE. We can't use conn to determine 5704 * as it could be NULL. 5705 * 5706 * This information can appear either in an ip6i_t or an 5707 * IPSEC_OUT message. 5708 */ 5709 ip6h = (ip6_t *)mp->b_rptr; 5710 ip6i_present = (ip6h->ip6_nxt == IPPROTO_RAW); 5711 if (ip6i_present || (io != NULL && io->ipsec_out_attach_if)) { 5712 if (!ip6i_present || 5713 ((ip6i_t *)ip6h)->ip6i_flags & IP6I_ATTACH_IF) { 5714 attach_ill = ip_grab_attach_ill(ill, first_mp, 5715 (ip6i_present ? 5716 ((ip6i_t *)ip6h)->ip6i_ifindex : 5717 io->ipsec_out_ill_index), B_TRUE, ipst); 5718 /* Failure case frees things for us. */ 5719 if (attach_ill == NULL) 5720 return; 5721 5722 /* 5723 * Check if we need an ire that will not be 5724 * looked up by anybody else i.e. HIDDEN. 5725 */ 5726 if (ill_is_probeonly(attach_ill)) 5727 ire_marks = IRE_MARK_HIDDEN; 5728 } 5729 } 5730 5731 /* 5732 * We check if an IRE_OFFSUBNET for the addr that goes through 5733 * ipif exists. We need it to determine if the RTF_SETSRC and/or 5734 * RTF_MULTIRT flags must be honored. 5735 */ 5736 fire = ipif_lookup_multi_ire_v6(ipif, v6dstp); 5737 ip2dbg(("ip_newroute_ipif_v6: " 5738 "ipif_lookup_multi_ire_v6(" 5739 "ipif %p, dst %08x) = fire %p\n", 5740 (void *)ipif, ntohl(V4_PART_OF_V6((*v6dstp))), 5741 (void *)fire)); 5742 5743 /* 5744 * If the application specified the ill (ifindex), we still 5745 * load spread. Only if the packets needs to go out specifically 5746 * on a given ill e.g. binding to IPIF_NOFAILOVER address or 5747 * IPV6_BOUND_PIF, or there is a parent ire entry that specified 5748 * multirouting, then we don't try to use a different ill for 5749 * load spreading. 5750 */ 5751 if (attach_ill == NULL) { 5752 /* 5753 * If the interface belongs to an interface group, 5754 * make sure the next possible interface in the group 5755 * is used. This encourages load spreading among peers 5756 * in an interface group. 5757 * 5758 * Note: While we pick a dst_ill we are really only 5759 * interested in the ill for load spreading. The source 5760 * ipif is determined by source address selection below. 5761 */ 5762 if ((fire != NULL) && (fire->ire_flags & RTF_MULTIRT)) { 5763 dst_ill = ipif->ipif_ill; 5764 /* For uniformity do a refhold */ 5765 ill_refhold(dst_ill); 5766 } else { 5767 /* refheld by ip_newroute_get_dst_ill_v6 */ 5768 dst_ill = 5769 ip_newroute_get_dst_ill_v6(ipif->ipif_ill); 5770 } 5771 if (dst_ill == NULL) { 5772 if (ip_debug > 2) { 5773 pr_addr_dbg("ip_newroute_ipif_v6: " 5774 "no dst ill for dst %s\n", 5775 AF_INET6, v6dstp); 5776 } 5777 goto err_ret; 5778 } 5779 } else { 5780 dst_ill = ipif->ipif_ill; 5781 /* 5782 * ip_wput_v6 passes the right ipif for IPIF_NOFAILOVER 5783 * and IPV6_BOUND_PIF case. 5784 */ 5785 ASSERT(dst_ill == attach_ill); 5786 /* attach_ill is already refheld */ 5787 } 5788 /* 5789 * Pick a source address which matches the scope of the 5790 * destination address. 5791 * For RTF_SETSRC routes, the source address is imposed by the 5792 * parent ire (fire). 5793 */ 5794 ASSERT(src_ipif == NULL); 5795 if ((fire != NULL) && (fire->ire_flags & RTF_SETSRC)) { 5796 /* 5797 * Check that the ipif matching the requested source 5798 * address still exists. 5799 */ 5800 src_ipif = 5801 ipif_lookup_addr_v6(&fire->ire_src_addr_v6, 5802 NULL, zoneid, NULL, NULL, NULL, NULL, ipst); 5803 } 5804 if (src_ipif == NULL && ip6_asp_can_lookup(ipst)) { 5805 uint_t restrict_ill = RESTRICT_TO_NONE; 5806 5807 if (ip6i_present && ((ip6i_t *)ip6h)->ip6i_flags 5808 & IP6I_ATTACH_IF) 5809 restrict_ill = RESTRICT_TO_ILL; 5810 ip6_asp_table_held = B_TRUE; 5811 src_ipif = ipif_select_source_v6(dst_ill, v6dstp, 5812 restrict_ill, IPV6_PREFER_SRC_DEFAULT, zoneid); 5813 } 5814 5815 if (src_ipif == NULL) { 5816 if (!unspec_src) { 5817 if (ip_debug > 2) { 5818 /* ip1dbg */ 5819 pr_addr_dbg("ip_newroute_ipif_v6: " 5820 "no src for dst %s\n,", 5821 AF_INET6, v6dstp); 5822 printf(" through interface %s\n", 5823 dst_ill->ill_name); 5824 } 5825 goto err_ret; 5826 } 5827 src_ipif = ipif; 5828 ipif_refhold(src_ipif); 5829 } 5830 ire = ipif_to_ire_v6(ipif); 5831 if (ire == NULL) { 5832 if (ip_debug > 2) { 5833 /* ip1dbg */ 5834 pr_addr_dbg("ip_newroute_ipif_v6: v6src %s\n", 5835 AF_INET6, &ipif->ipif_v6lcl_addr); 5836 printf("ip_newroute_ipif_v6: " 5837 "if %s\n", dst_ill->ill_name); 5838 } 5839 goto err_ret; 5840 } 5841 if (ire->ire_flags & (RTF_REJECT | RTF_BLACKHOLE)) 5842 goto err_ret; 5843 5844 ASSERT(ire->ire_ipversion == IPV6_VERSION); 5845 5846 ip1dbg(("ip_newroute_ipif_v6: interface type %s (%d),", 5847 ip_nv_lookup(ire_nv_tbl, ire->ire_type), ire->ire_type)); 5848 if (ip_debug > 2) { 5849 /* ip1dbg */ 5850 pr_addr_dbg(" address %s\n", 5851 AF_INET6, &ire->ire_src_addr_v6); 5852 } 5853 save_ire = ire; 5854 ip2dbg(("ip_newroute_ipif: ire %p, ipif %p\n", 5855 (void *)ire, (void *)ipif)); 5856 5857 if ((fire != NULL) && (fire->ire_flags & RTF_MULTIRT)) { 5858 /* 5859 * an IRE_OFFSUBET was looked up 5860 * on that interface. 5861 * this ire has RTF_MULTIRT flag, 5862 * so the resolution loop 5863 * will be re-entered to resolve 5864 * additional routes on other 5865 * interfaces. For that purpose, 5866 * a copy of the packet is 5867 * made at this point. 5868 */ 5869 fire->ire_last_used_time = lbolt; 5870 copy_mp = copymsg(first_mp); 5871 if (copy_mp) { 5872 MULTIRT_DEBUG_TAG(copy_mp); 5873 } 5874 } 5875 5876 ASSERT((attach_ill == NULL) || (dst_ill == attach_ill)); 5877 switch (ire->ire_type) { 5878 case IRE_IF_NORESOLVER: { 5879 /* 5880 * We have what we need to build an IRE_CACHE. 5881 * 5882 * handle the Gated case, where we create 5883 * a NORESOLVER route for loopback. 5884 */ 5885 if (dst_ill->ill_net_type != IRE_IF_NORESOLVER) 5886 break; 5887 /* 5888 * The newly created ire will inherit the flags of the 5889 * parent ire, if any. 5890 */ 5891 ire = ire_create_v6( 5892 v6dstp, /* dest address */ 5893 &ipv6_all_ones, /* mask */ 5894 &src_ipif->ipif_v6src_addr, /* source address */ 5895 NULL, /* gateway address */ 5896 &save_ire->ire_max_frag, 5897 NULL, /* no src nce */ 5898 dst_ill->ill_rq, /* recv-from queue */ 5899 dst_ill->ill_wq, /* send-to queue */ 5900 IRE_CACHE, 5901 src_ipif, 5902 NULL, 5903 (fire != NULL) ? /* Parent handle */ 5904 fire->ire_phandle : 0, 5905 save_ire->ire_ihandle, /* Interface handle */ 5906 (fire != NULL) ? 5907 (fire->ire_flags & (RTF_SETSRC | RTF_MULTIRT)) : 5908 0, 5909 &ire_uinfo_null, 5910 NULL, 5911 NULL, 5912 ipst); 5913 5914 if (ire == NULL) { 5915 ire_refrele(save_ire); 5916 break; 5917 } 5918 5919 ire->ire_marks |= ire_marks; 5920 5921 err = ndp_noresolver(dst_ill, v6dstp); 5922 if (err != 0) { 5923 ire_refrele(save_ire); 5924 break; 5925 } 5926 5927 /* Prevent save_ire from getting deleted */ 5928 IRB_REFHOLD(save_ire->ire_bucket); 5929 /* Has it been removed already ? */ 5930 if (save_ire->ire_marks & IRE_MARK_CONDEMNED) { 5931 IRB_REFRELE(save_ire->ire_bucket); 5932 ire_refrele(save_ire); 5933 break; 5934 } 5935 5936 ire_add_then_send(q, ire, first_mp); 5937 if (ip6_asp_table_held) { 5938 ip6_asp_table_refrele(ipst); 5939 ip6_asp_table_held = B_FALSE; 5940 } 5941 5942 /* Assert that it is not deleted yet. */ 5943 ASSERT(save_ire->ire_ptpn != NULL); 5944 IRB_REFRELE(save_ire->ire_bucket); 5945 ire_refrele(save_ire); 5946 if (fire != NULL) { 5947 ire_refrele(fire); 5948 fire = NULL; 5949 } 5950 5951 /* 5952 * The resolution loop is re-entered if we 5953 * actually are in a multirouting case. 5954 */ 5955 if (copy_mp != NULL) { 5956 boolean_t need_resolve = 5957 ire_multirt_need_resolve_v6(v6dstp, 5958 MBLK_GETLABEL(copy_mp), ipst); 5959 if (!need_resolve) { 5960 MULTIRT_DEBUG_UNTAG(copy_mp); 5961 freemsg(copy_mp); 5962 copy_mp = NULL; 5963 } else { 5964 /* 5965 * ipif_lookup_group_v6() calls 5966 * ire_lookup_multi_v6() that uses 5967 * ire_ftable_lookup_v6() to find 5968 * an IRE_INTERFACE for the group. 5969 * In the multirt case, 5970 * ire_lookup_multi_v6() then invokes 5971 * ire_multirt_lookup_v6() to find 5972 * the next resolvable ire. 5973 * As a result, we obtain a new 5974 * interface, derived from the 5975 * next ire. 5976 */ 5977 if (ipif_held) { 5978 ipif_refrele(ipif); 5979 ipif_held = B_FALSE; 5980 } 5981 ipif = ipif_lookup_group_v6(v6dstp, 5982 zoneid, ipst); 5983 ip2dbg(("ip_newroute_ipif: " 5984 "multirt dst %08x, ipif %p\n", 5985 ntohl(V4_PART_OF_V6((*v6dstp))), 5986 (void *)ipif)); 5987 if (ipif != NULL) { 5988 ipif_held = B_TRUE; 5989 mp = copy_mp; 5990 copy_mp = NULL; 5991 multirt_resolve_next = 5992 B_TRUE; 5993 continue; 5994 } else { 5995 freemsg(copy_mp); 5996 } 5997 } 5998 } 5999 ill_refrele(dst_ill); 6000 if (ipif_held) { 6001 ipif_refrele(ipif); 6002 ipif_held = B_FALSE; 6003 } 6004 if (src_ipif != NULL) 6005 ipif_refrele(src_ipif); 6006 return; 6007 } 6008 case IRE_IF_RESOLVER: { 6009 6010 ASSERT(dst_ill->ill_isv6); 6011 6012 /* 6013 * We obtain a partial IRE_CACHE which we will pass 6014 * along with the resolver query. When the response 6015 * comes back it will be there ready for us to add. 6016 */ 6017 /* 6018 * the newly created ire will inherit the flags of the 6019 * parent ire, if any. 6020 */ 6021 ire = ire_create_v6( 6022 v6dstp, /* dest address */ 6023 &ipv6_all_ones, /* mask */ 6024 &src_ipif->ipif_v6src_addr, /* source address */ 6025 NULL, /* gateway address */ 6026 &save_ire->ire_max_frag, 6027 NULL, /* src nce */ 6028 dst_ill->ill_rq, /* recv-from queue */ 6029 dst_ill->ill_wq, /* send-to queue */ 6030 IRE_CACHE, 6031 src_ipif, 6032 NULL, 6033 (fire != NULL) ? /* Parent handle */ 6034 fire->ire_phandle : 0, 6035 save_ire->ire_ihandle, /* Interface handle */ 6036 (fire != NULL) ? 6037 (fire->ire_flags & (RTF_SETSRC | RTF_MULTIRT)) : 6038 0, 6039 &ire_uinfo_null, 6040 NULL, 6041 NULL, 6042 ipst); 6043 6044 if (ire == NULL) { 6045 ire_refrele(save_ire); 6046 break; 6047 } 6048 6049 ire->ire_marks |= ire_marks; 6050 6051 /* Resolve and add ire to the ctable */ 6052 err = ndp_resolver(dst_ill, v6dstp, first_mp, zoneid); 6053 switch (err) { 6054 case 0: 6055 /* Prevent save_ire from getting deleted */ 6056 IRB_REFHOLD(save_ire->ire_bucket); 6057 /* Has it been removed already ? */ 6058 if (save_ire->ire_marks & IRE_MARK_CONDEMNED) { 6059 IRB_REFRELE(save_ire->ire_bucket); 6060 ire_refrele(save_ire); 6061 break; 6062 } 6063 /* 6064 * We have a resolved cache entry, 6065 * add in the IRE. 6066 */ 6067 ire_add_then_send(q, ire, first_mp); 6068 if (ip6_asp_table_held) { 6069 ip6_asp_table_refrele(ipst); 6070 ip6_asp_table_held = B_FALSE; 6071 } 6072 6073 /* Assert that it is not deleted yet. */ 6074 ASSERT(save_ire->ire_ptpn != NULL); 6075 IRB_REFRELE(save_ire->ire_bucket); 6076 ire_refrele(save_ire); 6077 if (fire != NULL) { 6078 ire_refrele(fire); 6079 fire = NULL; 6080 } 6081 6082 /* 6083 * The resolution loop is re-entered if we 6084 * actually are in a multirouting case. 6085 */ 6086 if (copy_mp != NULL) { 6087 boolean_t need_resolve = 6088 ire_multirt_need_resolve_v6(v6dstp, 6089 MBLK_GETLABEL(copy_mp), ipst); 6090 if (!need_resolve) { 6091 MULTIRT_DEBUG_UNTAG(copy_mp); 6092 freemsg(copy_mp); 6093 copy_mp = NULL; 6094 } else { 6095 /* 6096 * ipif_lookup_group_v6() calls 6097 * ire_lookup_multi_v6() that 6098 * uses ire_ftable_lookup_v6() 6099 * to find an IRE_INTERFACE for 6100 * the group. In the multirt 6101 * case, ire_lookup_multi_v6() 6102 * then invokes 6103 * ire_multirt_lookup_v6() to 6104 * find the next resolvable ire. 6105 * As a result, we obtain a new 6106 * interface, derived from the 6107 * next ire. 6108 */ 6109 if (ipif_held) { 6110 ipif_refrele(ipif); 6111 ipif_held = B_FALSE; 6112 } 6113 ipif = ipif_lookup_group_v6( 6114 v6dstp, zoneid, ipst); 6115 ip2dbg(("ip_newroute_ipif: " 6116 "multirt dst %08x, " 6117 "ipif %p\n", 6118 ntohl(V4_PART_OF_V6( 6119 (*v6dstp))), 6120 (void *)ipif)); 6121 if (ipif != NULL) { 6122 ipif_held = B_TRUE; 6123 mp = copy_mp; 6124 copy_mp = NULL; 6125 multirt_resolve_next = 6126 B_TRUE; 6127 continue; 6128 } else { 6129 freemsg(copy_mp); 6130 } 6131 } 6132 } 6133 ill_refrele(dst_ill); 6134 if (ipif_held) { 6135 ipif_refrele(ipif); 6136 ipif_held = B_FALSE; 6137 } 6138 if (src_ipif != NULL) 6139 ipif_refrele(src_ipif); 6140 return; 6141 6142 case EINPROGRESS: 6143 /* 6144 * mp was consumed - presumably queued. 6145 * No need for ire, presumably resolution is 6146 * in progress, and ire will be added when the 6147 * address is resolved. 6148 */ 6149 if (ip6_asp_table_held) { 6150 ip6_asp_table_refrele(ipst); 6151 ip6_asp_table_held = B_FALSE; 6152 } 6153 ire_delete(ire); 6154 ire_refrele(save_ire); 6155 if (fire != NULL) { 6156 ire_refrele(fire); 6157 fire = NULL; 6158 } 6159 6160 /* 6161 * The resolution loop is re-entered if we 6162 * actually are in a multirouting case. 6163 */ 6164 if (copy_mp != NULL) { 6165 boolean_t need_resolve = 6166 ire_multirt_need_resolve_v6(v6dstp, 6167 MBLK_GETLABEL(copy_mp), ipst); 6168 if (!need_resolve) { 6169 MULTIRT_DEBUG_UNTAG(copy_mp); 6170 freemsg(copy_mp); 6171 copy_mp = NULL; 6172 } else { 6173 /* 6174 * ipif_lookup_group_v6() calls 6175 * ire_lookup_multi_v6() that 6176 * uses ire_ftable_lookup_v6() 6177 * to find an IRE_INTERFACE for 6178 * the group. In the multirt 6179 * case, ire_lookup_multi_v6() 6180 * then invokes 6181 * ire_multirt_lookup_v6() to 6182 * find the next resolvable ire. 6183 * As a result, we obtain a new 6184 * interface, derived from the 6185 * next ire. 6186 */ 6187 if (ipif_held) { 6188 ipif_refrele(ipif); 6189 ipif_held = B_FALSE; 6190 } 6191 ipif = ipif_lookup_group_v6( 6192 v6dstp, zoneid, ipst); 6193 ip2dbg(("ip_newroute_ipif: " 6194 "multirt dst %08x, " 6195 "ipif %p\n", 6196 ntohl(V4_PART_OF_V6( 6197 (*v6dstp))), 6198 (void *)ipif)); 6199 if (ipif != NULL) { 6200 ipif_held = B_TRUE; 6201 mp = copy_mp; 6202 copy_mp = NULL; 6203 multirt_resolve_next = 6204 B_TRUE; 6205 continue; 6206 } else { 6207 freemsg(copy_mp); 6208 } 6209 } 6210 } 6211 ill_refrele(dst_ill); 6212 if (ipif_held) { 6213 ipif_refrele(ipif); 6214 ipif_held = B_FALSE; 6215 } 6216 if (src_ipif != NULL) 6217 ipif_refrele(src_ipif); 6218 return; 6219 default: 6220 /* Some transient error */ 6221 ire_refrele(save_ire); 6222 break; 6223 } 6224 break; 6225 } 6226 default: 6227 break; 6228 } 6229 if (ip6_asp_table_held) { 6230 ip6_asp_table_refrele(ipst); 6231 ip6_asp_table_held = B_FALSE; 6232 } 6233 } while (multirt_resolve_next); 6234 6235 err_ret: 6236 if (ip6_asp_table_held) 6237 ip6_asp_table_refrele(ipst); 6238 if (ire != NULL) 6239 ire_refrele(ire); 6240 if (fire != NULL) 6241 ire_refrele(fire); 6242 if (ipif != NULL && ipif_held) 6243 ipif_refrele(ipif); 6244 if (src_ipif != NULL) 6245 ipif_refrele(src_ipif); 6246 /* Multicast - no point in trying to generate ICMP error */ 6247 ASSERT((attach_ill == NULL) || (dst_ill == attach_ill)); 6248 if (dst_ill != NULL) { 6249 ill = dst_ill; 6250 ill_held = B_TRUE; 6251 } 6252 if (mp->b_prev || mp->b_next) { 6253 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 6254 } else { 6255 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); 6256 } 6257 ip1dbg(("ip_newroute_ipif_v6: dropped\n")); 6258 mp->b_next = NULL; 6259 mp->b_prev = NULL; 6260 freemsg(first_mp); 6261 if (ill_held) 6262 ill_refrele(ill); 6263 } 6264 6265 /* 6266 * Parse and process any hop-by-hop or destination options. 6267 * 6268 * Assumes that q is an ill read queue so that ICMP errors for link-local 6269 * destinations are sent out the correct interface. 6270 * 6271 * Returns -1 if there was an error and mp has been consumed. 6272 * Returns 0 if no special action is needed. 6273 * Returns 1 if the packet contained a router alert option for this node 6274 * which is verified to be "interesting/known" for our implementation. 6275 * 6276 * XXX Note: In future as more hbh or dest options are defined, 6277 * it may be better to have different routines for hbh and dest 6278 * options as opt_type fields other than IP6OPT_PAD1 and IP6OPT_PADN 6279 * may have same value in different namespaces. Or is it same namespace ?? 6280 * Current code checks for each opt_type (other than pads) if it is in 6281 * the expected nexthdr (hbh or dest) 6282 */ 6283 static int 6284 ip_process_options_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, 6285 uint8_t *optptr, uint_t optlen, uint8_t hdr_type, ip_stack_t *ipst) 6286 { 6287 uint8_t opt_type; 6288 uint_t optused; 6289 int ret = 0; 6290 mblk_t *first_mp; 6291 const char *errtype; 6292 zoneid_t zoneid; 6293 ill_t *ill = q->q_ptr; 6294 ipif_t *ipif; 6295 6296 first_mp = mp; 6297 if (mp->b_datap->db_type == M_CTL) { 6298 mp = mp->b_cont; 6299 } 6300 6301 while (optlen != 0) { 6302 opt_type = *optptr; 6303 if (opt_type == IP6OPT_PAD1) { 6304 optused = 1; 6305 } else { 6306 if (optlen < 2) 6307 goto bad_opt; 6308 errtype = "malformed"; 6309 if (opt_type == ip6opt_ls) { 6310 optused = 2 + optptr[1]; 6311 if (optused > optlen) 6312 goto bad_opt; 6313 } else switch (opt_type) { 6314 case IP6OPT_PADN: 6315 /* 6316 * Note:We don't verify that (N-2) pad octets 6317 * are zero as required by spec. Adhere to 6318 * "be liberal in what you accept..." part of 6319 * implementation philosophy (RFC791,RFC1122) 6320 */ 6321 optused = 2 + optptr[1]; 6322 if (optused > optlen) 6323 goto bad_opt; 6324 break; 6325 6326 case IP6OPT_JUMBO: 6327 if (hdr_type != IPPROTO_HOPOPTS) 6328 goto opt_error; 6329 goto opt_error; /* XXX Not implemented! */ 6330 6331 case IP6OPT_ROUTER_ALERT: { 6332 struct ip6_opt_router *or; 6333 6334 if (hdr_type != IPPROTO_HOPOPTS) 6335 goto opt_error; 6336 optused = 2 + optptr[1]; 6337 if (optused > optlen) 6338 goto bad_opt; 6339 or = (struct ip6_opt_router *)optptr; 6340 /* Check total length and alignment */ 6341 if (optused != sizeof (*or) || 6342 ((uintptr_t)or->ip6or_value & 0x1) != 0) 6343 goto opt_error; 6344 /* Check value */ 6345 switch (*((uint16_t *)or->ip6or_value)) { 6346 case IP6_ALERT_MLD: 6347 case IP6_ALERT_RSVP: 6348 ret = 1; 6349 } 6350 break; 6351 } 6352 case IP6OPT_HOME_ADDRESS: { 6353 /* 6354 * Minimal support for the home address option 6355 * (which is required by all IPv6 nodes). 6356 * Implement by just swapping the home address 6357 * and source address. 6358 * XXX Note: this has IPsec implications since 6359 * AH needs to take this into account. 6360 * Also, when IPsec is used we need to ensure 6361 * that this is only processed once 6362 * in the received packet (to avoid swapping 6363 * back and forth). 6364 * NOTE:This option processing is considered 6365 * to be unsafe and prone to a denial of 6366 * service attack. 6367 * The current processing is not safe even with 6368 * IPsec secured IP packets. Since the home 6369 * address option processing requirement still 6370 * is in the IETF draft and in the process of 6371 * being redefined for its usage, it has been 6372 * decided to turn off the option by default. 6373 * If this section of code needs to be executed, 6374 * ndd variable ip6_ignore_home_address_opt 6375 * should be set to 0 at the user's own risk. 6376 */ 6377 struct ip6_opt_home_address *oh; 6378 in6_addr_t tmp; 6379 6380 if (ipst->ips_ipv6_ignore_home_address_opt) 6381 goto opt_error; 6382 6383 if (hdr_type != IPPROTO_DSTOPTS) 6384 goto opt_error; 6385 optused = 2 + optptr[1]; 6386 if (optused > optlen) 6387 goto bad_opt; 6388 6389 /* 6390 * We did this dest. opt the first time 6391 * around (i.e. before AH processing). 6392 * If we've done AH... stop now. 6393 */ 6394 if (first_mp != mp) { 6395 ipsec_in_t *ii; 6396 6397 ii = (ipsec_in_t *)first_mp->b_rptr; 6398 if (ii->ipsec_in_ah_sa != NULL) 6399 break; 6400 } 6401 6402 oh = (struct ip6_opt_home_address *)optptr; 6403 /* Check total length and alignment */ 6404 if (optused < sizeof (*oh) || 6405 ((uintptr_t)oh->ip6oh_addr & 0x7) != 0) 6406 goto opt_error; 6407 /* Swap ip6_src and the home address */ 6408 tmp = ip6h->ip6_src; 6409 /* XXX Note: only 8 byte alignment option */ 6410 ip6h->ip6_src = *(in6_addr_t *)oh->ip6oh_addr; 6411 *(in6_addr_t *)oh->ip6oh_addr = tmp; 6412 break; 6413 } 6414 6415 case IP6OPT_TUNNEL_LIMIT: 6416 if (hdr_type != IPPROTO_DSTOPTS) { 6417 goto opt_error; 6418 } 6419 optused = 2 + optptr[1]; 6420 if (optused > optlen) { 6421 goto bad_opt; 6422 } 6423 if (optused != 3) { 6424 goto opt_error; 6425 } 6426 break; 6427 6428 default: 6429 errtype = "unknown"; 6430 /* FALLTHROUGH */ 6431 opt_error: 6432 /* Determine which zone should send error */ 6433 zoneid = ipif_lookup_addr_zoneid_v6( 6434 &ip6h->ip6_dst, ill, ipst); 6435 switch (IP6OPT_TYPE(opt_type)) { 6436 case IP6OPT_TYPE_SKIP: 6437 optused = 2 + optptr[1]; 6438 if (optused > optlen) 6439 goto bad_opt; 6440 ip1dbg(("ip_process_options_v6: %s " 6441 "opt 0x%x skipped\n", 6442 errtype, opt_type)); 6443 break; 6444 case IP6OPT_TYPE_DISCARD: 6445 ip1dbg(("ip_process_options_v6: %s " 6446 "opt 0x%x; packet dropped\n", 6447 errtype, opt_type)); 6448 freemsg(first_mp); 6449 return (-1); 6450 case IP6OPT_TYPE_ICMP: 6451 if (zoneid == ALL_ZONES) { 6452 freemsg(first_mp); 6453 return (-1); 6454 } 6455 icmp_param_problem_v6(WR(q), first_mp, 6456 ICMP6_PARAMPROB_OPTION, 6457 (uint32_t)(optptr - 6458 (uint8_t *)ip6h), 6459 B_FALSE, B_FALSE, zoneid, ipst); 6460 return (-1); 6461 case IP6OPT_TYPE_FORCEICMP: 6462 /* 6463 * If we don't have a zone and the dst 6464 * addr is multicast, then pick a zone 6465 * based on the inbound interface. 6466 */ 6467 if (zoneid == ALL_ZONES && 6468 IN6_IS_ADDR_MULTICAST( 6469 &ip6h->ip6_dst)) { 6470 ipif = ipif_select_source_v6( 6471 ill, &ip6h->ip6_src, 6472 RESTRICT_TO_GROUP, 6473 IPV6_PREFER_SRC_DEFAULT, 6474 ALL_ZONES); 6475 if (ipif != NULL) { 6476 zoneid = 6477 ipif->ipif_zoneid; 6478 ipif_refrele(ipif); 6479 } 6480 } 6481 if (zoneid == ALL_ZONES) { 6482 freemsg(first_mp); 6483 return (-1); 6484 } 6485 icmp_param_problem_v6(WR(q), first_mp, 6486 ICMP6_PARAMPROB_OPTION, 6487 (uint32_t)(optptr - 6488 (uint8_t *)ip6h), 6489 B_FALSE, B_TRUE, zoneid, ipst); 6490 return (-1); 6491 default: 6492 ASSERT(0); 6493 } 6494 } 6495 } 6496 optlen -= optused; 6497 optptr += optused; 6498 } 6499 return (ret); 6500 6501 bad_opt: 6502 /* Determine which zone should send error */ 6503 zoneid = ipif_lookup_addr_zoneid_v6(&ip6h->ip6_dst, ill, ipst); 6504 if (zoneid == ALL_ZONES) { 6505 freemsg(first_mp); 6506 } else { 6507 icmp_param_problem_v6(WR(q), first_mp, ICMP6_PARAMPROB_OPTION, 6508 (uint32_t)(optptr - (uint8_t *)ip6h), 6509 B_FALSE, B_FALSE, zoneid, ipst); 6510 } 6511 return (-1); 6512 } 6513 6514 /* 6515 * Process a routing header that is not yet empty. 6516 * Only handles type 0 routing headers. 6517 */ 6518 static void 6519 ip_process_rthdr(queue_t *q, mblk_t *mp, ip6_t *ip6h, ip6_rthdr_t *rth, 6520 ill_t *ill, uint_t flags, mblk_t *hada_mp, mblk_t *dl_mp) 6521 { 6522 ip6_rthdr0_t *rthdr; 6523 uint_t ehdrlen; 6524 uint_t numaddr; 6525 in6_addr_t *addrptr; 6526 in6_addr_t tmp; 6527 ip_stack_t *ipst = ill->ill_ipst; 6528 6529 ASSERT(rth->ip6r_segleft != 0); 6530 6531 if (!ipst->ips_ipv6_forward_src_routed) { 6532 /* XXX Check for source routed out same interface? */ 6533 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 6534 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInAddrErrors); 6535 freemsg(hada_mp); 6536 freemsg(mp); 6537 return; 6538 } 6539 6540 if (rth->ip6r_type != 0) { 6541 if (hada_mp != NULL) 6542 goto hada_drop; 6543 /* Sent by forwarding path, and router is global zone */ 6544 icmp_param_problem_v6(WR(q), mp, 6545 ICMP6_PARAMPROB_HEADER, 6546 (uint32_t)((uchar_t *)&rth->ip6r_type - (uchar_t *)ip6h), 6547 B_FALSE, B_FALSE, GLOBAL_ZONEID, ipst); 6548 return; 6549 } 6550 rthdr = (ip6_rthdr0_t *)rth; 6551 ehdrlen = 8 * (rthdr->ip6r0_len + 1); 6552 ASSERT(mp->b_rptr + ehdrlen <= mp->b_wptr); 6553 addrptr = (in6_addr_t *)((char *)rthdr + sizeof (*rthdr)); 6554 /* rthdr->ip6r0_len is twice the number of addresses in the header */ 6555 if (rthdr->ip6r0_len & 0x1) { 6556 /* An odd length is impossible */ 6557 if (hada_mp != NULL) 6558 goto hada_drop; 6559 /* Sent by forwarding path, and router is global zone */ 6560 icmp_param_problem_v6(WR(q), mp, 6561 ICMP6_PARAMPROB_HEADER, 6562 (uint32_t)((uchar_t *)&rthdr->ip6r0_len - (uchar_t *)ip6h), 6563 B_FALSE, B_FALSE, GLOBAL_ZONEID, ipst); 6564 return; 6565 } 6566 numaddr = rthdr->ip6r0_len / 2; 6567 if (rthdr->ip6r0_segleft > numaddr) { 6568 /* segleft exceeds number of addresses in routing header */ 6569 if (hada_mp != NULL) 6570 goto hada_drop; 6571 /* Sent by forwarding path, and router is global zone */ 6572 icmp_param_problem_v6(WR(q), mp, 6573 ICMP6_PARAMPROB_HEADER, 6574 (uint32_t)((uchar_t *)&rthdr->ip6r0_segleft - 6575 (uchar_t *)ip6h), 6576 B_FALSE, B_FALSE, GLOBAL_ZONEID, ipst); 6577 return; 6578 } 6579 addrptr += (numaddr - rthdr->ip6r0_segleft); 6580 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst) || 6581 IN6_IS_ADDR_MULTICAST(addrptr)) { 6582 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 6583 freemsg(hada_mp); 6584 freemsg(mp); 6585 return; 6586 } 6587 /* Swap */ 6588 tmp = *addrptr; 6589 *addrptr = ip6h->ip6_dst; 6590 ip6h->ip6_dst = tmp; 6591 rthdr->ip6r0_segleft--; 6592 /* Don't allow any mapped addresses - ip_wput_v6 can't handle them */ 6593 if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_dst)) { 6594 if (hada_mp != NULL) 6595 goto hada_drop; 6596 /* Sent by forwarding path, and router is global zone */ 6597 icmp_unreachable_v6(WR(q), mp, ICMP6_DST_UNREACH_NOROUTE, 6598 B_FALSE, B_FALSE, GLOBAL_ZONEID, ipst); 6599 return; 6600 } 6601 if (ip_check_v6_mblk(mp, ill) == IP6_MBLK_OK) { 6602 ip6h = (ip6_t *)mp->b_rptr; 6603 ip_rput_data_v6(q, ill, mp, ip6h, flags, hada_mp, dl_mp); 6604 } else { 6605 freemsg(mp); 6606 } 6607 return; 6608 hada_drop: 6609 /* IPsec kstats: bean counter? */ 6610 freemsg(hada_mp); 6611 freemsg(mp); 6612 } 6613 6614 /* 6615 * Read side put procedure for IPv6 module. 6616 */ 6617 void 6618 ip_rput_v6(queue_t *q, mblk_t *mp) 6619 { 6620 mblk_t *first_mp; 6621 mblk_t *hada_mp = NULL; 6622 ip6_t *ip6h; 6623 boolean_t ll_multicast = B_FALSE; 6624 boolean_t mctl_present = B_FALSE; 6625 ill_t *ill; 6626 struct iocblk *iocp; 6627 uint_t flags = 0; 6628 mblk_t *dl_mp; 6629 ip_stack_t *ipst; 6630 int check; 6631 6632 ill = (ill_t *)q->q_ptr; 6633 ipst = ill->ill_ipst; 6634 if (ill->ill_state_flags & ILL_CONDEMNED) { 6635 union DL_primitives *dl; 6636 6637 dl = (union DL_primitives *)mp->b_rptr; 6638 /* 6639 * Things are opening or closing - only accept DLPI 6640 * ack messages. If the stream is closing and ip_wsrv 6641 * has completed, ip_close is out of the qwait, but has 6642 * not yet completed qprocsoff. Don't proceed any further 6643 * because the ill has been cleaned up and things hanging 6644 * off the ill have been freed. 6645 */ 6646 if ((mp->b_datap->db_type != M_PCPROTO) || 6647 (dl->dl_primitive == DL_UNITDATA_IND)) { 6648 inet_freemsg(mp); 6649 return; 6650 } 6651 } 6652 6653 dl_mp = NULL; 6654 switch (mp->b_datap->db_type) { 6655 case M_DATA: { 6656 int hlen; 6657 uchar_t *ucp; 6658 struct ether_header *eh; 6659 dl_unitdata_ind_t *dui; 6660 6661 /* 6662 * This is a work-around for CR 6451644, a bug in Nemo. It 6663 * should be removed when that problem is fixed. 6664 */ 6665 if (ill->ill_mactype == DL_ETHER && 6666 (hlen = MBLKHEAD(mp)) >= sizeof (struct ether_header) && 6667 (ucp = mp->b_rptr)[-1] == (IP6_DL_SAP & 0xFF) && 6668 ucp[-2] == (IP6_DL_SAP >> 8)) { 6669 if (hlen >= sizeof (struct ether_vlan_header) && 6670 ucp[-5] == 0 && ucp[-6] == 0x81) 6671 ucp -= sizeof (struct ether_vlan_header); 6672 else 6673 ucp -= sizeof (struct ether_header); 6674 /* 6675 * If it's a group address, then fabricate a 6676 * DL_UNITDATA_IND message. 6677 */ 6678 if ((ll_multicast = (ucp[0] & 1)) != 0 && 6679 (dl_mp = allocb(DL_UNITDATA_IND_SIZE + 16, 6680 BPRI_HI)) != NULL) { 6681 eh = (struct ether_header *)ucp; 6682 dui = (dl_unitdata_ind_t *)dl_mp->b_rptr; 6683 DB_TYPE(dl_mp) = M_PROTO; 6684 dl_mp->b_wptr = (uchar_t *)(dui + 1) + 16; 6685 dui->dl_primitive = DL_UNITDATA_IND; 6686 dui->dl_dest_addr_length = 8; 6687 dui->dl_dest_addr_offset = DL_UNITDATA_IND_SIZE; 6688 dui->dl_src_addr_length = 8; 6689 dui->dl_src_addr_offset = DL_UNITDATA_IND_SIZE + 6690 8; 6691 dui->dl_group_address = 1; 6692 ucp = (uchar_t *)(dui + 1); 6693 if (ill->ill_sap_length > 0) 6694 ucp += ill->ill_sap_length; 6695 bcopy(&eh->ether_dhost, ucp, 6); 6696 bcopy(&eh->ether_shost, ucp + 8, 6); 6697 ucp = (uchar_t *)(dui + 1); 6698 if (ill->ill_sap_length < 0) 6699 ucp += 8 + ill->ill_sap_length; 6700 bcopy(&eh->ether_type, ucp, 2); 6701 bcopy(&eh->ether_type, ucp + 8, 2); 6702 } 6703 } 6704 break; 6705 } 6706 6707 case M_PROTO: 6708 case M_PCPROTO: 6709 if (((dl_unitdata_ind_t *)mp->b_rptr)->dl_primitive != 6710 DL_UNITDATA_IND) { 6711 /* Go handle anything other than data elsewhere. */ 6712 ip_rput_dlpi(q, mp); 6713 return; 6714 } 6715 ll_multicast = ip_get_dlpi_mbcast(ill, mp); 6716 6717 /* Save the DLPI header. */ 6718 dl_mp = mp; 6719 mp = mp->b_cont; 6720 dl_mp->b_cont = NULL; 6721 break; 6722 case M_BREAK: 6723 panic("ip_rput_v6: got an M_BREAK"); 6724 /*NOTREACHED*/ 6725 case M_IOCACK: 6726 iocp = (struct iocblk *)mp->b_rptr; 6727 switch (iocp->ioc_cmd) { 6728 case DL_IOC_HDR_INFO: 6729 ill = (ill_t *)q->q_ptr; 6730 ill_fastpath_ack(ill, mp); 6731 return; 6732 6733 case SIOCGTUNPARAM: 6734 case OSIOCGTUNPARAM: 6735 ip_rput_other(NULL, q, mp, NULL); 6736 return; 6737 6738 case SIOCSTUNPARAM: 6739 case OSIOCSTUNPARAM: 6740 /* Go through qwriter */ 6741 break; 6742 default: 6743 putnext(q, mp); 6744 return; 6745 } 6746 /* FALLTHRU */ 6747 case M_ERROR: 6748 case M_HANGUP: 6749 mutex_enter(&ill->ill_lock); 6750 if (ill->ill_state_flags & ILL_CONDEMNED) { 6751 mutex_exit(&ill->ill_lock); 6752 freemsg(mp); 6753 return; 6754 } 6755 ill_refhold_locked(ill); 6756 mutex_exit(&ill->ill_lock); 6757 qwriter_ip(ill, q, mp, ip_rput_other, CUR_OP, B_FALSE); 6758 return; 6759 case M_CTL: 6760 if ((MBLKL(mp) > sizeof (int)) && 6761 ((da_ipsec_t *)mp->b_rptr)->da_type == IPHADA_M_CTL) { 6762 ASSERT(MBLKL(mp) >= sizeof (da_ipsec_t)); 6763 mctl_present = B_TRUE; 6764 break; 6765 } 6766 putnext(q, mp); 6767 return; 6768 case M_IOCNAK: 6769 iocp = (struct iocblk *)mp->b_rptr; 6770 switch (iocp->ioc_cmd) { 6771 case DL_IOC_HDR_INFO: 6772 case SIOCGTUNPARAM: 6773 case OSIOCGTUNPARAM: 6774 ip_rput_other(NULL, q, mp, NULL); 6775 return; 6776 6777 case SIOCSTUNPARAM: 6778 case OSIOCSTUNPARAM: 6779 mutex_enter(&ill->ill_lock); 6780 if (ill->ill_state_flags & ILL_CONDEMNED) { 6781 mutex_exit(&ill->ill_lock); 6782 freemsg(mp); 6783 return; 6784 } 6785 ill_refhold_locked(ill); 6786 mutex_exit(&ill->ill_lock); 6787 qwriter_ip(ill, q, mp, ip_rput_other, CUR_OP, B_FALSE); 6788 return; 6789 default: 6790 break; 6791 } 6792 /* FALLTHRU */ 6793 default: 6794 putnext(q, mp); 6795 return; 6796 } 6797 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInReceives); 6798 UPDATE_MIB(ill->ill_ip_mib, ipIfStatsHCInOctets, 6799 (mp->b_cont == NULL) ? MBLKL(mp) : msgdsize(mp)); 6800 /* 6801 * if db_ref > 1 then copymsg and free original. Packet may be 6802 * changed and do not want other entity who has a reference to this 6803 * message to trip over the changes. This is a blind change because 6804 * trying to catch all places that might change packet is too 6805 * difficult (since it may be a module above this one). 6806 */ 6807 if (mp->b_datap->db_ref > 1) { 6808 mblk_t *mp1; 6809 6810 mp1 = copymsg(mp); 6811 freemsg(mp); 6812 if (mp1 == NULL) { 6813 first_mp = NULL; 6814 goto discard; 6815 } 6816 mp = mp1; 6817 } 6818 first_mp = mp; 6819 if (mctl_present) { 6820 hada_mp = first_mp; 6821 mp = first_mp->b_cont; 6822 } 6823 6824 if ((check = ip_check_v6_mblk(mp, ill)) == IP6_MBLK_HDR_ERR) { 6825 freemsg(mp); 6826 return; 6827 } 6828 6829 ip6h = (ip6_t *)mp->b_rptr; 6830 6831 /* 6832 * ip:::receive must see ipv6 packets with a full header, 6833 * and so is placed after the IP6_MBLK_HDR_ERR check. 6834 */ 6835 DTRACE_IP7(receive, mblk_t *, first_mp, conn_t *, NULL, void_ip_t *, 6836 ip6h, __dtrace_ipsr_ill_t *, ill, ipha_t *, NULL, ip6_t *, ip6h, 6837 int, 0); 6838 6839 if (check != IP6_MBLK_OK) { 6840 freemsg(mp); 6841 return; 6842 } 6843 6844 DTRACE_PROBE4(ip6__physical__in__start, 6845 ill_t *, ill, ill_t *, NULL, 6846 ip6_t *, ip6h, mblk_t *, first_mp); 6847 6848 FW_HOOKS6(ipst->ips_ip6_physical_in_event, 6849 ipst->ips_ipv6firewall_physical_in, 6850 ill, NULL, ip6h, first_mp, mp, ll_multicast, ipst); 6851 6852 DTRACE_PROBE1(ip6__physical__in__end, mblk_t *, first_mp); 6853 6854 if (first_mp == NULL) 6855 return; 6856 6857 /* 6858 * Attach any necessary label information to this packet. 6859 */ 6860 if (is_system_labeled() && !tsol_get_pkt_label(mp, IPV6_VERSION)) { 6861 if (ip6opt_ls != 0) 6862 ip0dbg(("tsol_get_pkt_label v6 failed\n")); 6863 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors); 6864 goto discard; 6865 } 6866 6867 /* IP observability hook. */ 6868 if (ipst->ips_ipobs_enabled) { 6869 zoneid_t dzone; 6870 6871 dzone = ip_get_zoneid_v6(&ip6h->ip6_dst, mp, ill, ipst, 6872 ALL_ZONES); 6873 ipobs_hook(mp, IPOBS_HOOK_INBOUND, ALL_ZONES, dzone, ill, 6874 IPV6_VERSION, 0, ipst); 6875 } 6876 6877 if ((ip6h->ip6_vcf & IPV6_VERS_AND_FLOW_MASK) == 6878 IPV6_DEFAULT_VERS_AND_FLOW) { 6879 /* 6880 * It may be a bit too expensive to do this mapped address 6881 * check here, but in the interest of robustness, it seems 6882 * like the correct place. 6883 * TODO: Avoid this check for e.g. connected TCP sockets 6884 */ 6885 if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_src)) { 6886 ip1dbg(("ip_rput_v6: pkt with mapped src addr\n")); 6887 goto discard; 6888 } 6889 6890 if (IN6_IS_ADDR_LOOPBACK(&ip6h->ip6_src)) { 6891 ip1dbg(("ip_rput_v6: pkt with loopback src")); 6892 goto discard; 6893 } else if (IN6_IS_ADDR_LOOPBACK(&ip6h->ip6_dst)) { 6894 ip1dbg(("ip_rput_v6: pkt with loopback dst")); 6895 goto discard; 6896 } 6897 6898 flags |= (ll_multicast ? IP6_IN_LLMCAST : 0); 6899 ip_rput_data_v6(q, ill, mp, ip6h, flags, hada_mp, dl_mp); 6900 } else { 6901 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInWrongIPVersion); 6902 goto discard; 6903 } 6904 freemsg(dl_mp); 6905 return; 6906 6907 discard: 6908 if (dl_mp != NULL) 6909 freeb(dl_mp); 6910 freemsg(first_mp); 6911 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 6912 } 6913 6914 /* 6915 * Walk through the IPv6 packet in mp and see if there's an AH header 6916 * in it. See if the AH header needs to get done before other headers in 6917 * the packet. (Worker function for ipsec_early_ah_v6().) 6918 */ 6919 #define IPSEC_HDR_DONT_PROCESS 0 6920 #define IPSEC_HDR_PROCESS 1 6921 #define IPSEC_MEMORY_ERROR 2 6922 static int 6923 ipsec_needs_processing_v6(mblk_t *mp, uint8_t *nexthdr) 6924 { 6925 uint_t length; 6926 uint_t ehdrlen; 6927 uint8_t *whereptr; 6928 uint8_t *endptr; 6929 uint8_t *nexthdrp; 6930 ip6_dest_t *desthdr; 6931 ip6_rthdr_t *rthdr; 6932 ip6_t *ip6h; 6933 6934 /* 6935 * For now just pullup everything. In general, the less pullups, 6936 * the better, but there's so much squirrelling through anyway, 6937 * it's just easier this way. 6938 */ 6939 if (!pullupmsg(mp, -1)) { 6940 return (IPSEC_MEMORY_ERROR); 6941 } 6942 6943 ip6h = (ip6_t *)mp->b_rptr; 6944 length = IPV6_HDR_LEN; 6945 whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */ 6946 endptr = mp->b_wptr; 6947 6948 /* 6949 * We can't just use the argument nexthdr in the place 6950 * of nexthdrp becaue we don't dereference nexthdrp 6951 * till we confirm whether it is a valid address. 6952 */ 6953 nexthdrp = &ip6h->ip6_nxt; 6954 while (whereptr < endptr) { 6955 /* Is there enough left for len + nexthdr? */ 6956 if (whereptr + MIN_EHDR_LEN > endptr) 6957 return (IPSEC_MEMORY_ERROR); 6958 6959 switch (*nexthdrp) { 6960 case IPPROTO_HOPOPTS: 6961 case IPPROTO_DSTOPTS: 6962 /* Assumes the headers are identical for hbh and dst */ 6963 desthdr = (ip6_dest_t *)whereptr; 6964 ehdrlen = 8 * (desthdr->ip6d_len + 1); 6965 if ((uchar_t *)desthdr + ehdrlen > endptr) 6966 return (IPSEC_MEMORY_ERROR); 6967 /* 6968 * Return DONT_PROCESS because the destination 6969 * options header may be for each hop in a 6970 * routing-header, and we only want AH if we're 6971 * finished with routing headers. 6972 */ 6973 if (*nexthdrp == IPPROTO_DSTOPTS) 6974 return (IPSEC_HDR_DONT_PROCESS); 6975 nexthdrp = &desthdr->ip6d_nxt; 6976 break; 6977 case IPPROTO_ROUTING: 6978 rthdr = (ip6_rthdr_t *)whereptr; 6979 6980 /* 6981 * If there's more hops left on the routing header, 6982 * return now with DON'T PROCESS. 6983 */ 6984 if (rthdr->ip6r_segleft > 0) 6985 return (IPSEC_HDR_DONT_PROCESS); 6986 6987 ehdrlen = 8 * (rthdr->ip6r_len + 1); 6988 if ((uchar_t *)rthdr + ehdrlen > endptr) 6989 return (IPSEC_MEMORY_ERROR); 6990 nexthdrp = &rthdr->ip6r_nxt; 6991 break; 6992 case IPPROTO_FRAGMENT: 6993 /* Wait for reassembly */ 6994 return (IPSEC_HDR_DONT_PROCESS); 6995 case IPPROTO_AH: 6996 *nexthdr = IPPROTO_AH; 6997 return (IPSEC_HDR_PROCESS); 6998 case IPPROTO_NONE: 6999 /* No next header means we're finished */ 7000 default: 7001 return (IPSEC_HDR_DONT_PROCESS); 7002 } 7003 length += ehdrlen; 7004 whereptr += ehdrlen; 7005 } 7006 panic("ipsec_needs_processing_v6"); 7007 /*NOTREACHED*/ 7008 } 7009 7010 /* 7011 * Path for AH if options are present. If this is the first time we are 7012 * sending a datagram to AH, allocate a IPSEC_IN message and prepend it. 7013 * Otherwise, just fanout. Return value answers the boolean question: 7014 * "Did I consume the mblk you sent me?" 7015 * 7016 * Sometimes AH needs to be done before other IPv6 headers for security 7017 * reasons. This function (and its ipsec_needs_processing_v6() above) 7018 * indicates if that is so, and fans out to the appropriate IPsec protocol 7019 * for the datagram passed in. 7020 */ 7021 static boolean_t 7022 ipsec_early_ah_v6(queue_t *q, mblk_t *first_mp, boolean_t mctl_present, 7023 ill_t *ill, mblk_t *hada_mp, zoneid_t zoneid) 7024 { 7025 mblk_t *mp; 7026 uint8_t nexthdr; 7027 ipsec_in_t *ii = NULL; 7028 ah_t *ah; 7029 ipsec_status_t ipsec_rc; 7030 ip_stack_t *ipst = ill->ill_ipst; 7031 netstack_t *ns = ipst->ips_netstack; 7032 ipsec_stack_t *ipss = ns->netstack_ipsec; 7033 7034 ASSERT((hada_mp == NULL) || (!mctl_present)); 7035 7036 switch (ipsec_needs_processing_v6( 7037 (mctl_present ? first_mp->b_cont : first_mp), &nexthdr)) { 7038 case IPSEC_MEMORY_ERROR: 7039 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 7040 freemsg(hada_mp); 7041 freemsg(first_mp); 7042 return (B_TRUE); 7043 case IPSEC_HDR_DONT_PROCESS: 7044 return (B_FALSE); 7045 } 7046 7047 /* Default means send it to AH! */ 7048 ASSERT(nexthdr == IPPROTO_AH); 7049 if (!mctl_present) { 7050 mp = first_mp; 7051 first_mp = ipsec_in_alloc(B_FALSE, ipst->ips_netstack); 7052 if (first_mp == NULL) { 7053 ip1dbg(("ipsec_early_ah_v6: IPSEC_IN " 7054 "allocation failure.\n")); 7055 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 7056 freemsg(hada_mp); 7057 freemsg(mp); 7058 return (B_TRUE); 7059 } 7060 /* 7061 * Store the ill_index so that when we come back 7062 * from IPSEC we ride on the same queue. 7063 */ 7064 ii = (ipsec_in_t *)first_mp->b_rptr; 7065 ii->ipsec_in_ill_index = ill->ill_phyint->phyint_ifindex; 7066 ii->ipsec_in_rill_index = ii->ipsec_in_ill_index; 7067 first_mp->b_cont = mp; 7068 } 7069 /* 7070 * Cache hardware acceleration info. 7071 */ 7072 if (hada_mp != NULL) { 7073 ASSERT(ii != NULL); 7074 IPSECHW_DEBUG(IPSECHW_PKT, ("ipsec_early_ah_v6: " 7075 "caching data attr.\n")); 7076 ii->ipsec_in_accelerated = B_TRUE; 7077 ii->ipsec_in_da = hada_mp; 7078 } 7079 7080 if (!ipsec_loaded(ipss)) { 7081 ip_proto_not_sup(q, first_mp, IP_FF_SEND_ICMP, zoneid, ipst); 7082 return (B_TRUE); 7083 } 7084 7085 ah = ipsec_inbound_ah_sa(first_mp, ns); 7086 if (ah == NULL) 7087 return (B_TRUE); 7088 ASSERT(ii->ipsec_in_ah_sa != NULL); 7089 ASSERT(ii->ipsec_in_ah_sa->ipsa_input_func != NULL); 7090 ipsec_rc = ii->ipsec_in_ah_sa->ipsa_input_func(first_mp, ah); 7091 7092 switch (ipsec_rc) { 7093 case IPSEC_STATUS_SUCCESS: 7094 /* we're done with IPsec processing, send it up */ 7095 ip_fanout_proto_again(first_mp, ill, ill, NULL); 7096 break; 7097 case IPSEC_STATUS_FAILED: 7098 BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsInDiscards); 7099 break; 7100 case IPSEC_STATUS_PENDING: 7101 /* no action needed */ 7102 break; 7103 } 7104 return (B_TRUE); 7105 } 7106 7107 /* 7108 * Validate the IPv6 mblk for alignment. 7109 */ 7110 int 7111 ip_check_v6_mblk(mblk_t *mp, ill_t *ill) 7112 { 7113 int pkt_len, ip6_len; 7114 ip6_t *ip6h = (ip6_t *)mp->b_rptr; 7115 7116 /* check for alignment and full IPv6 header */ 7117 if (!OK_32PTR((uchar_t *)ip6h) || 7118 (mp->b_wptr - (uchar_t *)ip6h) < IPV6_HDR_LEN) { 7119 if (!pullupmsg(mp, IPV6_HDR_LEN)) { 7120 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 7121 ip1dbg(("ip_rput_v6: pullupmsg failed\n")); 7122 return (IP6_MBLK_HDR_ERR); 7123 } 7124 ip6h = (ip6_t *)mp->b_rptr; 7125 } 7126 7127 ASSERT(OK_32PTR((uchar_t *)ip6h) && 7128 (mp->b_wptr - (uchar_t *)ip6h) >= IPV6_HDR_LEN); 7129 7130 if (mp->b_cont == NULL) 7131 pkt_len = mp->b_wptr - mp->b_rptr; 7132 else 7133 pkt_len = msgdsize(mp); 7134 ip6_len = ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN; 7135 7136 /* 7137 * Check for bogus (too short packet) and packet which 7138 * was padded by the link layer. 7139 */ 7140 if (ip6_len != pkt_len) { 7141 ssize_t diff; 7142 7143 if (ip6_len > pkt_len) { 7144 ip1dbg(("ip_rput_data_v6: packet too short %d %d\n", 7145 ip6_len, pkt_len)); 7146 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInTruncatedPkts); 7147 return (IP6_MBLK_LEN_ERR); 7148 } 7149 diff = (ssize_t)(pkt_len - ip6_len); 7150 7151 if (!adjmsg(mp, -diff)) { 7152 ip1dbg(("ip_rput_data_v6: adjmsg failed\n")); 7153 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 7154 return (IP6_MBLK_LEN_ERR); 7155 } 7156 } 7157 return (IP6_MBLK_OK); 7158 } 7159 7160 /* 7161 * ip_rput_data_v6 -- received IPv6 packets in M_DATA messages show up here. 7162 * ip_rput_v6 has already verified alignment, the min length, the version, 7163 * and db_ref = 1. 7164 * 7165 * The ill passed in (the arg named inill) is the ill that the packet 7166 * actually arrived on. We need to remember this when saving the 7167 * input interface index into potential IPV6_PKTINFO data in 7168 * ip_add_info_v6(). 7169 * 7170 * This routine doesn't free dl_mp; that's the caller's responsibility on 7171 * return. (Note that the callers are complex enough that there's no tail 7172 * recursion here anyway.) 7173 */ 7174 void 7175 ip_rput_data_v6(queue_t *q, ill_t *inill, mblk_t *mp, ip6_t *ip6h, 7176 uint_t flags, mblk_t *hada_mp, mblk_t *dl_mp) 7177 { 7178 ire_t *ire = NULL; 7179 ill_t *ill = inill; 7180 ill_t *outill; 7181 ipif_t *ipif; 7182 uint8_t *whereptr; 7183 uint8_t nexthdr; 7184 uint16_t remlen; 7185 uint_t prev_nexthdr_offset; 7186 uint_t used; 7187 size_t old_pkt_len; 7188 size_t pkt_len; 7189 uint16_t ip6_len; 7190 uint_t hdr_len; 7191 boolean_t mctl_present; 7192 mblk_t *first_mp; 7193 mblk_t *first_mp1; 7194 boolean_t no_forward; 7195 ip6_hbh_t *hbhhdr; 7196 boolean_t ll_multicast = (flags & IP6_IN_LLMCAST); 7197 conn_t *connp; 7198 ilm_t *ilm; 7199 uint32_t ports; 7200 zoneid_t zoneid = GLOBAL_ZONEID; 7201 uint16_t hck_flags, reass_hck_flags; 7202 uint32_t reass_sum; 7203 boolean_t cksum_err; 7204 mblk_t *mp1; 7205 ip_stack_t *ipst = inill->ill_ipst; 7206 7207 EXTRACT_PKT_MP(mp, first_mp, mctl_present); 7208 7209 if (hada_mp != NULL) { 7210 /* 7211 * It's an IPsec accelerated packet. 7212 * Keep a pointer to the data attributes around until 7213 * we allocate the ipsecinfo structure. 7214 */ 7215 IPSECHW_DEBUG(IPSECHW_PKT, 7216 ("ip_rput_data_v6: inbound HW accelerated IPsec pkt\n")); 7217 hada_mp->b_cont = NULL; 7218 /* 7219 * Since it is accelerated, it came directly from 7220 * the ill. 7221 */ 7222 ASSERT(mctl_present == B_FALSE); 7223 ASSERT(mp->b_datap->db_type != M_CTL); 7224 } 7225 7226 ip6h = (ip6_t *)mp->b_rptr; 7227 ip6_len = ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN; 7228 old_pkt_len = pkt_len = ip6_len; 7229 7230 if (ILL_HCKSUM_CAPABLE(ill) && !mctl_present && dohwcksum) 7231 hck_flags = DB_CKSUMFLAGS(mp); 7232 else 7233 hck_flags = 0; 7234 7235 /* Clear checksum flags in case we need to forward */ 7236 DB_CKSUMFLAGS(mp) = 0; 7237 reass_sum = reass_hck_flags = 0; 7238 7239 nexthdr = ip6h->ip6_nxt; 7240 7241 prev_nexthdr_offset = (uint_t)((uchar_t *)&ip6h->ip6_nxt - 7242 (uchar_t *)ip6h); 7243 whereptr = (uint8_t *)&ip6h[1]; 7244 remlen = pkt_len - IPV6_HDR_LEN; /* Track how much is left */ 7245 7246 /* Process hop by hop header options */ 7247 if (nexthdr == IPPROTO_HOPOPTS) { 7248 uint_t ehdrlen; 7249 uint8_t *optptr; 7250 7251 if (remlen < MIN_EHDR_LEN) 7252 goto pkt_too_short; 7253 if (mp->b_cont != NULL && 7254 whereptr + MIN_EHDR_LEN > mp->b_wptr) { 7255 if (!pullupmsg(mp, IPV6_HDR_LEN + MIN_EHDR_LEN)) { 7256 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 7257 freemsg(hada_mp); 7258 freemsg(first_mp); 7259 return; 7260 } 7261 ip6h = (ip6_t *)mp->b_rptr; 7262 whereptr = (uint8_t *)ip6h + pkt_len - remlen; 7263 } 7264 hbhhdr = (ip6_hbh_t *)whereptr; 7265 nexthdr = hbhhdr->ip6h_nxt; 7266 prev_nexthdr_offset = (uint_t)(whereptr - (uint8_t *)ip6h); 7267 ehdrlen = 8 * (hbhhdr->ip6h_len + 1); 7268 7269 if (remlen < ehdrlen) 7270 goto pkt_too_short; 7271 if (mp->b_cont != NULL && 7272 whereptr + ehdrlen > mp->b_wptr) { 7273 if (!pullupmsg(mp, IPV6_HDR_LEN + ehdrlen)) { 7274 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 7275 freemsg(hada_mp); 7276 freemsg(first_mp); 7277 return; 7278 } 7279 ip6h = (ip6_t *)mp->b_rptr; 7280 whereptr = (uint8_t *)ip6h + pkt_len - remlen; 7281 hbhhdr = (ip6_hbh_t *)whereptr; 7282 } 7283 7284 optptr = whereptr + 2; 7285 whereptr += ehdrlen; 7286 remlen -= ehdrlen; 7287 switch (ip_process_options_v6(q, first_mp, ip6h, optptr, 7288 ehdrlen - 2, IPPROTO_HOPOPTS, ipst)) { 7289 case -1: 7290 /* 7291 * Packet has been consumed and any 7292 * needed ICMP messages sent. 7293 */ 7294 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors); 7295 freemsg(hada_mp); 7296 return; 7297 case 0: 7298 /* no action needed */ 7299 break; 7300 case 1: 7301 /* Known router alert */ 7302 goto ipv6forus; 7303 } 7304 } 7305 7306 /* 7307 * On incoming v6 multicast packets we will bypass the ire table, 7308 * and assume that the read queue corresponds to the targetted 7309 * interface. 7310 * 7311 * The effect of this is the same as the IPv4 original code, but is 7312 * much cleaner I think. See ip_rput for how that was done. 7313 */ 7314 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 7315 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInMcastPkts); 7316 UPDATE_MIB(ill->ill_ip_mib, ipIfStatsHCInMcastOctets, pkt_len); 7317 /* 7318 * XXX TODO Give to mrouted to for multicast forwarding. 7319 */ 7320 ILM_WALKER_HOLD(ill); 7321 ilm = ilm_lookup_ill_v6(ill, &ip6h->ip6_dst, ALL_ZONES); 7322 ILM_WALKER_RELE(ill); 7323 if (ilm == NULL) { 7324 if (ip_debug > 3) { 7325 /* ip2dbg */ 7326 pr_addr_dbg("ip_rput_data_v6: got mcast packet" 7327 " which is not for us: %s\n", AF_INET6, 7328 &ip6h->ip6_dst); 7329 } 7330 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 7331 freemsg(hada_mp); 7332 freemsg(first_mp); 7333 return; 7334 } 7335 if (ip_debug > 3) { 7336 /* ip2dbg */ 7337 pr_addr_dbg("ip_rput_data_v6: multicast for us: %s\n", 7338 AF_INET6, &ip6h->ip6_dst); 7339 } 7340 zoneid = GLOBAL_ZONEID; 7341 goto ipv6forus; 7342 } 7343 7344 ipif = ill->ill_ipif; 7345 7346 /* 7347 * If a packet was received on an interface that is a 6to4 tunnel, 7348 * incoming IPv6 packets, with a 6to4 addressed IPv6 destination, must 7349 * be checked to have a 6to4 prefix (2002:V4ADDR::/48) that is equal to 7350 * the 6to4 prefix of the address configured on the receiving interface. 7351 * Otherwise, the packet was delivered to this interface in error and 7352 * the packet must be dropped. 7353 */ 7354 if ((ill->ill_is_6to4tun) && IN6_IS_ADDR_6TO4(&ip6h->ip6_dst)) { 7355 7356 if (!IN6_ARE_6TO4_PREFIX_EQUAL(&ipif->ipif_v6lcl_addr, 7357 &ip6h->ip6_dst)) { 7358 if (ip_debug > 2) { 7359 /* ip1dbg */ 7360 pr_addr_dbg("ip_rput_data_v6: received 6to4 " 7361 "addressed packet which is not for us: " 7362 "%s\n", AF_INET6, &ip6h->ip6_dst); 7363 } 7364 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 7365 freemsg(first_mp); 7366 return; 7367 } 7368 } 7369 7370 /* 7371 * Find an ire that matches destination. For link-local addresses 7372 * we have to match the ill. 7373 * TBD for site local addresses. 7374 */ 7375 if (IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_dst)) { 7376 ire = ire_ctable_lookup_v6(&ip6h->ip6_dst, NULL, 7377 IRE_CACHE|IRE_LOCAL, ill->ill_ipif, ALL_ZONES, NULL, 7378 MATCH_IRE_TYPE | MATCH_IRE_ILL_GROUP, ipst); 7379 } else { 7380 ire = ire_cache_lookup_v6(&ip6h->ip6_dst, ALL_ZONES, 7381 MBLK_GETLABEL(mp), ipst); 7382 7383 if (ire != NULL && ire->ire_stq != NULL && 7384 ire->ire_zoneid != GLOBAL_ZONEID && 7385 ire->ire_zoneid != ALL_ZONES) { 7386 /* 7387 * Should only use IREs that are visible from the 7388 * global zone for forwarding. 7389 */ 7390 ire_refrele(ire); 7391 ire = ire_cache_lookup_v6(&ip6h->ip6_dst, 7392 GLOBAL_ZONEID, MBLK_GETLABEL(mp), ipst); 7393 } 7394 } 7395 7396 if (ire == NULL) { 7397 /* 7398 * No matching IRE found. Mark this packet as having 7399 * originated externally. 7400 */ 7401 if (!(ill->ill_flags & ILLF_ROUTER) || ll_multicast) { 7402 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 7403 if (!(ill->ill_flags & ILLF_ROUTER)) { 7404 BUMP_MIB(ill->ill_ip_mib, 7405 ipIfStatsInAddrErrors); 7406 } 7407 freemsg(hada_mp); 7408 freemsg(first_mp); 7409 return; 7410 } 7411 if (ip6h->ip6_hops <= 1) { 7412 if (hada_mp != NULL) 7413 goto hada_drop; 7414 /* Sent by forwarding path, and router is global zone */ 7415 icmp_time_exceeded_v6(WR(q), first_mp, 7416 ICMP6_TIME_EXCEED_TRANSIT, ll_multicast, B_FALSE, 7417 GLOBAL_ZONEID, ipst); 7418 return; 7419 } 7420 /* 7421 * Per RFC 3513 section 2.5.2, we must not forward packets with 7422 * an unspecified source address. 7423 */ 7424 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 7425 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 7426 freemsg(hada_mp); 7427 freemsg(first_mp); 7428 return; 7429 } 7430 mp->b_prev = (mblk_t *)(uintptr_t) 7431 ill->ill_phyint->phyint_ifindex; 7432 ip_newroute_v6(q, mp, &ip6h->ip6_dst, &ip6h->ip6_src, 7433 IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_dst) ? ill : NULL, 7434 GLOBAL_ZONEID, ipst); 7435 return; 7436 } 7437 /* we have a matching IRE */ 7438 if (ire->ire_stq != NULL) { 7439 ill_group_t *ill_group; 7440 ill_group_t *ire_group; 7441 7442 /* 7443 * To be quicker, we may wish not to chase pointers 7444 * (ire->ire_ipif->ipif_ill...) and instead store the 7445 * forwarding policy in the ire. An unfortunate side- 7446 * effect of this would be requiring an ire flush whenever 7447 * the ILLF_ROUTER flag changes. For now, chase pointers 7448 * once and store in the boolean no_forward. 7449 * 7450 * This appears twice to keep it out of the non-forwarding, 7451 * yes-it's-for-us-on-the-right-interface case. 7452 */ 7453 no_forward = ((ill->ill_flags & 7454 ire->ire_ipif->ipif_ill->ill_flags & ILLF_ROUTER) == 0); 7455 7456 7457 ASSERT(first_mp == mp); 7458 /* 7459 * This ire has a send-to queue - forward the packet. 7460 */ 7461 if (no_forward || ll_multicast || (hada_mp != NULL)) { 7462 freemsg(hada_mp); 7463 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 7464 if (no_forward) { 7465 BUMP_MIB(ill->ill_ip_mib, 7466 ipIfStatsInAddrErrors); 7467 } 7468 freemsg(mp); 7469 ire_refrele(ire); 7470 return; 7471 } 7472 /* 7473 * ipIfStatsHCInForwDatagrams should only be increment if there 7474 * will be an attempt to forward the packet, which is why we 7475 * increment after the above condition has been checked. 7476 */ 7477 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInForwDatagrams); 7478 if (ip6h->ip6_hops <= 1) { 7479 ip1dbg(("ip_rput_data_v6: hop limit expired.\n")); 7480 /* Sent by forwarding path, and router is global zone */ 7481 icmp_time_exceeded_v6(WR(q), mp, 7482 ICMP6_TIME_EXCEED_TRANSIT, ll_multicast, B_FALSE, 7483 GLOBAL_ZONEID, ipst); 7484 ire_refrele(ire); 7485 return; 7486 } 7487 /* 7488 * Per RFC 3513 section 2.5.2, we must not forward packets with 7489 * an unspecified source address. 7490 */ 7491 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 7492 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 7493 freemsg(mp); 7494 ire_refrele(ire); 7495 return; 7496 } 7497 7498 if (is_system_labeled()) { 7499 mblk_t *mp1; 7500 7501 if ((mp1 = tsol_ip_forward(ire, mp)) == NULL) { 7502 BUMP_MIB(ill->ill_ip_mib, 7503 ipIfStatsForwProhibits); 7504 freemsg(mp); 7505 ire_refrele(ire); 7506 return; 7507 } 7508 /* Size may have changed */ 7509 mp = mp1; 7510 ip6h = (ip6_t *)mp->b_rptr; 7511 pkt_len = msgdsize(mp); 7512 } 7513 7514 if (pkt_len > ire->ire_max_frag) { 7515 int max_frag = ire->ire_max_frag; 7516 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInTooBigErrors); 7517 /* 7518 * Handle labeled packet resizing. 7519 */ 7520 if (is_system_labeled()) { 7521 max_frag = tsol_pmtu_adjust(mp, max_frag, 7522 pkt_len - old_pkt_len, AF_INET6); 7523 } 7524 7525 /* Sent by forwarding path, and router is global zone */ 7526 icmp_pkt2big_v6(WR(q), mp, max_frag, 7527 ll_multicast, B_TRUE, GLOBAL_ZONEID, ipst); 7528 ire_refrele(ire); 7529 return; 7530 } 7531 7532 /* 7533 * Check to see if we're forwarding the packet to a 7534 * different link from which it came. If so, check the 7535 * source and destination addresses since routers must not 7536 * forward any packets with link-local source or 7537 * destination addresses to other links. Otherwise (if 7538 * we're forwarding onto the same link), conditionally send 7539 * a redirect message. 7540 */ 7541 ill_group = ill->ill_group; 7542 ire_group = ((ill_t *)(ire->ire_rfq)->q_ptr)->ill_group; 7543 if (ire->ire_rfq != q && (ill_group == NULL || 7544 ill_group != ire_group)) { 7545 if (IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_dst) || 7546 IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src)) { 7547 BUMP_MIB(ill->ill_ip_mib, 7548 ipIfStatsInAddrErrors); 7549 freemsg(mp); 7550 ire_refrele(ire); 7551 return; 7552 } 7553 /* TBD add site-local check at site boundary? */ 7554 } else if (ipst->ips_ipv6_send_redirects) { 7555 in6_addr_t *v6targ; 7556 in6_addr_t gw_addr_v6; 7557 ire_t *src_ire_v6 = NULL; 7558 7559 /* 7560 * Don't send a redirect when forwarding a source 7561 * routed packet. 7562 */ 7563 if (ip_source_routed_v6(ip6h, mp, ipst)) 7564 goto forward; 7565 7566 mutex_enter(&ire->ire_lock); 7567 gw_addr_v6 = ire->ire_gateway_addr_v6; 7568 mutex_exit(&ire->ire_lock); 7569 if (!IN6_IS_ADDR_UNSPECIFIED(&gw_addr_v6)) { 7570 v6targ = &gw_addr_v6; 7571 /* 7572 * We won't send redirects to a router 7573 * that doesn't have a link local 7574 * address, but will forward. 7575 */ 7576 if (!IN6_IS_ADDR_LINKLOCAL(v6targ)) { 7577 BUMP_MIB(ill->ill_ip_mib, 7578 ipIfStatsInAddrErrors); 7579 goto forward; 7580 } 7581 } else { 7582 v6targ = &ip6h->ip6_dst; 7583 } 7584 7585 src_ire_v6 = ire_ftable_lookup_v6(&ip6h->ip6_src, 7586 NULL, NULL, IRE_INTERFACE, ire->ire_ipif, NULL, 7587 GLOBAL_ZONEID, 0, NULL, 7588 MATCH_IRE_IPIF | MATCH_IRE_TYPE, 7589 ipst); 7590 7591 if (src_ire_v6 != NULL) { 7592 /* 7593 * The source is directly connected. 7594 */ 7595 mp1 = copymsg(mp); 7596 if (mp1 != NULL) { 7597 icmp_send_redirect_v6(WR(q), 7598 mp1, v6targ, &ip6h->ip6_dst, 7599 ill, B_FALSE); 7600 } 7601 ire_refrele(src_ire_v6); 7602 } 7603 } 7604 7605 forward: 7606 /* Hoplimit verified above */ 7607 ip6h->ip6_hops--; 7608 7609 outill = ire->ire_ipif->ipif_ill; 7610 7611 DTRACE_PROBE4(ip6__forwarding__start, 7612 ill_t *, inill, ill_t *, outill, 7613 ip6_t *, ip6h, mblk_t *, mp); 7614 7615 FW_HOOKS6(ipst->ips_ip6_forwarding_event, 7616 ipst->ips_ipv6firewall_forwarding, 7617 inill, outill, ip6h, mp, mp, 0, ipst); 7618 7619 DTRACE_PROBE1(ip6__forwarding__end, mblk_t *, mp); 7620 7621 if (mp != NULL) { 7622 UPDATE_IB_PKT_COUNT(ire); 7623 ire->ire_last_used_time = lbolt; 7624 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCOutForwDatagrams); 7625 ip_xmit_v6(mp, ire, 0, NULL, B_FALSE, NULL); 7626 } 7627 IRE_REFRELE(ire); 7628 return; 7629 } 7630 7631 /* 7632 * Need to put on correct queue for reassembly to find it. 7633 * No need to use put() since reassembly has its own locks. 7634 * Note: multicast packets and packets destined to addresses 7635 * assigned to loopback (ire_rfq is NULL) will be reassembled on 7636 * the arriving ill. Unlike the IPv4 case, enabling strict 7637 * destination multihoming will prevent accepting packets 7638 * addressed to an IRE_LOCAL on lo0. 7639 */ 7640 if (ire->ire_rfq != q) { 7641 if ((ire = ip_check_multihome(&ip6h->ip6_dst, ire, ill)) 7642 == NULL) { 7643 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 7644 freemsg(hada_mp); 7645 freemsg(first_mp); 7646 return; 7647 } 7648 if (ire->ire_rfq != NULL) { 7649 q = ire->ire_rfq; 7650 ill = (ill_t *)q->q_ptr; 7651 ASSERT(ill != NULL); 7652 } 7653 } 7654 7655 zoneid = ire->ire_zoneid; 7656 UPDATE_IB_PKT_COUNT(ire); 7657 ire->ire_last_used_time = lbolt; 7658 /* Don't use the ire after this point, we'll NULL it out to be sure. */ 7659 ire_refrele(ire); 7660 ire = NULL; 7661 ipv6forus: 7662 /* 7663 * Looks like this packet is for us one way or another. 7664 * This is where we'll process destination headers etc. 7665 */ 7666 for (; ; ) { 7667 switch (nexthdr) { 7668 case IPPROTO_TCP: { 7669 uint16_t *up; 7670 uint32_t sum; 7671 int offset; 7672 7673 hdr_len = pkt_len - remlen; 7674 7675 if (hada_mp != NULL) { 7676 ip0dbg(("tcp hada drop\n")); 7677 goto hada_drop; 7678 } 7679 7680 7681 /* TCP needs all of the TCP header */ 7682 if (remlen < TCP_MIN_HEADER_LENGTH) 7683 goto pkt_too_short; 7684 if (mp->b_cont != NULL && 7685 whereptr + TCP_MIN_HEADER_LENGTH > mp->b_wptr) { 7686 if (!pullupmsg(mp, 7687 hdr_len + TCP_MIN_HEADER_LENGTH)) { 7688 BUMP_MIB(ill->ill_ip_mib, 7689 ipIfStatsInDiscards); 7690 freemsg(first_mp); 7691 return; 7692 } 7693 hck_flags = 0; 7694 ip6h = (ip6_t *)mp->b_rptr; 7695 whereptr = (uint8_t *)ip6h + hdr_len; 7696 } 7697 /* 7698 * Extract the offset field from the TCP header. 7699 */ 7700 offset = ((uchar_t *)ip6h)[hdr_len + 12] >> 4; 7701 if (offset != 5) { 7702 if (offset < 5) { 7703 ip1dbg(("ip_rput_data_v6: short " 7704 "TCP data offset")); 7705 BUMP_MIB(ill->ill_ip_mib, 7706 ipIfStatsInDiscards); 7707 freemsg(first_mp); 7708 return; 7709 } 7710 /* 7711 * There must be TCP options. 7712 * Make sure we can grab them. 7713 */ 7714 offset <<= 2; 7715 if (remlen < offset) 7716 goto pkt_too_short; 7717 if (mp->b_cont != NULL && 7718 whereptr + offset > mp->b_wptr) { 7719 if (!pullupmsg(mp, 7720 hdr_len + offset)) { 7721 BUMP_MIB(ill->ill_ip_mib, 7722 ipIfStatsInDiscards); 7723 freemsg(first_mp); 7724 return; 7725 } 7726 hck_flags = 0; 7727 ip6h = (ip6_t *)mp->b_rptr; 7728 whereptr = (uint8_t *)ip6h + hdr_len; 7729 } 7730 } 7731 7732 up = (uint16_t *)&ip6h->ip6_src; 7733 /* 7734 * TCP checksum calculation. First sum up the 7735 * pseudo-header fields: 7736 * - Source IPv6 address 7737 * - Destination IPv6 address 7738 * - TCP payload length 7739 * - TCP protocol ID 7740 */ 7741 sum = htons(IPPROTO_TCP + remlen) + 7742 up[0] + up[1] + up[2] + up[3] + 7743 up[4] + up[5] + up[6] + up[7] + 7744 up[8] + up[9] + up[10] + up[11] + 7745 up[12] + up[13] + up[14] + up[15]; 7746 7747 /* Fold initial sum */ 7748 sum = (sum & 0xffff) + (sum >> 16); 7749 7750 mp1 = mp->b_cont; 7751 7752 if ((hck_flags & (HCK_FULLCKSUM|HCK_PARTIALCKSUM)) == 0) 7753 IP6_STAT(ipst, ip6_in_sw_cksum); 7754 7755 IP_CKSUM_RECV(hck_flags, sum, (uchar_t *) 7756 ((uchar_t *)mp->b_rptr + DB_CKSUMSTART(mp)), 7757 (int32_t)(whereptr - (uchar_t *)mp->b_rptr), 7758 mp, mp1, cksum_err); 7759 7760 if (cksum_err) { 7761 BUMP_MIB(ill->ill_ip_mib, tcpIfStatsInErrs); 7762 7763 if (hck_flags & HCK_FULLCKSUM) { 7764 IP6_STAT(ipst, 7765 ip6_tcp_in_full_hw_cksum_err); 7766 } else if (hck_flags & HCK_PARTIALCKSUM) { 7767 IP6_STAT(ipst, 7768 ip6_tcp_in_part_hw_cksum_err); 7769 } else { 7770 IP6_STAT(ipst, ip6_tcp_in_sw_cksum_err); 7771 } 7772 freemsg(first_mp); 7773 return; 7774 } 7775 tcp_fanout: 7776 ip_fanout_tcp_v6(q, first_mp, ip6h, ill, inill, 7777 (flags|IP_FF_SEND_ICMP|IP_FF_SYN_ADDIRE| 7778 IP_FF_IPINFO), hdr_len, mctl_present, zoneid); 7779 return; 7780 } 7781 case IPPROTO_SCTP: 7782 { 7783 sctp_hdr_t *sctph; 7784 uint32_t calcsum, pktsum; 7785 uint_t hdr_len = pkt_len - remlen; 7786 sctp_stack_t *sctps; 7787 7788 sctps = inill->ill_ipst->ips_netstack->netstack_sctp; 7789 7790 /* SCTP needs all of the SCTP header */ 7791 if (remlen < sizeof (*sctph)) { 7792 goto pkt_too_short; 7793 } 7794 if (whereptr + sizeof (*sctph) > mp->b_wptr) { 7795 ASSERT(mp->b_cont != NULL); 7796 if (!pullupmsg(mp, hdr_len + sizeof (*sctph))) { 7797 BUMP_MIB(ill->ill_ip_mib, 7798 ipIfStatsInDiscards); 7799 freemsg(mp); 7800 return; 7801 } 7802 ip6h = (ip6_t *)mp->b_rptr; 7803 whereptr = (uint8_t *)ip6h + hdr_len; 7804 } 7805 7806 sctph = (sctp_hdr_t *)(mp->b_rptr + hdr_len); 7807 /* checksum */ 7808 pktsum = sctph->sh_chksum; 7809 sctph->sh_chksum = 0; 7810 calcsum = sctp_cksum(mp, hdr_len); 7811 if (calcsum != pktsum) { 7812 BUMP_MIB(&sctps->sctps_mib, sctpChecksumError); 7813 freemsg(mp); 7814 return; 7815 } 7816 sctph->sh_chksum = pktsum; 7817 ports = *(uint32_t *)(mp->b_rptr + hdr_len); 7818 if ((connp = sctp_fanout(&ip6h->ip6_src, &ip6h->ip6_dst, 7819 ports, zoneid, mp, sctps)) == NULL) { 7820 ip_fanout_sctp_raw(first_mp, ill, 7821 (ipha_t *)ip6h, B_FALSE, ports, 7822 mctl_present, 7823 (flags|IP_FF_SEND_ICMP|IP_FF_IPINFO), 7824 B_TRUE, zoneid); 7825 return; 7826 } 7827 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 7828 sctp_input(connp, (ipha_t *)ip6h, mp, first_mp, ill, 7829 B_FALSE, mctl_present); 7830 return; 7831 } 7832 case IPPROTO_UDP: { 7833 uint16_t *up; 7834 uint32_t sum; 7835 7836 hdr_len = pkt_len - remlen; 7837 7838 if (hada_mp != NULL) { 7839 ip0dbg(("udp hada drop\n")); 7840 goto hada_drop; 7841 } 7842 7843 /* Verify that at least the ports are present */ 7844 if (remlen < UDPH_SIZE) 7845 goto pkt_too_short; 7846 if (mp->b_cont != NULL && 7847 whereptr + UDPH_SIZE > mp->b_wptr) { 7848 if (!pullupmsg(mp, hdr_len + UDPH_SIZE)) { 7849 BUMP_MIB(ill->ill_ip_mib, 7850 ipIfStatsInDiscards); 7851 freemsg(first_mp); 7852 return; 7853 } 7854 hck_flags = 0; 7855 ip6h = (ip6_t *)mp->b_rptr; 7856 whereptr = (uint8_t *)ip6h + hdr_len; 7857 } 7858 7859 /* 7860 * Before going through the regular checksum 7861 * calculation, make sure the received checksum 7862 * is non-zero. RFC 2460 says, a 0x0000 checksum 7863 * in a UDP packet (within IPv6 packet) is invalid 7864 * and should be replaced by 0xffff. This makes 7865 * sense as regular checksum calculation will 7866 * pass for both the cases i.e. 0x0000 and 0xffff. 7867 * Removing one of the case makes error detection 7868 * stronger. 7869 */ 7870 7871 if (((udpha_t *)whereptr)->uha_checksum == 0) { 7872 /* 0x0000 checksum is invalid */ 7873 ip1dbg(("ip_rput_data_v6: Invalid UDP " 7874 "checksum value 0x0000\n")); 7875 BUMP_MIB(ill->ill_ip_mib, 7876 udpIfStatsInCksumErrs); 7877 freemsg(first_mp); 7878 return; 7879 } 7880 7881 up = (uint16_t *)&ip6h->ip6_src; 7882 7883 /* 7884 * UDP checksum calculation. First sum up the 7885 * pseudo-header fields: 7886 * - Source IPv6 address 7887 * - Destination IPv6 address 7888 * - UDP payload length 7889 * - UDP protocol ID 7890 */ 7891 7892 sum = htons(IPPROTO_UDP + remlen) + 7893 up[0] + up[1] + up[2] + up[3] + 7894 up[4] + up[5] + up[6] + up[7] + 7895 up[8] + up[9] + up[10] + up[11] + 7896 up[12] + up[13] + up[14] + up[15]; 7897 7898 /* Fold initial sum */ 7899 sum = (sum & 0xffff) + (sum >> 16); 7900 7901 if (reass_hck_flags != 0) { 7902 hck_flags = reass_hck_flags; 7903 7904 IP_CKSUM_RECV_REASS(hck_flags, 7905 (int32_t)(whereptr - (uchar_t *)mp->b_rptr), 7906 sum, reass_sum, cksum_err); 7907 } else { 7908 mp1 = mp->b_cont; 7909 7910 IP_CKSUM_RECV(hck_flags, sum, (uchar_t *) 7911 ((uchar_t *)mp->b_rptr + DB_CKSUMSTART(mp)), 7912 (int32_t)(whereptr - (uchar_t *)mp->b_rptr), 7913 mp, mp1, cksum_err); 7914 } 7915 7916 if ((hck_flags & (HCK_FULLCKSUM|HCK_PARTIALCKSUM)) == 0) 7917 IP6_STAT(ipst, ip6_in_sw_cksum); 7918 7919 if (cksum_err) { 7920 BUMP_MIB(ill->ill_ip_mib, 7921 udpIfStatsInCksumErrs); 7922 7923 if (hck_flags & HCK_FULLCKSUM) 7924 IP6_STAT(ipst, 7925 ip6_udp_in_full_hw_cksum_err); 7926 else if (hck_flags & HCK_PARTIALCKSUM) 7927 IP6_STAT(ipst, 7928 ip6_udp_in_part_hw_cksum_err); 7929 else 7930 IP6_STAT(ipst, ip6_udp_in_sw_cksum_err); 7931 7932 freemsg(first_mp); 7933 return; 7934 } 7935 goto udp_fanout; 7936 } 7937 case IPPROTO_ICMPV6: { 7938 uint16_t *up; 7939 uint32_t sum; 7940 uint_t hdr_len = pkt_len - remlen; 7941 7942 if (hada_mp != NULL) { 7943 ip0dbg(("icmp hada drop\n")); 7944 goto hada_drop; 7945 } 7946 7947 up = (uint16_t *)&ip6h->ip6_src; 7948 sum = htons(IPPROTO_ICMPV6 + remlen) + 7949 up[0] + up[1] + up[2] + up[3] + 7950 up[4] + up[5] + up[6] + up[7] + 7951 up[8] + up[9] + up[10] + up[11] + 7952 up[12] + up[13] + up[14] + up[15]; 7953 sum = (sum & 0xffff) + (sum >> 16); 7954 sum = IP_CSUM(mp, hdr_len, sum); 7955 if (sum != 0) { 7956 /* IPv6 ICMP checksum failed */ 7957 ip1dbg(("ip_rput_data_v6: ICMPv6 checksum " 7958 "failed %x\n", 7959 sum)); 7960 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInMsgs); 7961 BUMP_MIB(ill->ill_icmp6_mib, 7962 ipv6IfIcmpInErrors); 7963 freemsg(first_mp); 7964 return; 7965 } 7966 7967 icmp_fanout: 7968 /* Check variable for testing applications */ 7969 if (ipst->ips_ipv6_drop_inbound_icmpv6) { 7970 freemsg(first_mp); 7971 return; 7972 } 7973 /* 7974 * Assume that there is always at least one conn for 7975 * ICMPv6 (in.ndpd) i.e. don't optimize the case 7976 * where there is no conn. 7977 */ 7978 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 7979 ASSERT(!IS_LOOPBACK((ill))); 7980 /* 7981 * In the multicast case, applications may have 7982 * joined the group from different zones, so we 7983 * need to deliver the packet to each of them. 7984 * Loop through the multicast memberships 7985 * structures (ilm) on the receive ill and send 7986 * a copy of the packet up each matching one. 7987 */ 7988 ILM_WALKER_HOLD(ill); 7989 for (ilm = ill->ill_ilm; ilm != NULL; 7990 ilm = ilm->ilm_next) { 7991 if (ilm->ilm_flags & ILM_DELETED) 7992 continue; 7993 if (!IN6_ARE_ADDR_EQUAL( 7994 &ilm->ilm_v6addr, &ip6h->ip6_dst)) 7995 continue; 7996 if (!ipif_lookup_zoneid(ill, 7997 ilm->ilm_zoneid, IPIF_UP, NULL)) 7998 continue; 7999 8000 first_mp1 = ip_copymsg(first_mp); 8001 if (first_mp1 == NULL) 8002 continue; 8003 icmp_inbound_v6(q, first_mp1, ill, 8004 hdr_len, mctl_present, 0, 8005 ilm->ilm_zoneid, dl_mp); 8006 } 8007 ILM_WALKER_RELE(ill); 8008 } else { 8009 first_mp1 = ip_copymsg(first_mp); 8010 if (first_mp1 != NULL) 8011 icmp_inbound_v6(q, first_mp1, ill, 8012 hdr_len, mctl_present, 0, zoneid, 8013 dl_mp); 8014 } 8015 } 8016 /* FALLTHRU */ 8017 default: { 8018 /* 8019 * Handle protocols with which IPv6 is less intimate. 8020 */ 8021 uint_t proto_flags = IP_FF_RAWIP|IP_FF_IPINFO; 8022 8023 if (hada_mp != NULL) { 8024 ip0dbg(("default hada drop\n")); 8025 goto hada_drop; 8026 } 8027 8028 /* 8029 * Enable sending ICMP for "Unknown" nexthdr 8030 * case. i.e. where we did not FALLTHRU from 8031 * IPPROTO_ICMPV6 processing case above. 8032 * If we did FALLTHRU, then the packet has already been 8033 * processed for IPPF, don't process it again in 8034 * ip_fanout_proto_v6; set IP6_NO_IPPOLICY in the 8035 * flags 8036 */ 8037 if (nexthdr != IPPROTO_ICMPV6) 8038 proto_flags |= IP_FF_SEND_ICMP; 8039 else 8040 proto_flags |= IP6_NO_IPPOLICY; 8041 8042 ip_fanout_proto_v6(q, first_mp, ip6h, ill, inill, 8043 nexthdr, prev_nexthdr_offset, (flags|proto_flags), 8044 mctl_present, zoneid); 8045 return; 8046 } 8047 8048 case IPPROTO_DSTOPTS: { 8049 uint_t ehdrlen; 8050 uint8_t *optptr; 8051 ip6_dest_t *desthdr; 8052 8053 /* Check if AH is present. */ 8054 if (ipsec_early_ah_v6(q, first_mp, mctl_present, ill, 8055 hada_mp, zoneid)) { 8056 ip0dbg(("dst early hada drop\n")); 8057 return; 8058 } 8059 8060 /* 8061 * Reinitialize pointers, as ipsec_early_ah_v6() does 8062 * complete pullups. We don't have to do more pullups 8063 * as a result. 8064 */ 8065 whereptr = (uint8_t *)((uintptr_t)mp->b_rptr + 8066 (uintptr_t)(whereptr - ((uint8_t *)ip6h))); 8067 ip6h = (ip6_t *)mp->b_rptr; 8068 8069 if (remlen < MIN_EHDR_LEN) 8070 goto pkt_too_short; 8071 8072 desthdr = (ip6_dest_t *)whereptr; 8073 nexthdr = desthdr->ip6d_nxt; 8074 prev_nexthdr_offset = (uint_t)(whereptr - 8075 (uint8_t *)ip6h); 8076 ehdrlen = 8 * (desthdr->ip6d_len + 1); 8077 if (remlen < ehdrlen) 8078 goto pkt_too_short; 8079 optptr = whereptr + 2; 8080 /* 8081 * Note: XXX This code does not seem to make 8082 * distinction between Destination Options Header 8083 * being before/after Routing Header which can 8084 * happen if we are at the end of source route. 8085 * This may become significant in future. 8086 * (No real significant Destination Options are 8087 * defined/implemented yet ). 8088 */ 8089 switch (ip_process_options_v6(q, first_mp, ip6h, optptr, 8090 ehdrlen - 2, IPPROTO_DSTOPTS, ipst)) { 8091 case -1: 8092 /* 8093 * Packet has been consumed and any needed 8094 * ICMP errors sent. 8095 */ 8096 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors); 8097 freemsg(hada_mp); 8098 return; 8099 case 0: 8100 /* No action needed continue */ 8101 break; 8102 case 1: 8103 /* 8104 * Unnexpected return value 8105 * (Router alert is a Hop-by-Hop option) 8106 */ 8107 #ifdef DEBUG 8108 panic("ip_rput_data_v6: router " 8109 "alert hbh opt indication in dest opt"); 8110 /*NOTREACHED*/ 8111 #else 8112 freemsg(hada_mp); 8113 freemsg(first_mp); 8114 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 8115 return; 8116 #endif 8117 } 8118 used = ehdrlen; 8119 break; 8120 } 8121 case IPPROTO_FRAGMENT: { 8122 ip6_frag_t *fraghdr; 8123 size_t no_frag_hdr_len; 8124 8125 if (hada_mp != NULL) { 8126 ip0dbg(("frag hada drop\n")); 8127 goto hada_drop; 8128 } 8129 8130 ASSERT(first_mp == mp); 8131 if (remlen < sizeof (ip6_frag_t)) 8132 goto pkt_too_short; 8133 8134 if (mp->b_cont != NULL && 8135 whereptr + sizeof (ip6_frag_t) > mp->b_wptr) { 8136 if (!pullupmsg(mp, 8137 pkt_len - remlen + sizeof (ip6_frag_t))) { 8138 BUMP_MIB(ill->ill_ip_mib, 8139 ipIfStatsInDiscards); 8140 freemsg(mp); 8141 return; 8142 } 8143 hck_flags = 0; 8144 ip6h = (ip6_t *)mp->b_rptr; 8145 whereptr = (uint8_t *)ip6h + pkt_len - remlen; 8146 } 8147 8148 fraghdr = (ip6_frag_t *)whereptr; 8149 used = (uint_t)sizeof (ip6_frag_t); 8150 BUMP_MIB(ill->ill_ip_mib, ipIfStatsReasmReqds); 8151 8152 /* 8153 * Invoke the CGTP (multirouting) filtering module to 8154 * process the incoming packet. Packets identified as 8155 * duplicates must be discarded. Filtering is active 8156 * only if the the ip_cgtp_filter ndd variable is 8157 * non-zero. 8158 */ 8159 if (ipst->ips_ip_cgtp_filter && 8160 ipst->ips_ip_cgtp_filter_ops != NULL) { 8161 int cgtp_flt_pkt; 8162 netstackid_t stackid; 8163 8164 stackid = ipst->ips_netstack->netstack_stackid; 8165 8166 cgtp_flt_pkt = 8167 ipst->ips_ip_cgtp_filter_ops->cfo_filter_v6( 8168 stackid, inill->ill_phyint->phyint_ifindex, 8169 ip6h, fraghdr); 8170 if (cgtp_flt_pkt == CGTP_IP_PKT_DUPLICATE) { 8171 freemsg(mp); 8172 return; 8173 } 8174 } 8175 8176 /* Restore the flags */ 8177 DB_CKSUMFLAGS(mp) = hck_flags; 8178 8179 mp = ip_rput_frag_v6(q, mp, ip6h, fraghdr, 8180 remlen - used, &prev_nexthdr_offset, 8181 &reass_sum, &reass_hck_flags); 8182 if (mp == NULL) { 8183 /* Reassembly is still pending */ 8184 return; 8185 } 8186 /* The first mblk are the headers before the frag hdr */ 8187 BUMP_MIB(ill->ill_ip_mib, ipIfStatsReasmOKs); 8188 8189 first_mp = mp; /* mp has most likely changed! */ 8190 no_frag_hdr_len = mp->b_wptr - mp->b_rptr; 8191 ip6h = (ip6_t *)mp->b_rptr; 8192 nexthdr = ((char *)ip6h)[prev_nexthdr_offset]; 8193 whereptr = mp->b_rptr + no_frag_hdr_len; 8194 remlen = ntohs(ip6h->ip6_plen) + 8195 (uint16_t)(IPV6_HDR_LEN - no_frag_hdr_len); 8196 pkt_len = msgdsize(mp); 8197 used = 0; 8198 break; 8199 } 8200 case IPPROTO_HOPOPTS: { 8201 if (hada_mp != NULL) { 8202 ip0dbg(("hop hada drop\n")); 8203 goto hada_drop; 8204 } 8205 /* 8206 * Illegal header sequence. 8207 * (Hop-by-hop headers are processed above 8208 * and required to immediately follow IPv6 header) 8209 */ 8210 icmp_param_problem_v6(WR(q), first_mp, 8211 ICMP6_PARAMPROB_NEXTHEADER, 8212 prev_nexthdr_offset, 8213 B_FALSE, B_FALSE, zoneid, ipst); 8214 return; 8215 } 8216 case IPPROTO_ROUTING: { 8217 uint_t ehdrlen; 8218 ip6_rthdr_t *rthdr; 8219 8220 /* Check if AH is present. */ 8221 if (ipsec_early_ah_v6(q, first_mp, mctl_present, ill, 8222 hada_mp, zoneid)) { 8223 ip0dbg(("routing hada drop\n")); 8224 return; 8225 } 8226 8227 /* 8228 * Reinitialize pointers, as ipsec_early_ah_v6() does 8229 * complete pullups. We don't have to do more pullups 8230 * as a result. 8231 */ 8232 whereptr = (uint8_t *)((uintptr_t)mp->b_rptr + 8233 (uintptr_t)(whereptr - ((uint8_t *)ip6h))); 8234 ip6h = (ip6_t *)mp->b_rptr; 8235 8236 if (remlen < MIN_EHDR_LEN) 8237 goto pkt_too_short; 8238 rthdr = (ip6_rthdr_t *)whereptr; 8239 nexthdr = rthdr->ip6r_nxt; 8240 prev_nexthdr_offset = (uint_t)(whereptr - 8241 (uint8_t *)ip6h); 8242 ehdrlen = 8 * (rthdr->ip6r_len + 1); 8243 if (remlen < ehdrlen) 8244 goto pkt_too_short; 8245 if (rthdr->ip6r_segleft != 0) { 8246 /* Not end of source route */ 8247 if (ll_multicast) { 8248 BUMP_MIB(ill->ill_ip_mib, 8249 ipIfStatsForwProhibits); 8250 freemsg(hada_mp); 8251 freemsg(mp); 8252 return; 8253 } 8254 ip_process_rthdr(q, mp, ip6h, rthdr, ill, 8255 flags, hada_mp, dl_mp); 8256 return; 8257 } 8258 used = ehdrlen; 8259 break; 8260 } 8261 case IPPROTO_AH: 8262 case IPPROTO_ESP: { 8263 /* 8264 * Fast path for AH/ESP. If this is the first time 8265 * we are sending a datagram to AH/ESP, allocate 8266 * a IPSEC_IN message and prepend it. Otherwise, 8267 * just fanout. 8268 */ 8269 8270 ipsec_in_t *ii; 8271 int ipsec_rc; 8272 ipsec_stack_t *ipss; 8273 8274 ipss = ipst->ips_netstack->netstack_ipsec; 8275 if (!mctl_present) { 8276 ASSERT(first_mp == mp); 8277 first_mp = ipsec_in_alloc(B_FALSE, 8278 ipst->ips_netstack); 8279 if (first_mp == NULL) { 8280 ip1dbg(("ip_rput_data_v6: IPSEC_IN " 8281 "allocation failure.\n")); 8282 BUMP_MIB(ill->ill_ip_mib, 8283 ipIfStatsInDiscards); 8284 freemsg(mp); 8285 return; 8286 } 8287 /* 8288 * Store the ill_index so that when we come back 8289 * from IPSEC we ride on the same queue. 8290 */ 8291 ii = (ipsec_in_t *)first_mp->b_rptr; 8292 ii->ipsec_in_ill_index = 8293 ill->ill_phyint->phyint_ifindex; 8294 ii->ipsec_in_rill_index = 8295 ii->ipsec_in_ill_index; 8296 first_mp->b_cont = mp; 8297 /* 8298 * Cache hardware acceleration info. 8299 */ 8300 if (hada_mp != NULL) { 8301 IPSECHW_DEBUG(IPSECHW_PKT, 8302 ("ip_rput_data_v6: " 8303 "caching data attr.\n")); 8304 ii->ipsec_in_accelerated = B_TRUE; 8305 ii->ipsec_in_da = hada_mp; 8306 hada_mp = NULL; 8307 } 8308 } else { 8309 ii = (ipsec_in_t *)first_mp->b_rptr; 8310 } 8311 8312 if (!ipsec_loaded(ipss)) { 8313 ip_proto_not_sup(q, first_mp, IP_FF_SEND_ICMP, 8314 zoneid, ipst); 8315 return; 8316 } 8317 8318 /* select inbound SA and have IPsec process the pkt */ 8319 if (nexthdr == IPPROTO_ESP) { 8320 esph_t *esph = ipsec_inbound_esp_sa(first_mp, 8321 ipst->ips_netstack); 8322 if (esph == NULL) 8323 return; 8324 ASSERT(ii->ipsec_in_esp_sa != NULL); 8325 ASSERT(ii->ipsec_in_esp_sa->ipsa_input_func != 8326 NULL); 8327 ipsec_rc = ii->ipsec_in_esp_sa->ipsa_input_func( 8328 first_mp, esph); 8329 } else { 8330 ah_t *ah = ipsec_inbound_ah_sa(first_mp, 8331 ipst->ips_netstack); 8332 if (ah == NULL) 8333 return; 8334 ASSERT(ii->ipsec_in_ah_sa != NULL); 8335 ASSERT(ii->ipsec_in_ah_sa->ipsa_input_func != 8336 NULL); 8337 ipsec_rc = ii->ipsec_in_ah_sa->ipsa_input_func( 8338 first_mp, ah); 8339 } 8340 8341 switch (ipsec_rc) { 8342 case IPSEC_STATUS_SUCCESS: 8343 break; 8344 case IPSEC_STATUS_FAILED: 8345 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 8346 /* FALLTHRU */ 8347 case IPSEC_STATUS_PENDING: 8348 return; 8349 } 8350 /* we're done with IPsec processing, send it up */ 8351 ip_fanout_proto_again(first_mp, ill, inill, NULL); 8352 return; 8353 } 8354 case IPPROTO_NONE: 8355 /* All processing is done. Count as "delivered". */ 8356 freemsg(hada_mp); 8357 freemsg(first_mp); 8358 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 8359 return; 8360 } 8361 whereptr += used; 8362 ASSERT(remlen >= used); 8363 remlen -= used; 8364 } 8365 /* NOTREACHED */ 8366 8367 pkt_too_short: 8368 ip1dbg(("ip_rput_data_v6: packet too short %d %lu %d\n", 8369 ip6_len, pkt_len, remlen)); 8370 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInTruncatedPkts); 8371 freemsg(hada_mp); 8372 freemsg(first_mp); 8373 return; 8374 udp_fanout: 8375 if (mctl_present || IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 8376 connp = NULL; 8377 } else { 8378 connp = ipcl_classify_v6(mp, IPPROTO_UDP, hdr_len, zoneid, 8379 ipst); 8380 if ((connp != NULL) && (connp->conn_upq == NULL)) { 8381 CONN_DEC_REF(connp); 8382 connp = NULL; 8383 } 8384 } 8385 8386 if (connp == NULL) { 8387 uint32_t ports; 8388 8389 ports = *(uint32_t *)(mp->b_rptr + hdr_len + 8390 UDP_PORTS_OFFSET); 8391 IP6_STAT(ipst, ip6_udp_slow_path); 8392 ip_fanout_udp_v6(q, first_mp, ip6h, ports, ill, inill, 8393 (flags|IP_FF_SEND_ICMP|IP_FF_IPINFO), mctl_present, 8394 zoneid); 8395 return; 8396 } 8397 8398 if (CONN_UDP_FLOWCTLD(connp)) { 8399 freemsg(first_mp); 8400 BUMP_MIB(ill->ill_ip_mib, udpIfStatsInOverflows); 8401 CONN_DEC_REF(connp); 8402 return; 8403 } 8404 8405 /* Initiate IPPF processing */ 8406 if (IP6_IN_IPP(flags, ipst)) { 8407 ip_process(IPP_LOCAL_IN, &mp, ill->ill_phyint->phyint_ifindex); 8408 if (mp == NULL) { 8409 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 8410 CONN_DEC_REF(connp); 8411 return; 8412 } 8413 } 8414 8415 if (connp->conn_ip_recvpktinfo || 8416 IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src)) { 8417 mp = ip_add_info_v6(mp, inill, &ip6h->ip6_dst); 8418 if (mp == NULL) { 8419 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 8420 CONN_DEC_REF(connp); 8421 return; 8422 } 8423 } 8424 8425 IP6_STAT(ipst, ip6_udp_fast_path); 8426 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 8427 8428 /* Send it upstream */ 8429 (connp->conn_recv)(connp, mp, NULL); 8430 8431 CONN_DEC_REF(connp); 8432 freemsg(hada_mp); 8433 return; 8434 8435 hada_drop: 8436 ip1dbg(("ip_rput_data_v6: malformed accelerated packet\n")); 8437 /* IPsec kstats: bump counter here */ 8438 freemsg(hada_mp); 8439 freemsg(first_mp); 8440 } 8441 8442 /* 8443 * Reassemble fragment. 8444 * When it returns a completed message the first mblk will only contain 8445 * the headers prior to the fragment header. 8446 * 8447 * prev_nexthdr_offset is an offset indication of where the nexthdr field is 8448 * of the preceding header. This is needed to patch the previous header's 8449 * nexthdr field when reassembly completes. 8450 */ 8451 static mblk_t * 8452 ip_rput_frag_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, 8453 ip6_frag_t *fraghdr, uint_t remlen, uint_t *prev_nexthdr_offset, 8454 uint32_t *cksum_val, uint16_t *cksum_flags) 8455 { 8456 ill_t *ill = (ill_t *)q->q_ptr; 8457 uint32_t ident = ntohl(fraghdr->ip6f_ident); 8458 uint16_t offset; 8459 boolean_t more_frags; 8460 uint8_t nexthdr = fraghdr->ip6f_nxt; 8461 in6_addr_t *v6dst_ptr; 8462 in6_addr_t *v6src_ptr; 8463 uint_t end; 8464 uint_t hdr_length; 8465 size_t count; 8466 ipf_t *ipf; 8467 ipf_t **ipfp; 8468 ipfb_t *ipfb; 8469 mblk_t *mp1; 8470 uint8_t ecn_info = 0; 8471 size_t msg_len; 8472 mblk_t *tail_mp; 8473 mblk_t *t_mp; 8474 boolean_t pruned = B_FALSE; 8475 uint32_t sum_val; 8476 uint16_t sum_flags; 8477 ip_stack_t *ipst = ill->ill_ipst; 8478 8479 if (cksum_val != NULL) 8480 *cksum_val = 0; 8481 if (cksum_flags != NULL) 8482 *cksum_flags = 0; 8483 8484 /* 8485 * We utilize hardware computed checksum info only for UDP since 8486 * IP fragmentation is a normal occurence for the protocol. In 8487 * addition, checksum offload support for IP fragments carrying 8488 * UDP payload is commonly implemented across network adapters. 8489 */ 8490 ASSERT(ill != NULL); 8491 if (nexthdr == IPPROTO_UDP && dohwcksum && ILL_HCKSUM_CAPABLE(ill) && 8492 (DB_CKSUMFLAGS(mp) & (HCK_FULLCKSUM | HCK_PARTIALCKSUM))) { 8493 mblk_t *mp1 = mp->b_cont; 8494 int32_t len; 8495 8496 /* Record checksum information from the packet */ 8497 sum_val = (uint32_t)DB_CKSUM16(mp); 8498 sum_flags = DB_CKSUMFLAGS(mp); 8499 8500 /* fragmented payload offset from beginning of mblk */ 8501 offset = (uint16_t)((uchar_t *)&fraghdr[1] - mp->b_rptr); 8502 8503 if ((sum_flags & HCK_PARTIALCKSUM) && 8504 (mp1 == NULL || mp1->b_cont == NULL) && 8505 offset >= (uint16_t)DB_CKSUMSTART(mp) && 8506 ((len = offset - (uint16_t)DB_CKSUMSTART(mp)) & 1) == 0) { 8507 uint32_t adj; 8508 /* 8509 * Partial checksum has been calculated by hardware 8510 * and attached to the packet; in addition, any 8511 * prepended extraneous data is even byte aligned. 8512 * If any such data exists, we adjust the checksum; 8513 * this would also handle any postpended data. 8514 */ 8515 IP_ADJCKSUM_PARTIAL(mp->b_rptr + DB_CKSUMSTART(mp), 8516 mp, mp1, len, adj); 8517 8518 /* One's complement subtract extraneous checksum */ 8519 if (adj >= sum_val) 8520 sum_val = ~(adj - sum_val) & 0xFFFF; 8521 else 8522 sum_val -= adj; 8523 } 8524 } else { 8525 sum_val = 0; 8526 sum_flags = 0; 8527 } 8528 8529 /* Clear hardware checksumming flag */ 8530 DB_CKSUMFLAGS(mp) = 0; 8531 8532 /* 8533 * Note: Fragment offset in header is in 8-octet units. 8534 * Clearing least significant 3 bits not only extracts 8535 * it but also gets it in units of octets. 8536 */ 8537 offset = ntohs(fraghdr->ip6f_offlg) & ~7; 8538 more_frags = (fraghdr->ip6f_offlg & IP6F_MORE_FRAG); 8539 8540 /* 8541 * Is the more frags flag on and the payload length not a multiple 8542 * of eight? 8543 */ 8544 if (more_frags && (ntohs(ip6h->ip6_plen) & 7)) { 8545 zoneid_t zoneid; 8546 8547 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors); 8548 zoneid = ipif_lookup_addr_zoneid_v6(&ip6h->ip6_dst, ill, ipst); 8549 if (zoneid == ALL_ZONES) { 8550 freemsg(mp); 8551 return (NULL); 8552 } 8553 icmp_param_problem_v6(WR(q), mp, ICMP6_PARAMPROB_HEADER, 8554 (uint32_t)((char *)&ip6h->ip6_plen - 8555 (char *)ip6h), B_FALSE, B_FALSE, zoneid, ipst); 8556 return (NULL); 8557 } 8558 8559 v6src_ptr = &ip6h->ip6_src; 8560 v6dst_ptr = &ip6h->ip6_dst; 8561 end = remlen; 8562 8563 hdr_length = (uint_t)((char *)&fraghdr[1] - (char *)ip6h); 8564 end += offset; 8565 8566 /* 8567 * Would fragment cause reassembled packet to have a payload length 8568 * greater than IP_MAXPACKET - the max payload size? 8569 */ 8570 if (end > IP_MAXPACKET) { 8571 zoneid_t zoneid; 8572 8573 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors); 8574 zoneid = ipif_lookup_addr_zoneid_v6(&ip6h->ip6_dst, ill, ipst); 8575 if (zoneid == ALL_ZONES) { 8576 freemsg(mp); 8577 return (NULL); 8578 } 8579 icmp_param_problem_v6(WR(q), mp, ICMP6_PARAMPROB_HEADER, 8580 (uint32_t)((char *)&fraghdr->ip6f_offlg - 8581 (char *)ip6h), B_FALSE, B_FALSE, zoneid, ipst); 8582 return (NULL); 8583 } 8584 8585 /* 8586 * This packet just has one fragment. Reassembly not 8587 * needed. 8588 */ 8589 if (!more_frags && offset == 0) { 8590 goto reass_done; 8591 } 8592 8593 /* 8594 * Drop the fragmented as early as possible, if 8595 * we don't have resource(s) to re-assemble. 8596 */ 8597 if (ipst->ips_ip_reass_queue_bytes == 0) { 8598 freemsg(mp); 8599 return (NULL); 8600 } 8601 8602 /* Record the ECN field info. */ 8603 ecn_info = (uint8_t)(ntohl(ip6h->ip6_vcf & htonl(~0xFFCFFFFF)) >> 20); 8604 /* 8605 * If this is not the first fragment, dump the unfragmentable 8606 * portion of the packet. 8607 */ 8608 if (offset) 8609 mp->b_rptr = (uchar_t *)&fraghdr[1]; 8610 8611 /* 8612 * Fragmentation reassembly. Each ILL has a hash table for 8613 * queueing packets undergoing reassembly for all IPIFs 8614 * associated with the ILL. The hash is based on the packet 8615 * IP ident field. The ILL frag hash table was allocated 8616 * as a timer block at the time the ILL was created. Whenever 8617 * there is anything on the reassembly queue, the timer will 8618 * be running. 8619 */ 8620 msg_len = MBLKSIZE(mp); 8621 tail_mp = mp; 8622 while (tail_mp->b_cont != NULL) { 8623 tail_mp = tail_mp->b_cont; 8624 msg_len += MBLKSIZE(tail_mp); 8625 } 8626 /* 8627 * If the reassembly list for this ILL will get too big 8628 * prune it. 8629 */ 8630 8631 if ((msg_len + sizeof (*ipf) + ill->ill_frag_count) >= 8632 ipst->ips_ip_reass_queue_bytes) { 8633 ill_frag_prune(ill, 8634 (ipst->ips_ip_reass_queue_bytes < msg_len) ? 0 : 8635 (ipst->ips_ip_reass_queue_bytes - msg_len)); 8636 pruned = B_TRUE; 8637 } 8638 8639 ipfb = &ill->ill_frag_hash_tbl[ILL_FRAG_HASH_V6(*v6src_ptr, ident)]; 8640 mutex_enter(&ipfb->ipfb_lock); 8641 8642 ipfp = &ipfb->ipfb_ipf; 8643 /* Try to find an existing fragment queue for this packet. */ 8644 for (;;) { 8645 ipf = ipfp[0]; 8646 if (ipf) { 8647 /* 8648 * It has to match on ident, source address, and 8649 * dest address. 8650 */ 8651 if (ipf->ipf_ident == ident && 8652 IN6_ARE_ADDR_EQUAL(&ipf->ipf_v6src, v6src_ptr) && 8653 IN6_ARE_ADDR_EQUAL(&ipf->ipf_v6dst, v6dst_ptr)) { 8654 8655 /* 8656 * If we have received too many 8657 * duplicate fragments for this packet 8658 * free it. 8659 */ 8660 if (ipf->ipf_num_dups > ip_max_frag_dups) { 8661 ill_frag_free_pkts(ill, ipfb, ipf, 1); 8662 freemsg(mp); 8663 mutex_exit(&ipfb->ipfb_lock); 8664 return (NULL); 8665 } 8666 8667 break; 8668 } 8669 ipfp = &ipf->ipf_hash_next; 8670 continue; 8671 } 8672 8673 8674 /* 8675 * If we pruned the list, do we want to store this new 8676 * fragment?. We apply an optimization here based on the 8677 * fact that most fragments will be received in order. 8678 * So if the offset of this incoming fragment is zero, 8679 * it is the first fragment of a new packet. We will 8680 * keep it. Otherwise drop the fragment, as we have 8681 * probably pruned the packet already (since the 8682 * packet cannot be found). 8683 */ 8684 8685 if (pruned && offset != 0) { 8686 mutex_exit(&ipfb->ipfb_lock); 8687 freemsg(mp); 8688 return (NULL); 8689 } 8690 8691 /* New guy. Allocate a frag message. */ 8692 mp1 = allocb(sizeof (*ipf), BPRI_MED); 8693 if (!mp1) { 8694 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 8695 freemsg(mp); 8696 partial_reass_done: 8697 mutex_exit(&ipfb->ipfb_lock); 8698 return (NULL); 8699 } 8700 8701 if (ipfb->ipfb_frag_pkts >= MAX_FRAG_PKTS(ipst)) { 8702 /* 8703 * Too many fragmented packets in this hash bucket. 8704 * Free the oldest. 8705 */ 8706 ill_frag_free_pkts(ill, ipfb, ipfb->ipfb_ipf, 1); 8707 } 8708 8709 mp1->b_cont = mp; 8710 8711 /* Initialize the fragment header. */ 8712 ipf = (ipf_t *)mp1->b_rptr; 8713 ipf->ipf_mp = mp1; 8714 ipf->ipf_ptphn = ipfp; 8715 ipfp[0] = ipf; 8716 ipf->ipf_hash_next = NULL; 8717 ipf->ipf_ident = ident; 8718 ipf->ipf_v6src = *v6src_ptr; 8719 ipf->ipf_v6dst = *v6dst_ptr; 8720 /* Record reassembly start time. */ 8721 ipf->ipf_timestamp = gethrestime_sec(); 8722 /* Record ipf generation and account for frag header */ 8723 ipf->ipf_gen = ill->ill_ipf_gen++; 8724 ipf->ipf_count = MBLKSIZE(mp1); 8725 ipf->ipf_protocol = nexthdr; 8726 ipf->ipf_nf_hdr_len = 0; 8727 ipf->ipf_prev_nexthdr_offset = 0; 8728 ipf->ipf_last_frag_seen = B_FALSE; 8729 ipf->ipf_ecn = ecn_info; 8730 ipf->ipf_num_dups = 0; 8731 ipfb->ipfb_frag_pkts++; 8732 ipf->ipf_checksum = 0; 8733 ipf->ipf_checksum_flags = 0; 8734 8735 /* Store checksum value in fragment header */ 8736 if (sum_flags != 0) { 8737 sum_val = (sum_val & 0xFFFF) + (sum_val >> 16); 8738 sum_val = (sum_val & 0xFFFF) + (sum_val >> 16); 8739 ipf->ipf_checksum = sum_val; 8740 ipf->ipf_checksum_flags = sum_flags; 8741 } 8742 8743 /* 8744 * We handle reassembly two ways. In the easy case, 8745 * where all the fragments show up in order, we do 8746 * minimal bookkeeping, and just clip new pieces on 8747 * the end. If we ever see a hole, then we go off 8748 * to ip_reassemble which has to mark the pieces and 8749 * keep track of the number of holes, etc. Obviously, 8750 * the point of having both mechanisms is so we can 8751 * handle the easy case as efficiently as possible. 8752 */ 8753 if (offset == 0) { 8754 /* Easy case, in-order reassembly so far. */ 8755 /* Update the byte count */ 8756 ipf->ipf_count += msg_len; 8757 ipf->ipf_tail_mp = tail_mp; 8758 /* 8759 * Keep track of next expected offset in 8760 * ipf_end. 8761 */ 8762 ipf->ipf_end = end; 8763 ipf->ipf_nf_hdr_len = hdr_length; 8764 ipf->ipf_prev_nexthdr_offset = *prev_nexthdr_offset; 8765 } else { 8766 /* Hard case, hole at the beginning. */ 8767 ipf->ipf_tail_mp = NULL; 8768 /* 8769 * ipf_end == 0 means that we have given up 8770 * on easy reassembly. 8771 */ 8772 ipf->ipf_end = 0; 8773 8774 /* Forget checksum offload from now on */ 8775 ipf->ipf_checksum_flags = 0; 8776 8777 /* 8778 * ipf_hole_cnt is set by ip_reassemble. 8779 * ipf_count is updated by ip_reassemble. 8780 * No need to check for return value here 8781 * as we don't expect reassembly to complete or 8782 * fail for the first fragment itself. 8783 */ 8784 (void) ip_reassemble(mp, ipf, offset, more_frags, ill, 8785 msg_len); 8786 } 8787 /* Update per ipfb and ill byte counts */ 8788 ipfb->ipfb_count += ipf->ipf_count; 8789 ASSERT(ipfb->ipfb_count > 0); /* Wraparound */ 8790 atomic_add_32(&ill->ill_frag_count, ipf->ipf_count); 8791 /* If the frag timer wasn't already going, start it. */ 8792 mutex_enter(&ill->ill_lock); 8793 ill_frag_timer_start(ill); 8794 mutex_exit(&ill->ill_lock); 8795 goto partial_reass_done; 8796 } 8797 8798 /* 8799 * If the packet's flag has changed (it could be coming up 8800 * from an interface different than the previous, therefore 8801 * possibly different checksum capability), then forget about 8802 * any stored checksum states. Otherwise add the value to 8803 * the existing one stored in the fragment header. 8804 */ 8805 if (sum_flags != 0 && sum_flags == ipf->ipf_checksum_flags) { 8806 sum_val += ipf->ipf_checksum; 8807 sum_val = (sum_val & 0xFFFF) + (sum_val >> 16); 8808 sum_val = (sum_val & 0xFFFF) + (sum_val >> 16); 8809 ipf->ipf_checksum = sum_val; 8810 } else if (ipf->ipf_checksum_flags != 0) { 8811 /* Forget checksum offload from now on */ 8812 ipf->ipf_checksum_flags = 0; 8813 } 8814 8815 /* 8816 * We have a new piece of a datagram which is already being 8817 * reassembled. Update the ECN info if all IP fragments 8818 * are ECN capable. If there is one which is not, clear 8819 * all the info. If there is at least one which has CE 8820 * code point, IP needs to report that up to transport. 8821 */ 8822 if (ecn_info != IPH_ECN_NECT && ipf->ipf_ecn != IPH_ECN_NECT) { 8823 if (ecn_info == IPH_ECN_CE) 8824 ipf->ipf_ecn = IPH_ECN_CE; 8825 } else { 8826 ipf->ipf_ecn = IPH_ECN_NECT; 8827 } 8828 8829 if (offset && ipf->ipf_end == offset) { 8830 /* The new fragment fits at the end */ 8831 ipf->ipf_tail_mp->b_cont = mp; 8832 /* Update the byte count */ 8833 ipf->ipf_count += msg_len; 8834 /* Update per ipfb and ill byte counts */ 8835 ipfb->ipfb_count += msg_len; 8836 ASSERT(ipfb->ipfb_count > 0); /* Wraparound */ 8837 atomic_add_32(&ill->ill_frag_count, msg_len); 8838 if (more_frags) { 8839 /* More to come. */ 8840 ipf->ipf_end = end; 8841 ipf->ipf_tail_mp = tail_mp; 8842 goto partial_reass_done; 8843 } 8844 } else { 8845 /* 8846 * Go do the hard cases. 8847 * Call ip_reassemble(). 8848 */ 8849 int ret; 8850 8851 if (offset == 0) { 8852 if (ipf->ipf_prev_nexthdr_offset == 0) { 8853 ipf->ipf_nf_hdr_len = hdr_length; 8854 ipf->ipf_prev_nexthdr_offset = 8855 *prev_nexthdr_offset; 8856 } 8857 } 8858 /* Save current byte count */ 8859 count = ipf->ipf_count; 8860 ret = ip_reassemble(mp, ipf, offset, more_frags, ill, msg_len); 8861 8862 /* Count of bytes added and subtracted (freeb()ed) */ 8863 count = ipf->ipf_count - count; 8864 if (count) { 8865 /* Update per ipfb and ill byte counts */ 8866 ipfb->ipfb_count += count; 8867 ASSERT(ipfb->ipfb_count > 0); /* Wraparound */ 8868 atomic_add_32(&ill->ill_frag_count, count); 8869 } 8870 if (ret == IP_REASS_PARTIAL) { 8871 goto partial_reass_done; 8872 } else if (ret == IP_REASS_FAILED) { 8873 /* Reassembly failed. Free up all resources */ 8874 ill_frag_free_pkts(ill, ipfb, ipf, 1); 8875 for (t_mp = mp; t_mp != NULL; t_mp = t_mp->b_cont) { 8876 IP_REASS_SET_START(t_mp, 0); 8877 IP_REASS_SET_END(t_mp, 0); 8878 } 8879 freemsg(mp); 8880 goto partial_reass_done; 8881 } 8882 8883 /* We will reach here iff 'ret' is IP_REASS_COMPLETE */ 8884 } 8885 /* 8886 * We have completed reassembly. Unhook the frag header from 8887 * the reassembly list. 8888 * 8889 * Grab the unfragmentable header length next header value out 8890 * of the first fragment 8891 */ 8892 ASSERT(ipf->ipf_nf_hdr_len != 0); 8893 hdr_length = ipf->ipf_nf_hdr_len; 8894 8895 /* 8896 * Before we free the frag header, record the ECN info 8897 * to report back to the transport. 8898 */ 8899 ecn_info = ipf->ipf_ecn; 8900 8901 /* 8902 * Store the nextheader field in the header preceding the fragment 8903 * header 8904 */ 8905 nexthdr = ipf->ipf_protocol; 8906 *prev_nexthdr_offset = ipf->ipf_prev_nexthdr_offset; 8907 ipfp = ipf->ipf_ptphn; 8908 8909 /* We need to supply these to caller */ 8910 if ((sum_flags = ipf->ipf_checksum_flags) != 0) 8911 sum_val = ipf->ipf_checksum; 8912 else 8913 sum_val = 0; 8914 8915 mp1 = ipf->ipf_mp; 8916 count = ipf->ipf_count; 8917 ipf = ipf->ipf_hash_next; 8918 if (ipf) 8919 ipf->ipf_ptphn = ipfp; 8920 ipfp[0] = ipf; 8921 atomic_add_32(&ill->ill_frag_count, -count); 8922 ASSERT(ipfb->ipfb_count >= count); 8923 ipfb->ipfb_count -= count; 8924 ipfb->ipfb_frag_pkts--; 8925 mutex_exit(&ipfb->ipfb_lock); 8926 /* Ditch the frag header. */ 8927 mp = mp1->b_cont; 8928 freeb(mp1); 8929 8930 /* 8931 * Make sure the packet is good by doing some sanity 8932 * check. If bad we can silentely drop the packet. 8933 */ 8934 reass_done: 8935 if (hdr_length < sizeof (ip6_frag_t)) { 8936 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors); 8937 ip1dbg(("ip_rput_frag_v6: bad packet\n")); 8938 freemsg(mp); 8939 return (NULL); 8940 } 8941 8942 /* 8943 * Remove the fragment header from the initial header by 8944 * splitting the mblk into the non-fragmentable header and 8945 * everthing after the fragment extension header. This has the 8946 * side effect of putting all the headers that need destination 8947 * processing into the b_cont block-- on return this fact is 8948 * used in order to avoid having to look at the extensions 8949 * already processed. 8950 * 8951 * Note that this code assumes that the unfragmentable portion 8952 * of the header is in the first mblk and increments 8953 * the read pointer past it. If this assumption is broken 8954 * this code fails badly. 8955 */ 8956 if (mp->b_rptr + hdr_length != mp->b_wptr) { 8957 mblk_t *nmp; 8958 8959 if (!(nmp = dupb(mp))) { 8960 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 8961 ip1dbg(("ip_rput_frag_v6: dupb failed\n")); 8962 freemsg(mp); 8963 return (NULL); 8964 } 8965 nmp->b_cont = mp->b_cont; 8966 mp->b_cont = nmp; 8967 nmp->b_rptr += hdr_length; 8968 } 8969 mp->b_wptr = mp->b_rptr + hdr_length - sizeof (ip6_frag_t); 8970 8971 ip6h = (ip6_t *)mp->b_rptr; 8972 ((char *)ip6h)[*prev_nexthdr_offset] = nexthdr; 8973 8974 /* Restore original IP length in header. */ 8975 ip6h->ip6_plen = htons((uint16_t)(msgdsize(mp) - IPV6_HDR_LEN)); 8976 /* Record the ECN info. */ 8977 ip6h->ip6_vcf &= htonl(0xFFCFFFFF); 8978 ip6h->ip6_vcf |= htonl(ecn_info << 20); 8979 8980 /* Reassembly is successful; return checksum information if needed */ 8981 if (cksum_val != NULL) 8982 *cksum_val = sum_val; 8983 if (cksum_flags != NULL) 8984 *cksum_flags = sum_flags; 8985 8986 return (mp); 8987 } 8988 8989 /* 8990 * Walk through the options to see if there is a routing header. 8991 * If present get the destination which is the last address of 8992 * the option. 8993 */ 8994 in6_addr_t 8995 ip_get_dst_v6(ip6_t *ip6h, boolean_t *is_fragment) 8996 { 8997 uint8_t nexthdr; 8998 uint8_t *whereptr; 8999 ip6_hbh_t *hbhhdr; 9000 ip6_dest_t *dsthdr; 9001 ip6_rthdr0_t *rthdr; 9002 ip6_frag_t *fraghdr; 9003 int ehdrlen; 9004 int left; 9005 in6_addr_t *ap, rv; 9006 9007 if (is_fragment != NULL) 9008 *is_fragment = B_FALSE; 9009 9010 rv = ip6h->ip6_dst; 9011 9012 nexthdr = ip6h->ip6_nxt; 9013 whereptr = (uint8_t *)&ip6h[1]; 9014 for (;;) { 9015 9016 ASSERT(nexthdr != IPPROTO_RAW); 9017 switch (nexthdr) { 9018 case IPPROTO_HOPOPTS: 9019 hbhhdr = (ip6_hbh_t *)whereptr; 9020 nexthdr = hbhhdr->ip6h_nxt; 9021 ehdrlen = 8 * (hbhhdr->ip6h_len + 1); 9022 break; 9023 case IPPROTO_DSTOPTS: 9024 dsthdr = (ip6_dest_t *)whereptr; 9025 nexthdr = dsthdr->ip6d_nxt; 9026 ehdrlen = 8 * (dsthdr->ip6d_len + 1); 9027 break; 9028 case IPPROTO_ROUTING: 9029 rthdr = (ip6_rthdr0_t *)whereptr; 9030 nexthdr = rthdr->ip6r0_nxt; 9031 ehdrlen = 8 * (rthdr->ip6r0_len + 1); 9032 9033 left = rthdr->ip6r0_segleft; 9034 ap = (in6_addr_t *)((char *)rthdr + sizeof (*rthdr)); 9035 rv = *(ap + left - 1); 9036 /* 9037 * If the caller doesn't care whether the packet 9038 * is a fragment or not, we can stop here since 9039 * we have our destination. 9040 */ 9041 if (is_fragment == NULL) 9042 goto done; 9043 break; 9044 case IPPROTO_FRAGMENT: 9045 fraghdr = (ip6_frag_t *)whereptr; 9046 nexthdr = fraghdr->ip6f_nxt; 9047 ehdrlen = sizeof (ip6_frag_t); 9048 if (is_fragment != NULL) 9049 *is_fragment = B_TRUE; 9050 goto done; 9051 default : 9052 goto done; 9053 } 9054 whereptr += ehdrlen; 9055 } 9056 9057 done: 9058 return (rv); 9059 } 9060 9061 /* 9062 * ip_source_routed_v6: 9063 * This function is called by redirect code in ip_rput_data_v6 to 9064 * know whether this packet is source routed through this node i.e 9065 * whether this node (router) is part of the journey. This 9066 * function is called under two cases : 9067 * 9068 * case 1 : Routing header was processed by this node and 9069 * ip_process_rthdr replaced ip6_dst with the next hop 9070 * and we are forwarding the packet to the next hop. 9071 * 9072 * case 2 : Routing header was not processed by this node and we 9073 * are just forwarding the packet. 9074 * 9075 * For case (1) we don't want to send redirects. For case(2) we 9076 * want to send redirects. 9077 */ 9078 static boolean_t 9079 ip_source_routed_v6(ip6_t *ip6h, mblk_t *mp, ip_stack_t *ipst) 9080 { 9081 uint8_t nexthdr; 9082 in6_addr_t *addrptr; 9083 ip6_rthdr0_t *rthdr; 9084 uint8_t numaddr; 9085 ip6_hbh_t *hbhhdr; 9086 uint_t ehdrlen; 9087 uint8_t *byteptr; 9088 9089 ip2dbg(("ip_source_routed_v6\n")); 9090 nexthdr = ip6h->ip6_nxt; 9091 ehdrlen = IPV6_HDR_LEN; 9092 9093 /* if a routing hdr is preceeded by HOPOPT or DSTOPT */ 9094 while (nexthdr == IPPROTO_HOPOPTS || 9095 nexthdr == IPPROTO_DSTOPTS) { 9096 byteptr = (uint8_t *)ip6h + ehdrlen; 9097 /* 9098 * Check if we have already processed 9099 * packets or we are just a forwarding 9100 * router which only pulled up msgs up 9101 * to IPV6HDR and one HBH ext header 9102 */ 9103 if (byteptr + MIN_EHDR_LEN > mp->b_wptr) { 9104 ip2dbg(("ip_source_routed_v6: Extension" 9105 " headers not processed\n")); 9106 return (B_FALSE); 9107 } 9108 hbhhdr = (ip6_hbh_t *)byteptr; 9109 nexthdr = hbhhdr->ip6h_nxt; 9110 ehdrlen = ehdrlen + 8 * (hbhhdr->ip6h_len + 1); 9111 } 9112 switch (nexthdr) { 9113 case IPPROTO_ROUTING: 9114 byteptr = (uint8_t *)ip6h + ehdrlen; 9115 /* 9116 * If for some reason, we haven't pulled up 9117 * the routing hdr data mblk, then we must 9118 * not have processed it at all. So for sure 9119 * we are not part of the source routed journey. 9120 */ 9121 if (byteptr + MIN_EHDR_LEN > mp->b_wptr) { 9122 ip2dbg(("ip_source_routed_v6: Routing" 9123 " header not processed\n")); 9124 return (B_FALSE); 9125 } 9126 rthdr = (ip6_rthdr0_t *)byteptr; 9127 /* 9128 * Either we are an intermediate router or the 9129 * last hop before destination and we have 9130 * already processed the routing header. 9131 * If segment_left is greater than or equal to zero, 9132 * then we must be the (numaddr - segleft) entry 9133 * of the routing header. Although ip6r0_segleft 9134 * is a unit8_t variable, we still check for zero 9135 * or greater value, if in case the data type 9136 * is changed someday in future. 9137 */ 9138 if (rthdr->ip6r0_segleft > 0 || 9139 rthdr->ip6r0_segleft == 0) { 9140 ire_t *ire = NULL; 9141 9142 numaddr = rthdr->ip6r0_len / 2; 9143 addrptr = (in6_addr_t *)((char *)rthdr + 9144 sizeof (*rthdr)); 9145 addrptr += (numaddr - (rthdr->ip6r0_segleft + 1)); 9146 if (addrptr != NULL) { 9147 ire = ire_ctable_lookup_v6(addrptr, NULL, 9148 IRE_LOCAL, NULL, ALL_ZONES, NULL, 9149 MATCH_IRE_TYPE, 9150 ipst); 9151 if (ire != NULL) { 9152 ire_refrele(ire); 9153 return (B_TRUE); 9154 } 9155 ip1dbg(("ip_source_routed_v6: No ire found\n")); 9156 } 9157 } 9158 /* FALLTHRU */ 9159 default: 9160 ip2dbg(("ip_source_routed_v6: Not source routed here\n")); 9161 return (B_FALSE); 9162 } 9163 } 9164 9165 /* 9166 * ip_wput_v6 -- Packets sent down from transport modules show up here. 9167 * Assumes that the following set of headers appear in the first 9168 * mblk: 9169 * ip6i_t (if present) CAN also appear as a separate mblk. 9170 * ip6_t 9171 * Any extension headers 9172 * TCP/UDP/SCTP header (if present) 9173 * The routine can handle an ICMPv6 header that is not in the first mblk. 9174 * 9175 * The order to determine the outgoing interface is as follows: 9176 * 1. IPV6_BOUND_PIF is set, use that ill (conn_outgoing_pill) 9177 * 2. If conn_nofailover_ill is set then use that ill. 9178 * 3. If an ip6i_t with IP6I_IFINDEX set then use that ill. 9179 * 4. If q is an ill queue and (link local or multicast destination) then 9180 * use that ill. 9181 * 5. If IPV6_BOUND_IF has been set use that ill. 9182 * 6. For multicast: if IPV6_MULTICAST_IF has been set use it. Otherwise 9183 * look for the best IRE match for the unspecified group to determine 9184 * the ill. 9185 * 7. For unicast: Just do an IRE lookup for the best match. 9186 * 9187 * arg2 is always a queue_t *. 9188 * When that queue is an ill_t (i.e. q_next != NULL), then arg must be 9189 * the zoneid. 9190 * When that queue is not an ill_t, then arg must be a conn_t pointer. 9191 */ 9192 void 9193 ip_output_v6(void *arg, mblk_t *mp, void *arg2, int caller) 9194 { 9195 conn_t *connp = NULL; 9196 queue_t *q = (queue_t *)arg2; 9197 ire_t *ire = NULL; 9198 ire_t *sctp_ire = NULL; 9199 ip6_t *ip6h; 9200 in6_addr_t *v6dstp; 9201 ill_t *ill = NULL; 9202 ipif_t *ipif; 9203 ip6i_t *ip6i; 9204 int cksum_request; /* -1 => normal. */ 9205 /* 1 => Skip TCP/UDP/SCTP checksum */ 9206 /* Otherwise contains insert offset for checksum */ 9207 int unspec_src; 9208 boolean_t do_outrequests; /* Increment OutRequests? */ 9209 mib2_ipIfStatsEntry_t *mibptr; 9210 int match_flags = MATCH_IRE_ILL_GROUP; 9211 boolean_t attach_if = B_FALSE; 9212 mblk_t *first_mp; 9213 boolean_t mctl_present; 9214 ipsec_out_t *io; 9215 boolean_t drop_if_delayed = B_FALSE; 9216 boolean_t multirt_need_resolve = B_FALSE; 9217 mblk_t *copy_mp = NULL; 9218 int err = 0; 9219 int ip6i_flags = 0; 9220 zoneid_t zoneid; 9221 ill_t *saved_ill = NULL; 9222 boolean_t conn_lock_held; 9223 boolean_t need_decref = B_FALSE; 9224 ip_stack_t *ipst; 9225 9226 if (q->q_next != NULL) { 9227 ill = (ill_t *)q->q_ptr; 9228 ipst = ill->ill_ipst; 9229 } else { 9230 connp = (conn_t *)arg; 9231 ASSERT(connp != NULL); 9232 ipst = connp->conn_netstack->netstack_ip; 9233 } 9234 9235 /* 9236 * Highest bit in version field is Reachability Confirmation bit 9237 * used by NUD in ip_xmit_v6(). 9238 */ 9239 #ifdef _BIG_ENDIAN 9240 #define IPVER(ip6h) ((((uint32_t *)ip6h)[0] >> 28) & 0x7) 9241 #else 9242 #define IPVER(ip6h) ((((uint32_t *)ip6h)[0] >> 4) & 0x7) 9243 #endif 9244 9245 /* 9246 * M_CTL comes from 6 places 9247 * 9248 * 1) TCP sends down IPSEC_OUT(M_CTL) for detached connections 9249 * both V4 and V6 datagrams. 9250 * 9251 * 2) AH/ESP sends down M_CTL after doing their job with both 9252 * V4 and V6 datagrams. 9253 * 9254 * 3) NDP callbacks when nce is resolved and IPSEC_OUT has been 9255 * attached. 9256 * 9257 * 4) Notifications from an external resolver (for XRESOLV ifs) 9258 * 9259 * 5) AH/ESP send down IPSEC_CTL(M_CTL) to be relayed to hardware for 9260 * IPsec hardware acceleration support. 9261 * 9262 * 6) TUN_HELLO. 9263 * 9264 * We need to handle (1)'s IPv6 case and (3) here. For the 9265 * IPv4 case in (1), and (2), IPSEC processing has already 9266 * started. The code in ip_wput() already knows how to handle 9267 * continuing IPSEC processing (for IPv4 and IPv6). All other 9268 * M_CTLs (including case (4)) are passed on to ip_wput_nondata() 9269 * for handling. 9270 */ 9271 first_mp = mp; 9272 mctl_present = B_FALSE; 9273 io = NULL; 9274 9275 /* Multidata transmit? */ 9276 if (DB_TYPE(mp) == M_MULTIDATA) { 9277 /* 9278 * We should never get here, since all Multidata messages 9279 * originating from tcp should have been directed over to 9280 * tcp_multisend() in the first place. 9281 */ 9282 BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsOutDiscards); 9283 freemsg(mp); 9284 return; 9285 } else if (DB_TYPE(mp) == M_CTL) { 9286 uint32_t mctltype = 0; 9287 uint32_t mlen = MBLKL(first_mp); 9288 9289 mp = mp->b_cont; 9290 mctl_present = B_TRUE; 9291 io = (ipsec_out_t *)first_mp->b_rptr; 9292 9293 /* 9294 * Validate this M_CTL message. The only three types of 9295 * M_CTL messages we expect to see in this code path are 9296 * ipsec_out_t or ipsec_in_t structures (allocated as 9297 * ipsec_info_t unions), or ipsec_ctl_t structures. 9298 * The ipsec_out_type and ipsec_in_type overlap in the two 9299 * data structures, and they are either set to IPSEC_OUT 9300 * or IPSEC_IN depending on which data structure it is. 9301 * ipsec_ctl_t is an IPSEC_CTL. 9302 * 9303 * All other M_CTL messages are sent to ip_wput_nondata() 9304 * for handling. 9305 */ 9306 if (mlen >= sizeof (io->ipsec_out_type)) 9307 mctltype = io->ipsec_out_type; 9308 9309 if ((mlen == sizeof (ipsec_ctl_t)) && 9310 (mctltype == IPSEC_CTL)) { 9311 ip_output(arg, first_mp, arg2, caller); 9312 return; 9313 } 9314 9315 if ((mlen < sizeof (ipsec_info_t)) || 9316 (mctltype != IPSEC_OUT && mctltype != IPSEC_IN) || 9317 mp == NULL) { 9318 ip_wput_nondata(NULL, q, first_mp, NULL); 9319 return; 9320 } 9321 /* NDP callbacks have q_next non-NULL. That's case #3. */ 9322 if (q->q_next == NULL) { 9323 ip6h = (ip6_t *)mp->b_rptr; 9324 /* 9325 * For a freshly-generated TCP dgram that needs IPV6 9326 * processing, don't call ip_wput immediately. We can 9327 * tell this by the ipsec_out_proc_begin. In-progress 9328 * IPSEC_OUT messages have proc_begin set to TRUE, 9329 * and we want to send all IPSEC_IN messages to 9330 * ip_wput() for IPsec processing or finishing. 9331 */ 9332 if (mctltype == IPSEC_IN || 9333 IPVER(ip6h) != IPV6_VERSION || 9334 io->ipsec_out_proc_begin) { 9335 mibptr = &ipst->ips_ip6_mib; 9336 goto notv6; 9337 } 9338 } 9339 } else if (DB_TYPE(mp) != M_DATA) { 9340 ip_wput_nondata(NULL, q, mp, NULL); 9341 return; 9342 } 9343 9344 ip6h = (ip6_t *)mp->b_rptr; 9345 9346 if (IPVER(ip6h) != IPV6_VERSION) { 9347 mibptr = &ipst->ips_ip6_mib; 9348 goto notv6; 9349 } 9350 9351 if (is_system_labeled() && DB_TYPE(mp) == M_DATA && 9352 (connp == NULL || !connp->conn_ulp_labeled)) { 9353 if (connp != NULL) { 9354 ASSERT(CONN_CRED(connp) != NULL); 9355 err = tsol_check_label_v6(BEST_CRED(mp, connp), 9356 &mp, connp->conn_mac_exempt, ipst); 9357 } else if (DB_CRED(mp) != NULL) { 9358 err = tsol_check_label_v6(DB_CRED(mp), 9359 &mp, B_FALSE, ipst); 9360 } 9361 if (mctl_present) 9362 first_mp->b_cont = mp; 9363 else 9364 first_mp = mp; 9365 if (err != 0) { 9366 DTRACE_PROBE3( 9367 tsol_ip_log_drop_checklabel_ip6, char *, 9368 "conn(1), failed to check/update mp(2)", 9369 conn_t, connp, mblk_t, mp); 9370 freemsg(first_mp); 9371 return; 9372 } 9373 ip6h = (ip6_t *)mp->b_rptr; 9374 } 9375 if (q->q_next != NULL) { 9376 /* 9377 * We don't know if this ill will be used for IPv6 9378 * until the ILLF_IPV6 flag is set via SIOCSLIFNAME. 9379 * ipif_set_values() sets the ill_isv6 flag to true if 9380 * ILLF_IPV6 is set. If the ill_isv6 flag isn't true, 9381 * just drop the packet. 9382 */ 9383 if (!ill->ill_isv6) { 9384 ip1dbg(("ip_wput_v6: Received an IPv6 packet before " 9385 "ILLF_IPV6 was set\n")); 9386 freemsg(first_mp); 9387 return; 9388 } 9389 /* For uniformity do a refhold */ 9390 mutex_enter(&ill->ill_lock); 9391 if (!ILL_CAN_LOOKUP(ill)) { 9392 mutex_exit(&ill->ill_lock); 9393 freemsg(first_mp); 9394 return; 9395 } 9396 ill_refhold_locked(ill); 9397 mutex_exit(&ill->ill_lock); 9398 mibptr = ill->ill_ip_mib; 9399 9400 ASSERT(mibptr != NULL); 9401 unspec_src = 0; 9402 BUMP_MIB(mibptr, ipIfStatsHCOutRequests); 9403 do_outrequests = B_FALSE; 9404 zoneid = (zoneid_t)(uintptr_t)arg; 9405 } else { 9406 ASSERT(connp != NULL); 9407 zoneid = connp->conn_zoneid; 9408 9409 /* is queue flow controlled? */ 9410 if ((q->q_first || connp->conn_draining) && 9411 (caller == IP_WPUT)) { 9412 /* 9413 * 1) TCP sends down M_CTL for detached connections. 9414 * 2) AH/ESP sends down M_CTL. 9415 * 9416 * We don't flow control either of the above. Only 9417 * UDP and others are flow controlled for which we 9418 * can't have a M_CTL. 9419 */ 9420 ASSERT(first_mp == mp); 9421 (void) putq(q, mp); 9422 return; 9423 } 9424 mibptr = &ipst->ips_ip6_mib; 9425 unspec_src = connp->conn_unspec_src; 9426 do_outrequests = B_TRUE; 9427 if (mp->b_flag & MSGHASREF) { 9428 mp->b_flag &= ~MSGHASREF; 9429 ASSERT(connp->conn_ulp == IPPROTO_SCTP); 9430 SCTP_EXTRACT_IPINFO(mp, sctp_ire); 9431 need_decref = B_TRUE; 9432 } 9433 9434 /* 9435 * If there is a policy, try to attach an ipsec_out in 9436 * the front. At the end, first_mp either points to a 9437 * M_DATA message or IPSEC_OUT message linked to a 9438 * M_DATA message. We have to do it now as we might 9439 * lose the "conn" if we go through ip_newroute. 9440 */ 9441 if (!mctl_present && 9442 (connp->conn_out_enforce_policy || 9443 connp->conn_latch != NULL)) { 9444 ASSERT(first_mp == mp); 9445 /* XXX Any better way to get the protocol fast ? */ 9446 if (((mp = ipsec_attach_ipsec_out(&mp, connp, NULL, 9447 connp->conn_ulp, ipst->ips_netstack)) == NULL)) { 9448 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 9449 if (need_decref) 9450 CONN_DEC_REF(connp); 9451 return; 9452 } else { 9453 ASSERT(mp->b_datap->db_type == M_CTL); 9454 first_mp = mp; 9455 mp = mp->b_cont; 9456 mctl_present = B_TRUE; 9457 io = (ipsec_out_t *)first_mp->b_rptr; 9458 } 9459 } 9460 } 9461 9462 /* check for alignment and full IPv6 header */ 9463 if (!OK_32PTR((uchar_t *)ip6h) || 9464 (mp->b_wptr - (uchar_t *)ip6h) < IPV6_HDR_LEN) { 9465 ip0dbg(("ip_wput_v6: bad alignment or length\n")); 9466 if (do_outrequests) 9467 BUMP_MIB(mibptr, ipIfStatsHCOutRequests); 9468 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 9469 freemsg(first_mp); 9470 if (ill != NULL) 9471 ill_refrele(ill); 9472 if (need_decref) 9473 CONN_DEC_REF(connp); 9474 return; 9475 } 9476 v6dstp = &ip6h->ip6_dst; 9477 cksum_request = -1; 9478 ip6i = NULL; 9479 9480 /* 9481 * Once neighbor discovery has completed, ndp_process() will provide 9482 * locally generated packets for which processing can be reattempted. 9483 * In these cases, connp is NULL and the original zone is part of a 9484 * prepended ipsec_out_t. 9485 */ 9486 if (io != NULL) { 9487 /* 9488 * When coming from icmp_input_v6, the zoneid might not match 9489 * for the loopback case, because inside icmp_input_v6 the 9490 * queue_t is a conn queue from the sending side. 9491 */ 9492 zoneid = io->ipsec_out_zoneid; 9493 ASSERT(zoneid != ALL_ZONES); 9494 } 9495 9496 if (ip6h->ip6_nxt == IPPROTO_RAW) { 9497 /* 9498 * This is an ip6i_t header followed by an ip6_hdr. 9499 * Check which fields are set. 9500 * 9501 * When the packet comes from a transport we should have 9502 * all needed headers in the first mblk. However, when 9503 * going through ip_newroute*_v6 the ip6i might be in 9504 * a separate mblk when we return here. In that case 9505 * we pullup everything to ensure that extension and transport 9506 * headers "stay" in the first mblk. 9507 */ 9508 ip6i = (ip6i_t *)ip6h; 9509 ip6i_flags = ip6i->ip6i_flags; 9510 9511 ASSERT((mp->b_wptr - (uchar_t *)ip6i) == sizeof (ip6i_t) || 9512 ((mp->b_wptr - (uchar_t *)ip6i) >= 9513 sizeof (ip6i_t) + IPV6_HDR_LEN)); 9514 9515 if ((mp->b_wptr - (uchar_t *)ip6i) == sizeof (ip6i_t)) { 9516 if (!pullupmsg(mp, -1)) { 9517 ip1dbg(("ip_wput_v6: pullupmsg failed\n")); 9518 if (do_outrequests) { 9519 BUMP_MIB(mibptr, 9520 ipIfStatsHCOutRequests); 9521 } 9522 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 9523 freemsg(first_mp); 9524 if (ill != NULL) 9525 ill_refrele(ill); 9526 if (need_decref) 9527 CONN_DEC_REF(connp); 9528 return; 9529 } 9530 ip6h = (ip6_t *)mp->b_rptr; 9531 v6dstp = &ip6h->ip6_dst; 9532 ip6i = (ip6i_t *)ip6h; 9533 } 9534 ip6h = (ip6_t *)&ip6i[1]; 9535 9536 /* 9537 * Advance rptr past the ip6i_t to get ready for 9538 * transmitting the packet. However, if the packet gets 9539 * passed to ip_newroute*_v6 then rptr is moved back so 9540 * that the ip6i_t header can be inspected when the 9541 * packet comes back here after passing through 9542 * ire_add_then_send. 9543 */ 9544 mp->b_rptr = (uchar_t *)ip6h; 9545 9546 /* 9547 * IP6I_ATTACH_IF is set in this function when we had a 9548 * conn and it was either bound to the IPFF_NOFAILOVER address 9549 * or IPV6_BOUND_PIF was set. These options override other 9550 * options that set the ifindex. We come here with 9551 * IP6I_ATTACH_IF set when we can't find the ire and 9552 * ip_newroute_v6 is feeding the packet for second time. 9553 */ 9554 if ((ip6i->ip6i_flags & IP6I_IFINDEX) || 9555 (ip6i->ip6i_flags & IP6I_ATTACH_IF)) { 9556 ASSERT(ip6i->ip6i_ifindex != 0); 9557 if (ill != NULL) 9558 ill_refrele(ill); 9559 ill = ill_lookup_on_ifindex(ip6i->ip6i_ifindex, 1, 9560 NULL, NULL, NULL, NULL, ipst); 9561 if (ill == NULL) { 9562 if (do_outrequests) { 9563 BUMP_MIB(mibptr, 9564 ipIfStatsHCOutRequests); 9565 } 9566 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 9567 ip1dbg(("ip_wput_v6: bad ifindex %d\n", 9568 ip6i->ip6i_ifindex)); 9569 if (need_decref) 9570 CONN_DEC_REF(connp); 9571 freemsg(first_mp); 9572 return; 9573 } 9574 mibptr = ill->ill_ip_mib; 9575 if (ip6i->ip6i_flags & IP6I_IFINDEX) { 9576 /* 9577 * Preserve the index so that when we return 9578 * from IPSEC processing, we know where to 9579 * send the packet. 9580 */ 9581 if (mctl_present) { 9582 ASSERT(io != NULL); 9583 io->ipsec_out_ill_index = 9584 ip6i->ip6i_ifindex; 9585 } 9586 } 9587 if (ip6i->ip6i_flags & IP6I_ATTACH_IF) { 9588 /* 9589 * This is a multipathing probe packet that has 9590 * been delayed in ND resolution. Drop the 9591 * packet for the reasons mentioned in 9592 * nce_queue_mp() 9593 */ 9594 if ((ip6i->ip6i_flags & IP6I_DROP_IFDELAYED) && 9595 (ip6i->ip6i_flags & IP6I_ND_DELAYED)) { 9596 freemsg(first_mp); 9597 ill_refrele(ill); 9598 if (need_decref) 9599 CONN_DEC_REF(connp); 9600 return; 9601 } 9602 } 9603 } 9604 if (ip6i->ip6i_flags & IP6I_VERIFY_SRC) { 9605 cred_t *cr = DB_CREDDEF(mp, GET_QUEUE_CRED(q)); 9606 9607 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)); 9608 if (secpolicy_net_rawaccess(cr) != 0) { 9609 /* 9610 * Use IPCL_ZONEID to honor SO_ALLZONES. 9611 */ 9612 ire = ire_route_lookup_v6(&ip6h->ip6_src, 9613 0, 0, (IRE_LOCAL|IRE_LOOPBACK), NULL, 9614 NULL, connp != NULL ? 9615 IPCL_ZONEID(connp) : zoneid, NULL, 9616 MATCH_IRE_TYPE | MATCH_IRE_ZONEONLY, ipst); 9617 if (ire == NULL) { 9618 if (do_outrequests) 9619 BUMP_MIB(mibptr, 9620 ipIfStatsHCOutRequests); 9621 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 9622 ip1dbg(("ip_wput_v6: bad source " 9623 "addr\n")); 9624 freemsg(first_mp); 9625 if (ill != NULL) 9626 ill_refrele(ill); 9627 if (need_decref) 9628 CONN_DEC_REF(connp); 9629 return; 9630 } 9631 ire_refrele(ire); 9632 } 9633 /* No need to verify again when using ip_newroute */ 9634 ip6i->ip6i_flags &= ~IP6I_VERIFY_SRC; 9635 } 9636 if (!(ip6i->ip6i_flags & IP6I_NEXTHOP)) { 9637 /* 9638 * Make sure they match since ip_newroute*_v6 etc might 9639 * (unknown to them) inspect ip6i_nexthop when 9640 * they think they access ip6_dst. 9641 */ 9642 ip6i->ip6i_nexthop = ip6h->ip6_dst; 9643 } 9644 if (ip6i->ip6i_flags & IP6I_NO_ULP_CKSUM) 9645 cksum_request = 1; 9646 if (ip6i->ip6i_flags & IP6I_RAW_CHECKSUM) 9647 cksum_request = ip6i->ip6i_checksum_off; 9648 if (ip6i->ip6i_flags & IP6I_UNSPEC_SRC) 9649 unspec_src = 1; 9650 9651 if (do_outrequests && ill != NULL) { 9652 BUMP_MIB(mibptr, ipIfStatsHCOutRequests); 9653 do_outrequests = B_FALSE; 9654 } 9655 /* 9656 * Store ip6i_t info that we need after we come back 9657 * from IPSEC processing. 9658 */ 9659 if (mctl_present) { 9660 ASSERT(io != NULL); 9661 io->ipsec_out_unspec_src = unspec_src; 9662 } 9663 } 9664 if (connp != NULL && connp->conn_dontroute) 9665 ip6h->ip6_hops = 1; 9666 9667 if (IN6_IS_ADDR_MULTICAST(v6dstp)) 9668 goto ipv6multicast; 9669 9670 /* 1. IPV6_BOUND_PIF takes precedence over all the ifindex settings. */ 9671 if (connp != NULL && connp->conn_outgoing_pill != NULL) { 9672 ill_t *conn_outgoing_pill; 9673 9674 conn_outgoing_pill = conn_get_held_ill(connp, 9675 &connp->conn_outgoing_pill, &err); 9676 if (err == ILL_LOOKUP_FAILED) { 9677 if (ill != NULL) 9678 ill_refrele(ill); 9679 if (need_decref) 9680 CONN_DEC_REF(connp); 9681 freemsg(first_mp); 9682 return; 9683 } 9684 if (conn_outgoing_pill != NULL) { 9685 if (ill != NULL) 9686 ill_refrele(ill); 9687 ill = conn_outgoing_pill; 9688 attach_if = B_TRUE; 9689 match_flags = MATCH_IRE_ILL; 9690 mibptr = ill->ill_ip_mib; 9691 9692 /* 9693 * Check if we need an ire that will not be 9694 * looked up by anybody else i.e. HIDDEN. 9695 */ 9696 if (ill_is_probeonly(ill)) 9697 match_flags |= MATCH_IRE_MARK_HIDDEN; 9698 goto send_from_ill; 9699 } 9700 } 9701 9702 /* 2. If ipc_nofailover_ill is set then use that ill. */ 9703 if (connp != NULL && connp->conn_nofailover_ill != NULL) { 9704 ill_t *conn_nofailover_ill; 9705 9706 conn_nofailover_ill = conn_get_held_ill(connp, 9707 &connp->conn_nofailover_ill, &err); 9708 if (err == ILL_LOOKUP_FAILED) { 9709 if (ill != NULL) 9710 ill_refrele(ill); 9711 if (need_decref) 9712 CONN_DEC_REF(connp); 9713 freemsg(first_mp); 9714 return; 9715 } 9716 if (conn_nofailover_ill != NULL) { 9717 if (ill != NULL) 9718 ill_refrele(ill); 9719 ill = conn_nofailover_ill; 9720 attach_if = B_TRUE; 9721 /* 9722 * Assumes that ipc_nofailover_ill is used only for 9723 * multipathing probe packets. These packets are better 9724 * dropped, if they are delayed in ND resolution, for 9725 * the reasons described in nce_queue_mp(). 9726 * IP6I_DROP_IFDELAYED will be set later on in this 9727 * function for this packet. 9728 */ 9729 drop_if_delayed = B_TRUE; 9730 match_flags = MATCH_IRE_ILL; 9731 mibptr = ill->ill_ip_mib; 9732 9733 /* 9734 * Check if we need an ire that will not be 9735 * looked up by anybody else i.e. HIDDEN. 9736 */ 9737 if (ill_is_probeonly(ill)) 9738 match_flags |= MATCH_IRE_MARK_HIDDEN; 9739 goto send_from_ill; 9740 } 9741 } 9742 9743 /* 9744 * Redo 1. If we did not find an IRE_CACHE the first time, we should 9745 * have an ip6i_t with IP6I_ATTACH_IF if IPV6_BOUND_PIF or 9746 * bind to the IPIF_NOFAILOVER address was used on this endpoint. 9747 */ 9748 if (ip6i != NULL && (ip6i->ip6i_flags & IP6I_ATTACH_IF)) { 9749 ASSERT(ip6i->ip6i_ifindex != 0); 9750 attach_if = B_TRUE; 9751 ASSERT(ill != NULL); 9752 match_flags = MATCH_IRE_ILL; 9753 9754 /* 9755 * Check if we need an ire that will not be 9756 * looked up by anybody else i.e. HIDDEN. 9757 */ 9758 if (ill_is_probeonly(ill)) 9759 match_flags |= MATCH_IRE_MARK_HIDDEN; 9760 goto send_from_ill; 9761 } 9762 9763 /* 3. If an ip6i_t with IP6I_IFINDEX set then use that ill. */ 9764 if (ip6i != NULL && (ip6i->ip6i_flags & IP6I_IFINDEX)) { 9765 ASSERT(ill != NULL); 9766 goto send_from_ill; 9767 } 9768 9769 /* 9770 * 4. If q is an ill queue and (link local or multicast destination) 9771 * then use that ill. 9772 */ 9773 if (ill != NULL && IN6_IS_ADDR_LINKLOCAL(v6dstp)) { 9774 goto send_from_ill; 9775 } 9776 9777 /* 5. If IPV6_BOUND_IF has been set use that ill. */ 9778 if (connp != NULL && connp->conn_outgoing_ill != NULL) { 9779 ill_t *conn_outgoing_ill; 9780 9781 conn_outgoing_ill = conn_get_held_ill(connp, 9782 &connp->conn_outgoing_ill, &err); 9783 if (err == ILL_LOOKUP_FAILED) { 9784 if (ill != NULL) 9785 ill_refrele(ill); 9786 if (need_decref) 9787 CONN_DEC_REF(connp); 9788 freemsg(first_mp); 9789 return; 9790 } 9791 if (ill != NULL) 9792 ill_refrele(ill); 9793 ill = conn_outgoing_ill; 9794 mibptr = ill->ill_ip_mib; 9795 goto send_from_ill; 9796 } 9797 9798 /* 9799 * 6. For unicast: Just do an IRE lookup for the best match. 9800 * If we get here for a link-local address it is rather random 9801 * what interface we pick on a multihomed host. 9802 * *If* there is an IRE_CACHE (and the link-local address 9803 * isn't duplicated on multi links) this will find the IRE_CACHE. 9804 * Otherwise it will use one of the matching IRE_INTERFACE routes 9805 * for the link-local prefix. Hence, applications 9806 * *should* be encouraged to specify an outgoing interface when sending 9807 * to a link local address. 9808 */ 9809 if (connp == NULL || (IP_FLOW_CONTROLLED_ULP(connp->conn_ulp) && 9810 !connp->conn_fully_bound)) { 9811 /* 9812 * We cache IRE_CACHEs to avoid lookups. We don't do 9813 * this for the tcp global queue and listen end point 9814 * as it does not really have a real destination to 9815 * talk to. 9816 */ 9817 ire = ire_cache_lookup_v6(v6dstp, zoneid, MBLK_GETLABEL(mp), 9818 ipst); 9819 } else { 9820 /* 9821 * IRE_MARK_CONDEMNED is marked in ire_delete. We don't 9822 * grab a lock here to check for CONDEMNED as it is okay 9823 * to send a packet or two with the IRE_CACHE that is going 9824 * away. 9825 */ 9826 mutex_enter(&connp->conn_lock); 9827 ire = sctp_ire != NULL ? sctp_ire : connp->conn_ire_cache; 9828 if (ire != NULL && 9829 IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, v6dstp) && 9830 !(ire->ire_marks & IRE_MARK_CONDEMNED)) { 9831 9832 IRE_REFHOLD(ire); 9833 mutex_exit(&connp->conn_lock); 9834 9835 } else { 9836 boolean_t cached = B_FALSE; 9837 9838 connp->conn_ire_cache = NULL; 9839 mutex_exit(&connp->conn_lock); 9840 /* Release the old ire */ 9841 if (ire != NULL && sctp_ire == NULL) 9842 IRE_REFRELE_NOTR(ire); 9843 9844 ire = ire_cache_lookup_v6(v6dstp, zoneid, 9845 MBLK_GETLABEL(mp), ipst); 9846 if (ire != NULL) { 9847 IRE_REFHOLD_NOTR(ire); 9848 9849 mutex_enter(&connp->conn_lock); 9850 if (CONN_CACHE_IRE(connp) && 9851 (connp->conn_ire_cache == NULL)) { 9852 rw_enter(&ire->ire_bucket->irb_lock, 9853 RW_READER); 9854 if (!(ire->ire_marks & 9855 IRE_MARK_CONDEMNED)) { 9856 connp->conn_ire_cache = ire; 9857 cached = B_TRUE; 9858 } 9859 rw_exit(&ire->ire_bucket->irb_lock); 9860 } 9861 mutex_exit(&connp->conn_lock); 9862 9863 /* 9864 * We can continue to use the ire but since it 9865 * was not cached, we should drop the extra 9866 * reference. 9867 */ 9868 if (!cached) 9869 IRE_REFRELE_NOTR(ire); 9870 } 9871 } 9872 } 9873 9874 if (ire != NULL) { 9875 if (do_outrequests) { 9876 /* Handle IRE_LOCAL's that might appear here */ 9877 if (ire->ire_type == IRE_CACHE) { 9878 mibptr = ((ill_t *)ire->ire_stq->q_ptr)-> 9879 ill_ip_mib; 9880 } else { 9881 mibptr = ire->ire_ipif->ipif_ill->ill_ip_mib; 9882 } 9883 BUMP_MIB(mibptr, ipIfStatsHCOutRequests); 9884 } 9885 ASSERT(!attach_if); 9886 9887 /* 9888 * Check if the ire has the RTF_MULTIRT flag, inherited 9889 * from an IRE_OFFSUBNET ire entry in ip_newroute(). 9890 */ 9891 if (ire->ire_flags & RTF_MULTIRT) { 9892 /* 9893 * Force hop limit of multirouted packets if required. 9894 * The hop limit of such packets is bounded by the 9895 * ip_multirt_ttl ndd variable. 9896 * NDP packets must have a hop limit of 255; don't 9897 * change the hop limit in that case. 9898 */ 9899 if ((ipst->ips_ip_multirt_ttl > 0) && 9900 (ip6h->ip6_hops > ipst->ips_ip_multirt_ttl) && 9901 (ip6h->ip6_hops != IPV6_MAX_HOPS)) { 9902 if (ip_debug > 3) { 9903 ip2dbg(("ip_wput_v6: forcing multirt " 9904 "hop limit to %d (was %d) ", 9905 ipst->ips_ip_multirt_ttl, 9906 ip6h->ip6_hops)); 9907 pr_addr_dbg("v6dst %s\n", AF_INET6, 9908 &ire->ire_addr_v6); 9909 } 9910 ip6h->ip6_hops = ipst->ips_ip_multirt_ttl; 9911 } 9912 9913 /* 9914 * We look at this point if there are pending 9915 * unresolved routes. ire_multirt_need_resolve_v6() 9916 * checks in O(n) that all IRE_OFFSUBNET ire 9917 * entries for the packet's destination and 9918 * flagged RTF_MULTIRT are currently resolved. 9919 * If some remain unresolved, we do a copy 9920 * of the current message. It will be used 9921 * to initiate additional route resolutions. 9922 */ 9923 multirt_need_resolve = 9924 ire_multirt_need_resolve_v6(&ire->ire_addr_v6, 9925 MBLK_GETLABEL(first_mp), ipst); 9926 ip2dbg(("ip_wput_v6: ire %p, " 9927 "multirt_need_resolve %d, first_mp %p\n", 9928 (void *)ire, multirt_need_resolve, 9929 (void *)first_mp)); 9930 if (multirt_need_resolve) { 9931 copy_mp = copymsg(first_mp); 9932 if (copy_mp != NULL) { 9933 MULTIRT_DEBUG_TAG(copy_mp); 9934 } 9935 } 9936 } 9937 ip_wput_ire_v6(q, first_mp, ire, unspec_src, cksum_request, 9938 connp, caller, 0, ip6i_flags, zoneid); 9939 if (need_decref) { 9940 CONN_DEC_REF(connp); 9941 connp = NULL; 9942 } 9943 IRE_REFRELE(ire); 9944 9945 /* 9946 * Try to resolve another multiroute if 9947 * ire_multirt_need_resolve_v6() deemed it necessary. 9948 * copy_mp will be consumed (sent or freed) by 9949 * ip_newroute_v6(). 9950 */ 9951 if (copy_mp != NULL) { 9952 if (mctl_present) { 9953 ip6h = (ip6_t *)copy_mp->b_cont->b_rptr; 9954 } else { 9955 ip6h = (ip6_t *)copy_mp->b_rptr; 9956 } 9957 ip_newroute_v6(q, copy_mp, &ip6h->ip6_dst, 9958 &ip6h->ip6_src, NULL, zoneid, ipst); 9959 } 9960 if (ill != NULL) 9961 ill_refrele(ill); 9962 return; 9963 } 9964 9965 /* 9966 * No full IRE for this destination. Send it to 9967 * ip_newroute_v6 to see if anything else matches. 9968 * Mark this packet as having originated on this 9969 * machine. 9970 * Update rptr if there was an ip6i_t header. 9971 */ 9972 mp->b_prev = NULL; 9973 mp->b_next = NULL; 9974 if (ip6i != NULL) 9975 mp->b_rptr -= sizeof (ip6i_t); 9976 9977 if (unspec_src) { 9978 if (ip6i == NULL) { 9979 /* 9980 * Add ip6i_t header to carry unspec_src 9981 * until the packet comes back in ip_wput_v6. 9982 */ 9983 mp = ip_add_info_v6(mp, NULL, v6dstp); 9984 if (mp == NULL) { 9985 if (do_outrequests) 9986 BUMP_MIB(mibptr, 9987 ipIfStatsHCOutRequests); 9988 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 9989 if (mctl_present) 9990 freeb(first_mp); 9991 if (ill != NULL) 9992 ill_refrele(ill); 9993 if (need_decref) 9994 CONN_DEC_REF(connp); 9995 return; 9996 } 9997 ip6i = (ip6i_t *)mp->b_rptr; 9998 9999 if (mctl_present) { 10000 ASSERT(first_mp != mp); 10001 first_mp->b_cont = mp; 10002 } else { 10003 first_mp = mp; 10004 } 10005 10006 if ((mp->b_wptr - (uchar_t *)ip6i) == 10007 sizeof (ip6i_t)) { 10008 /* 10009 * ndp_resolver called from ip_newroute_v6 10010 * expects pulled up message. 10011 */ 10012 if (!pullupmsg(mp, -1)) { 10013 ip1dbg(("ip_wput_v6: pullupmsg" 10014 " failed\n")); 10015 if (do_outrequests) { 10016 BUMP_MIB(mibptr, 10017 ipIfStatsHCOutRequests); 10018 } 10019 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10020 freemsg(first_mp); 10021 if (ill != NULL) 10022 ill_refrele(ill); 10023 if (need_decref) 10024 CONN_DEC_REF(connp); 10025 return; 10026 } 10027 ip6i = (ip6i_t *)mp->b_rptr; 10028 } 10029 ip6h = (ip6_t *)&ip6i[1]; 10030 v6dstp = &ip6h->ip6_dst; 10031 } 10032 ip6i->ip6i_flags |= IP6I_UNSPEC_SRC; 10033 if (mctl_present) { 10034 ASSERT(io != NULL); 10035 io->ipsec_out_unspec_src = unspec_src; 10036 } 10037 } 10038 if (do_outrequests) 10039 BUMP_MIB(mibptr, ipIfStatsHCOutRequests); 10040 if (need_decref) 10041 CONN_DEC_REF(connp); 10042 ip_newroute_v6(q, first_mp, v6dstp, &ip6h->ip6_src, NULL, zoneid, ipst); 10043 if (ill != NULL) 10044 ill_refrele(ill); 10045 return; 10046 10047 10048 /* 10049 * Handle multicast packets with or without an conn. 10050 * Assumes that the transports set ip6_hops taking 10051 * IPV6_MULTICAST_HOPS (and the other ways to set the hoplimit) 10052 * into account. 10053 */ 10054 ipv6multicast: 10055 ip2dbg(("ip_wput_v6: multicast\n")); 10056 10057 /* 10058 * 1. IPV6_BOUND_PIF takes precedence over all the ifindex settings 10059 * 2. If conn_nofailover_ill is set then use that ill. 10060 * 10061 * Hold the conn_lock till we refhold the ill of interest that is 10062 * pointed to from the conn. Since we cannot do an ill/ipif_refrele 10063 * while holding any locks, postpone the refrele until after the 10064 * conn_lock is dropped. 10065 */ 10066 if (connp != NULL) { 10067 mutex_enter(&connp->conn_lock); 10068 conn_lock_held = B_TRUE; 10069 } else { 10070 conn_lock_held = B_FALSE; 10071 } 10072 if (connp != NULL && connp->conn_outgoing_pill != NULL) { 10073 err = ill_check_and_refhold(connp->conn_outgoing_pill); 10074 if (err == ILL_LOOKUP_FAILED) { 10075 ip1dbg(("ip_output_v6: multicast" 10076 " conn_outgoing_pill no ipif\n")); 10077 multicast_discard: 10078 ASSERT(saved_ill == NULL); 10079 if (conn_lock_held) 10080 mutex_exit(&connp->conn_lock); 10081 if (ill != NULL) 10082 ill_refrele(ill); 10083 freemsg(first_mp); 10084 if (do_outrequests) 10085 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10086 if (need_decref) 10087 CONN_DEC_REF(connp); 10088 return; 10089 } 10090 saved_ill = ill; 10091 ill = connp->conn_outgoing_pill; 10092 attach_if = B_TRUE; 10093 match_flags = MATCH_IRE_ILL; 10094 mibptr = ill->ill_ip_mib; 10095 10096 /* 10097 * Check if we need an ire that will not be 10098 * looked up by anybody else i.e. HIDDEN. 10099 */ 10100 if (ill_is_probeonly(ill)) 10101 match_flags |= MATCH_IRE_MARK_HIDDEN; 10102 } else if (connp != NULL && connp->conn_nofailover_ill != NULL) { 10103 err = ill_check_and_refhold(connp->conn_nofailover_ill); 10104 if (err == ILL_LOOKUP_FAILED) { 10105 ip1dbg(("ip_output_v6: multicast" 10106 " conn_nofailover_ill no ipif\n")); 10107 goto multicast_discard; 10108 } 10109 saved_ill = ill; 10110 ill = connp->conn_nofailover_ill; 10111 attach_if = B_TRUE; 10112 match_flags = MATCH_IRE_ILL; 10113 10114 /* 10115 * Check if we need an ire that will not be 10116 * looked up by anybody else i.e. HIDDEN. 10117 */ 10118 if (ill_is_probeonly(ill)) 10119 match_flags |= MATCH_IRE_MARK_HIDDEN; 10120 } else if (ip6i != NULL && (ip6i->ip6i_flags & IP6I_ATTACH_IF)) { 10121 /* 10122 * Redo 1. If we did not find an IRE_CACHE the first time, 10123 * we should have an ip6i_t with IP6I_ATTACH_IF if 10124 * IPV6_BOUND_PIF or bind to the IPIF_NOFAILOVER address was 10125 * used on this endpoint. 10126 */ 10127 ASSERT(ip6i->ip6i_ifindex != 0); 10128 attach_if = B_TRUE; 10129 ASSERT(ill != NULL); 10130 match_flags = MATCH_IRE_ILL; 10131 10132 /* 10133 * Check if we need an ire that will not be 10134 * looked up by anybody else i.e. HIDDEN. 10135 */ 10136 if (ill_is_probeonly(ill)) 10137 match_flags |= MATCH_IRE_MARK_HIDDEN; 10138 } else if (ip6i != NULL && (ip6i->ip6i_flags & IP6I_IFINDEX)) { 10139 /* 3. If an ip6i_t with IP6I_IFINDEX set then use that ill. */ 10140 10141 ASSERT(ill != NULL); 10142 } else if (ill != NULL) { 10143 /* 10144 * 4. If q is an ill queue and (link local or multicast 10145 * destination) then use that ill. 10146 * We don't need the ipif initialization here. 10147 * This useless assert below is just to prevent lint from 10148 * reporting a null body if statement. 10149 */ 10150 ASSERT(ill != NULL); 10151 } else if (connp != NULL) { 10152 /* 10153 * 5. If IPV6_BOUND_IF has been set use that ill. 10154 * 10155 * 6. For multicast: if IPV6_MULTICAST_IF has been set use it. 10156 * Otherwise look for the best IRE match for the unspecified 10157 * group to determine the ill. 10158 * 10159 * conn_multicast_ill is used for only IPv6 packets. 10160 * conn_multicast_ipif is used for only IPv4 packets. 10161 * Thus a PF_INET6 socket send both IPv4 and IPv6 10162 * multicast packets using different IP*_MULTICAST_IF 10163 * interfaces. 10164 */ 10165 if (connp->conn_outgoing_ill != NULL) { 10166 err = ill_check_and_refhold(connp->conn_outgoing_ill); 10167 if (err == ILL_LOOKUP_FAILED) { 10168 ip1dbg(("ip_output_v6: multicast" 10169 " conn_outgoing_ill no ipif\n")); 10170 goto multicast_discard; 10171 } 10172 ill = connp->conn_outgoing_ill; 10173 } else if (connp->conn_multicast_ill != NULL) { 10174 err = ill_check_and_refhold(connp->conn_multicast_ill); 10175 if (err == ILL_LOOKUP_FAILED) { 10176 ip1dbg(("ip_output_v6: multicast" 10177 " conn_multicast_ill no ipif\n")); 10178 goto multicast_discard; 10179 } 10180 ill = connp->conn_multicast_ill; 10181 } else { 10182 mutex_exit(&connp->conn_lock); 10183 conn_lock_held = B_FALSE; 10184 ipif = ipif_lookup_group_v6(v6dstp, zoneid, ipst); 10185 if (ipif == NULL) { 10186 ip1dbg(("ip_output_v6: multicast no ipif\n")); 10187 goto multicast_discard; 10188 } 10189 /* 10190 * We have a ref to this ipif, so we can safely 10191 * access ipif_ill. 10192 */ 10193 ill = ipif->ipif_ill; 10194 mutex_enter(&ill->ill_lock); 10195 if (!ILL_CAN_LOOKUP(ill)) { 10196 mutex_exit(&ill->ill_lock); 10197 ipif_refrele(ipif); 10198 ill = NULL; 10199 ip1dbg(("ip_output_v6: multicast no ipif\n")); 10200 goto multicast_discard; 10201 } 10202 ill_refhold_locked(ill); 10203 mutex_exit(&ill->ill_lock); 10204 ipif_refrele(ipif); 10205 /* 10206 * Save binding until IPV6_MULTICAST_IF 10207 * changes it 10208 */ 10209 mutex_enter(&connp->conn_lock); 10210 connp->conn_multicast_ill = ill; 10211 connp->conn_orig_multicast_ifindex = 10212 ill->ill_phyint->phyint_ifindex; 10213 mutex_exit(&connp->conn_lock); 10214 } 10215 } 10216 if (conn_lock_held) 10217 mutex_exit(&connp->conn_lock); 10218 10219 if (saved_ill != NULL) 10220 ill_refrele(saved_ill); 10221 10222 ASSERT(ill != NULL); 10223 /* 10224 * For multicast loopback interfaces replace the multicast address 10225 * with a unicast address for the ire lookup. 10226 */ 10227 if (IS_LOOPBACK(ill)) 10228 v6dstp = &ill->ill_ipif->ipif_v6lcl_addr; 10229 10230 mibptr = ill->ill_ip_mib; 10231 if (do_outrequests) { 10232 BUMP_MIB(mibptr, ipIfStatsHCOutRequests); 10233 do_outrequests = B_FALSE; 10234 } 10235 BUMP_MIB(mibptr, ipIfStatsHCOutMcastPkts); 10236 UPDATE_MIB(mibptr, ipIfStatsHCOutMcastOctets, 10237 ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN); 10238 10239 /* 10240 * As we may lose the conn by the time we reach ip_wput_ire_v6 10241 * we copy conn_multicast_loop and conn_dontroute on to an 10242 * ipsec_out. In case if this datagram goes out secure, 10243 * we need the ill_index also. Copy that also into the 10244 * ipsec_out. 10245 */ 10246 if (mctl_present) { 10247 io = (ipsec_out_t *)first_mp->b_rptr; 10248 ASSERT(first_mp->b_datap->db_type == M_CTL); 10249 ASSERT(io->ipsec_out_type == IPSEC_OUT); 10250 } else { 10251 ASSERT(mp == first_mp); 10252 if ((first_mp = ipsec_alloc_ipsec_out(ipst->ips_netstack)) == 10253 NULL) { 10254 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10255 freemsg(mp); 10256 if (ill != NULL) 10257 ill_refrele(ill); 10258 if (need_decref) 10259 CONN_DEC_REF(connp); 10260 return; 10261 } 10262 io = (ipsec_out_t *)first_mp->b_rptr; 10263 /* This is not a secure packet */ 10264 io->ipsec_out_secure = B_FALSE; 10265 io->ipsec_out_use_global_policy = B_TRUE; 10266 io->ipsec_out_zoneid = 10267 (zoneid != ALL_ZONES ? zoneid : GLOBAL_ZONEID); 10268 first_mp->b_cont = mp; 10269 mctl_present = B_TRUE; 10270 } 10271 io->ipsec_out_ill_index = ill->ill_phyint->phyint_ifindex; 10272 io->ipsec_out_unspec_src = unspec_src; 10273 if (connp != NULL) 10274 io->ipsec_out_dontroute = connp->conn_dontroute; 10275 10276 send_from_ill: 10277 ASSERT(ill != NULL); 10278 ASSERT(mibptr == ill->ill_ip_mib); 10279 if (do_outrequests) { 10280 BUMP_MIB(mibptr, ipIfStatsHCOutRequests); 10281 do_outrequests = B_FALSE; 10282 } 10283 10284 if (io != NULL) 10285 io->ipsec_out_ill_index = ill->ill_phyint->phyint_ifindex; 10286 10287 /* 10288 * When a specific ill is specified (using IPV6_PKTINFO, 10289 * IPV6_MULTICAST_IF, or IPV6_BOUND_IF) we will only match 10290 * on routing entries (ftable and ctable) that have a matching 10291 * ire->ire_ipif->ipif_ill. Thus this can only be used 10292 * for destinations that are on-link for the specific ill 10293 * and that can appear on multiple links. Thus it is useful 10294 * for multicast destinations, link-local destinations, and 10295 * at some point perhaps for site-local destinations (if the 10296 * node sits at a site boundary). 10297 * We create the cache entries in the regular ctable since 10298 * it can not "confuse" things for other destinations. 10299 * table. 10300 * 10301 * NOTE : conn_ire_cache is not used for caching ire_ctable_lookups. 10302 * It is used only when ire_cache_lookup is used above. 10303 */ 10304 ire = ire_ctable_lookup_v6(v6dstp, 0, 0, ill->ill_ipif, 10305 zoneid, MBLK_GETLABEL(mp), match_flags, ipst); 10306 if (ire != NULL) { 10307 /* 10308 * Check if the ire has the RTF_MULTIRT flag, inherited 10309 * from an IRE_OFFSUBNET ire entry in ip_newroute(). 10310 */ 10311 if (ire->ire_flags & RTF_MULTIRT) { 10312 /* 10313 * Force hop limit of multirouted packets if required. 10314 * The hop limit of such packets is bounded by the 10315 * ip_multirt_ttl ndd variable. 10316 * NDP packets must have a hop limit of 255; don't 10317 * change the hop limit in that case. 10318 */ 10319 if ((ipst->ips_ip_multirt_ttl > 0) && 10320 (ip6h->ip6_hops > ipst->ips_ip_multirt_ttl) && 10321 (ip6h->ip6_hops != IPV6_MAX_HOPS)) { 10322 if (ip_debug > 3) { 10323 ip2dbg(("ip_wput_v6: forcing multirt " 10324 "hop limit to %d (was %d) ", 10325 ipst->ips_ip_multirt_ttl, 10326 ip6h->ip6_hops)); 10327 pr_addr_dbg("v6dst %s\n", AF_INET6, 10328 &ire->ire_addr_v6); 10329 } 10330 ip6h->ip6_hops = ipst->ips_ip_multirt_ttl; 10331 } 10332 10333 /* 10334 * We look at this point if there are pending 10335 * unresolved routes. ire_multirt_need_resolve_v6() 10336 * checks in O(n) that all IRE_OFFSUBNET ire 10337 * entries for the packet's destination and 10338 * flagged RTF_MULTIRT are currently resolved. 10339 * If some remain unresolved, we make a copy 10340 * of the current message. It will be used 10341 * to initiate additional route resolutions. 10342 */ 10343 multirt_need_resolve = 10344 ire_multirt_need_resolve_v6(&ire->ire_addr_v6, 10345 MBLK_GETLABEL(first_mp), ipst); 10346 ip2dbg(("ip_wput_v6[send_from_ill]: ire %p, " 10347 "multirt_need_resolve %d, first_mp %p\n", 10348 (void *)ire, multirt_need_resolve, 10349 (void *)first_mp)); 10350 if (multirt_need_resolve) { 10351 copy_mp = copymsg(first_mp); 10352 if (copy_mp != NULL) { 10353 MULTIRT_DEBUG_TAG(copy_mp); 10354 } 10355 } 10356 } 10357 10358 ip1dbg(("ip_wput_v6: send on %s, ire = %p, ill index = %d\n", 10359 ill->ill_name, (void *)ire, 10360 ill->ill_phyint->phyint_ifindex)); 10361 ip_wput_ire_v6(q, first_mp, ire, unspec_src, cksum_request, 10362 connp, caller, 10363 (attach_if ? ill->ill_phyint->phyint_ifindex : 0), 10364 ip6i_flags, zoneid); 10365 ire_refrele(ire); 10366 if (need_decref) { 10367 CONN_DEC_REF(connp); 10368 connp = NULL; 10369 } 10370 10371 /* 10372 * Try to resolve another multiroute if 10373 * ire_multirt_need_resolve_v6() deemed it necessary. 10374 * copy_mp will be consumed (sent or freed) by 10375 * ip_newroute_[ipif_]v6(). 10376 */ 10377 if (copy_mp != NULL) { 10378 if (mctl_present) { 10379 ip6h = (ip6_t *)copy_mp->b_cont->b_rptr; 10380 } else { 10381 ip6h = (ip6_t *)copy_mp->b_rptr; 10382 } 10383 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 10384 ipif = ipif_lookup_group_v6(&ip6h->ip6_dst, 10385 zoneid, ipst); 10386 if (ipif == NULL) { 10387 ip1dbg(("ip_wput_v6: No ipif for " 10388 "multicast\n")); 10389 MULTIRT_DEBUG_UNTAG(copy_mp); 10390 freemsg(copy_mp); 10391 return; 10392 } 10393 ip_newroute_ipif_v6(q, copy_mp, ipif, 10394 ip6h->ip6_dst, unspec_src, zoneid); 10395 ipif_refrele(ipif); 10396 } else { 10397 ip_newroute_v6(q, copy_mp, &ip6h->ip6_dst, 10398 &ip6h->ip6_src, ill, zoneid, ipst); 10399 } 10400 } 10401 ill_refrele(ill); 10402 return; 10403 } 10404 if (need_decref) { 10405 CONN_DEC_REF(connp); 10406 connp = NULL; 10407 } 10408 10409 /* Update rptr if there was an ip6i_t header. */ 10410 if (ip6i != NULL) 10411 mp->b_rptr -= sizeof (ip6i_t); 10412 if (unspec_src || attach_if) { 10413 if (ip6i == NULL) { 10414 /* 10415 * Add ip6i_t header to carry unspec_src 10416 * or attach_if until the packet comes back in 10417 * ip_wput_v6. 10418 */ 10419 if (mctl_present) { 10420 first_mp->b_cont = 10421 ip_add_info_v6(mp, NULL, v6dstp); 10422 mp = first_mp->b_cont; 10423 if (mp == NULL) 10424 freeb(first_mp); 10425 } else { 10426 first_mp = mp = ip_add_info_v6(mp, NULL, 10427 v6dstp); 10428 } 10429 if (mp == NULL) { 10430 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10431 ill_refrele(ill); 10432 return; 10433 } 10434 ip6i = (ip6i_t *)mp->b_rptr; 10435 if ((mp->b_wptr - (uchar_t *)ip6i) == 10436 sizeof (ip6i_t)) { 10437 /* 10438 * ndp_resolver called from ip_newroute_v6 10439 * expects a pulled up message. 10440 */ 10441 if (!pullupmsg(mp, -1)) { 10442 ip1dbg(("ip_wput_v6: pullupmsg" 10443 " failed\n")); 10444 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10445 freemsg(first_mp); 10446 return; 10447 } 10448 ip6i = (ip6i_t *)mp->b_rptr; 10449 } 10450 ip6h = (ip6_t *)&ip6i[1]; 10451 v6dstp = &ip6h->ip6_dst; 10452 } 10453 if (unspec_src) 10454 ip6i->ip6i_flags |= IP6I_UNSPEC_SRC; 10455 if (attach_if) { 10456 /* 10457 * Bind to nofailover/BOUND_PIF overrides ifindex. 10458 */ 10459 ip6i->ip6i_flags |= IP6I_ATTACH_IF; 10460 ip6i->ip6i_flags &= ~IP6I_IFINDEX; 10461 ip6i->ip6i_ifindex = ill->ill_phyint->phyint_ifindex; 10462 if (drop_if_delayed) { 10463 /* This is a multipathing probe packet */ 10464 ip6i->ip6i_flags |= IP6I_DROP_IFDELAYED; 10465 } 10466 } 10467 if (mctl_present) { 10468 ASSERT(io != NULL); 10469 io->ipsec_out_unspec_src = unspec_src; 10470 } 10471 } 10472 if (IN6_IS_ADDR_MULTICAST(v6dstp)) { 10473 ip_newroute_ipif_v6(q, first_mp, ill->ill_ipif, *v6dstp, 10474 unspec_src, zoneid); 10475 } else { 10476 ip_newroute_v6(q, first_mp, v6dstp, &ip6h->ip6_src, ill, 10477 zoneid, ipst); 10478 } 10479 ill_refrele(ill); 10480 return; 10481 10482 notv6: 10483 /* FIXME?: assume the caller calls the right version of ip_output? */ 10484 if (q->q_next == NULL) { 10485 connp = Q_TO_CONN(q); 10486 10487 /* 10488 * We can change conn_send for all types of conn, even 10489 * though only TCP uses it right now. 10490 * FIXME: sctp could use conn_send but doesn't currently. 10491 */ 10492 ip_setpktversion(connp, B_FALSE, B_TRUE, ipst); 10493 } 10494 BUMP_MIB(mibptr, ipIfStatsOutWrongIPVersion); 10495 (void) ip_output(arg, first_mp, arg2, caller); 10496 if (ill != NULL) 10497 ill_refrele(ill); 10498 } 10499 10500 /* 10501 * If this is a conn_t queue, then we pass in the conn. This includes the 10502 * zoneid. 10503 * Otherwise, this is a message for an ill_t queue, 10504 * in which case we use the global zoneid since those are all part of 10505 * the global zone. 10506 */ 10507 void 10508 ip_wput_v6(queue_t *q, mblk_t *mp) 10509 { 10510 if (CONN_Q(q)) 10511 ip_output_v6(Q_TO_CONN(q), mp, q, IP_WPUT); 10512 else 10513 ip_output_v6(GLOBAL_ZONEID, mp, q, IP_WPUT); 10514 } 10515 10516 static void 10517 ipsec_out_attach_if(ipsec_out_t *io, int attach_index) 10518 { 10519 ASSERT(io->ipsec_out_type == IPSEC_OUT); 10520 io->ipsec_out_attach_if = B_TRUE; 10521 io->ipsec_out_ill_index = attach_index; 10522 } 10523 10524 /* 10525 * NULL send-to queue - packet is to be delivered locally. 10526 */ 10527 void 10528 ip_wput_local_v6(queue_t *q, ill_t *ill, ip6_t *ip6h, mblk_t *first_mp, 10529 ire_t *ire, int fanout_flags, zoneid_t zoneid) 10530 { 10531 uint32_t ports; 10532 mblk_t *mp = first_mp, *first_mp1; 10533 boolean_t mctl_present; 10534 uint8_t nexthdr; 10535 uint16_t hdr_length; 10536 ipsec_out_t *io; 10537 mib2_ipIfStatsEntry_t *mibptr; 10538 ilm_t *ilm; 10539 uint_t nexthdr_offset; 10540 ip_stack_t *ipst = ill->ill_ipst; 10541 10542 if (DB_TYPE(mp) == M_CTL) { 10543 io = (ipsec_out_t *)mp->b_rptr; 10544 if (!io->ipsec_out_secure) { 10545 mp = mp->b_cont; 10546 freeb(first_mp); 10547 first_mp = mp; 10548 mctl_present = B_FALSE; 10549 } else { 10550 mctl_present = B_TRUE; 10551 mp = first_mp->b_cont; 10552 ipsec_out_to_in(first_mp); 10553 } 10554 } else { 10555 mctl_present = B_FALSE; 10556 } 10557 10558 /* 10559 * Remove reachability confirmation bit from version field 10560 * before passing the packet on to any firewall hooks or 10561 * looping back the packet. 10562 */ 10563 if (ip6h->ip6_vcf & IP_FORWARD_PROG) 10564 ip6h->ip6_vcf &= ~IP_FORWARD_PROG; 10565 10566 DTRACE_PROBE4(ip6__loopback__in__start, 10567 ill_t *, ill, ill_t *, NULL, 10568 ip6_t *, ip6h, mblk_t *, first_mp); 10569 10570 FW_HOOKS6(ipst->ips_ip6_loopback_in_event, 10571 ipst->ips_ipv6firewall_loopback_in, 10572 ill, NULL, ip6h, first_mp, mp, 0, ipst); 10573 10574 DTRACE_PROBE1(ip6__loopback__in__end, mblk_t *, first_mp); 10575 10576 if (first_mp == NULL) 10577 return; 10578 10579 if (ipst->ips_ipobs_enabled) { 10580 zoneid_t szone, dzone, lookup_zoneid = ALL_ZONES; 10581 zoneid_t stackzoneid = netstackid_to_zoneid( 10582 ipst->ips_netstack->netstack_stackid); 10583 10584 szone = (stackzoneid == GLOBAL_ZONEID) ? zoneid : stackzoneid; 10585 /* 10586 * ::1 is special, as we cannot lookup its zoneid by 10587 * address. For this case, restrict the lookup to the 10588 * source zone. 10589 */ 10590 if (IN6_IS_ADDR_LOOPBACK(&ip6h->ip6_dst)) 10591 lookup_zoneid = zoneid; 10592 dzone = ip_get_zoneid_v6(&ip6h->ip6_dst, mp, ill, ipst, 10593 lookup_zoneid); 10594 ipobs_hook(mp, IPOBS_HOOK_LOCAL, szone, dzone, ill, 10595 IPV6_VERSION, 0, ipst); 10596 } 10597 10598 DTRACE_IP7(receive, mblk_t *, first_mp, conn_t *, NULL, void_ip_t *, 10599 ip6h, __dtrace_ipsr_ill_t *, ill, ipha_t *, NULL, ip6_t *, ip6h, 10600 int, 1); 10601 10602 nexthdr = ip6h->ip6_nxt; 10603 mibptr = ill->ill_ip_mib; 10604 10605 /* Fastpath */ 10606 switch (nexthdr) { 10607 case IPPROTO_TCP: 10608 case IPPROTO_UDP: 10609 case IPPROTO_ICMPV6: 10610 case IPPROTO_SCTP: 10611 hdr_length = IPV6_HDR_LEN; 10612 nexthdr_offset = (uint_t)((uchar_t *)&ip6h->ip6_nxt - 10613 (uchar_t *)ip6h); 10614 break; 10615 default: { 10616 uint8_t *nexthdrp; 10617 10618 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, 10619 &hdr_length, &nexthdrp)) { 10620 /* Malformed packet */ 10621 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10622 freemsg(first_mp); 10623 return; 10624 } 10625 nexthdr = *nexthdrp; 10626 nexthdr_offset = nexthdrp - (uint8_t *)ip6h; 10627 break; 10628 } 10629 } 10630 10631 UPDATE_OB_PKT_COUNT(ire); 10632 ire->ire_last_used_time = lbolt; 10633 10634 switch (nexthdr) { 10635 case IPPROTO_TCP: 10636 if (DB_TYPE(mp) == M_DATA) { 10637 /* 10638 * M_DATA mblk, so init mblk (chain) for 10639 * no struio(). 10640 */ 10641 mblk_t *mp1 = mp; 10642 10643 do { 10644 mp1->b_datap->db_struioflag = 0; 10645 } while ((mp1 = mp1->b_cont) != NULL); 10646 } 10647 ports = *(uint32_t *)(mp->b_rptr + hdr_length + 10648 TCP_PORTS_OFFSET); 10649 ip_fanout_tcp_v6(q, first_mp, ip6h, ill, ill, 10650 fanout_flags|IP_FF_SEND_ICMP|IP_FF_SYN_ADDIRE| 10651 IP_FF_IPINFO|IP6_NO_IPPOLICY|IP_FF_LOOPBACK, 10652 hdr_length, mctl_present, ire->ire_zoneid); 10653 return; 10654 10655 case IPPROTO_UDP: 10656 ports = *(uint32_t *)(mp->b_rptr + hdr_length + 10657 UDP_PORTS_OFFSET); 10658 ip_fanout_udp_v6(q, first_mp, ip6h, ports, ill, ill, 10659 fanout_flags|IP_FF_SEND_ICMP|IP_FF_IPINFO| 10660 IP6_NO_IPPOLICY, mctl_present, ire->ire_zoneid); 10661 return; 10662 10663 case IPPROTO_SCTP: 10664 { 10665 ports = *(uint32_t *)(mp->b_rptr + hdr_length); 10666 ip_fanout_sctp(first_mp, ill, (ipha_t *)ip6h, ports, 10667 fanout_flags|IP_FF_SEND_ICMP|IP_FF_IPINFO, 10668 mctl_present, IP6_NO_IPPOLICY, ire->ire_zoneid); 10669 return; 10670 } 10671 case IPPROTO_ICMPV6: { 10672 icmp6_t *icmp6; 10673 10674 /* check for full IPv6+ICMPv6 header */ 10675 if ((mp->b_wptr - mp->b_rptr) < 10676 (hdr_length + ICMP6_MINLEN)) { 10677 if (!pullupmsg(mp, hdr_length + ICMP6_MINLEN)) { 10678 ip1dbg(("ip_wput_v6: ICMP hdr pullupmsg" 10679 " failed\n")); 10680 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10681 freemsg(first_mp); 10682 return; 10683 } 10684 ip6h = (ip6_t *)mp->b_rptr; 10685 } 10686 icmp6 = (icmp6_t *)((uchar_t *)ip6h + hdr_length); 10687 10688 /* Update output mib stats */ 10689 icmp_update_out_mib_v6(ill, icmp6); 10690 10691 /* Check variable for testing applications */ 10692 if (ipst->ips_ipv6_drop_inbound_icmpv6) { 10693 freemsg(first_mp); 10694 return; 10695 } 10696 /* 10697 * Assume that there is always at least one conn for 10698 * ICMPv6 (in.ndpd) i.e. don't optimize the case 10699 * where there is no conn. 10700 */ 10701 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst) && 10702 !IS_LOOPBACK(ill)) { 10703 /* 10704 * In the multicast case, applications may have 10705 * joined the group from different zones, so we 10706 * need to deliver the packet to each of them. 10707 * Loop through the multicast memberships 10708 * structures (ilm) on the receive ill and send 10709 * a copy of the packet up each matching one. 10710 * However, we don't do this for multicasts sent 10711 * on the loopback interface (PHYI_LOOPBACK flag 10712 * set) as they must stay in the sender's zone. 10713 */ 10714 ILM_WALKER_HOLD(ill); 10715 for (ilm = ill->ill_ilm; ilm != NULL; 10716 ilm = ilm->ilm_next) { 10717 if (ilm->ilm_flags & ILM_DELETED) 10718 continue; 10719 if (!IN6_ARE_ADDR_EQUAL( 10720 &ilm->ilm_v6addr, &ip6h->ip6_dst)) 10721 continue; 10722 if ((fanout_flags & 10723 IP_FF_NO_MCAST_LOOP) && 10724 ilm->ilm_zoneid == ire->ire_zoneid) 10725 continue; 10726 if (!ipif_lookup_zoneid(ill, 10727 ilm->ilm_zoneid, IPIF_UP, NULL)) 10728 continue; 10729 10730 first_mp1 = ip_copymsg(first_mp); 10731 if (first_mp1 == NULL) 10732 continue; 10733 icmp_inbound_v6(q, first_mp1, ill, 10734 hdr_length, mctl_present, 10735 IP6_NO_IPPOLICY, ilm->ilm_zoneid, 10736 NULL); 10737 } 10738 ILM_WALKER_RELE(ill); 10739 } else { 10740 first_mp1 = ip_copymsg(first_mp); 10741 if (first_mp1 != NULL) 10742 icmp_inbound_v6(q, first_mp1, ill, 10743 hdr_length, mctl_present, 10744 IP6_NO_IPPOLICY, ire->ire_zoneid, 10745 NULL); 10746 } 10747 } 10748 /* FALLTHRU */ 10749 default: { 10750 /* 10751 * Handle protocols with which IPv6 is less intimate. 10752 */ 10753 fanout_flags |= IP_FF_RAWIP|IP_FF_IPINFO; 10754 10755 /* 10756 * Enable sending ICMP for "Unknown" nexthdr 10757 * case. i.e. where we did not FALLTHRU from 10758 * IPPROTO_ICMPV6 processing case above. 10759 */ 10760 if (nexthdr != IPPROTO_ICMPV6) 10761 fanout_flags |= IP_FF_SEND_ICMP; 10762 /* 10763 * Note: There can be more than one stream bound 10764 * to a particular protocol. When this is the case, 10765 * each one gets a copy of any incoming packets. 10766 */ 10767 ip_fanout_proto_v6(q, first_mp, ip6h, ill, ill, nexthdr, 10768 nexthdr_offset, fanout_flags|IP6_NO_IPPOLICY, 10769 mctl_present, ire->ire_zoneid); 10770 return; 10771 } 10772 } 10773 } 10774 10775 /* 10776 * Send packet using IRE. 10777 * Checksumming is controlled by cksum_request: 10778 * -1 => normal i.e. TCP/UDP/SCTP/ICMPv6 are checksummed and nothing else. 10779 * 1 => Skip TCP/UDP/SCTP checksum 10780 * Otherwise => checksum_request contains insert offset for checksum 10781 * 10782 * Assumes that the following set of headers appear in the first 10783 * mblk: 10784 * ip6_t 10785 * Any extension headers 10786 * TCP/UDP/SCTP header (if present) 10787 * The routine can handle an ICMPv6 header that is not in the first mblk. 10788 * 10789 * NOTE : This function does not ire_refrele the ire passed in as the 10790 * argument unlike ip_wput_ire where the REFRELE is done. 10791 * Refer to ip_wput_ire for more on this. 10792 */ 10793 static void 10794 ip_wput_ire_v6(queue_t *q, mblk_t *mp, ire_t *ire, int unspec_src, 10795 int cksum_request, conn_t *connp, int caller, int attach_index, int flags, 10796 zoneid_t zoneid) 10797 { 10798 ip6_t *ip6h; 10799 uint8_t nexthdr; 10800 uint16_t hdr_length; 10801 uint_t reachable = 0x0; 10802 ill_t *ill; 10803 mib2_ipIfStatsEntry_t *mibptr; 10804 mblk_t *first_mp; 10805 boolean_t mctl_present; 10806 ipsec_out_t *io; 10807 boolean_t conn_dontroute; /* conn value for multicast */ 10808 boolean_t conn_multicast_loop; /* conn value for multicast */ 10809 boolean_t multicast_forward; /* Should we forward ? */ 10810 int max_frag; 10811 ip_stack_t *ipst = ire->ire_ipst; 10812 ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec; 10813 10814 ill = ire_to_ill(ire); 10815 first_mp = mp; 10816 multicast_forward = B_FALSE; 10817 10818 if (mp->b_datap->db_type != M_CTL) { 10819 ip6h = (ip6_t *)first_mp->b_rptr; 10820 } else { 10821 io = (ipsec_out_t *)first_mp->b_rptr; 10822 ASSERT(io->ipsec_out_type == IPSEC_OUT); 10823 /* 10824 * Grab the zone id now because the M_CTL can be discarded by 10825 * ip_wput_ire_parse_ipsec_out() below. 10826 */ 10827 ASSERT(zoneid == io->ipsec_out_zoneid); 10828 ASSERT(zoneid != ALL_ZONES); 10829 ip6h = (ip6_t *)first_mp->b_cont->b_rptr; 10830 /* 10831 * For the multicast case, ipsec_out carries conn_dontroute and 10832 * conn_multicast_loop as conn may not be available here. We 10833 * need this for multicast loopback and forwarding which is done 10834 * later in the code. 10835 */ 10836 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 10837 conn_dontroute = io->ipsec_out_dontroute; 10838 conn_multicast_loop = io->ipsec_out_multicast_loop; 10839 /* 10840 * If conn_dontroute is not set or conn_multicast_loop 10841 * is set, we need to do forwarding/loopback. For 10842 * datagrams from ip_wput_multicast, conn_dontroute is 10843 * set to B_TRUE and conn_multicast_loop is set to 10844 * B_FALSE so that we neither do forwarding nor 10845 * loopback. 10846 */ 10847 if (!conn_dontroute || conn_multicast_loop) 10848 multicast_forward = B_TRUE; 10849 } 10850 } 10851 10852 /* 10853 * If the sender didn't supply the hop limit and there is a default 10854 * unicast hop limit associated with the output interface, we use 10855 * that if the packet is unicast. Interface specific unicast hop 10856 * limits as set via the SIOCSLIFLNKINFO ioctl. 10857 */ 10858 if (ill->ill_max_hops != 0 && !(flags & IP6I_HOPLIMIT) && 10859 !(IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst))) { 10860 ip6h->ip6_hops = ill->ill_max_hops; 10861 } 10862 10863 if (ire->ire_type == IRE_LOCAL && ire->ire_zoneid != zoneid && 10864 ire->ire_zoneid != ALL_ZONES) { 10865 /* 10866 * When a zone sends a packet to another zone, we try to deliver 10867 * the packet under the same conditions as if the destination 10868 * was a real node on the network. To do so, we look for a 10869 * matching route in the forwarding table. 10870 * RTF_REJECT and RTF_BLACKHOLE are handled just like 10871 * ip_newroute_v6() does. 10872 * Note that IRE_LOCAL are special, since they are used 10873 * when the zoneid doesn't match in some cases. This means that 10874 * we need to handle ipha_src differently since ire_src_addr 10875 * belongs to the receiving zone instead of the sending zone. 10876 * When ip_restrict_interzone_loopback is set, then 10877 * ire_cache_lookup_v6() ensures that IRE_LOCAL are only used 10878 * for loopback between zones when the logical "Ethernet" would 10879 * have looped them back. 10880 */ 10881 ire_t *src_ire; 10882 10883 src_ire = ire_ftable_lookup_v6(&ip6h->ip6_dst, 0, 0, 0, 10884 NULL, NULL, zoneid, 0, NULL, (MATCH_IRE_RECURSIVE | 10885 MATCH_IRE_DEFAULT | MATCH_IRE_RJ_BHOLE), ipst); 10886 if (src_ire != NULL && 10887 !(src_ire->ire_flags & (RTF_REJECT | RTF_BLACKHOLE)) && 10888 (!ipst->ips_ip_restrict_interzone_loopback || 10889 ire_local_same_ill_group(ire, src_ire))) { 10890 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src) && 10891 !unspec_src) { 10892 ip6h->ip6_src = src_ire->ire_src_addr_v6; 10893 } 10894 ire_refrele(src_ire); 10895 } else { 10896 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutNoRoutes); 10897 if (src_ire != NULL) { 10898 if (src_ire->ire_flags & RTF_BLACKHOLE) { 10899 ire_refrele(src_ire); 10900 freemsg(first_mp); 10901 return; 10902 } 10903 ire_refrele(src_ire); 10904 } 10905 if (ip_hdr_complete_v6(ip6h, zoneid, ipst)) { 10906 /* Failed */ 10907 freemsg(first_mp); 10908 return; 10909 } 10910 icmp_unreachable_v6(q, first_mp, 10911 ICMP6_DST_UNREACH_NOROUTE, B_FALSE, B_FALSE, 10912 zoneid, ipst); 10913 return; 10914 } 10915 } 10916 10917 if (mp->b_datap->db_type == M_CTL || 10918 ipss->ipsec_outbound_v6_policy_present) { 10919 mp = ip_wput_ire_parse_ipsec_out(first_mp, NULL, ip6h, ire, 10920 connp, unspec_src, zoneid); 10921 if (mp == NULL) { 10922 return; 10923 } 10924 } 10925 10926 first_mp = mp; 10927 if (mp->b_datap->db_type == M_CTL) { 10928 io = (ipsec_out_t *)mp->b_rptr; 10929 ASSERT(io->ipsec_out_type == IPSEC_OUT); 10930 mp = mp->b_cont; 10931 mctl_present = B_TRUE; 10932 } else { 10933 mctl_present = B_FALSE; 10934 } 10935 10936 ip6h = (ip6_t *)mp->b_rptr; 10937 nexthdr = ip6h->ip6_nxt; 10938 mibptr = ill->ill_ip_mib; 10939 10940 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src) && !unspec_src) { 10941 ipif_t *ipif; 10942 10943 /* 10944 * Select the source address using ipif_select_source_v6. 10945 */ 10946 if (attach_index != 0) { 10947 ipif = ipif_select_source_v6(ill, &ip6h->ip6_dst, 10948 RESTRICT_TO_ILL, IPV6_PREFER_SRC_DEFAULT, zoneid); 10949 } else { 10950 ipif = ipif_select_source_v6(ill, &ip6h->ip6_dst, 10951 RESTRICT_TO_NONE, IPV6_PREFER_SRC_DEFAULT, zoneid); 10952 } 10953 if (ipif == NULL) { 10954 if (ip_debug > 2) { 10955 /* ip1dbg */ 10956 pr_addr_dbg("ip_wput_ire_v6: no src for " 10957 "dst %s\n, ", AF_INET6, &ip6h->ip6_dst); 10958 printf("ip_wput_ire_v6: interface name %s\n", 10959 ill->ill_name); 10960 } 10961 freemsg(first_mp); 10962 return; 10963 } 10964 ip6h->ip6_src = ipif->ipif_v6src_addr; 10965 ipif_refrele(ipif); 10966 } 10967 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 10968 if ((connp != NULL && connp->conn_multicast_loop) || 10969 !IS_LOOPBACK(ill)) { 10970 ilm_t *ilm; 10971 10972 ILM_WALKER_HOLD(ill); 10973 ilm = ilm_lookup_ill_v6(ill, &ip6h->ip6_dst, ALL_ZONES); 10974 ILM_WALKER_RELE(ill); 10975 if (ilm != NULL) { 10976 mblk_t *nmp; 10977 int fanout_flags = 0; 10978 10979 if (connp != NULL && 10980 !connp->conn_multicast_loop) { 10981 fanout_flags |= IP_FF_NO_MCAST_LOOP; 10982 } 10983 ip1dbg(("ip_wput_ire_v6: " 10984 "Loopback multicast\n")); 10985 nmp = ip_copymsg(first_mp); 10986 if (nmp != NULL) { 10987 ip6_t *nip6h; 10988 mblk_t *mp_ip6h; 10989 10990 if (mctl_present) { 10991 nip6h = (ip6_t *) 10992 nmp->b_cont->b_rptr; 10993 mp_ip6h = nmp->b_cont; 10994 } else { 10995 nip6h = (ip6_t *)nmp->b_rptr; 10996 mp_ip6h = nmp; 10997 } 10998 10999 DTRACE_PROBE4( 11000 ip6__loopback__out__start, 11001 ill_t *, NULL, 11002 ill_t *, ill, 11003 ip6_t *, nip6h, 11004 mblk_t *, nmp); 11005 11006 FW_HOOKS6( 11007 ipst->ips_ip6_loopback_out_event, 11008 ipst->ips_ipv6firewall_loopback_out, 11009 NULL, ill, nip6h, nmp, mp_ip6h, 11010 0, ipst); 11011 11012 DTRACE_PROBE1( 11013 ip6__loopback__out__end, 11014 mblk_t *, nmp); 11015 11016 /* 11017 * DTrace this as ip:::send. A blocked 11018 * packet will fire the send probe, but 11019 * not the receive probe. 11020 */ 11021 DTRACE_IP7(send, mblk_t *, nmp, 11022 conn_t *, NULL, void_ip_t *, nip6h, 11023 __dtrace_ipsr_ill_t *, ill, 11024 ipha_t *, NULL, ip6_t *, nip6h, 11025 int, 1); 11026 11027 if (nmp != NULL) { 11028 /* 11029 * Deliver locally and to 11030 * every local zone, except 11031 * the sending zone when 11032 * IPV6_MULTICAST_LOOP is 11033 * disabled. 11034 */ 11035 ip_wput_local_v6(RD(q), ill, 11036 nip6h, nmp, ire, 11037 fanout_flags, zoneid); 11038 } 11039 } else { 11040 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 11041 ip1dbg(("ip_wput_ire_v6: " 11042 "copymsg failed\n")); 11043 } 11044 } 11045 } 11046 if (ip6h->ip6_hops == 0 || 11047 IN6_IS_ADDR_MC_NODELOCAL(&ip6h->ip6_dst) || 11048 IS_LOOPBACK(ill)) { 11049 /* 11050 * Local multicast or just loopback on loopback 11051 * interface. 11052 */ 11053 BUMP_MIB(mibptr, ipIfStatsHCOutMcastPkts); 11054 UPDATE_MIB(mibptr, ipIfStatsHCOutMcastOctets, 11055 ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN); 11056 ip1dbg(("ip_wput_ire_v6: local multicast only\n")); 11057 freemsg(first_mp); 11058 return; 11059 } 11060 } 11061 11062 if (ire->ire_stq != NULL) { 11063 uint32_t sum; 11064 uint_t ill_index = ((ill_t *)ire->ire_stq->q_ptr)-> 11065 ill_phyint->phyint_ifindex; 11066 queue_t *dev_q = ire->ire_stq->q_next; 11067 11068 /* 11069 * non-NULL send-to queue - packet is to be sent 11070 * out an interface. 11071 */ 11072 11073 /* Driver is flow-controlling? */ 11074 if (!IP_FLOW_CONTROLLED_ULP(nexthdr) && 11075 ((dev_q->q_next || dev_q->q_first) && !canput(dev_q))) { 11076 /* 11077 * Queue packet if we have an conn to give back 11078 * pressure. We can't queue packets intended for 11079 * hardware acceleration since we've tossed that 11080 * state already. If the packet is being fed back 11081 * from ire_send_v6, we don't know the position in 11082 * the queue to enqueue the packet and we discard 11083 * the packet. 11084 */ 11085 if (ipst->ips_ip_output_queue && connp != NULL && 11086 !mctl_present && caller != IRE_SEND) { 11087 if (caller == IP_WSRV) { 11088 connp->conn_did_putbq = 1; 11089 (void) putbq(connp->conn_wq, mp); 11090 conn_drain_insert(connp); 11091 /* 11092 * caller == IP_WSRV implies we are 11093 * the service thread, and the 11094 * queue is already noenabled. 11095 * The check for canput and 11096 * the putbq is not atomic. 11097 * So we need to check again. 11098 */ 11099 if (canput(dev_q)) 11100 connp->conn_did_putbq = 0; 11101 } else { 11102 (void) putq(connp->conn_wq, mp); 11103 } 11104 return; 11105 } 11106 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 11107 freemsg(first_mp); 11108 return; 11109 } 11110 11111 /* 11112 * Look for reachability confirmations from the transport. 11113 */ 11114 if (ip6h->ip6_vcf & IP_FORWARD_PROG) { 11115 reachable |= IPV6_REACHABILITY_CONFIRMATION; 11116 ip6h->ip6_vcf &= ~IP_FORWARD_PROG; 11117 if (mctl_present) 11118 io->ipsec_out_reachable = B_TRUE; 11119 } 11120 /* Fastpath */ 11121 switch (nexthdr) { 11122 case IPPROTO_TCP: 11123 case IPPROTO_UDP: 11124 case IPPROTO_ICMPV6: 11125 case IPPROTO_SCTP: 11126 hdr_length = IPV6_HDR_LEN; 11127 break; 11128 default: { 11129 uint8_t *nexthdrp; 11130 11131 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, 11132 &hdr_length, &nexthdrp)) { 11133 /* Malformed packet */ 11134 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 11135 freemsg(first_mp); 11136 return; 11137 } 11138 nexthdr = *nexthdrp; 11139 break; 11140 } 11141 } 11142 11143 if (cksum_request != -1 && nexthdr != IPPROTO_ICMPV6) { 11144 uint16_t *up; 11145 uint16_t *insp; 11146 11147 /* 11148 * The packet header is processed once for all, even 11149 * in the multirouting case. We disable hardware 11150 * checksum if the packet is multirouted, as it will be 11151 * replicated via several interfaces, and not all of 11152 * them may have this capability. 11153 */ 11154 if (cksum_request == 1 && 11155 !(ire->ire_flags & RTF_MULTIRT)) { 11156 /* Skip the transport checksum */ 11157 goto cksum_done; 11158 } 11159 /* 11160 * Do user-configured raw checksum. 11161 * Compute checksum and insert at offset "cksum_request" 11162 */ 11163 11164 /* check for enough headers for checksum */ 11165 cksum_request += hdr_length; /* offset from rptr */ 11166 if ((mp->b_wptr - mp->b_rptr) < 11167 (cksum_request + sizeof (int16_t))) { 11168 if (!pullupmsg(mp, 11169 cksum_request + sizeof (int16_t))) { 11170 ip1dbg(("ip_wput_v6: ICMP hdr pullupmsg" 11171 " failed\n")); 11172 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 11173 freemsg(first_mp); 11174 return; 11175 } 11176 ip6h = (ip6_t *)mp->b_rptr; 11177 } 11178 insp = (uint16_t *)((uchar_t *)ip6h + cksum_request); 11179 ASSERT(((uintptr_t)insp & 0x1) == 0); 11180 up = (uint16_t *)&ip6h->ip6_src; 11181 /* 11182 * icmp has placed length and routing 11183 * header adjustment in *insp. 11184 */ 11185 sum = htons(nexthdr) + 11186 up[0] + up[1] + up[2] + up[3] + 11187 up[4] + up[5] + up[6] + up[7] + 11188 up[8] + up[9] + up[10] + up[11] + 11189 up[12] + up[13] + up[14] + up[15]; 11190 sum = (sum & 0xffff) + (sum >> 16); 11191 *insp = IP_CSUM(mp, hdr_length, sum); 11192 } else if (nexthdr == IPPROTO_TCP) { 11193 uint16_t *up; 11194 11195 /* 11196 * Check for full IPv6 header + enough TCP header 11197 * to get at the checksum field. 11198 */ 11199 if ((mp->b_wptr - mp->b_rptr) < 11200 (hdr_length + TCP_CHECKSUM_OFFSET + 11201 TCP_CHECKSUM_SIZE)) { 11202 if (!pullupmsg(mp, hdr_length + 11203 TCP_CHECKSUM_OFFSET + TCP_CHECKSUM_SIZE)) { 11204 ip1dbg(("ip_wput_v6: TCP hdr pullupmsg" 11205 " failed\n")); 11206 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 11207 freemsg(first_mp); 11208 return; 11209 } 11210 ip6h = (ip6_t *)mp->b_rptr; 11211 } 11212 11213 up = (uint16_t *)&ip6h->ip6_src; 11214 /* 11215 * Note: The TCP module has stored the length value 11216 * into the tcp checksum field, so we don't 11217 * need to explicitly sum it in here. 11218 */ 11219 sum = up[0] + up[1] + up[2] + up[3] + 11220 up[4] + up[5] + up[6] + up[7] + 11221 up[8] + up[9] + up[10] + up[11] + 11222 up[12] + up[13] + up[14] + up[15]; 11223 11224 /* Fold the initial sum */ 11225 sum = (sum & 0xffff) + (sum >> 16); 11226 11227 up = (uint16_t *)(((uchar_t *)ip6h) + 11228 hdr_length + TCP_CHECKSUM_OFFSET); 11229 11230 IP_CKSUM_XMIT(ill, ire, mp, ip6h, up, IPPROTO_TCP, 11231 hdr_length, ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN, 11232 ire->ire_max_frag, mctl_present, sum); 11233 11234 /* Software checksum? */ 11235 if (DB_CKSUMFLAGS(mp) == 0) { 11236 IP6_STAT(ipst, ip6_out_sw_cksum); 11237 IP6_STAT_UPDATE(ipst, 11238 ip6_tcp_out_sw_cksum_bytes, 11239 (ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN) - 11240 hdr_length); 11241 } 11242 } else if (nexthdr == IPPROTO_UDP) { 11243 uint16_t *up; 11244 11245 /* 11246 * check for full IPv6 header + enough UDP header 11247 * to get at the UDP checksum field 11248 */ 11249 if ((mp->b_wptr - mp->b_rptr) < (hdr_length + 11250 UDP_CHECKSUM_OFFSET + UDP_CHECKSUM_SIZE)) { 11251 if (!pullupmsg(mp, hdr_length + 11252 UDP_CHECKSUM_OFFSET + UDP_CHECKSUM_SIZE)) { 11253 ip1dbg(("ip_wput_v6: UDP hdr pullupmsg" 11254 " failed\n")); 11255 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 11256 freemsg(first_mp); 11257 return; 11258 } 11259 ip6h = (ip6_t *)mp->b_rptr; 11260 } 11261 up = (uint16_t *)&ip6h->ip6_src; 11262 /* 11263 * Note: The UDP module has stored the length value 11264 * into the udp checksum field, so we don't 11265 * need to explicitly sum it in here. 11266 */ 11267 sum = up[0] + up[1] + up[2] + up[3] + 11268 up[4] + up[5] + up[6] + up[7] + 11269 up[8] + up[9] + up[10] + up[11] + 11270 up[12] + up[13] + up[14] + up[15]; 11271 11272 /* Fold the initial sum */ 11273 sum = (sum & 0xffff) + (sum >> 16); 11274 11275 up = (uint16_t *)(((uchar_t *)ip6h) + 11276 hdr_length + UDP_CHECKSUM_OFFSET); 11277 11278 IP_CKSUM_XMIT(ill, ire, mp, ip6h, up, IPPROTO_UDP, 11279 hdr_length, ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN, 11280 ire->ire_max_frag, mctl_present, sum); 11281 11282 /* Software checksum? */ 11283 if (DB_CKSUMFLAGS(mp) == 0) { 11284 IP6_STAT(ipst, ip6_out_sw_cksum); 11285 IP6_STAT_UPDATE(ipst, 11286 ip6_udp_out_sw_cksum_bytes, 11287 (ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN) - 11288 hdr_length); 11289 } 11290 } else if (nexthdr == IPPROTO_ICMPV6) { 11291 uint16_t *up; 11292 icmp6_t *icmp6; 11293 11294 /* check for full IPv6+ICMPv6 header */ 11295 if ((mp->b_wptr - mp->b_rptr) < 11296 (hdr_length + ICMP6_MINLEN)) { 11297 if (!pullupmsg(mp, hdr_length + ICMP6_MINLEN)) { 11298 ip1dbg(("ip_wput_v6: ICMP hdr pullupmsg" 11299 " failed\n")); 11300 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 11301 freemsg(first_mp); 11302 return; 11303 } 11304 ip6h = (ip6_t *)mp->b_rptr; 11305 } 11306 icmp6 = (icmp6_t *)((uchar_t *)ip6h + hdr_length); 11307 up = (uint16_t *)&ip6h->ip6_src; 11308 /* 11309 * icmp has placed length and routing 11310 * header adjustment in icmp6_cksum. 11311 */ 11312 sum = htons(IPPROTO_ICMPV6) + 11313 up[0] + up[1] + up[2] + up[3] + 11314 up[4] + up[5] + up[6] + up[7] + 11315 up[8] + up[9] + up[10] + up[11] + 11316 up[12] + up[13] + up[14] + up[15]; 11317 sum = (sum & 0xffff) + (sum >> 16); 11318 icmp6->icmp6_cksum = IP_CSUM(mp, hdr_length, sum); 11319 11320 /* Update output mib stats */ 11321 icmp_update_out_mib_v6(ill, icmp6); 11322 } else if (nexthdr == IPPROTO_SCTP) { 11323 sctp_hdr_t *sctph; 11324 11325 if (MBLKL(mp) < (hdr_length + sizeof (*sctph))) { 11326 if (!pullupmsg(mp, hdr_length + 11327 sizeof (*sctph))) { 11328 ip1dbg(("ip_wput_v6: SCTP hdr pullupmsg" 11329 " failed\n")); 11330 BUMP_MIB(ill->ill_ip_mib, 11331 ipIfStatsOutDiscards); 11332 freemsg(mp); 11333 return; 11334 } 11335 ip6h = (ip6_t *)mp->b_rptr; 11336 } 11337 sctph = (sctp_hdr_t *)(mp->b_rptr + hdr_length); 11338 sctph->sh_chksum = 0; 11339 sctph->sh_chksum = sctp_cksum(mp, hdr_length); 11340 } 11341 11342 cksum_done: 11343 /* 11344 * We force the insertion of a fragment header using the 11345 * IPH_FRAG_HDR flag in two cases: 11346 * - after reception of an ICMPv6 "packet too big" message 11347 * with a MTU < 1280 (cf. RFC 2460 section 5) 11348 * - for multirouted IPv6 packets, so that the receiver can 11349 * discard duplicates according to their fragment identifier 11350 * 11351 * Two flags modifed from the API can modify this behavior. 11352 * The first is IPV6_USE_MIN_MTU. With this API the user 11353 * can specify how to manage PMTUD for unicast and multicast. 11354 * 11355 * IPV6_DONTFRAG disallows fragmentation. 11356 */ 11357 max_frag = ire->ire_max_frag; 11358 switch (IP6I_USE_MIN_MTU_API(flags)) { 11359 case IPV6_USE_MIN_MTU_DEFAULT: 11360 case IPV6_USE_MIN_MTU_UNICAST: 11361 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 11362 max_frag = IPV6_MIN_MTU; 11363 } 11364 break; 11365 11366 case IPV6_USE_MIN_MTU_NEVER: 11367 max_frag = IPV6_MIN_MTU; 11368 break; 11369 } 11370 if (ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN > max_frag || 11371 (ire->ire_frag_flag & IPH_FRAG_HDR)) { 11372 if (connp != NULL && (flags & IP6I_DONTFRAG)) { 11373 icmp_pkt2big_v6(ire->ire_stq, first_mp, 11374 max_frag, B_FALSE, B_TRUE, zoneid, ipst); 11375 return; 11376 } 11377 11378 if (ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN != 11379 (mp->b_cont ? msgdsize(mp) : 11380 mp->b_wptr - (uchar_t *)ip6h)) { 11381 ip0dbg(("Packet length mismatch: %d, %ld\n", 11382 ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN, 11383 msgdsize(mp))); 11384 freemsg(first_mp); 11385 return; 11386 } 11387 /* Do IPSEC processing first */ 11388 if (mctl_present) { 11389 if (attach_index != 0) 11390 ipsec_out_attach_if(io, attach_index); 11391 ipsec_out_process(q, first_mp, ire, ill_index); 11392 return; 11393 } 11394 ASSERT(mp->b_prev == NULL); 11395 ip2dbg(("Fragmenting Size = %d, mtu = %d\n", 11396 ntohs(ip6h->ip6_plen) + 11397 IPV6_HDR_LEN, max_frag)); 11398 ASSERT(mp == first_mp); 11399 /* Initiate IPPF processing */ 11400 if (IPP_ENABLED(IPP_LOCAL_OUT, ipst)) { 11401 ip_process(IPP_LOCAL_OUT, &mp, ill_index); 11402 if (mp == NULL) { 11403 return; 11404 } 11405 } 11406 ip_wput_frag_v6(mp, ire, reachable, connp, 11407 caller, max_frag); 11408 return; 11409 } 11410 /* Do IPSEC processing first */ 11411 if (mctl_present) { 11412 int extra_len = ipsec_out_extra_length(first_mp); 11413 11414 if (ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN + extra_len > 11415 max_frag) { 11416 /* 11417 * IPsec headers will push the packet over the 11418 * MTU limit. Issue an ICMPv6 Packet Too Big 11419 * message for this packet if the upper-layer 11420 * that issued this packet will be able to 11421 * react to the icmp_pkt2big_v6() that we'll 11422 * generate. 11423 */ 11424 icmp_pkt2big_v6(ire->ire_stq, first_mp, 11425 max_frag, B_FALSE, B_TRUE, zoneid, ipst); 11426 return; 11427 } 11428 if (attach_index != 0) 11429 ipsec_out_attach_if(io, attach_index); 11430 ipsec_out_process(q, first_mp, ire, ill_index); 11431 return; 11432 } 11433 /* 11434 * XXX multicast: add ip_mforward_v6() here. 11435 * Check conn_dontroute 11436 */ 11437 #ifdef lint 11438 /* 11439 * XXX The only purpose of this statement is to avoid lint 11440 * errors. See the above "XXX multicast". When that gets 11441 * fixed, remove this whole #ifdef lint section. 11442 */ 11443 ip3dbg(("multicast forward is %s.\n", 11444 (multicast_forward ? "TRUE" : "FALSE"))); 11445 #endif 11446 11447 UPDATE_OB_PKT_COUNT(ire); 11448 ire->ire_last_used_time = lbolt; 11449 ASSERT(mp == first_mp); 11450 ip_xmit_v6(mp, ire, reachable, connp, caller, NULL); 11451 } else { 11452 /* 11453 * DTrace this as ip:::send. A blocked packet will fire the 11454 * send probe, but not the receive probe. 11455 */ 11456 DTRACE_IP7(send, mblk_t *, first_mp, conn_t *, NULL, 11457 void_ip_t *, ip6h, __dtrace_ipsr_ill_t *, ill, ipha_t *, 11458 NULL, ip6_t *, ip6h, int, 1); 11459 DTRACE_PROBE4(ip6__loopback__out__start, 11460 ill_t *, NULL, ill_t *, ill, 11461 ip6_t *, ip6h, mblk_t *, first_mp); 11462 FW_HOOKS6(ipst->ips_ip6_loopback_out_event, 11463 ipst->ips_ipv6firewall_loopback_out, 11464 NULL, ill, ip6h, first_mp, mp, 0, ipst); 11465 DTRACE_PROBE1(ip6__loopback__out__end, mblk_t *, first_mp); 11466 if (first_mp != NULL) { 11467 ip_wput_local_v6(RD(q), ill, ip6h, first_mp, ire, 0, 11468 zoneid); 11469 } 11470 } 11471 } 11472 11473 /* 11474 * Outbound IPv6 fragmentation routine using MDT. 11475 */ 11476 static void 11477 ip_wput_frag_mdt_v6(mblk_t *mp, ire_t *ire, size_t max_chunk, 11478 size_t unfragmentable_len, uint8_t nexthdr, uint_t prev_nexthdr_offset) 11479 { 11480 ip6_t *ip6h = (ip6_t *)mp->b_rptr; 11481 uint_t pkts, wroff, hdr_chunk_len, pbuf_idx; 11482 mblk_t *hdr_mp, *md_mp = NULL; 11483 int i1; 11484 multidata_t *mmd; 11485 unsigned char *hdr_ptr, *pld_ptr; 11486 ip_pdescinfo_t pdi; 11487 uint32_t ident; 11488 size_t len; 11489 uint16_t offset; 11490 queue_t *stq = ire->ire_stq; 11491 ill_t *ill = (ill_t *)stq->q_ptr; 11492 ip_stack_t *ipst = ill->ill_ipst; 11493 11494 ASSERT(DB_TYPE(mp) == M_DATA); 11495 ASSERT(MBLKL(mp) > unfragmentable_len); 11496 11497 /* 11498 * Move read ptr past unfragmentable portion, we don't want this part 11499 * of the data in our fragments. 11500 */ 11501 mp->b_rptr += unfragmentable_len; 11502 11503 /* Calculate how many packets we will send out */ 11504 i1 = (mp->b_cont == NULL) ? MBLKL(mp) : msgsize(mp); 11505 pkts = (i1 + max_chunk - 1) / max_chunk; 11506 ASSERT(pkts > 1); 11507 11508 /* Allocate a message block which will hold all the IP Headers. */ 11509 wroff = ipst->ips_ip_wroff_extra; 11510 hdr_chunk_len = wroff + unfragmentable_len + sizeof (ip6_frag_t); 11511 11512 i1 = pkts * hdr_chunk_len; 11513 /* 11514 * Create the header buffer, Multidata and destination address 11515 * and SAP attribute that should be associated with it. 11516 */ 11517 if ((hdr_mp = allocb(i1, BPRI_HI)) == NULL || 11518 ((hdr_mp->b_wptr += i1), 11519 (mmd = mmd_alloc(hdr_mp, &md_mp, KM_NOSLEEP)) == NULL) || 11520 !ip_md_addr_attr(mmd, NULL, ire->ire_nce->nce_res_mp)) { 11521 freemsg(mp); 11522 if (md_mp == NULL) { 11523 freemsg(hdr_mp); 11524 } else { 11525 free_mmd: IP6_STAT(ipst, ip6_frag_mdt_discarded); 11526 freemsg(md_mp); 11527 } 11528 IP6_STAT(ipst, ip6_frag_mdt_allocfail); 11529 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails); 11530 return; 11531 } 11532 IP6_STAT(ipst, ip6_frag_mdt_allocd); 11533 11534 /* 11535 * Add a payload buffer to the Multidata; this operation must not 11536 * fail, or otherwise our logic in this routine is broken. There 11537 * is no memory allocation done by the routine, so any returned 11538 * failure simply tells us that we've done something wrong. 11539 * 11540 * A failure tells us that either we're adding the same payload 11541 * buffer more than once, or we're trying to add more buffers than 11542 * allowed. None of the above cases should happen, and we panic 11543 * because either there's horrible heap corruption, and/or 11544 * programming mistake. 11545 */ 11546 if ((pbuf_idx = mmd_addpldbuf(mmd, mp)) < 0) { 11547 goto pbuf_panic; 11548 } 11549 11550 hdr_ptr = hdr_mp->b_rptr; 11551 pld_ptr = mp->b_rptr; 11552 11553 pdi.flags = PDESC_HBUF_REF | PDESC_PBUF_REF; 11554 11555 ident = htonl(atomic_add_32_nv(&ire->ire_ident, 1)); 11556 11557 /* 11558 * len is the total length of the fragmentable data in this 11559 * datagram. For each fragment sent, we will decrement len 11560 * by the amount of fragmentable data sent in that fragment 11561 * until len reaches zero. 11562 */ 11563 len = ntohs(ip6h->ip6_plen) - (unfragmentable_len - IPV6_HDR_LEN); 11564 11565 offset = 0; 11566 prev_nexthdr_offset += wroff; 11567 11568 while (len != 0) { 11569 size_t mlen; 11570 ip6_t *fip6h; 11571 ip6_frag_t *fraghdr; 11572 int error; 11573 11574 ASSERT((hdr_ptr + hdr_chunk_len) <= hdr_mp->b_wptr); 11575 mlen = MIN(len, max_chunk); 11576 len -= mlen; 11577 11578 fip6h = (ip6_t *)(hdr_ptr + wroff); 11579 ASSERT(OK_32PTR(fip6h)); 11580 bcopy(ip6h, fip6h, unfragmentable_len); 11581 hdr_ptr[prev_nexthdr_offset] = IPPROTO_FRAGMENT; 11582 11583 fip6h->ip6_plen = htons((uint16_t)(mlen + 11584 unfragmentable_len - IPV6_HDR_LEN + sizeof (ip6_frag_t))); 11585 11586 fraghdr = (ip6_frag_t *)((unsigned char *)fip6h + 11587 unfragmentable_len); 11588 fraghdr->ip6f_nxt = nexthdr; 11589 fraghdr->ip6f_reserved = 0; 11590 fraghdr->ip6f_offlg = htons(offset) | 11591 ((len != 0) ? IP6F_MORE_FRAG : 0); 11592 fraghdr->ip6f_ident = ident; 11593 11594 /* 11595 * Record offset and size of header and data of the next packet 11596 * in the multidata message. 11597 */ 11598 PDESC_HDR_ADD(&pdi, hdr_ptr, wroff, 11599 unfragmentable_len + sizeof (ip6_frag_t), 0); 11600 PDESC_PLD_INIT(&pdi); 11601 i1 = MIN(mp->b_wptr - pld_ptr, mlen); 11602 ASSERT(i1 > 0); 11603 PDESC_PLD_SPAN_ADD(&pdi, pbuf_idx, pld_ptr, i1); 11604 if (i1 == mlen) { 11605 pld_ptr += mlen; 11606 } else { 11607 i1 = mlen - i1; 11608 mp = mp->b_cont; 11609 ASSERT(mp != NULL); 11610 ASSERT(MBLKL(mp) >= i1); 11611 /* 11612 * Attach the next payload message block to the 11613 * multidata message. 11614 */ 11615 if ((pbuf_idx = mmd_addpldbuf(mmd, mp)) < 0) 11616 goto pbuf_panic; 11617 PDESC_PLD_SPAN_ADD(&pdi, pbuf_idx, mp->b_rptr, i1); 11618 pld_ptr = mp->b_rptr + i1; 11619 } 11620 11621 if ((mmd_addpdesc(mmd, (pdescinfo_t *)&pdi, &error, 11622 KM_NOSLEEP)) == NULL) { 11623 /* 11624 * Any failure other than ENOMEM indicates that we 11625 * have passed in invalid pdesc info or parameters 11626 * to mmd_addpdesc, which must not happen. 11627 * 11628 * EINVAL is a result of failure on boundary checks 11629 * against the pdesc info contents. It should not 11630 * happen, and we panic because either there's 11631 * horrible heap corruption, and/or programming 11632 * mistake. 11633 */ 11634 if (error != ENOMEM) { 11635 cmn_err(CE_PANIC, "ip_wput_frag_mdt_v6: " 11636 "pdesc logic error detected for " 11637 "mmd %p pinfo %p (%d)\n", 11638 (void *)mmd, (void *)&pdi, error); 11639 /* NOTREACHED */ 11640 } 11641 IP6_STAT(ipst, ip6_frag_mdt_addpdescfail); 11642 /* Free unattached payload message blocks as well */ 11643 md_mp->b_cont = mp->b_cont; 11644 goto free_mmd; 11645 } 11646 11647 /* Advance fragment offset. */ 11648 offset += mlen; 11649 11650 /* Advance to location for next header in the buffer. */ 11651 hdr_ptr += hdr_chunk_len; 11652 11653 /* Did we reach the next payload message block? */ 11654 if (pld_ptr == mp->b_wptr && mp->b_cont != NULL) { 11655 mp = mp->b_cont; 11656 /* 11657 * Attach the next message block with payload 11658 * data to the multidata message. 11659 */ 11660 if ((pbuf_idx = mmd_addpldbuf(mmd, mp)) < 0) 11661 goto pbuf_panic; 11662 pld_ptr = mp->b_rptr; 11663 } 11664 } 11665 11666 ASSERT(hdr_mp->b_wptr == hdr_ptr); 11667 ASSERT(mp->b_wptr == pld_ptr); 11668 11669 /* Update IP statistics */ 11670 UPDATE_MIB(ill->ill_ip_mib, ipIfStatsOutFragCreates, pkts); 11671 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragOKs); 11672 UPDATE_MIB(ill->ill_ip_mib, ipIfStatsHCOutTransmits, pkts); 11673 /* 11674 * The ipv6 header len is accounted for in unfragmentable_len so 11675 * when calculating the fragmentation overhead just add the frag 11676 * header len. 11677 */ 11678 UPDATE_MIB(ill->ill_ip_mib, ipIfStatsHCOutOctets, 11679 (ntohs(ip6h->ip6_plen) - (unfragmentable_len - IPV6_HDR_LEN)) + 11680 pkts * (unfragmentable_len + sizeof (ip6_frag_t))); 11681 IP6_STAT_UPDATE(ipst, ip6_frag_mdt_pkt_out, pkts); 11682 11683 ire->ire_ob_pkt_count += pkts; 11684 if (ire->ire_ipif != NULL) 11685 atomic_add_32(&ire->ire_ipif->ipif_ob_pkt_count, pkts); 11686 11687 ire->ire_last_used_time = lbolt; 11688 /* Send it down */ 11689 putnext(stq, md_mp); 11690 return; 11691 11692 pbuf_panic: 11693 cmn_err(CE_PANIC, "ip_wput_frag_mdt_v6: payload buffer logic " 11694 "error for mmd %p pbuf %p (%d)", (void *)mmd, (void *)mp, 11695 pbuf_idx); 11696 /* NOTREACHED */ 11697 } 11698 11699 /* 11700 * IPv6 fragmentation. Essentially the same as IPv4 fragmentation. 11701 * We have not optimized this in terms of number of mblks 11702 * allocated. For instance, for each fragment sent we always allocate a 11703 * mblk to hold the IPv6 header and fragment header. 11704 * 11705 * Assumes that all the extension headers are contained in the first mblk. 11706 * 11707 * The fragment header is inserted after an hop-by-hop options header 11708 * and after [an optional destinations header followed by] a routing header. 11709 * 11710 * NOTE : This function does not ire_refrele the ire passed in as 11711 * the argument. 11712 */ 11713 void 11714 ip_wput_frag_v6(mblk_t *mp, ire_t *ire, uint_t reachable, conn_t *connp, 11715 int caller, int max_frag) 11716 { 11717 ip6_t *ip6h = (ip6_t *)mp->b_rptr; 11718 ip6_t *fip6h; 11719 mblk_t *hmp; 11720 mblk_t *hmp0; 11721 mblk_t *dmp; 11722 ip6_frag_t *fraghdr; 11723 size_t unfragmentable_len; 11724 size_t len; 11725 size_t mlen; 11726 size_t max_chunk; 11727 uint32_t ident; 11728 uint16_t off_flags; 11729 uint16_t offset = 0; 11730 ill_t *ill; 11731 uint8_t nexthdr; 11732 uint_t prev_nexthdr_offset; 11733 uint8_t *ptr; 11734 ip_stack_t *ipst = ire->ire_ipst; 11735 11736 ASSERT(ire->ire_type == IRE_CACHE); 11737 ill = (ill_t *)ire->ire_stq->q_ptr; 11738 11739 if (max_frag <= 0) { 11740 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails); 11741 freemsg(mp); 11742 return; 11743 } 11744 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragReqds); 11745 11746 /* 11747 * Determine the length of the unfragmentable portion of this 11748 * datagram. This consists of the IPv6 header, a potential 11749 * hop-by-hop options header, a potential pre-routing-header 11750 * destination options header, and a potential routing header. 11751 */ 11752 nexthdr = ip6h->ip6_nxt; 11753 prev_nexthdr_offset = (uint8_t *)&ip6h->ip6_nxt - (uint8_t *)ip6h; 11754 ptr = (uint8_t *)&ip6h[1]; 11755 11756 if (nexthdr == IPPROTO_HOPOPTS) { 11757 ip6_hbh_t *hbh_hdr; 11758 uint_t hdr_len; 11759 11760 hbh_hdr = (ip6_hbh_t *)ptr; 11761 hdr_len = 8 * (hbh_hdr->ip6h_len + 1); 11762 nexthdr = hbh_hdr->ip6h_nxt; 11763 prev_nexthdr_offset = (uint8_t *)&hbh_hdr->ip6h_nxt 11764 - (uint8_t *)ip6h; 11765 ptr += hdr_len; 11766 } 11767 if (nexthdr == IPPROTO_DSTOPTS) { 11768 ip6_dest_t *dest_hdr; 11769 uint_t hdr_len; 11770 11771 dest_hdr = (ip6_dest_t *)ptr; 11772 if (dest_hdr->ip6d_nxt == IPPROTO_ROUTING) { 11773 hdr_len = 8 * (dest_hdr->ip6d_len + 1); 11774 nexthdr = dest_hdr->ip6d_nxt; 11775 prev_nexthdr_offset = (uint8_t *)&dest_hdr->ip6d_nxt 11776 - (uint8_t *)ip6h; 11777 ptr += hdr_len; 11778 } 11779 } 11780 if (nexthdr == IPPROTO_ROUTING) { 11781 ip6_rthdr_t *rthdr; 11782 uint_t hdr_len; 11783 11784 rthdr = (ip6_rthdr_t *)ptr; 11785 nexthdr = rthdr->ip6r_nxt; 11786 prev_nexthdr_offset = (uint8_t *)&rthdr->ip6r_nxt 11787 - (uint8_t *)ip6h; 11788 hdr_len = 8 * (rthdr->ip6r_len + 1); 11789 ptr += hdr_len; 11790 } 11791 unfragmentable_len = (uint_t)(ptr - (uint8_t *)ip6h); 11792 11793 max_chunk = (min(max_frag, ire->ire_max_frag) - unfragmentable_len - 11794 sizeof (ip6_frag_t)) & ~7; 11795 11796 /* Check if we can use MDT to send out the frags. */ 11797 ASSERT(!IRE_IS_LOCAL(ire)); 11798 if (ipst->ips_ip_multidata_outbound && reachable == 0 && 11799 !(ire->ire_flags & RTF_MULTIRT) && ILL_MDT_CAPABLE(ill) && 11800 IP_CAN_FRAG_MDT(mp, unfragmentable_len, max_chunk)) { 11801 ip_wput_frag_mdt_v6(mp, ire, max_chunk, unfragmentable_len, 11802 nexthdr, prev_nexthdr_offset); 11803 return; 11804 } 11805 11806 /* 11807 * Allocate an mblk with enough room for the link-layer 11808 * header, the unfragmentable part of the datagram, and the 11809 * fragment header. This (or a copy) will be used as the 11810 * first mblk for each fragment we send. 11811 */ 11812 hmp = allocb(unfragmentable_len + sizeof (ip6_frag_t) + 11813 ipst->ips_ip_wroff_extra, BPRI_HI); 11814 if (hmp == NULL) { 11815 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails); 11816 freemsg(mp); 11817 return; 11818 } 11819 hmp->b_rptr += ipst->ips_ip_wroff_extra; 11820 hmp->b_wptr = hmp->b_rptr + unfragmentable_len + sizeof (ip6_frag_t); 11821 11822 fip6h = (ip6_t *)hmp->b_rptr; 11823 fraghdr = (ip6_frag_t *)(hmp->b_rptr + unfragmentable_len); 11824 11825 bcopy(ip6h, fip6h, unfragmentable_len); 11826 hmp->b_rptr[prev_nexthdr_offset] = IPPROTO_FRAGMENT; 11827 11828 ident = atomic_add_32_nv(&ire->ire_ident, 1); 11829 11830 fraghdr->ip6f_nxt = nexthdr; 11831 fraghdr->ip6f_reserved = 0; 11832 fraghdr->ip6f_offlg = 0; 11833 fraghdr->ip6f_ident = htonl(ident); 11834 11835 /* 11836 * len is the total length of the fragmentable data in this 11837 * datagram. For each fragment sent, we will decrement len 11838 * by the amount of fragmentable data sent in that fragment 11839 * until len reaches zero. 11840 */ 11841 len = ntohs(ip6h->ip6_plen) - (unfragmentable_len - IPV6_HDR_LEN); 11842 11843 /* 11844 * Move read ptr past unfragmentable portion, we don't want this part 11845 * of the data in our fragments. 11846 */ 11847 mp->b_rptr += unfragmentable_len; 11848 11849 while (len != 0) { 11850 mlen = MIN(len, max_chunk); 11851 len -= mlen; 11852 if (len != 0) { 11853 /* Not last */ 11854 hmp0 = copyb(hmp); 11855 if (hmp0 == NULL) { 11856 freeb(hmp); 11857 freemsg(mp); 11858 BUMP_MIB(ill->ill_ip_mib, 11859 ipIfStatsOutFragFails); 11860 ip1dbg(("ip_wput_frag_v6: copyb failed\n")); 11861 return; 11862 } 11863 off_flags = IP6F_MORE_FRAG; 11864 } else { 11865 /* Last fragment */ 11866 hmp0 = hmp; 11867 hmp = NULL; 11868 off_flags = 0; 11869 } 11870 fip6h = (ip6_t *)(hmp0->b_rptr); 11871 fraghdr = (ip6_frag_t *)(hmp0->b_rptr + unfragmentable_len); 11872 11873 fip6h->ip6_plen = htons((uint16_t)(mlen + 11874 unfragmentable_len - IPV6_HDR_LEN + sizeof (ip6_frag_t))); 11875 /* 11876 * Note: Optimization alert. 11877 * In IPv6 (and IPv4) protocol header, Fragment Offset 11878 * ("offset") is 13 bits wide and in 8-octet units. 11879 * In IPv6 protocol header (unlike IPv4) in a 16 bit field, 11880 * it occupies the most significant 13 bits. 11881 * (least significant 13 bits in IPv4). 11882 * We do not do any shifts here. Not shifting is same effect 11883 * as taking offset value in octet units, dividing by 8 and 11884 * then shifting 3 bits left to line it up in place in proper 11885 * place protocol header. 11886 */ 11887 fraghdr->ip6f_offlg = htons(offset) | off_flags; 11888 11889 if (!(dmp = ip_carve_mp(&mp, mlen))) { 11890 /* mp has already been freed by ip_carve_mp() */ 11891 if (hmp != NULL) 11892 freeb(hmp); 11893 freeb(hmp0); 11894 ip1dbg(("ip_carve_mp: failed\n")); 11895 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails); 11896 return; 11897 } 11898 hmp0->b_cont = dmp; 11899 /* Get the priority marking, if any */ 11900 hmp0->b_band = dmp->b_band; 11901 UPDATE_OB_PKT_COUNT(ire); 11902 ire->ire_last_used_time = lbolt; 11903 ip_xmit_v6(hmp0, ire, reachable | IP6_NO_IPPOLICY, connp, 11904 caller, NULL); 11905 reachable = 0; /* No need to redo state machine in loop */ 11906 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragCreates); 11907 offset += mlen; 11908 } 11909 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragOKs); 11910 } 11911 11912 /* 11913 * Determine if the ill and multicast aspects of that packets 11914 * "matches" the conn. 11915 */ 11916 boolean_t 11917 conn_wantpacket_v6(conn_t *connp, ill_t *ill, ip6_t *ip6h, int fanout_flags, 11918 zoneid_t zoneid) 11919 { 11920 ill_t *in_ill; 11921 boolean_t wantpacket = B_TRUE; 11922 in6_addr_t *v6dst_ptr = &ip6h->ip6_dst; 11923 in6_addr_t *v6src_ptr = &ip6h->ip6_src; 11924 11925 /* 11926 * conn_incoming_ill is set by IPV6_BOUND_IF which limits 11927 * unicast and multicast reception to conn_incoming_ill. 11928 * conn_wantpacket_v6 is called both for unicast and 11929 * multicast. 11930 * 11931 * 1) The unicast copy of the packet can come anywhere in 11932 * the ill group if it is part of the group. Thus, we 11933 * need to check to see whether the ill group matches 11934 * if in_ill is part of a group. 11935 * 11936 * 2) ip_rput does not suppress duplicate multicast packets. 11937 * If there are two interfaces in a ill group and we have 11938 * 2 applications (conns) joined a multicast group G on 11939 * both the interfaces, ilm_lookup_ill filter in ip_rput 11940 * will give us two packets because we join G on both the 11941 * interfaces rather than nominating just one interface 11942 * for receiving multicast like broadcast above. So, 11943 * we have to call ilg_lookup_ill to filter out duplicate 11944 * copies, if ill is part of a group, to supress duplicates. 11945 */ 11946 in_ill = connp->conn_incoming_ill; 11947 if (in_ill != NULL) { 11948 mutex_enter(&connp->conn_lock); 11949 in_ill = connp->conn_incoming_ill; 11950 mutex_enter(&ill->ill_lock); 11951 /* 11952 * No IPMP, and the packet did not arrive on conn_incoming_ill 11953 * OR, IPMP in use and the packet arrived on an IPMP group 11954 * different from the conn_incoming_ill's IPMP group. 11955 * Reject the packet. 11956 */ 11957 if ((in_ill->ill_group == NULL && in_ill != ill) || 11958 (in_ill->ill_group != NULL && 11959 in_ill->ill_group != ill->ill_group)) { 11960 wantpacket = B_FALSE; 11961 } 11962 mutex_exit(&ill->ill_lock); 11963 mutex_exit(&connp->conn_lock); 11964 if (!wantpacket) 11965 return (B_FALSE); 11966 } 11967 11968 if (connp->conn_multi_router) 11969 return (B_TRUE); 11970 11971 if (!IN6_IS_ADDR_MULTICAST(v6dst_ptr) && 11972 !IN6_IS_ADDR_V4MAPPED_CLASSD(v6dst_ptr)) { 11973 /* 11974 * Unicast case: we match the conn only if it's in the specified 11975 * zone. 11976 */ 11977 return (IPCL_ZONE_MATCH(connp, zoneid)); 11978 } 11979 11980 if ((fanout_flags & IP_FF_NO_MCAST_LOOP) && 11981 (connp->conn_zoneid == zoneid || zoneid == ALL_ZONES)) { 11982 /* 11983 * Loopback case: the sending endpoint has IP_MULTICAST_LOOP 11984 * disabled, therefore we don't dispatch the multicast packet to 11985 * the sending zone. 11986 */ 11987 return (B_FALSE); 11988 } 11989 11990 if (IS_LOOPBACK(ill) && connp->conn_zoneid != zoneid && 11991 zoneid != ALL_ZONES) { 11992 /* 11993 * Multicast packet on the loopback interface: we only match 11994 * conns who joined the group in the specified zone. 11995 */ 11996 return (B_FALSE); 11997 } 11998 11999 mutex_enter(&connp->conn_lock); 12000 wantpacket = 12001 ilg_lookup_ill_withsrc_v6(connp, v6dst_ptr, v6src_ptr, ill) != NULL; 12002 mutex_exit(&connp->conn_lock); 12003 12004 return (wantpacket); 12005 } 12006 12007 12008 /* 12009 * Transmit a packet and update any NUD state based on the flags 12010 * XXX need to "recover" any ip6i_t when doing putq! 12011 * 12012 * NOTE : This function does not ire_refrele the ire passed in as the 12013 * argument. 12014 */ 12015 void 12016 ip_xmit_v6(mblk_t *mp, ire_t *ire, uint_t flags, conn_t *connp, 12017 int caller, ipsec_out_t *io) 12018 { 12019 mblk_t *mp1; 12020 nce_t *nce = ire->ire_nce; 12021 ill_t *ill; 12022 ill_t *out_ill; 12023 uint64_t delta; 12024 ip6_t *ip6h; 12025 queue_t *stq = ire->ire_stq; 12026 ire_t *ire1 = NULL; 12027 ire_t *save_ire = ire; 12028 boolean_t multirt_send = B_FALSE; 12029 mblk_t *next_mp = NULL; 12030 ip_stack_t *ipst = ire->ire_ipst; 12031 boolean_t fp_prepend = B_FALSE; 12032 uint32_t hlen; 12033 12034 ip6h = (ip6_t *)mp->b_rptr; 12035 ASSERT(!IN6_IS_ADDR_V4MAPPED(&ire->ire_addr_v6)); 12036 ASSERT(ire->ire_ipversion == IPV6_VERSION); 12037 ASSERT(nce != NULL); 12038 ASSERT(mp->b_datap->db_type == M_DATA); 12039 ASSERT(stq != NULL); 12040 12041 ill = ire_to_ill(ire); 12042 if (!ill) { 12043 ip0dbg(("ip_xmit_v6: ire_to_ill failed\n")); 12044 freemsg(mp); 12045 return; 12046 } 12047 12048 /* 12049 * If a packet is to be sent out an interface that is a 6to4 12050 * tunnel, outgoing IPv6 packets, with a 6to4 addressed IPv6 12051 * destination, must be checked to have a 6to4 prefix 12052 * (2002:V4ADDR::/48) that is NOT equal to the 6to4 prefix of 12053 * address configured on the sending interface. Otherwise, 12054 * the packet was delivered to this interface in error and the 12055 * packet must be dropped. 12056 */ 12057 if ((ill->ill_is_6to4tun) && IN6_IS_ADDR_6TO4(&ip6h->ip6_dst)) { 12058 ipif_t *ipif = ill->ill_ipif; 12059 12060 if (IN6_ARE_6TO4_PREFIX_EQUAL(&ipif->ipif_v6lcl_addr, 12061 &ip6h->ip6_dst)) { 12062 if (ip_debug > 2) { 12063 /* ip1dbg */ 12064 pr_addr_dbg("ip_xmit_v6: attempting to " 12065 "send 6to4 addressed IPv6 " 12066 "destination (%s) out the wrong " 12067 "interface.\n", AF_INET6, 12068 &ip6h->ip6_dst); 12069 } 12070 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); 12071 freemsg(mp); 12072 return; 12073 } 12074 } 12075 12076 /* Flow-control check has been done in ip_wput_ire_v6 */ 12077 if (IP_FLOW_CONTROLLED_ULP(ip6h->ip6_nxt) || caller == IP_WPUT || 12078 caller == IP_WSRV || canput(stq->q_next)) { 12079 uint32_t ill_index; 12080 12081 /* 12082 * In most cases, the emission loop below is entered only 12083 * once. Only in the case where the ire holds the 12084 * RTF_MULTIRT flag, do we loop to process all RTF_MULTIRT 12085 * flagged ires in the bucket, and send the packet 12086 * through all crossed RTF_MULTIRT routes. 12087 */ 12088 if (ire->ire_flags & RTF_MULTIRT) { 12089 /* 12090 * Multirouting case. The bucket where ire is stored 12091 * probably holds other RTF_MULTIRT flagged ires 12092 * to the destination. In this call to ip_xmit_v6, 12093 * we attempt to send the packet through all 12094 * those ires. Thus, we first ensure that ire is the 12095 * first RTF_MULTIRT ire in the bucket, 12096 * before walking the ire list. 12097 */ 12098 ire_t *first_ire; 12099 irb_t *irb = ire->ire_bucket; 12100 ASSERT(irb != NULL); 12101 multirt_send = B_TRUE; 12102 12103 /* Make sure we do not omit any multiroute ire. */ 12104 IRB_REFHOLD(irb); 12105 for (first_ire = irb->irb_ire; 12106 first_ire != NULL; 12107 first_ire = first_ire->ire_next) { 12108 if ((first_ire->ire_flags & RTF_MULTIRT) && 12109 (IN6_ARE_ADDR_EQUAL(&first_ire->ire_addr_v6, 12110 &ire->ire_addr_v6)) && 12111 !(first_ire->ire_marks & 12112 (IRE_MARK_CONDEMNED | IRE_MARK_HIDDEN))) 12113 break; 12114 } 12115 12116 if ((first_ire != NULL) && (first_ire != ire)) { 12117 IRE_REFHOLD(first_ire); 12118 /* ire will be released by the caller */ 12119 ire = first_ire; 12120 nce = ire->ire_nce; 12121 stq = ire->ire_stq; 12122 ill = ire_to_ill(ire); 12123 } 12124 IRB_REFRELE(irb); 12125 } else if (connp != NULL && IPCL_IS_TCP(connp) && 12126 connp->conn_mdt_ok && !connp->conn_tcp->tcp_mdt && 12127 ILL_MDT_USABLE(ill)) { 12128 /* 12129 * This tcp connection was marked as MDT-capable, but 12130 * it has been turned off due changes in the interface. 12131 * Now that the interface support is back, turn it on 12132 * by notifying tcp. We don't directly modify tcp_mdt, 12133 * since we leave all the details to the tcp code that 12134 * knows better. 12135 */ 12136 mblk_t *mdimp = ip_mdinfo_alloc(ill->ill_mdt_capab); 12137 12138 if (mdimp == NULL) { 12139 ip0dbg(("ip_xmit_v6: can't re-enable MDT for " 12140 "connp %p (ENOMEM)\n", (void *)connp)); 12141 } else { 12142 CONN_INC_REF(connp); 12143 squeue_fill(connp->conn_sqp, mdimp, tcp_input, 12144 connp, SQTAG_TCP_INPUT_MCTL); 12145 } 12146 } 12147 12148 do { 12149 mblk_t *mp_ip6h; 12150 12151 if (multirt_send) { 12152 irb_t *irb; 12153 /* 12154 * We are in a multiple send case, need to get 12155 * the next ire and make a duplicate of the 12156 * packet. ire1 holds here the next ire to 12157 * process in the bucket. If multirouting is 12158 * expected, any non-RTF_MULTIRT ire that has 12159 * the right destination address is ignored. 12160 */ 12161 irb = ire->ire_bucket; 12162 ASSERT(irb != NULL); 12163 12164 IRB_REFHOLD(irb); 12165 for (ire1 = ire->ire_next; 12166 ire1 != NULL; 12167 ire1 = ire1->ire_next) { 12168 if (!(ire1->ire_flags & RTF_MULTIRT)) 12169 continue; 12170 if (!IN6_ARE_ADDR_EQUAL( 12171 &ire1->ire_addr_v6, 12172 &ire->ire_addr_v6)) 12173 continue; 12174 if (ire1->ire_marks & 12175 (IRE_MARK_CONDEMNED| 12176 IRE_MARK_HIDDEN)) 12177 continue; 12178 12179 /* Got one */ 12180 if (ire1 != save_ire) { 12181 IRE_REFHOLD(ire1); 12182 } 12183 break; 12184 } 12185 IRB_REFRELE(irb); 12186 12187 if (ire1 != NULL) { 12188 next_mp = copyb(mp); 12189 if ((next_mp == NULL) || 12190 ((mp->b_cont != NULL) && 12191 ((next_mp->b_cont = 12192 dupmsg(mp->b_cont)) == NULL))) { 12193 freemsg(next_mp); 12194 next_mp = NULL; 12195 ire_refrele(ire1); 12196 ire1 = NULL; 12197 } 12198 } 12199 12200 /* Last multiroute ire; don't loop anymore. */ 12201 if (ire1 == NULL) { 12202 multirt_send = B_FALSE; 12203 } 12204 } 12205 12206 ill_index = 12207 ((ill_t *)stq->q_ptr)->ill_phyint->phyint_ifindex; 12208 12209 /* Initiate IPPF processing */ 12210 if (IP6_OUT_IPP(flags, ipst)) { 12211 ip_process(IPP_LOCAL_OUT, &mp, ill_index); 12212 if (mp == NULL) { 12213 BUMP_MIB(ill->ill_ip_mib, 12214 ipIfStatsOutDiscards); 12215 if (next_mp != NULL) 12216 freemsg(next_mp); 12217 if (ire != save_ire) { 12218 ire_refrele(ire); 12219 } 12220 return; 12221 } 12222 ip6h = (ip6_t *)mp->b_rptr; 12223 } 12224 mp_ip6h = mp; 12225 12226 /* 12227 * Check for fastpath, we need to hold nce_lock to 12228 * prevent fastpath update from chaining nce_fp_mp. 12229 */ 12230 12231 ASSERT(nce->nce_ipversion != IPV4_VERSION); 12232 mutex_enter(&nce->nce_lock); 12233 if ((mp1 = nce->nce_fp_mp) != NULL) { 12234 uchar_t *rptr; 12235 12236 hlen = MBLKL(mp1); 12237 rptr = mp->b_rptr - hlen; 12238 /* 12239 * make sure there is room for the fastpath 12240 * datalink header 12241 */ 12242 if (rptr < mp->b_datap->db_base) { 12243 mp1 = copyb(mp1); 12244 mutex_exit(&nce->nce_lock); 12245 if (mp1 == NULL) { 12246 BUMP_MIB(ill->ill_ip_mib, 12247 ipIfStatsOutDiscards); 12248 freemsg(mp); 12249 if (next_mp != NULL) 12250 freemsg(next_mp); 12251 if (ire != save_ire) { 12252 ire_refrele(ire); 12253 } 12254 return; 12255 } 12256 mp1->b_cont = mp; 12257 12258 /* Get the priority marking, if any */ 12259 mp1->b_band = mp->b_band; 12260 mp = mp1; 12261 } else { 12262 mp->b_rptr = rptr; 12263 /* 12264 * fastpath - pre-pend datalink 12265 * header 12266 */ 12267 bcopy(mp1->b_rptr, rptr, hlen); 12268 mutex_exit(&nce->nce_lock); 12269 fp_prepend = B_TRUE; 12270 } 12271 } else { 12272 /* 12273 * Get the DL_UNITDATA_REQ. 12274 */ 12275 mp1 = nce->nce_res_mp; 12276 if (mp1 == NULL) { 12277 mutex_exit(&nce->nce_lock); 12278 ip1dbg(("ip_xmit_v6: No resolution " 12279 "block ire = %p\n", (void *)ire)); 12280 freemsg(mp); 12281 if (next_mp != NULL) 12282 freemsg(next_mp); 12283 if (ire != save_ire) { 12284 ire_refrele(ire); 12285 } 12286 return; 12287 } 12288 /* 12289 * Prepend the DL_UNITDATA_REQ. 12290 */ 12291 mp1 = copyb(mp1); 12292 mutex_exit(&nce->nce_lock); 12293 if (mp1 == NULL) { 12294 BUMP_MIB(ill->ill_ip_mib, 12295 ipIfStatsOutDiscards); 12296 freemsg(mp); 12297 if (next_mp != NULL) 12298 freemsg(next_mp); 12299 if (ire != save_ire) { 12300 ire_refrele(ire); 12301 } 12302 return; 12303 } 12304 mp1->b_cont = mp; 12305 12306 /* Get the priority marking, if any */ 12307 mp1->b_band = mp->b_band; 12308 mp = mp1; 12309 } 12310 12311 out_ill = (ill_t *)stq->q_ptr; 12312 12313 DTRACE_PROBE4(ip6__physical__out__start, 12314 ill_t *, NULL, ill_t *, out_ill, 12315 ip6_t *, ip6h, mblk_t *, mp); 12316 12317 FW_HOOKS6(ipst->ips_ip6_physical_out_event, 12318 ipst->ips_ipv6firewall_physical_out, 12319 NULL, out_ill, ip6h, mp, mp_ip6h, 0, ipst); 12320 12321 DTRACE_PROBE1(ip6__physical__out__end, mblk_t *, mp); 12322 12323 if (mp == NULL) { 12324 if (multirt_send) { 12325 ASSERT(ire1 != NULL); 12326 if (ire != save_ire) { 12327 ire_refrele(ire); 12328 } 12329 /* 12330 * Proceed with the next RTF_MULTIRT 12331 * ire, also set up the send-to queue 12332 * accordingly. 12333 */ 12334 ire = ire1; 12335 ire1 = NULL; 12336 stq = ire->ire_stq; 12337 nce = ire->ire_nce; 12338 ill = ire_to_ill(ire); 12339 mp = next_mp; 12340 next_mp = NULL; 12341 continue; 12342 } else { 12343 ASSERT(next_mp == NULL); 12344 ASSERT(ire1 == NULL); 12345 break; 12346 } 12347 } 12348 12349 if (ipst->ips_ipobs_enabled) { 12350 zoneid_t szone; 12351 12352 szone = ip_get_zoneid_v6(&ip6h->ip6_src, 12353 mp_ip6h, out_ill, ipst, ALL_ZONES); 12354 ipobs_hook(mp_ip6h, IPOBS_HOOK_OUTBOUND, szone, 12355 ALL_ZONES, out_ill, IPV6_VERSION, 12356 fp_prepend ? hlen : 0, ipst); 12357 } 12358 12359 /* 12360 * Update ire and MIB counters; for save_ire, this has 12361 * been done by the caller. 12362 */ 12363 if (ire != save_ire) { 12364 UPDATE_OB_PKT_COUNT(ire); 12365 ire->ire_last_used_time = lbolt; 12366 12367 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 12368 BUMP_MIB(ill->ill_ip_mib, 12369 ipIfStatsHCOutMcastPkts); 12370 UPDATE_MIB(ill->ill_ip_mib, 12371 ipIfStatsHCOutMcastOctets, 12372 ntohs(ip6h->ip6_plen) + 12373 IPV6_HDR_LEN); 12374 } 12375 } 12376 12377 /* 12378 * Send it down. XXX Do we want to flow control AH/ESP 12379 * packets that carry TCP payloads? We don't flow 12380 * control TCP packets, but we should also not 12381 * flow-control TCP packets that have been protected. 12382 * We don't have an easy way to find out if an AH/ESP 12383 * packet was originally TCP or not currently. 12384 */ 12385 if (io == NULL) { 12386 BUMP_MIB(ill->ill_ip_mib, 12387 ipIfStatsHCOutTransmits); 12388 UPDATE_MIB(ill->ill_ip_mib, 12389 ipIfStatsHCOutOctets, 12390 ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN); 12391 DTRACE_IP7(send, mblk_t *, mp, conn_t *, NULL, 12392 void_ip_t *, ip6h, __dtrace_ipsr_ill_t *, 12393 out_ill, ipha_t *, NULL, ip6_t *, ip6h, 12394 int, 0); 12395 12396 putnext(stq, mp); 12397 } else { 12398 /* 12399 * Safety Pup says: make sure this is 12400 * going to the right interface! 12401 */ 12402 if (io->ipsec_out_capab_ill_index != 12403 ill_index) { 12404 /* IPsec kstats: bump lose counter */ 12405 freemsg(mp1); 12406 } else { 12407 BUMP_MIB(ill->ill_ip_mib, 12408 ipIfStatsHCOutTransmits); 12409 UPDATE_MIB(ill->ill_ip_mib, 12410 ipIfStatsHCOutOctets, 12411 ntohs(ip6h->ip6_plen) + 12412 IPV6_HDR_LEN); 12413 DTRACE_IP7(send, mblk_t *, mp, 12414 conn_t *, NULL, void_ip_t *, ip6h, 12415 __dtrace_ipsr_ill_t *, out_ill, 12416 ipha_t *, NULL, ip6_t *, ip6h, int, 12417 0); 12418 ipsec_hw_putnext(stq, mp); 12419 } 12420 } 12421 12422 if (nce->nce_flags & (NCE_F_NONUD|NCE_F_PERMANENT)) { 12423 if (ire != save_ire) { 12424 ire_refrele(ire); 12425 } 12426 if (multirt_send) { 12427 ASSERT(ire1 != NULL); 12428 /* 12429 * Proceed with the next RTF_MULTIRT 12430 * ire, also set up the send-to queue 12431 * accordingly. 12432 */ 12433 ire = ire1; 12434 ire1 = NULL; 12435 stq = ire->ire_stq; 12436 nce = ire->ire_nce; 12437 ill = ire_to_ill(ire); 12438 mp = next_mp; 12439 next_mp = NULL; 12440 continue; 12441 } 12442 ASSERT(next_mp == NULL); 12443 ASSERT(ire1 == NULL); 12444 return; 12445 } 12446 12447 ASSERT(nce->nce_state != ND_INCOMPLETE); 12448 12449 /* 12450 * Check for upper layer advice 12451 */ 12452 if (flags & IPV6_REACHABILITY_CONFIRMATION) { 12453 /* 12454 * It should be o.k. to check the state without 12455 * a lock here, at most we lose an advice. 12456 */ 12457 nce->nce_last = TICK_TO_MSEC(lbolt64); 12458 if (nce->nce_state != ND_REACHABLE) { 12459 12460 mutex_enter(&nce->nce_lock); 12461 nce->nce_state = ND_REACHABLE; 12462 nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT; 12463 mutex_exit(&nce->nce_lock); 12464 (void) untimeout(nce->nce_timeout_id); 12465 if (ip_debug > 2) { 12466 /* ip1dbg */ 12467 pr_addr_dbg("ip_xmit_v6: state" 12468 " for %s changed to" 12469 " REACHABLE\n", AF_INET6, 12470 &ire->ire_addr_v6); 12471 } 12472 } 12473 if (ire != save_ire) { 12474 ire_refrele(ire); 12475 } 12476 if (multirt_send) { 12477 ASSERT(ire1 != NULL); 12478 /* 12479 * Proceed with the next RTF_MULTIRT 12480 * ire, also set up the send-to queue 12481 * accordingly. 12482 */ 12483 ire = ire1; 12484 ire1 = NULL; 12485 stq = ire->ire_stq; 12486 nce = ire->ire_nce; 12487 ill = ire_to_ill(ire); 12488 mp = next_mp; 12489 next_mp = NULL; 12490 continue; 12491 } 12492 ASSERT(next_mp == NULL); 12493 ASSERT(ire1 == NULL); 12494 return; 12495 } 12496 12497 delta = TICK_TO_MSEC(lbolt64) - nce->nce_last; 12498 ip1dbg(("ip_xmit_v6: delta = %" PRId64 12499 " ill_reachable_time = %d \n", delta, 12500 ill->ill_reachable_time)); 12501 if (delta > (uint64_t)ill->ill_reachable_time) { 12502 nce = ire->ire_nce; 12503 mutex_enter(&nce->nce_lock); 12504 switch (nce->nce_state) { 12505 case ND_REACHABLE: 12506 case ND_STALE: 12507 /* 12508 * ND_REACHABLE is identical to 12509 * ND_STALE in this specific case. If 12510 * reachable time has expired for this 12511 * neighbor (delta is greater than 12512 * reachable time), conceptually, the 12513 * neighbor cache is no longer in 12514 * REACHABLE state, but already in 12515 * STALE state. So the correct 12516 * transition here is to ND_DELAY. 12517 */ 12518 nce->nce_state = ND_DELAY; 12519 mutex_exit(&nce->nce_lock); 12520 NDP_RESTART_TIMER(nce, 12521 ipst->ips_delay_first_probe_time); 12522 if (ip_debug > 3) { 12523 /* ip2dbg */ 12524 pr_addr_dbg("ip_xmit_v6: state" 12525 " for %s changed to" 12526 " DELAY\n", AF_INET6, 12527 &ire->ire_addr_v6); 12528 } 12529 break; 12530 case ND_DELAY: 12531 case ND_PROBE: 12532 mutex_exit(&nce->nce_lock); 12533 /* Timers have already started */ 12534 break; 12535 case ND_UNREACHABLE: 12536 /* 12537 * ndp timer has detected that this nce 12538 * is unreachable and initiated deleting 12539 * this nce and all its associated IREs. 12540 * This is a race where we found the 12541 * ire before it was deleted and have 12542 * just sent out a packet using this 12543 * unreachable nce. 12544 */ 12545 mutex_exit(&nce->nce_lock); 12546 break; 12547 default: 12548 ASSERT(0); 12549 } 12550 } 12551 12552 if (multirt_send) { 12553 ASSERT(ire1 != NULL); 12554 /* 12555 * Proceed with the next RTF_MULTIRT ire, 12556 * Also set up the send-to queue accordingly. 12557 */ 12558 if (ire != save_ire) { 12559 ire_refrele(ire); 12560 } 12561 ire = ire1; 12562 ire1 = NULL; 12563 stq = ire->ire_stq; 12564 nce = ire->ire_nce; 12565 ill = ire_to_ill(ire); 12566 mp = next_mp; 12567 next_mp = NULL; 12568 } 12569 } while (multirt_send); 12570 /* 12571 * In the multirouting case, release the last ire used for 12572 * emission. save_ire will be released by the caller. 12573 */ 12574 if (ire != save_ire) { 12575 ire_refrele(ire); 12576 } 12577 } else { 12578 /* 12579 * Queue packet if we have an conn to give back pressure. 12580 * We can't queue packets intended for hardware acceleration 12581 * since we've tossed that state already. If the packet is 12582 * being fed back from ire_send_v6, we don't know the 12583 * position in the queue to enqueue the packet and we discard 12584 * the packet. 12585 */ 12586 if (ipst->ips_ip_output_queue && (connp != NULL) && 12587 (io == NULL) && (caller != IRE_SEND)) { 12588 if (caller == IP_WSRV) { 12589 connp->conn_did_putbq = 1; 12590 (void) putbq(connp->conn_wq, mp); 12591 conn_drain_insert(connp); 12592 /* 12593 * caller == IP_WSRV implies we are 12594 * the service thread, and the 12595 * queue is already noenabled. 12596 * The check for canput and 12597 * the putbq is not atomic. 12598 * So we need to check again. 12599 */ 12600 if (canput(stq->q_next)) 12601 connp->conn_did_putbq = 0; 12602 } else { 12603 (void) putq(connp->conn_wq, mp); 12604 } 12605 return; 12606 } 12607 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); 12608 freemsg(mp); 12609 return; 12610 } 12611 } 12612 12613 /* 12614 * pr_addr_dbg function provides the needed buffer space to call 12615 * inet_ntop() function's 3rd argument. This function should be 12616 * used by any kernel routine which wants to save INET6_ADDRSTRLEN 12617 * stack buffer space in it's own stack frame. This function uses 12618 * a buffer from it's own stack and prints the information. 12619 * Example: pr_addr_dbg("func: no route for %s\n ", AF_INET, addr) 12620 * 12621 * Note: This function can call inet_ntop() once. 12622 */ 12623 void 12624 pr_addr_dbg(char *fmt1, int af, const void *addr) 12625 { 12626 char buf[INET6_ADDRSTRLEN]; 12627 12628 if (fmt1 == NULL) { 12629 ip0dbg(("pr_addr_dbg: Wrong arguments\n")); 12630 return; 12631 } 12632 12633 /* 12634 * This does not compare debug level and just prints 12635 * out. Thus it is the responsibility of the caller 12636 * to check the appropriate debug-level before calling 12637 * this function. 12638 */ 12639 if (ip_debug > 0) { 12640 printf(fmt1, inet_ntop(af, addr, buf, sizeof (buf))); 12641 } 12642 12643 12644 } 12645 12646 12647 /* 12648 * Return the length in bytes of the IPv6 headers (base header, ip6i_t 12649 * if needed and extension headers) that will be needed based on the 12650 * ip6_pkt_t structure passed by the caller. 12651 * 12652 * The returned length does not include the length of the upper level 12653 * protocol (ULP) header. 12654 */ 12655 int 12656 ip_total_hdrs_len_v6(ip6_pkt_t *ipp) 12657 { 12658 int len; 12659 12660 len = IPV6_HDR_LEN; 12661 if (ipp->ipp_fields & IPPF_HAS_IP6I) 12662 len += sizeof (ip6i_t); 12663 if (ipp->ipp_fields & IPPF_HOPOPTS) { 12664 ASSERT(ipp->ipp_hopoptslen != 0); 12665 len += ipp->ipp_hopoptslen; 12666 } 12667 if (ipp->ipp_fields & IPPF_RTHDR) { 12668 ASSERT(ipp->ipp_rthdrlen != 0); 12669 len += ipp->ipp_rthdrlen; 12670 } 12671 /* 12672 * En-route destination options 12673 * Only do them if there's a routing header as well 12674 */ 12675 if ((ipp->ipp_fields & (IPPF_RTDSTOPTS|IPPF_RTHDR)) == 12676 (IPPF_RTDSTOPTS|IPPF_RTHDR)) { 12677 ASSERT(ipp->ipp_rtdstoptslen != 0); 12678 len += ipp->ipp_rtdstoptslen; 12679 } 12680 if (ipp->ipp_fields & IPPF_DSTOPTS) { 12681 ASSERT(ipp->ipp_dstoptslen != 0); 12682 len += ipp->ipp_dstoptslen; 12683 } 12684 return (len); 12685 } 12686 12687 /* 12688 * All-purpose routine to build a header chain of an IPv6 header 12689 * followed by any required extension headers and a proto header, 12690 * preceeded (where necessary) by an ip6i_t private header. 12691 * 12692 * The fields of the IPv6 header that are derived from the ip6_pkt_t 12693 * will be filled in appropriately. 12694 * Thus the caller must fill in the rest of the IPv6 header, such as 12695 * traffic class/flowid, source address (if not set here), hoplimit (if not 12696 * set here) and destination address. 12697 * 12698 * The extension headers and ip6i_t header will all be fully filled in. 12699 */ 12700 void 12701 ip_build_hdrs_v6(uchar_t *ext_hdrs, uint_t ext_hdrs_len, 12702 ip6_pkt_t *ipp, uint8_t protocol) 12703 { 12704 uint8_t *nxthdr_ptr; 12705 uint8_t *cp; 12706 ip6i_t *ip6i; 12707 ip6_t *ip6h = (ip6_t *)ext_hdrs; 12708 12709 /* 12710 * If sending private ip6i_t header down (checksum info, nexthop, 12711 * or ifindex), adjust ip header pointer and set ip6i_t header pointer, 12712 * then fill it in. (The checksum info will be filled in by icmp). 12713 */ 12714 if (ipp->ipp_fields & IPPF_HAS_IP6I) { 12715 ip6i = (ip6i_t *)ip6h; 12716 ip6h = (ip6_t *)&ip6i[1]; 12717 12718 ip6i->ip6i_flags = 0; 12719 ip6i->ip6i_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 12720 if (ipp->ipp_fields & IPPF_IFINDEX || 12721 ipp->ipp_fields & IPPF_SCOPE_ID) { 12722 ASSERT(ipp->ipp_ifindex != 0); 12723 ip6i->ip6i_flags |= IP6I_IFINDEX; 12724 ip6i->ip6i_ifindex = ipp->ipp_ifindex; 12725 } 12726 if (ipp->ipp_fields & IPPF_ADDR) { 12727 /* 12728 * Enable per-packet source address verification if 12729 * IPV6_PKTINFO specified the source address. 12730 * ip6_src is set in the transport's _wput function. 12731 */ 12732 ASSERT(!IN6_IS_ADDR_UNSPECIFIED( 12733 &ipp->ipp_addr)); 12734 ip6i->ip6i_flags |= IP6I_VERIFY_SRC; 12735 } 12736 if (ipp->ipp_fields & IPPF_UNICAST_HOPS) { 12737 ip6h->ip6_hops = ipp->ipp_unicast_hops; 12738 /* 12739 * We need to set this flag so that IP doesn't 12740 * rewrite the IPv6 header's hoplimit with the 12741 * current default value. 12742 */ 12743 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 12744 } 12745 if (ipp->ipp_fields & IPPF_NEXTHOP) { 12746 ASSERT(!IN6_IS_ADDR_UNSPECIFIED( 12747 &ipp->ipp_nexthop)); 12748 ip6i->ip6i_flags |= IP6I_NEXTHOP; 12749 ip6i->ip6i_nexthop = ipp->ipp_nexthop; 12750 } 12751 /* 12752 * tell IP this is an ip6i_t private header 12753 */ 12754 ip6i->ip6i_nxt = IPPROTO_RAW; 12755 } 12756 /* Initialize IPv6 header */ 12757 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 12758 if (ipp->ipp_fields & IPPF_TCLASS) { 12759 ip6h->ip6_vcf = (ip6h->ip6_vcf & ~IPV6_FLOWINFO_TCLASS) | 12760 (ipp->ipp_tclass << 20); 12761 } 12762 if (ipp->ipp_fields & IPPF_ADDR) 12763 ip6h->ip6_src = ipp->ipp_addr; 12764 12765 nxthdr_ptr = (uint8_t *)&ip6h->ip6_nxt; 12766 cp = (uint8_t *)&ip6h[1]; 12767 /* 12768 * Here's where we have to start stringing together 12769 * any extension headers in the right order: 12770 * Hop-by-hop, destination, routing, and final destination opts. 12771 */ 12772 if (ipp->ipp_fields & IPPF_HOPOPTS) { 12773 /* Hop-by-hop options */ 12774 ip6_hbh_t *hbh = (ip6_hbh_t *)cp; 12775 12776 *nxthdr_ptr = IPPROTO_HOPOPTS; 12777 nxthdr_ptr = &hbh->ip6h_nxt; 12778 12779 bcopy(ipp->ipp_hopopts, cp, ipp->ipp_hopoptslen); 12780 cp += ipp->ipp_hopoptslen; 12781 } 12782 /* 12783 * En-route destination options 12784 * Only do them if there's a routing header as well 12785 */ 12786 if ((ipp->ipp_fields & (IPPF_RTDSTOPTS|IPPF_RTHDR)) == 12787 (IPPF_RTDSTOPTS|IPPF_RTHDR)) { 12788 ip6_dest_t *dst = (ip6_dest_t *)cp; 12789 12790 *nxthdr_ptr = IPPROTO_DSTOPTS; 12791 nxthdr_ptr = &dst->ip6d_nxt; 12792 12793 bcopy(ipp->ipp_rtdstopts, cp, ipp->ipp_rtdstoptslen); 12794 cp += ipp->ipp_rtdstoptslen; 12795 } 12796 /* 12797 * Routing header next 12798 */ 12799 if (ipp->ipp_fields & IPPF_RTHDR) { 12800 ip6_rthdr_t *rt = (ip6_rthdr_t *)cp; 12801 12802 *nxthdr_ptr = IPPROTO_ROUTING; 12803 nxthdr_ptr = &rt->ip6r_nxt; 12804 12805 bcopy(ipp->ipp_rthdr, cp, ipp->ipp_rthdrlen); 12806 cp += ipp->ipp_rthdrlen; 12807 } 12808 /* 12809 * Do ultimate destination options 12810 */ 12811 if (ipp->ipp_fields & IPPF_DSTOPTS) { 12812 ip6_dest_t *dest = (ip6_dest_t *)cp; 12813 12814 *nxthdr_ptr = IPPROTO_DSTOPTS; 12815 nxthdr_ptr = &dest->ip6d_nxt; 12816 12817 bcopy(ipp->ipp_dstopts, cp, ipp->ipp_dstoptslen); 12818 cp += ipp->ipp_dstoptslen; 12819 } 12820 /* 12821 * Now set the last header pointer to the proto passed in 12822 */ 12823 *nxthdr_ptr = protocol; 12824 ASSERT((int)(cp - ext_hdrs) == ext_hdrs_len); 12825 } 12826 12827 /* 12828 * Return a pointer to the routing header extension header 12829 * in the IPv6 header(s) chain passed in. 12830 * If none found, return NULL 12831 * Assumes that all extension headers are in same mblk as the v6 header 12832 */ 12833 ip6_rthdr_t * 12834 ip_find_rthdr_v6(ip6_t *ip6h, uint8_t *endptr) 12835 { 12836 ip6_dest_t *desthdr; 12837 ip6_frag_t *fraghdr; 12838 uint_t hdrlen; 12839 uint8_t nexthdr; 12840 uint8_t *ptr = (uint8_t *)&ip6h[1]; 12841 12842 if (ip6h->ip6_nxt == IPPROTO_ROUTING) 12843 return ((ip6_rthdr_t *)ptr); 12844 12845 /* 12846 * The routing header will precede all extension headers 12847 * other than the hop-by-hop and destination options 12848 * extension headers, so if we see anything other than those, 12849 * we're done and didn't find it. 12850 * We could see a destination options header alone but no 12851 * routing header, in which case we'll return NULL as soon as 12852 * we see anything after that. 12853 * Hop-by-hop and destination option headers are identical, 12854 * so we can use either one we want as a template. 12855 */ 12856 nexthdr = ip6h->ip6_nxt; 12857 while (ptr < endptr) { 12858 /* Is there enough left for len + nexthdr? */ 12859 if (ptr + MIN_EHDR_LEN > endptr) 12860 return (NULL); 12861 12862 switch (nexthdr) { 12863 case IPPROTO_HOPOPTS: 12864 case IPPROTO_DSTOPTS: 12865 /* Assumes the headers are identical for hbh and dst */ 12866 desthdr = (ip6_dest_t *)ptr; 12867 hdrlen = 8 * (desthdr->ip6d_len + 1); 12868 nexthdr = desthdr->ip6d_nxt; 12869 break; 12870 12871 case IPPROTO_ROUTING: 12872 return ((ip6_rthdr_t *)ptr); 12873 12874 case IPPROTO_FRAGMENT: 12875 fraghdr = (ip6_frag_t *)ptr; 12876 hdrlen = sizeof (ip6_frag_t); 12877 nexthdr = fraghdr->ip6f_nxt; 12878 break; 12879 12880 default: 12881 return (NULL); 12882 } 12883 ptr += hdrlen; 12884 } 12885 return (NULL); 12886 } 12887 12888 /* 12889 * Called for source-routed packets originating on this node. 12890 * Manipulates the original routing header by moving every entry up 12891 * one slot, placing the first entry in the v6 header's v6_dst field, 12892 * and placing the ultimate destination in the routing header's last 12893 * slot. 12894 * 12895 * Returns the checksum diference between the ultimate destination 12896 * (last hop in the routing header when the packet is sent) and 12897 * the first hop (ip6_dst when the packet is sent) 12898 */ 12899 /* ARGSUSED2 */ 12900 uint32_t 12901 ip_massage_options_v6(ip6_t *ip6h, ip6_rthdr_t *rth, netstack_t *ns) 12902 { 12903 uint_t numaddr; 12904 uint_t i; 12905 in6_addr_t *addrptr; 12906 in6_addr_t tmp; 12907 ip6_rthdr0_t *rthdr = (ip6_rthdr0_t *)rth; 12908 uint32_t cksm; 12909 uint32_t addrsum = 0; 12910 uint16_t *ptr; 12911 12912 /* 12913 * Perform any processing needed for source routing. 12914 * We know that all extension headers will be in the same mblk 12915 * as the IPv6 header. 12916 */ 12917 12918 /* 12919 * If no segments left in header, or the header length field is zero, 12920 * don't move hop addresses around; 12921 * Checksum difference is zero. 12922 */ 12923 if ((rthdr->ip6r0_segleft == 0) || (rthdr->ip6r0_len == 0)) 12924 return (0); 12925 12926 ptr = (uint16_t *)&ip6h->ip6_dst; 12927 cksm = 0; 12928 for (i = 0; i < (sizeof (in6_addr_t) / sizeof (uint16_t)); i++) { 12929 cksm += ptr[i]; 12930 } 12931 cksm = (cksm & 0xFFFF) + (cksm >> 16); 12932 12933 /* 12934 * Here's where the fun begins - we have to 12935 * move all addresses up one spot, take the 12936 * first hop and make it our first ip6_dst, 12937 * and place the ultimate destination in the 12938 * newly-opened last slot. 12939 */ 12940 addrptr = (in6_addr_t *)((char *)rthdr + sizeof (*rthdr)); 12941 numaddr = rthdr->ip6r0_len / 2; 12942 tmp = *addrptr; 12943 for (i = 0; i < (numaddr - 1); addrptr++, i++) { 12944 *addrptr = addrptr[1]; 12945 } 12946 *addrptr = ip6h->ip6_dst; 12947 ip6h->ip6_dst = tmp; 12948 12949 /* 12950 * From the checksummed ultimate destination subtract the checksummed 12951 * current ip6_dst (the first hop address). Return that number. 12952 * (In the v4 case, the second part of this is done in each routine 12953 * that calls ip_massage_options(). We do it all in this one place 12954 * for v6). 12955 */ 12956 ptr = (uint16_t *)&ip6h->ip6_dst; 12957 for (i = 0; i < (sizeof (in6_addr_t) / sizeof (uint16_t)); i++) { 12958 addrsum += ptr[i]; 12959 } 12960 cksm -= ((addrsum >> 16) + (addrsum & 0xFFFF)); 12961 if ((int)cksm < 0) 12962 cksm--; 12963 cksm = (cksm & 0xFFFF) + (cksm >> 16); 12964 12965 return (cksm); 12966 } 12967 12968 /* 12969 * Propagate a multicast group membership operation (join/leave) (*fn) on 12970 * all interfaces crossed by the related multirt routes. 12971 * The call is considered successful if the operation succeeds 12972 * on at least one interface. 12973 * The function is called if the destination address in the packet to send 12974 * is multirouted. 12975 */ 12976 int 12977 ip_multirt_apply_membership_v6(int (*fn)(conn_t *, boolean_t, 12978 const in6_addr_t *, int, mcast_record_t, const in6_addr_t *, mblk_t *), 12979 ire_t *ire, conn_t *connp, boolean_t checkonly, const in6_addr_t *v6grp, 12980 mcast_record_t fmode, const in6_addr_t *v6src, mblk_t *first_mp) 12981 { 12982 ire_t *ire_gw; 12983 irb_t *irb; 12984 int index, error = 0; 12985 opt_restart_t *or; 12986 ip_stack_t *ipst = ire->ire_ipst; 12987 12988 irb = ire->ire_bucket; 12989 ASSERT(irb != NULL); 12990 12991 ASSERT(DB_TYPE(first_mp) == M_CTL); 12992 or = (opt_restart_t *)first_mp->b_rptr; 12993 12994 IRB_REFHOLD(irb); 12995 for (; ire != NULL; ire = ire->ire_next) { 12996 if ((ire->ire_flags & RTF_MULTIRT) == 0) 12997 continue; 12998 if (!IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, v6grp)) 12999 continue; 13000 13001 ire_gw = ire_ftable_lookup_v6(&ire->ire_gateway_addr_v6, 0, 0, 13002 IRE_INTERFACE, NULL, NULL, ALL_ZONES, 0, NULL, 13003 MATCH_IRE_RECURSIVE | MATCH_IRE_TYPE, ipst); 13004 /* No resolver exists for the gateway; skip this ire. */ 13005 if (ire_gw == NULL) 13006 continue; 13007 index = ire_gw->ire_ipif->ipif_ill->ill_phyint->phyint_ifindex; 13008 /* 13009 * A resolver exists: we can get the interface on which we have 13010 * to apply the operation. 13011 */ 13012 error = fn(connp, checkonly, v6grp, index, fmode, v6src, 13013 first_mp); 13014 if (error == 0) 13015 or->or_private = CGTP_MCAST_SUCCESS; 13016 13017 if (ip_debug > 0) { 13018 ulong_t off; 13019 char *ksym; 13020 13021 ksym = kobj_getsymname((uintptr_t)fn, &off); 13022 ip2dbg(("ip_multirt_apply_membership_v6: " 13023 "called %s, multirt group 0x%08x via itf 0x%08x, " 13024 "error %d [success %u]\n", 13025 ksym ? ksym : "?", 13026 ntohl(V4_PART_OF_V6((*v6grp))), 13027 ntohl(V4_PART_OF_V6(ire_gw->ire_src_addr_v6)), 13028 error, or->or_private)); 13029 } 13030 13031 ire_refrele(ire_gw); 13032 if (error == EINPROGRESS) { 13033 IRB_REFRELE(irb); 13034 return (error); 13035 } 13036 } 13037 IRB_REFRELE(irb); 13038 /* 13039 * Consider the call as successful if we succeeded on at least 13040 * one interface. Otherwise, return the last encountered error. 13041 */ 13042 return (or->or_private == CGTP_MCAST_SUCCESS ? 0 : error); 13043 } 13044 13045 void 13046 *ip6_kstat_init(netstackid_t stackid, ip6_stat_t *ip6_statisticsp) 13047 { 13048 kstat_t *ksp; 13049 13050 ip6_stat_t template = { 13051 { "ip6_udp_fast_path", KSTAT_DATA_UINT64 }, 13052 { "ip6_udp_slow_path", KSTAT_DATA_UINT64 }, 13053 { "ip6_udp_fannorm", KSTAT_DATA_UINT64 }, 13054 { "ip6_udp_fanmb", KSTAT_DATA_UINT64 }, 13055 { "ip6_out_sw_cksum", KSTAT_DATA_UINT64 }, 13056 { "ip6_in_sw_cksum", KSTAT_DATA_UINT64 }, 13057 { "ip6_tcp_in_full_hw_cksum_err", KSTAT_DATA_UINT64 }, 13058 { "ip6_tcp_in_part_hw_cksum_err", KSTAT_DATA_UINT64 }, 13059 { "ip6_tcp_in_sw_cksum_err", KSTAT_DATA_UINT64 }, 13060 { "ip6_tcp_out_sw_cksum_bytes", KSTAT_DATA_UINT64 }, 13061 { "ip6_udp_in_full_hw_cksum_err", KSTAT_DATA_UINT64 }, 13062 { "ip6_udp_in_part_hw_cksum_err", KSTAT_DATA_UINT64 }, 13063 { "ip6_udp_in_sw_cksum_err", KSTAT_DATA_UINT64 }, 13064 { "ip6_udp_out_sw_cksum_bytes", KSTAT_DATA_UINT64 }, 13065 { "ip6_frag_mdt_pkt_out", KSTAT_DATA_UINT64 }, 13066 { "ip6_frag_mdt_discarded", KSTAT_DATA_UINT64 }, 13067 { "ip6_frag_mdt_allocfail", KSTAT_DATA_UINT64 }, 13068 { "ip6_frag_mdt_addpdescfail", KSTAT_DATA_UINT64 }, 13069 { "ip6_frag_mdt_allocd", KSTAT_DATA_UINT64 }, 13070 }; 13071 ksp = kstat_create_netstack("ip", 0, "ip6stat", "net", 13072 KSTAT_TYPE_NAMED, sizeof (template) / sizeof (kstat_named_t), 13073 KSTAT_FLAG_VIRTUAL, stackid); 13074 13075 if (ksp == NULL) 13076 return (NULL); 13077 13078 bcopy(&template, ip6_statisticsp, sizeof (template)); 13079 ksp->ks_data = (void *)ip6_statisticsp; 13080 ksp->ks_private = (void *)(uintptr_t)stackid; 13081 13082 kstat_install(ksp); 13083 return (ksp); 13084 } 13085 13086 void 13087 ip6_kstat_fini(netstackid_t stackid, kstat_t *ksp) 13088 { 13089 if (ksp != NULL) { 13090 ASSERT(stackid == (netstackid_t)(uintptr_t)ksp->ks_private); 13091 kstat_delete_netstack(ksp, stackid); 13092 } 13093 } 13094 13095 /* 13096 * The following two functions set and get the value for the 13097 * IPV6_SRC_PREFERENCES socket option. 13098 */ 13099 int 13100 ip6_set_src_preferences(conn_t *connp, uint32_t prefs) 13101 { 13102 /* 13103 * We only support preferences that are covered by 13104 * IPV6_PREFER_SRC_MASK. 13105 */ 13106 if (prefs & ~IPV6_PREFER_SRC_MASK) 13107 return (EINVAL); 13108 13109 /* 13110 * Look for conflicting preferences or default preferences. If 13111 * both bits of a related pair are clear, the application wants the 13112 * system's default value for that pair. Both bits in a pair can't 13113 * be set. 13114 */ 13115 if ((prefs & IPV6_PREFER_SRC_MIPMASK) == 0) { 13116 prefs |= IPV6_PREFER_SRC_MIPDEFAULT; 13117 } else if ((prefs & IPV6_PREFER_SRC_MIPMASK) == 13118 IPV6_PREFER_SRC_MIPMASK) { 13119 return (EINVAL); 13120 } 13121 if ((prefs & IPV6_PREFER_SRC_TMPMASK) == 0) { 13122 prefs |= IPV6_PREFER_SRC_TMPDEFAULT; 13123 } else if ((prefs & IPV6_PREFER_SRC_TMPMASK) == 13124 IPV6_PREFER_SRC_TMPMASK) { 13125 return (EINVAL); 13126 } 13127 if ((prefs & IPV6_PREFER_SRC_CGAMASK) == 0) { 13128 prefs |= IPV6_PREFER_SRC_CGADEFAULT; 13129 } else if ((prefs & IPV6_PREFER_SRC_CGAMASK) == 13130 IPV6_PREFER_SRC_CGAMASK) { 13131 return (EINVAL); 13132 } 13133 13134 connp->conn_src_preferences = prefs; 13135 return (0); 13136 } 13137 13138 size_t 13139 ip6_get_src_preferences(conn_t *connp, uint32_t *val) 13140 { 13141 *val = connp->conn_src_preferences; 13142 return (sizeof (connp->conn_src_preferences)); 13143 } 13144 13145 int 13146 ip6_set_pktinfo(cred_t *cr, conn_t *connp, struct in6_pktinfo *pkti, mblk_t *mp) 13147 { 13148 ill_t *ill; 13149 ire_t *ire; 13150 int error; 13151 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 13152 13153 /* 13154 * Verify the source address and ifindex. Privileged users can use 13155 * any source address. For ancillary data the source address is 13156 * checked in ip_wput_v6. 13157 */ 13158 if (pkti->ipi6_ifindex != 0) { 13159 ASSERT(connp != NULL); 13160 ill = ill_lookup_on_ifindex(pkti->ipi6_ifindex, B_TRUE, 13161 CONNP_TO_WQ(connp), mp, ip_restart_optmgmt, &error, ipst); 13162 if (ill == NULL) { 13163 /* 13164 * We just want to know if the interface exists, we 13165 * don't really care about the ill pointer itself. 13166 */ 13167 if (error != EINPROGRESS) 13168 return (error); 13169 error = 0; /* Ensure we don't use it below */ 13170 } else { 13171 ill_refrele(ill); 13172 } 13173 } 13174 if (!IN6_IS_ADDR_UNSPECIFIED(&pkti->ipi6_addr) && 13175 secpolicy_net_rawaccess(cr) != 0) { 13176 ire = ire_route_lookup_v6(&pkti->ipi6_addr, 0, 0, 13177 (IRE_LOCAL|IRE_LOOPBACK), NULL, NULL, 13178 connp->conn_zoneid, NULL, MATCH_IRE_TYPE, ipst); 13179 if (ire != NULL) 13180 ire_refrele(ire); 13181 else 13182 return (ENXIO); 13183 } 13184 return (0); 13185 } 13186 13187 /* 13188 * Get the size of the IP options (including the IP headers size) 13189 * without including the AH header's size. If till_ah is B_FALSE, 13190 * and if AH header is present, dest options beyond AH header will 13191 * also be included in the returned size. 13192 */ 13193 int 13194 ipsec_ah_get_hdr_size_v6(mblk_t *mp, boolean_t till_ah) 13195 { 13196 ip6_t *ip6h; 13197 uint8_t nexthdr; 13198 uint8_t *whereptr; 13199 ip6_hbh_t *hbhhdr; 13200 ip6_dest_t *dsthdr; 13201 ip6_rthdr_t *rthdr; 13202 int ehdrlen; 13203 int size; 13204 ah_t *ah; 13205 13206 ip6h = (ip6_t *)mp->b_rptr; 13207 size = IPV6_HDR_LEN; 13208 nexthdr = ip6h->ip6_nxt; 13209 whereptr = (uint8_t *)&ip6h[1]; 13210 for (;;) { 13211 /* Assume IP has already stripped it */ 13212 ASSERT(nexthdr != IPPROTO_FRAGMENT && nexthdr != IPPROTO_RAW); 13213 switch (nexthdr) { 13214 case IPPROTO_HOPOPTS: 13215 hbhhdr = (ip6_hbh_t *)whereptr; 13216 nexthdr = hbhhdr->ip6h_nxt; 13217 ehdrlen = 8 * (hbhhdr->ip6h_len + 1); 13218 break; 13219 case IPPROTO_DSTOPTS: 13220 dsthdr = (ip6_dest_t *)whereptr; 13221 nexthdr = dsthdr->ip6d_nxt; 13222 ehdrlen = 8 * (dsthdr->ip6d_len + 1); 13223 break; 13224 case IPPROTO_ROUTING: 13225 rthdr = (ip6_rthdr_t *)whereptr; 13226 nexthdr = rthdr->ip6r_nxt; 13227 ehdrlen = 8 * (rthdr->ip6r_len + 1); 13228 break; 13229 default : 13230 if (till_ah) { 13231 ASSERT(nexthdr == IPPROTO_AH); 13232 return (size); 13233 } 13234 /* 13235 * If we don't have a AH header to traverse, 13236 * return now. This happens normally for 13237 * outbound datagrams where we have not inserted 13238 * the AH header. 13239 */ 13240 if (nexthdr != IPPROTO_AH) { 13241 return (size); 13242 } 13243 13244 /* 13245 * We don't include the AH header's size 13246 * to be symmetrical with other cases where 13247 * we either don't have a AH header (outbound) 13248 * or peek into the AH header yet (inbound and 13249 * not pulled up yet). 13250 */ 13251 ah = (ah_t *)whereptr; 13252 nexthdr = ah->ah_nexthdr; 13253 ehdrlen = (ah->ah_length << 2) + 8; 13254 13255 if (nexthdr == IPPROTO_DSTOPTS) { 13256 if (whereptr + ehdrlen >= mp->b_wptr) { 13257 /* 13258 * The destination options header 13259 * is not part of the first mblk. 13260 */ 13261 whereptr = mp->b_cont->b_rptr; 13262 } else { 13263 whereptr += ehdrlen; 13264 } 13265 13266 dsthdr = (ip6_dest_t *)whereptr; 13267 ehdrlen = 8 * (dsthdr->ip6d_len + 1); 13268 size += ehdrlen; 13269 } 13270 return (size); 13271 } 13272 whereptr += ehdrlen; 13273 size += ehdrlen; 13274 } 13275 } 13276