1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* 26 * Copyright (c) 1990 Mentat Inc. 27 */ 28 29 #pragma ident "%Z%%M% %I% %E% SMI" 30 31 #include <sys/types.h> 32 #include <sys/stream.h> 33 #include <sys/dlpi.h> 34 #include <sys/stropts.h> 35 #include <sys/sysmacros.h> 36 #include <sys/strsun.h> 37 #include <sys/strlog.h> 38 #include <sys/strsubr.h> 39 #define _SUN_TPI_VERSION 2 40 #include <sys/tihdr.h> 41 #include <sys/ddi.h> 42 #include <sys/sunddi.h> 43 #include <sys/cmn_err.h> 44 #include <sys/debug.h> 45 #include <sys/sdt.h> 46 #include <sys/kobj.h> 47 #include <sys/zone.h> 48 #include <sys/neti.h> 49 #include <sys/hook.h> 50 51 #include <sys/kmem.h> 52 #include <sys/systm.h> 53 #include <sys/param.h> 54 #include <sys/socket.h> 55 #include <sys/vtrace.h> 56 #include <sys/isa_defs.h> 57 #include <sys/atomic.h> 58 #include <sys/iphada.h> 59 #include <sys/policy.h> 60 #include <net/if.h> 61 #include <net/if_types.h> 62 #include <net/route.h> 63 #include <net/if_dl.h> 64 #include <sys/sockio.h> 65 #include <netinet/in.h> 66 #include <netinet/ip6.h> 67 #include <netinet/icmp6.h> 68 #include <netinet/sctp.h> 69 70 #include <inet/common.h> 71 #include <inet/mi.h> 72 #include <inet/optcom.h> 73 #include <inet/mib2.h> 74 #include <inet/nd.h> 75 #include <inet/arp.h> 76 77 #include <inet/ip.h> 78 #include <inet/ip_impl.h> 79 #include <inet/ip6.h> 80 #include <inet/ip6_asp.h> 81 #include <inet/tcp.h> 82 #include <inet/tcp_impl.h> 83 #include <inet/udp_impl.h> 84 #include <inet/ipp_common.h> 85 86 #include <inet/ip_multi.h> 87 #include <inet/ip_if.h> 88 #include <inet/ip_ire.h> 89 #include <inet/ip_rts.h> 90 #include <inet/ip_ndp.h> 91 #include <net/pfkeyv2.h> 92 #include <inet/ipsec_info.h> 93 #include <inet/sadb.h> 94 #include <inet/ipsec_impl.h> 95 #include <inet/tun.h> 96 #include <inet/sctp_ip.h> 97 #include <sys/pattr.h> 98 #include <inet/ipclassifier.h> 99 #include <inet/ipsecah.h> 100 #include <inet/udp_impl.h> 101 #include <inet/rawip_impl.h> 102 #include <inet/rts_impl.h> 103 #include <sys/squeue.h> 104 105 #include <sys/tsol/label.h> 106 #include <sys/tsol/tnet.h> 107 108 #include <rpc/pmap_prot.h> 109 110 /* Temporary; for CR 6451644 work-around */ 111 #include <sys/ethernet.h> 112 113 extern squeue_func_t ip_input_proc; 114 115 /* 116 * Naming conventions: 117 * These rules should be judiciously applied 118 * if there is a need to identify something as IPv6 versus IPv4 119 * IPv6 funcions will end with _v6 in the ip module. 120 * IPv6 funcions will end with _ipv6 in the transport modules. 121 * IPv6 macros: 122 * Some macros end with _V6; e.g. ILL_FRAG_HASH_V6 123 * Some macros start with V6_; e.g. V6_OR_V4_INADDR_ANY 124 * And then there are ..V4_PART_OF_V6. 125 * The intent is that macros in the ip module end with _V6. 126 * IPv6 global variables will start with ipv6_ 127 * IPv6 structures will start with ipv6 128 * IPv6 defined constants should start with IPV6_ 129 * (but then there are NDP_DEFAULT_VERS_PRI_AND_FLOW, etc) 130 */ 131 132 /* 133 * ip6opt_ls is used to enable IPv6 (via /etc/system on TX systems). 134 * We need to do this because we didn't obtain the IP6OPT_LS (0x0a) 135 * from IANA. This mechanism will remain in effect until an official 136 * number is obtained. 137 */ 138 uchar_t ip6opt_ls; 139 140 const in6_addr_t ipv6_all_ones = 141 { 0xffffffffU, 0xffffffffU, 0xffffffffU, 0xffffffffU }; 142 const in6_addr_t ipv6_all_zeros = { 0, 0, 0, 0 }; 143 144 #ifdef _BIG_ENDIAN 145 const in6_addr_t ipv6_unspecified_group = { 0xff000000U, 0, 0, 0 }; 146 #else /* _BIG_ENDIAN */ 147 const in6_addr_t ipv6_unspecified_group = { 0x000000ffU, 0, 0, 0 }; 148 #endif /* _BIG_ENDIAN */ 149 150 #ifdef _BIG_ENDIAN 151 const in6_addr_t ipv6_loopback = { 0, 0, 0, 0x00000001U }; 152 #else /* _BIG_ENDIAN */ 153 const in6_addr_t ipv6_loopback = { 0, 0, 0, 0x01000000U }; 154 #endif /* _BIG_ENDIAN */ 155 156 #ifdef _BIG_ENDIAN 157 const in6_addr_t ipv6_all_hosts_mcast = { 0xff020000U, 0, 0, 0x00000001U }; 158 #else /* _BIG_ENDIAN */ 159 const in6_addr_t ipv6_all_hosts_mcast = { 0x000002ffU, 0, 0, 0x01000000U }; 160 #endif /* _BIG_ENDIAN */ 161 162 #ifdef _BIG_ENDIAN 163 const in6_addr_t ipv6_all_rtrs_mcast = { 0xff020000U, 0, 0, 0x00000002U }; 164 #else /* _BIG_ENDIAN */ 165 const in6_addr_t ipv6_all_rtrs_mcast = { 0x000002ffU, 0, 0, 0x02000000U }; 166 #endif /* _BIG_ENDIAN */ 167 168 #ifdef _BIG_ENDIAN 169 const in6_addr_t ipv6_all_v2rtrs_mcast = { 0xff020000U, 0, 0, 0x00000016U }; 170 #else /* _BIG_ENDIAN */ 171 const in6_addr_t ipv6_all_v2rtrs_mcast = { 0x000002ffU, 0, 0, 0x16000000U }; 172 #endif /* _BIG_ENDIAN */ 173 174 #ifdef _BIG_ENDIAN 175 const in6_addr_t ipv6_solicited_node_mcast = 176 { 0xff020000U, 0, 0x00000001U, 0xff000000U }; 177 #else /* _BIG_ENDIAN */ 178 const in6_addr_t ipv6_solicited_node_mcast = 179 { 0x000002ffU, 0, 0x01000000U, 0x000000ffU }; 180 #endif /* _BIG_ENDIAN */ 181 182 /* Leave room for ip_newroute to tack on the src and target addresses */ 183 #define OK_RESOLVER_MP_V6(mp) \ 184 ((mp) && ((mp)->b_wptr - (mp)->b_rptr) >= (2 * IPV6_ADDR_LEN)) 185 186 static void icmp_inbound_too_big_v6(queue_t *, mblk_t *, ill_t *ill, 187 boolean_t, zoneid_t); 188 static void icmp_pkt_v6(queue_t *, mblk_t *, void *, size_t, 189 const in6_addr_t *, boolean_t, zoneid_t, ip_stack_t *); 190 static void icmp_redirect_v6(queue_t *, mblk_t *, ill_t *ill); 191 static int ip_bind_connected_v6(conn_t *, mblk_t *, in6_addr_t *, 192 uint16_t, const in6_addr_t *, ip6_pkt_t *, uint16_t, 193 boolean_t, boolean_t, boolean_t, boolean_t); 194 static boolean_t ip_bind_insert_ire_v6(mblk_t *, ire_t *, const in6_addr_t *, 195 iulp_t *, ip_stack_t *); 196 static int ip_bind_laddr_v6(conn_t *, mblk_t *, const in6_addr_t *, 197 uint16_t, boolean_t, boolean_t, boolean_t); 198 static void ip_fanout_proto_v6(queue_t *, mblk_t *, ip6_t *, ill_t *, 199 ill_t *, uint8_t, uint_t, uint_t, boolean_t, zoneid_t); 200 static void ip_fanout_tcp_v6(queue_t *, mblk_t *, ip6_t *, ill_t *, 201 ill_t *, uint_t, uint_t, boolean_t, zoneid_t); 202 static void ip_fanout_udp_v6(queue_t *, mblk_t *, ip6_t *, uint32_t, 203 ill_t *, ill_t *, uint_t, boolean_t, zoneid_t); 204 static int ip_process_options_v6(queue_t *, mblk_t *, ip6_t *, 205 uint8_t *, uint_t, uint8_t, ip_stack_t *); 206 static mblk_t *ip_rput_frag_v6(queue_t *, mblk_t *, ip6_t *, 207 ip6_frag_t *, uint_t, uint_t *, uint32_t *, uint16_t *); 208 static boolean_t ip_source_routed_v6(ip6_t *, mblk_t *, ip_stack_t *); 209 static void ip_wput_ire_v6(queue_t *, mblk_t *, ire_t *, int, int, 210 conn_t *, int, int, int, zoneid_t); 211 212 /* 213 * A template for an IPv6 AR_ENTRY_QUERY 214 */ 215 static areq_t ipv6_areq_template = { 216 AR_ENTRY_QUERY, /* cmd */ 217 sizeof (areq_t)+(2*IPV6_ADDR_LEN), /* name offset */ 218 sizeof (areq_t), /* name len (filled by ill_arp_alloc) */ 219 IP6_DL_SAP, /* protocol, from arps perspective */ 220 sizeof (areq_t), /* target addr offset */ 221 IPV6_ADDR_LEN, /* target addr_length */ 222 0, /* flags */ 223 sizeof (areq_t) + IPV6_ADDR_LEN, /* sender addr offset */ 224 IPV6_ADDR_LEN, /* sender addr length */ 225 6, /* xmit_count */ 226 1000, /* (re)xmit_interval in milliseconds */ 227 4 /* max # of requests to buffer */ 228 /* anything else filled in by the code */ 229 }; 230 231 /* 232 * Handle IPv6 ICMP packets sent to us. Consume the mblk passed in. 233 * The message has already been checksummed and if needed, 234 * a copy has been made to be sent any interested ICMP client (conn) 235 * Note that this is different than icmp_inbound() which does the fanout 236 * to conn's as well as local processing of the ICMP packets. 237 * 238 * All error messages are passed to the matching transport stream. 239 * 240 * Zones notes: 241 * The packet is only processed in the context of the specified zone: typically 242 * only this zone will reply to an echo request. This means that the caller must 243 * call icmp_inbound_v6() for each relevant zone. 244 */ 245 static void 246 icmp_inbound_v6(queue_t *q, mblk_t *mp, ill_t *ill, uint_t hdr_length, 247 boolean_t mctl_present, uint_t flags, zoneid_t zoneid, mblk_t *dl_mp) 248 { 249 icmp6_t *icmp6; 250 ip6_t *ip6h; 251 boolean_t interested; 252 ip6i_t *ip6i; 253 in6_addr_t origsrc; 254 ire_t *ire; 255 mblk_t *first_mp; 256 ipsec_in_t *ii; 257 ip_stack_t *ipst = ill->ill_ipst; 258 259 ASSERT(ill != NULL); 260 first_mp = mp; 261 if (mctl_present) { 262 mp = first_mp->b_cont; 263 ASSERT(mp != NULL); 264 265 ii = (ipsec_in_t *)first_mp->b_rptr; 266 ASSERT(ii->ipsec_in_type == IPSEC_IN); 267 } 268 269 ip6h = (ip6_t *)mp->b_rptr; 270 271 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInMsgs); 272 273 if ((mp->b_wptr - mp->b_rptr) < (hdr_length + ICMP6_MINLEN)) { 274 if (!pullupmsg(mp, hdr_length + ICMP6_MINLEN)) { 275 ip1dbg(("icmp_inbound_v6: pullupmsg failed\n")); 276 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors); 277 freemsg(first_mp); 278 return; 279 } 280 ip6h = (ip6_t *)mp->b_rptr; 281 } 282 if (ipst->ips_icmp_accept_clear_messages == 0) { 283 first_mp = ipsec_check_global_policy(first_mp, NULL, 284 NULL, ip6h, mctl_present, ipst->ips_netstack); 285 if (first_mp == NULL) 286 return; 287 } 288 289 /* 290 * On a labeled system, we have to check whether the zone itself is 291 * permitted to receive raw traffic. 292 */ 293 if (is_system_labeled()) { 294 if (zoneid == ALL_ZONES) 295 zoneid = tsol_packet_to_zoneid(mp); 296 if (!tsol_can_accept_raw(mp, B_FALSE)) { 297 ip1dbg(("icmp_inbound_v6: zone %d can't receive raw", 298 zoneid)); 299 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors); 300 freemsg(first_mp); 301 return; 302 } 303 } 304 305 icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]); 306 ip2dbg(("icmp_inbound_v6: type %d code %d\n", icmp6->icmp6_type, 307 icmp6->icmp6_code)); 308 interested = !(icmp6->icmp6_type & ICMP6_INFOMSG_MASK); 309 310 /* Initiate IPPF processing here */ 311 if (IP6_IN_IPP(flags, ipst)) { 312 313 /* 314 * If the ifindex changes due to SIOCSLIFINDEX 315 * packet may return to IP on the wrong ill. 316 */ 317 ip_process(IPP_LOCAL_IN, &mp, ill->ill_phyint->phyint_ifindex); 318 if (mp == NULL) { 319 if (mctl_present) { 320 freeb(first_mp); 321 } 322 return; 323 } 324 } 325 326 switch (icmp6->icmp6_type) { 327 case ICMP6_DST_UNREACH: 328 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInDestUnreachs); 329 if (icmp6->icmp6_code == ICMP6_DST_UNREACH_ADMIN) 330 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInAdminProhibs); 331 break; 332 333 case ICMP6_TIME_EXCEEDED: 334 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInTimeExcds); 335 break; 336 337 case ICMP6_PARAM_PROB: 338 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInParmProblems); 339 break; 340 341 case ICMP6_PACKET_TOO_BIG: 342 icmp_inbound_too_big_v6(q, first_mp, ill, mctl_present, 343 zoneid); 344 return; 345 case ICMP6_ECHO_REQUEST: 346 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInEchos); 347 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst) && 348 !ipst->ips_ipv6_resp_echo_mcast) 349 break; 350 351 /* 352 * We must have exclusive use of the mblk to convert it to 353 * a response. 354 * If not, we copy it. 355 */ 356 if (mp->b_datap->db_ref > 1) { 357 mblk_t *mp1; 358 359 mp1 = copymsg(mp); 360 freemsg(mp); 361 if (mp1 == NULL) { 362 BUMP_MIB(ill->ill_icmp6_mib, 363 ipv6IfIcmpInErrors); 364 if (mctl_present) 365 freeb(first_mp); 366 return; 367 } 368 mp = mp1; 369 ip6h = (ip6_t *)mp->b_rptr; 370 icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]); 371 if (mctl_present) 372 first_mp->b_cont = mp; 373 else 374 first_mp = mp; 375 } 376 377 /* 378 * Turn the echo into an echo reply. 379 * Remove any extension headers (do not reverse a source route) 380 * and clear the flow id (keep traffic class for now). 381 */ 382 if (hdr_length != IPV6_HDR_LEN) { 383 int i; 384 385 for (i = 0; i < IPV6_HDR_LEN; i++) 386 mp->b_rptr[hdr_length - i - 1] = 387 mp->b_rptr[IPV6_HDR_LEN - i - 1]; 388 mp->b_rptr += (hdr_length - IPV6_HDR_LEN); 389 ip6h = (ip6_t *)mp->b_rptr; 390 ip6h->ip6_nxt = IPPROTO_ICMPV6; 391 hdr_length = IPV6_HDR_LEN; 392 } 393 ip6h->ip6_vcf &= ~IPV6_FLOWINFO_FLOWLABEL; 394 icmp6->icmp6_type = ICMP6_ECHO_REPLY; 395 396 ip6h->ip6_plen = 397 htons((uint16_t)(msgdsize(mp) - IPV6_HDR_LEN)); 398 origsrc = ip6h->ip6_src; 399 /* 400 * Reverse the source and destination addresses. 401 * If the return address is a multicast, zero out the source 402 * (ip_wput_v6 will set an address). 403 */ 404 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 405 ip6h->ip6_src = ipv6_all_zeros; 406 ip6h->ip6_dst = origsrc; 407 } else { 408 ip6h->ip6_src = ip6h->ip6_dst; 409 ip6h->ip6_dst = origsrc; 410 } 411 412 /* set the hop limit */ 413 ip6h->ip6_hops = ipst->ips_ipv6_def_hops; 414 415 /* 416 * Prepare for checksum by putting icmp length in the icmp 417 * checksum field. The checksum is calculated in ip_wput_v6. 418 */ 419 icmp6->icmp6_cksum = ip6h->ip6_plen; 420 /* 421 * ICMP echo replies should go out on the same interface 422 * the request came on as probes used by in.mpathd for 423 * detecting NIC failures are ECHO packets. We turn-off load 424 * spreading by allocating a ip6i and setting ip6i_attach_if 425 * to B_TRUE which is handled both by ip_wput_v6 and 426 * ip_newroute_v6. If we don't turnoff load spreading, 427 * the packets might get dropped if there are no 428 * non-FAILED/INACTIVE interfaces for it to go out on and 429 * in.mpathd would wrongly detect a failure or mis-detect 430 * a NIC failure as a link failure. As load spreading can 431 * happen only if ill_group is not NULL, we do only for 432 * that case and this does not affect the normal case. 433 * 434 * We force this only on echo packets that came from on-link 435 * hosts. We restrict this to link-local addresses which 436 * is used by in.mpathd for probing. In the IPv6 case, 437 * default routes typically have an ire_ipif pointer and 438 * hence a MATCH_IRE_ILL later in ip_newroute_v6/ip_wput_v6 439 * might work. As a default route out of this interface 440 * may not be present, enforcing this packet to go out in 441 * this case may not work. 442 */ 443 if (ill->ill_group != NULL && 444 IN6_IS_ADDR_LINKLOCAL(&origsrc)) { 445 /* 446 * If we are sending replies to ourselves, don't 447 * set ATTACH_IF as we may not be able to find 448 * the IRE_LOCAL on this ill i.e setting ATTACH_IF 449 * causes ip_wput_v6 to look for an IRE_LOCAL on 450 * "ill" which it may not find and will try to 451 * create an IRE_CACHE for our local address. Once 452 * we do this, we will try to forward all packets 453 * meant to our LOCAL address. 454 */ 455 ire = ire_cache_lookup_v6(&ip6h->ip6_dst, ALL_ZONES, 456 NULL, ipst); 457 if (ire == NULL || ire->ire_type != IRE_LOCAL) { 458 mp = ip_add_info_v6(mp, NULL, &ip6h->ip6_dst); 459 if (mp == NULL) { 460 BUMP_MIB(ill->ill_icmp6_mib, 461 ipv6IfIcmpInErrors); 462 if (ire != NULL) 463 ire_refrele(ire); 464 if (mctl_present) 465 freeb(first_mp); 466 return; 467 } else if (mctl_present) { 468 first_mp->b_cont = mp; 469 } else { 470 first_mp = mp; 471 } 472 ip6i = (ip6i_t *)mp->b_rptr; 473 ip6i->ip6i_flags = IP6I_ATTACH_IF; 474 ip6i->ip6i_ifindex = 475 ill->ill_phyint->phyint_ifindex; 476 } 477 if (ire != NULL) 478 ire_refrele(ire); 479 } 480 481 if (!mctl_present) { 482 /* 483 * This packet should go out the same way as it 484 * came in i.e in clear. To make sure that global 485 * policy will not be applied to this in ip_wput, 486 * we attach a IPSEC_IN mp and clear ipsec_in_secure. 487 */ 488 ASSERT(first_mp == mp); 489 first_mp = ipsec_in_alloc(B_FALSE, ipst->ips_netstack); 490 if (first_mp == NULL) { 491 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 492 freemsg(mp); 493 return; 494 } 495 ii = (ipsec_in_t *)first_mp->b_rptr; 496 497 /* This is not a secure packet */ 498 ii->ipsec_in_secure = B_FALSE; 499 first_mp->b_cont = mp; 500 } 501 ii->ipsec_in_zoneid = zoneid; 502 ASSERT(zoneid != ALL_ZONES); 503 if (!ipsec_in_to_out(first_mp, NULL, ip6h)) { 504 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 505 return; 506 } 507 put(WR(q), first_mp); 508 return; 509 510 case ICMP6_ECHO_REPLY: 511 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInEchoReplies); 512 break; 513 514 case ND_ROUTER_SOLICIT: 515 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInRouterSolicits); 516 break; 517 518 case ND_ROUTER_ADVERT: 519 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInRouterAdvertisements); 520 break; 521 522 case ND_NEIGHBOR_SOLICIT: 523 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInNeighborSolicits); 524 if (mctl_present) 525 freeb(first_mp); 526 /* XXX may wish to pass first_mp up to ndp_input someday. */ 527 ndp_input(ill, mp, dl_mp); 528 return; 529 530 case ND_NEIGHBOR_ADVERT: 531 BUMP_MIB(ill->ill_icmp6_mib, 532 ipv6IfIcmpInNeighborAdvertisements); 533 if (mctl_present) 534 freeb(first_mp); 535 /* XXX may wish to pass first_mp up to ndp_input someday. */ 536 ndp_input(ill, mp, dl_mp); 537 return; 538 539 case ND_REDIRECT: { 540 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInRedirects); 541 542 if (ipst->ips_ipv6_ignore_redirect) 543 break; 544 545 /* 546 * As there is no upper client to deliver, we don't 547 * need the first_mp any more. 548 */ 549 if (mctl_present) 550 freeb(first_mp); 551 if (!pullupmsg(mp, -1)) { 552 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects); 553 break; 554 } 555 icmp_redirect_v6(q, mp, ill); 556 return; 557 } 558 559 /* 560 * The next three icmp messages will be handled by MLD. 561 * Pass all valid MLD packets up to any process(es) 562 * listening on a raw ICMP socket. MLD messages are 563 * freed by mld_input function. 564 */ 565 case MLD_LISTENER_QUERY: 566 case MLD_LISTENER_REPORT: 567 case MLD_LISTENER_REDUCTION: 568 if (mctl_present) 569 freeb(first_mp); 570 mld_input(q, mp, ill); 571 return; 572 default: 573 break; 574 } 575 if (interested) { 576 icmp_inbound_error_fanout_v6(q, first_mp, ip6h, icmp6, ill, 577 mctl_present, zoneid); 578 } else { 579 freemsg(first_mp); 580 } 581 } 582 583 /* 584 * Process received IPv6 ICMP Packet too big. 585 * After updating any IRE it does the fanout to any matching transport streams. 586 * Assumes the IPv6 plus ICMPv6 headers have been pulled up but nothing else. 587 */ 588 /* ARGSUSED */ 589 static void 590 icmp_inbound_too_big_v6(queue_t *q, mblk_t *mp, ill_t *ill, 591 boolean_t mctl_present, zoneid_t zoneid) 592 { 593 ip6_t *ip6h; 594 ip6_t *inner_ip6h; 595 icmp6_t *icmp6; 596 uint16_t hdr_length; 597 uint32_t mtu; 598 ire_t *ire, *first_ire; 599 mblk_t *first_mp; 600 ip_stack_t *ipst = ill->ill_ipst; 601 602 first_mp = mp; 603 if (mctl_present) 604 mp = first_mp->b_cont; 605 /* 606 * We must have exclusive use of the mblk to update the MTU 607 * in the packet. 608 * If not, we copy it. 609 * 610 * If there's an M_CTL present, we know that allocated first_mp 611 * earlier in this function, so we know first_mp has refcnt of one. 612 */ 613 ASSERT(!mctl_present || first_mp->b_datap->db_ref == 1); 614 if (mp->b_datap->db_ref > 1) { 615 mblk_t *mp1; 616 617 mp1 = copymsg(mp); 618 freemsg(mp); 619 if (mp1 == NULL) { 620 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 621 if (mctl_present) 622 freeb(first_mp); 623 return; 624 } 625 mp = mp1; 626 if (mctl_present) 627 first_mp->b_cont = mp; 628 else 629 first_mp = mp; 630 } 631 ip6h = (ip6_t *)mp->b_rptr; 632 if (ip6h->ip6_nxt != IPPROTO_ICMPV6) 633 hdr_length = ip_hdr_length_v6(mp, ip6h); 634 else 635 hdr_length = IPV6_HDR_LEN; 636 637 icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]); 638 ASSERT((size_t)(mp->b_wptr - mp->b_rptr) >= hdr_length + ICMP6_MINLEN); 639 inner_ip6h = (ip6_t *)&icmp6[1]; /* Packet in error */ 640 if ((uchar_t *)&inner_ip6h[1] > mp->b_wptr) { 641 if (!pullupmsg(mp, (uchar_t *)&inner_ip6h[1] - mp->b_rptr)) { 642 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 643 freemsg(first_mp); 644 return; 645 } 646 ip6h = (ip6_t *)mp->b_rptr; 647 icmp6 = (icmp6_t *)&mp->b_rptr[hdr_length]; 648 inner_ip6h = (ip6_t *)&icmp6[1]; 649 } 650 651 /* 652 * For link local destinations matching simply on IRE type is not 653 * sufficient. Same link local addresses for different ILL's is 654 * possible. 655 */ 656 657 if (IN6_IS_ADDR_LINKLOCAL(&inner_ip6h->ip6_dst)) { 658 first_ire = ire_ctable_lookup_v6(&inner_ip6h->ip6_dst, NULL, 659 IRE_CACHE, ill->ill_ipif, ALL_ZONES, NULL, 660 MATCH_IRE_TYPE | MATCH_IRE_ILL_GROUP, ipst); 661 662 if (first_ire == NULL) { 663 if (ip_debug > 2) { 664 /* ip1dbg */ 665 pr_addr_dbg("icmp_inbound_too_big_v6:" 666 "no ire for dst %s\n", AF_INET6, 667 &inner_ip6h->ip6_dst); 668 } 669 freemsg(first_mp); 670 return; 671 } 672 673 mtu = ntohl(icmp6->icmp6_mtu); 674 rw_enter(&first_ire->ire_bucket->irb_lock, RW_READER); 675 for (ire = first_ire; ire != NULL && 676 IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, &inner_ip6h->ip6_dst); 677 ire = ire->ire_next) { 678 mutex_enter(&ire->ire_lock); 679 if (mtu < IPV6_MIN_MTU) { 680 ip1dbg(("Received mtu less than IPv6 " 681 "min mtu %d: %d\n", IPV6_MIN_MTU, mtu)); 682 mtu = IPV6_MIN_MTU; 683 /* 684 * If an mtu less than IPv6 min mtu is received, 685 * we must include a fragment header in 686 * subsequent packets. 687 */ 688 ire->ire_frag_flag |= IPH_FRAG_HDR; 689 } 690 ip1dbg(("Received mtu from router: %d\n", mtu)); 691 ire->ire_max_frag = MIN(ire->ire_max_frag, mtu); 692 /* Record the new max frag size for the ULP. */ 693 if (ire->ire_frag_flag & IPH_FRAG_HDR) { 694 /* 695 * If we need a fragment header in every packet 696 * (above case or multirouting), make sure the 697 * ULP takes it into account when computing the 698 * payload size. 699 */ 700 icmp6->icmp6_mtu = htonl(ire->ire_max_frag - 701 sizeof (ip6_frag_t)); 702 } else { 703 icmp6->icmp6_mtu = htonl(ire->ire_max_frag); 704 } 705 mutex_exit(&ire->ire_lock); 706 } 707 rw_exit(&first_ire->ire_bucket->irb_lock); 708 ire_refrele(first_ire); 709 } else { 710 irb_t *irb = NULL; 711 /* 712 * for non-link local destinations we match only on the IRE type 713 */ 714 ire = ire_ctable_lookup_v6(&inner_ip6h->ip6_dst, NULL, 715 IRE_CACHE, ill->ill_ipif, ALL_ZONES, NULL, MATCH_IRE_TYPE, 716 ipst); 717 if (ire == NULL) { 718 if (ip_debug > 2) { 719 /* ip1dbg */ 720 pr_addr_dbg("icmp_inbound_too_big_v6:" 721 "no ire for dst %s\n", 722 AF_INET6, &inner_ip6h->ip6_dst); 723 } 724 freemsg(first_mp); 725 return; 726 } 727 irb = ire->ire_bucket; 728 ire_refrele(ire); 729 rw_enter(&irb->irb_lock, RW_READER); 730 for (ire = irb->irb_ire; ire != NULL; ire = ire->ire_next) { 731 if (IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, 732 &inner_ip6h->ip6_dst)) { 733 mtu = ntohl(icmp6->icmp6_mtu); 734 mutex_enter(&ire->ire_lock); 735 if (mtu < IPV6_MIN_MTU) { 736 ip1dbg(("Received mtu less than IPv6" 737 "min mtu %d: %d\n", 738 IPV6_MIN_MTU, mtu)); 739 mtu = IPV6_MIN_MTU; 740 /* 741 * If an mtu less than IPv6 min mtu is 742 * received, we must include a fragment 743 * header in subsequent packets. 744 */ 745 ire->ire_frag_flag |= IPH_FRAG_HDR; 746 } 747 748 ip1dbg(("Received mtu from router: %d\n", mtu)); 749 ire->ire_max_frag = MIN(ire->ire_max_frag, mtu); 750 /* Record the new max frag size for the ULP. */ 751 if (ire->ire_frag_flag & IPH_FRAG_HDR) { 752 /* 753 * If we need a fragment header in 754 * every packet (above case or 755 * multirouting), make sure the ULP 756 * takes it into account when computing 757 * the payload size. 758 */ 759 icmp6->icmp6_mtu = 760 htonl(ire->ire_max_frag - 761 sizeof (ip6_frag_t)); 762 } else { 763 icmp6->icmp6_mtu = 764 htonl(ire->ire_max_frag); 765 } 766 mutex_exit(&ire->ire_lock); 767 } 768 } 769 rw_exit(&irb->irb_lock); 770 } 771 icmp_inbound_error_fanout_v6(q, first_mp, ip6h, icmp6, ill, 772 mctl_present, zoneid); 773 } 774 775 /* 776 * Fanout received ICMPv6 error packets to the transports. 777 * Assumes the IPv6 plus ICMPv6 headers have been pulled up but nothing else. 778 */ 779 void 780 icmp_inbound_error_fanout_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, 781 icmp6_t *icmp6, ill_t *ill, boolean_t mctl_present, zoneid_t zoneid) 782 { 783 uint16_t *up; /* Pointer to ports in ULP header */ 784 uint32_t ports; /* reversed ports for fanout */ 785 ip6_t rip6h; /* With reversed addresses */ 786 uint16_t hdr_length; 787 uint8_t *nexthdrp; 788 uint8_t nexthdr; 789 mblk_t *first_mp; 790 ipsec_in_t *ii; 791 tcpha_t *tcpha; 792 conn_t *connp; 793 ip_stack_t *ipst = ill->ill_ipst; 794 795 first_mp = mp; 796 if (mctl_present) { 797 mp = first_mp->b_cont; 798 ASSERT(mp != NULL); 799 800 ii = (ipsec_in_t *)first_mp->b_rptr; 801 ASSERT(ii->ipsec_in_type == IPSEC_IN); 802 } else { 803 ii = NULL; 804 } 805 806 hdr_length = (uint16_t)((uchar_t *)icmp6 - (uchar_t *)ip6h); 807 ASSERT((size_t)(mp->b_wptr - (uchar_t *)icmp6) >= ICMP6_MINLEN); 808 809 /* 810 * Need to pullup everything in order to use 811 * ip_hdr_length_nexthdr_v6() 812 */ 813 if (mp->b_cont != NULL) { 814 if (!pullupmsg(mp, -1)) { 815 ip1dbg(("icmp_inbound_error_fanout_v6: " 816 "pullupmsg failed\n")); 817 goto drop_pkt; 818 } 819 ip6h = (ip6_t *)mp->b_rptr; 820 icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]); 821 } 822 823 ip6h = (ip6_t *)&icmp6[1]; /* Packet in error */ 824 if ((uchar_t *)&ip6h[1] > mp->b_wptr) 825 goto drop_pkt; 826 827 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_length, &nexthdrp)) 828 goto drop_pkt; 829 nexthdr = *nexthdrp; 830 831 /* Set message type, must be done after pullups */ 832 mp->b_datap->db_type = M_CTL; 833 834 /* Try to pass the ICMP message to clients who need it */ 835 switch (nexthdr) { 836 case IPPROTO_UDP: { 837 /* 838 * Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of 839 * UDP header to get the port information. 840 */ 841 if ((uchar_t *)ip6h + hdr_length + ICMP_MIN_TP_HDR_LEN > 842 mp->b_wptr) { 843 break; 844 } 845 /* 846 * Attempt to find a client stream based on port. 847 * Note that we do a reverse lookup since the header is 848 * in the form we sent it out. 849 * The rip6h header is only used for the IPCL_UDP_MATCH_V6 850 * and we only set the src and dst addresses and nexthdr. 851 */ 852 up = (uint16_t *)((uchar_t *)ip6h + hdr_length); 853 rip6h.ip6_src = ip6h->ip6_dst; 854 rip6h.ip6_dst = ip6h->ip6_src; 855 rip6h.ip6_nxt = nexthdr; 856 ((uint16_t *)&ports)[0] = up[1]; 857 ((uint16_t *)&ports)[1] = up[0]; 858 859 ip_fanout_udp_v6(q, first_mp, &rip6h, ports, ill, ill, 860 IP6_NO_IPPOLICY, mctl_present, zoneid); 861 return; 862 } 863 case IPPROTO_TCP: { 864 /* 865 * Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of 866 * the TCP header to get the port information. 867 */ 868 if ((uchar_t *)ip6h + hdr_length + ICMP_MIN_TP_HDR_LEN > 869 mp->b_wptr) { 870 break; 871 } 872 873 /* 874 * Attempt to find a client stream based on port. 875 * Note that we do a reverse lookup since the header is 876 * in the form we sent it out. 877 * The rip6h header is only used for the IP_TCP_*MATCH_V6 and 878 * we only set the src and dst addresses and nexthdr. 879 */ 880 881 tcpha = (tcpha_t *)((char *)ip6h + hdr_length); 882 connp = ipcl_tcp_lookup_reversed_ipv6(ip6h, tcpha, 883 TCPS_LISTEN, ill->ill_phyint->phyint_ifindex, ipst); 884 if (connp == NULL) { 885 goto drop_pkt; 886 } 887 888 squeue_fill(connp->conn_sqp, first_mp, tcp_input, 889 connp, SQTAG_TCP6_INPUT_ICMP_ERR); 890 return; 891 892 } 893 case IPPROTO_SCTP: 894 /* 895 * Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of 896 * the SCTP header to get the port information. 897 */ 898 if ((uchar_t *)ip6h + hdr_length + ICMP_MIN_TP_HDR_LEN > 899 mp->b_wptr) { 900 break; 901 } 902 903 up = (uint16_t *)((uchar_t *)ip6h + hdr_length); 904 ((uint16_t *)&ports)[0] = up[1]; 905 ((uint16_t *)&ports)[1] = up[0]; 906 ip_fanout_sctp(mp, ill, (ipha_t *)ip6h, ports, 0, mctl_present, 907 IP6_NO_IPPOLICY, zoneid); 908 return; 909 case IPPROTO_ESP: 910 case IPPROTO_AH: { 911 int ipsec_rc; 912 ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec; 913 914 /* 915 * We need a IPSEC_IN in the front to fanout to AH/ESP. 916 * We will re-use the IPSEC_IN if it is already present as 917 * AH/ESP will not affect any fields in the IPSEC_IN for 918 * ICMP errors. If there is no IPSEC_IN, allocate a new 919 * one and attach it in the front. 920 */ 921 if (ii != NULL) { 922 /* 923 * ip_fanout_proto_again converts the ICMP errors 924 * that come back from AH/ESP to M_DATA so that 925 * if it is non-AH/ESP and we do a pullupmsg in 926 * this function, it would work. Convert it back 927 * to M_CTL before we send up as this is a ICMP 928 * error. This could have been generated locally or 929 * by some router. Validate the inner IPSEC 930 * headers. 931 * 932 * NOTE : ill_index is used by ip_fanout_proto_again 933 * to locate the ill. 934 */ 935 ASSERT(ill != NULL); 936 ii->ipsec_in_ill_index = 937 ill->ill_phyint->phyint_ifindex; 938 ii->ipsec_in_rill_index = ii->ipsec_in_ill_index; 939 first_mp->b_cont->b_datap->db_type = M_CTL; 940 } else { 941 /* 942 * IPSEC_IN is not present. We attach a ipsec_in 943 * message and send up to IPSEC for validating 944 * and removing the IPSEC headers. Clear 945 * ipsec_in_secure so that when we return 946 * from IPSEC, we don't mistakenly think that this 947 * is a secure packet came from the network. 948 * 949 * NOTE : ill_index is used by ip_fanout_proto_again 950 * to locate the ill. 951 */ 952 ASSERT(first_mp == mp); 953 first_mp = ipsec_in_alloc(B_FALSE, ipst->ips_netstack); 954 ASSERT(ill != NULL); 955 if (first_mp == NULL) { 956 freemsg(mp); 957 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 958 return; 959 } 960 ii = (ipsec_in_t *)first_mp->b_rptr; 961 962 /* This is not a secure packet */ 963 ii->ipsec_in_secure = B_FALSE; 964 first_mp->b_cont = mp; 965 mp->b_datap->db_type = M_CTL; 966 ii->ipsec_in_ill_index = 967 ill->ill_phyint->phyint_ifindex; 968 ii->ipsec_in_rill_index = ii->ipsec_in_ill_index; 969 } 970 971 if (!ipsec_loaded(ipss)) { 972 ip_proto_not_sup(q, first_mp, 0, zoneid, ipst); 973 return; 974 } 975 976 if (nexthdr == IPPROTO_ESP) 977 ipsec_rc = ipsecesp_icmp_error(first_mp); 978 else 979 ipsec_rc = ipsecah_icmp_error(first_mp); 980 if (ipsec_rc == IPSEC_STATUS_FAILED) 981 return; 982 983 ip_fanout_proto_again(first_mp, ill, ill, NULL); 984 return; 985 } 986 case IPPROTO_ENCAP: 987 case IPPROTO_IPV6: 988 if ((uint8_t *)ip6h + hdr_length + 989 (nexthdr == IPPROTO_ENCAP ? sizeof (ipha_t) : 990 sizeof (ip6_t)) > mp->b_wptr) { 991 goto drop_pkt; 992 } 993 994 if (nexthdr == IPPROTO_ENCAP || 995 !IN6_ARE_ADDR_EQUAL( 996 &((ip6_t *)(((uint8_t *)ip6h) + hdr_length))->ip6_src, 997 &ip6h->ip6_src) || 998 !IN6_ARE_ADDR_EQUAL( 999 &((ip6_t *)(((uint8_t *)ip6h) + hdr_length))->ip6_dst, 1000 &ip6h->ip6_dst)) { 1001 /* 1002 * For tunnels that have used IPsec protection, 1003 * we need to adjust the MTU to take into account 1004 * the IPsec overhead. 1005 */ 1006 if (ii != NULL) 1007 icmp6->icmp6_mtu = htonl( 1008 ntohl(icmp6->icmp6_mtu) - 1009 ipsec_in_extra_length(first_mp)); 1010 } else { 1011 /* 1012 * Self-encapsulated case. As in the ipv4 case, 1013 * we need to strip the 2nd IP header. Since mp 1014 * is already pulled-up, we can simply bcopy 1015 * the 3rd header + data over the 2nd header. 1016 */ 1017 uint16_t unused_len; 1018 ip6_t *inner_ip6h = (ip6_t *) 1019 ((uchar_t *)ip6h + hdr_length); 1020 1021 /* 1022 * Make sure we don't do recursion more than once. 1023 */ 1024 if (!ip_hdr_length_nexthdr_v6(mp, inner_ip6h, 1025 &unused_len, &nexthdrp) || 1026 *nexthdrp == IPPROTO_IPV6) { 1027 goto drop_pkt; 1028 } 1029 1030 /* 1031 * We are about to modify the packet. Make a copy if 1032 * someone else has a reference to it. 1033 */ 1034 if (DB_REF(mp) > 1) { 1035 mblk_t *mp1; 1036 uint16_t icmp6_offset; 1037 1038 mp1 = copymsg(mp); 1039 if (mp1 == NULL) { 1040 goto drop_pkt; 1041 } 1042 icmp6_offset = (uint16_t) 1043 ((uchar_t *)icmp6 - mp->b_rptr); 1044 freemsg(mp); 1045 mp = mp1; 1046 1047 icmp6 = (icmp6_t *)(mp->b_rptr + icmp6_offset); 1048 ip6h = (ip6_t *)&icmp6[1]; 1049 inner_ip6h = (ip6_t *) 1050 ((uchar_t *)ip6h + hdr_length); 1051 1052 if (mctl_present) 1053 first_mp->b_cont = mp; 1054 else 1055 first_mp = mp; 1056 } 1057 1058 /* 1059 * Need to set db_type back to M_DATA before 1060 * refeeding mp into this function. 1061 */ 1062 DB_TYPE(mp) = M_DATA; 1063 1064 /* 1065 * Copy the 3rd header + remaining data on top 1066 * of the 2nd header. 1067 */ 1068 bcopy(inner_ip6h, ip6h, 1069 mp->b_wptr - (uchar_t *)inner_ip6h); 1070 1071 /* 1072 * Subtract length of the 2nd header. 1073 */ 1074 mp->b_wptr -= hdr_length; 1075 1076 /* 1077 * Now recurse, and see what I _really_ should be 1078 * doing here. 1079 */ 1080 icmp_inbound_error_fanout_v6(q, first_mp, 1081 (ip6_t *)mp->b_rptr, icmp6, ill, mctl_present, 1082 zoneid); 1083 return; 1084 } 1085 /* FALLTHRU */ 1086 default: 1087 /* 1088 * The rip6h header is only used for the lookup and we 1089 * only set the src and dst addresses and nexthdr. 1090 */ 1091 rip6h.ip6_src = ip6h->ip6_dst; 1092 rip6h.ip6_dst = ip6h->ip6_src; 1093 rip6h.ip6_nxt = nexthdr; 1094 ip_fanout_proto_v6(q, first_mp, &rip6h, ill, ill, nexthdr, 0, 1095 IP6_NO_IPPOLICY, mctl_present, zoneid); 1096 return; 1097 } 1098 /* NOTREACHED */ 1099 drop_pkt: 1100 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors); 1101 ip1dbg(("icmp_inbound_error_fanout_v6: drop pkt\n")); 1102 freemsg(first_mp); 1103 } 1104 1105 /* 1106 * Process received IPv6 ICMP Redirect messages. 1107 */ 1108 /* ARGSUSED */ 1109 static void 1110 icmp_redirect_v6(queue_t *q, mblk_t *mp, ill_t *ill) 1111 { 1112 ip6_t *ip6h; 1113 uint16_t hdr_length; 1114 nd_redirect_t *rd; 1115 ire_t *ire; 1116 ire_t *prev_ire; 1117 ire_t *redir_ire; 1118 in6_addr_t *src, *dst, *gateway; 1119 nd_opt_hdr_t *opt; 1120 nce_t *nce; 1121 int nce_flags = 0; 1122 int err = 0; 1123 boolean_t redirect_to_router = B_FALSE; 1124 int len; 1125 int optlen; 1126 iulp_t ulp_info = { 0 }; 1127 ill_t *prev_ire_ill; 1128 ipif_t *ipif; 1129 ip_stack_t *ipst = ill->ill_ipst; 1130 1131 ip6h = (ip6_t *)mp->b_rptr; 1132 if (ip6h->ip6_nxt != IPPROTO_ICMPV6) 1133 hdr_length = ip_hdr_length_v6(mp, ip6h); 1134 else 1135 hdr_length = IPV6_HDR_LEN; 1136 1137 rd = (nd_redirect_t *)&mp->b_rptr[hdr_length]; 1138 len = mp->b_wptr - mp->b_rptr - hdr_length; 1139 src = &ip6h->ip6_src; 1140 dst = &rd->nd_rd_dst; 1141 gateway = &rd->nd_rd_target; 1142 1143 /* Verify if it is a valid redirect */ 1144 if (!IN6_IS_ADDR_LINKLOCAL(src) || 1145 (ip6h->ip6_hops != IPV6_MAX_HOPS) || 1146 (rd->nd_rd_code != 0) || 1147 (len < sizeof (nd_redirect_t)) || 1148 (IN6_IS_ADDR_V4MAPPED(dst)) || 1149 (IN6_IS_ADDR_MULTICAST(dst))) { 1150 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects); 1151 freemsg(mp); 1152 return; 1153 } 1154 1155 if (!(IN6_IS_ADDR_LINKLOCAL(gateway) || 1156 IN6_ARE_ADDR_EQUAL(gateway, dst))) { 1157 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects); 1158 freemsg(mp); 1159 return; 1160 } 1161 1162 if (len > sizeof (nd_redirect_t)) { 1163 if (!ndp_verify_optlen((nd_opt_hdr_t *)&rd[1], 1164 len - sizeof (nd_redirect_t))) { 1165 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects); 1166 freemsg(mp); 1167 return; 1168 } 1169 } 1170 1171 if (!IN6_ARE_ADDR_EQUAL(gateway, dst)) { 1172 redirect_to_router = B_TRUE; 1173 nce_flags |= NCE_F_ISROUTER; 1174 } 1175 1176 /* ipif will be refreleased afterwards */ 1177 ipif = ipif_get_next_ipif(NULL, ill); 1178 if (ipif == NULL) { 1179 freemsg(mp); 1180 return; 1181 } 1182 1183 /* 1184 * Verify that the IP source address of the redirect is 1185 * the same as the current first-hop router for the specified 1186 * ICMP destination address. 1187 * Also, Make sure we had a route for the dest in question and 1188 * that route was pointing to the old gateway (the source of the 1189 * redirect packet.) 1190 */ 1191 1192 prev_ire = ire_route_lookup_v6(dst, 0, src, 0, ipif, NULL, 1193 ALL_ZONES, NULL, MATCH_IRE_GW | MATCH_IRE_ILL_GROUP | 1194 MATCH_IRE_DEFAULT, ipst); 1195 1196 /* 1197 * Check that 1198 * the redirect was not from ourselves 1199 * old gateway is still directly reachable 1200 */ 1201 if (prev_ire == NULL || 1202 prev_ire->ire_type == IRE_LOCAL) { 1203 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects); 1204 ipif_refrele(ipif); 1205 goto fail_redirect; 1206 } 1207 prev_ire_ill = ire_to_ill(prev_ire); 1208 ASSERT(prev_ire_ill != NULL); 1209 if (prev_ire_ill->ill_flags & ILLF_NONUD) 1210 nce_flags |= NCE_F_NONUD; 1211 1212 /* 1213 * Should we use the old ULP info to create the new gateway? From 1214 * a user's perspective, we should inherit the info so that it 1215 * is a "smooth" transition. If we do not do that, then new 1216 * connections going thru the new gateway will have no route metrics, 1217 * which is counter-intuitive to user. From a network point of 1218 * view, this may or may not make sense even though the new gateway 1219 * is still directly connected to us so the route metrics should not 1220 * change much. 1221 * 1222 * But if the old ire_uinfo is not initialized, we do another 1223 * recursive lookup on the dest using the new gateway. There may 1224 * be a route to that. If so, use it to initialize the redirect 1225 * route. 1226 */ 1227 if (prev_ire->ire_uinfo.iulp_set) { 1228 bcopy(&prev_ire->ire_uinfo, &ulp_info, sizeof (iulp_t)); 1229 } else if (redirect_to_router) { 1230 /* 1231 * Only do the following if the redirection is really to 1232 * a router. 1233 */ 1234 ire_t *tmp_ire; 1235 ire_t *sire; 1236 1237 tmp_ire = ire_ftable_lookup_v6(dst, 0, gateway, 0, NULL, &sire, 1238 ALL_ZONES, 0, NULL, 1239 (MATCH_IRE_RECURSIVE | MATCH_IRE_GW | MATCH_IRE_DEFAULT), 1240 ipst); 1241 if (sire != NULL) { 1242 bcopy(&sire->ire_uinfo, &ulp_info, sizeof (iulp_t)); 1243 ASSERT(tmp_ire != NULL); 1244 ire_refrele(tmp_ire); 1245 ire_refrele(sire); 1246 } else if (tmp_ire != NULL) { 1247 bcopy(&tmp_ire->ire_uinfo, &ulp_info, 1248 sizeof (iulp_t)); 1249 ire_refrele(tmp_ire); 1250 } 1251 } 1252 1253 optlen = mp->b_wptr - mp->b_rptr - hdr_length - sizeof (nd_redirect_t); 1254 opt = (nd_opt_hdr_t *)&rd[1]; 1255 opt = ndp_get_option(opt, optlen, ND_OPT_TARGET_LINKADDR); 1256 if (opt != NULL) { 1257 err = ndp_lookup_then_add_v6(ill, 1258 (uchar_t *)&opt[1], /* Link layer address */ 1259 gateway, 1260 &ipv6_all_ones, /* prefix mask */ 1261 &ipv6_all_zeros, /* Mapping mask */ 1262 0, 1263 nce_flags, 1264 ND_STALE, 1265 &nce); 1266 switch (err) { 1267 case 0: 1268 NCE_REFRELE(nce); 1269 break; 1270 case EEXIST: 1271 /* 1272 * Check to see if link layer address has changed and 1273 * process the nce_state accordingly. 1274 */ 1275 ndp_process(nce, (uchar_t *)&opt[1], 0, B_FALSE); 1276 NCE_REFRELE(nce); 1277 break; 1278 default: 1279 ip1dbg(("icmp_redirect_v6: NCE create failed %d\n", 1280 err)); 1281 ipif_refrele(ipif); 1282 goto fail_redirect; 1283 } 1284 } 1285 if (redirect_to_router) { 1286 /* icmp_redirect_ok_v6() must have already verified this */ 1287 ASSERT(IN6_IS_ADDR_LINKLOCAL(gateway)); 1288 1289 /* 1290 * Create a Route Association. This will allow us to remember 1291 * a router told us to use the particular gateway. 1292 */ 1293 ire = ire_create_v6( 1294 dst, 1295 &ipv6_all_ones, /* mask */ 1296 &prev_ire->ire_src_addr_v6, /* source addr */ 1297 gateway, /* gateway addr */ 1298 &prev_ire->ire_max_frag, /* max frag */ 1299 NULL, /* no src nce */ 1300 NULL, /* no rfq */ 1301 NULL, /* no stq */ 1302 IRE_HOST, 1303 prev_ire->ire_ipif, 1304 NULL, 1305 0, 1306 0, 1307 (RTF_DYNAMIC | RTF_GATEWAY | RTF_HOST), 1308 &ulp_info, 1309 NULL, 1310 NULL, 1311 ipst); 1312 } else { 1313 queue_t *stq; 1314 1315 stq = (ipif->ipif_net_type == IRE_IF_RESOLVER) 1316 ? ipif->ipif_rq : ipif->ipif_wq; 1317 1318 /* 1319 * Just create an on link entry, i.e. interface route. 1320 */ 1321 ire = ire_create_v6( 1322 dst, /* gateway == dst */ 1323 &ipv6_all_ones, /* mask */ 1324 &prev_ire->ire_src_addr_v6, /* source addr */ 1325 &ipv6_all_zeros, /* gateway addr */ 1326 &prev_ire->ire_max_frag, /* max frag */ 1327 NULL, /* no src nce */ 1328 NULL, /* ire rfq */ 1329 stq, /* ire stq */ 1330 ipif->ipif_net_type, /* IF_[NO]RESOLVER */ 1331 prev_ire->ire_ipif, 1332 &ipv6_all_ones, 1333 0, 1334 0, 1335 (RTF_DYNAMIC | RTF_HOST), 1336 &ulp_info, 1337 NULL, 1338 NULL, 1339 ipst); 1340 } 1341 1342 /* Release reference from earlier ipif_get_next_ipif() */ 1343 ipif_refrele(ipif); 1344 1345 if (ire == NULL) 1346 goto fail_redirect; 1347 1348 if (ire_add(&ire, NULL, NULL, NULL, B_FALSE) == 0) { 1349 1350 /* tell routing sockets that we received a redirect */ 1351 ip_rts_change_v6(RTM_REDIRECT, 1352 &rd->nd_rd_dst, 1353 &rd->nd_rd_target, 1354 &ipv6_all_ones, 0, &ire->ire_src_addr_v6, 1355 (RTF_DYNAMIC | RTF_GATEWAY | RTF_HOST), 0, 1356 (RTA_DST | RTA_GATEWAY | RTA_NETMASK | RTA_AUTHOR), ipst); 1357 1358 /* 1359 * Delete any existing IRE_HOST type ires for this destination. 1360 * This together with the added IRE has the effect of 1361 * modifying an existing redirect. 1362 */ 1363 redir_ire = ire_ftable_lookup_v6(dst, 0, src, IRE_HOST, 1364 ire->ire_ipif, NULL, ALL_ZONES, 0, NULL, 1365 (MATCH_IRE_GW | MATCH_IRE_TYPE | MATCH_IRE_ILL_GROUP), 1366 ipst); 1367 1368 ire_refrele(ire); /* Held in ire_add_v6 */ 1369 1370 if (redir_ire != NULL) { 1371 if (redir_ire->ire_flags & RTF_DYNAMIC) 1372 ire_delete(redir_ire); 1373 ire_refrele(redir_ire); 1374 } 1375 } 1376 1377 if (prev_ire->ire_type == IRE_CACHE) 1378 ire_delete(prev_ire); 1379 ire_refrele(prev_ire); 1380 prev_ire = NULL; 1381 1382 fail_redirect: 1383 if (prev_ire != NULL) 1384 ire_refrele(prev_ire); 1385 freemsg(mp); 1386 } 1387 1388 static ill_t * 1389 ip_queue_to_ill_v6(queue_t *q, ip_stack_t *ipst) 1390 { 1391 ill_t *ill; 1392 1393 ASSERT(WR(q) == q); 1394 1395 if (q->q_next != NULL) { 1396 ill = (ill_t *)q->q_ptr; 1397 if (ILL_CAN_LOOKUP(ill)) 1398 ill_refhold(ill); 1399 else 1400 ill = NULL; 1401 } else { 1402 ill = ill_lookup_on_name(ipif_loopback_name, B_FALSE, B_TRUE, 1403 NULL, NULL, NULL, NULL, NULL, ipst); 1404 } 1405 if (ill == NULL) 1406 ip0dbg(("ip_queue_to_ill_v6: no ill\n")); 1407 return (ill); 1408 } 1409 1410 /* 1411 * Assigns an appropriate source address to the packet. 1412 * If origdst is one of our IP addresses that use it as the source. 1413 * If the queue is an ill queue then select a source from that ill. 1414 * Otherwise pick a source based on a route lookup back to the origsrc. 1415 * 1416 * src is the return parameter. Returns a pointer to src or NULL if failure. 1417 */ 1418 static in6_addr_t * 1419 icmp_pick_source_v6(queue_t *wq, in6_addr_t *origsrc, in6_addr_t *origdst, 1420 in6_addr_t *src, zoneid_t zoneid, ip_stack_t *ipst) 1421 { 1422 ill_t *ill; 1423 ire_t *ire; 1424 ipif_t *ipif; 1425 1426 ASSERT(!(wq->q_flag & QREADR)); 1427 if (wq->q_next != NULL) { 1428 ill = (ill_t *)wq->q_ptr; 1429 } else { 1430 ill = NULL; 1431 } 1432 1433 ire = ire_route_lookup_v6(origdst, 0, 0, (IRE_LOCAL|IRE_LOOPBACK), 1434 NULL, NULL, zoneid, NULL, (MATCH_IRE_TYPE|MATCH_IRE_ZONEONLY), 1435 ipst); 1436 if (ire != NULL) { 1437 /* Destined to one of our addresses */ 1438 *src = *origdst; 1439 ire_refrele(ire); 1440 return (src); 1441 } 1442 if (ire != NULL) { 1443 ire_refrele(ire); 1444 ire = NULL; 1445 } 1446 if (ill == NULL) { 1447 /* What is the route back to the original source? */ 1448 ire = ire_route_lookup_v6(origsrc, 0, 0, 0, 1449 NULL, NULL, zoneid, NULL, 1450 (MATCH_IRE_DEFAULT|MATCH_IRE_RECURSIVE), ipst); 1451 if (ire == NULL) { 1452 BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsOutNoRoutes); 1453 return (NULL); 1454 } 1455 /* 1456 * Does not matter whether we use ire_stq or ire_ipif here. 1457 * Just pick an ill for ICMP replies. 1458 */ 1459 ASSERT(ire->ire_ipif != NULL); 1460 ill = ire->ire_ipif->ipif_ill; 1461 ire_refrele(ire); 1462 } 1463 ipif = ipif_select_source_v6(ill, origsrc, RESTRICT_TO_NONE, 1464 IPV6_PREFER_SRC_DEFAULT, zoneid); 1465 if (ipif != NULL) { 1466 *src = ipif->ipif_v6src_addr; 1467 ipif_refrele(ipif); 1468 return (src); 1469 } 1470 /* 1471 * Unusual case - can't find a usable source address to reach the 1472 * original source. Use what in the route to the source. 1473 */ 1474 ire = ire_route_lookup_v6(origsrc, 0, 0, 0, 1475 NULL, NULL, zoneid, NULL, 1476 (MATCH_IRE_DEFAULT|MATCH_IRE_RECURSIVE), ipst); 1477 if (ire == NULL) { 1478 BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsOutNoRoutes); 1479 return (NULL); 1480 } 1481 ASSERT(ire != NULL); 1482 *src = ire->ire_src_addr_v6; 1483 ire_refrele(ire); 1484 return (src); 1485 } 1486 1487 /* 1488 * Build and ship an IPv6 ICMP message using the packet data in mp, 1489 * and the ICMP header pointed to by "stuff". (May be called as 1490 * writer.) 1491 * Note: assumes that icmp_pkt_err_ok_v6 has been called to 1492 * verify that an icmp error packet can be sent. 1493 * 1494 * If q is an ill write side queue (which is the case when packets 1495 * arrive from ip_rput) then ip_wput code will ensure that packets to 1496 * link-local destinations are sent out that ill. 1497 * 1498 * If v6src_ptr is set use it as a source. Otherwise select a reasonable 1499 * source address (see above function). 1500 */ 1501 static void 1502 icmp_pkt_v6(queue_t *q, mblk_t *mp, void *stuff, size_t len, 1503 const in6_addr_t *v6src_ptr, boolean_t mctl_present, zoneid_t zoneid, 1504 ip_stack_t *ipst) 1505 { 1506 ip6_t *ip6h; 1507 in6_addr_t v6dst; 1508 size_t len_needed; 1509 size_t msg_len; 1510 mblk_t *mp1; 1511 icmp6_t *icmp6; 1512 ill_t *ill; 1513 in6_addr_t v6src; 1514 mblk_t *ipsec_mp; 1515 ipsec_out_t *io; 1516 1517 ill = ip_queue_to_ill_v6(q, ipst); 1518 if (ill == NULL) { 1519 freemsg(mp); 1520 return; 1521 } 1522 1523 if (mctl_present) { 1524 /* 1525 * If it is : 1526 * 1527 * 1) a IPSEC_OUT, then this is caused by outbound 1528 * datagram originating on this host. IPSEC processing 1529 * may or may not have been done. Refer to comments above 1530 * icmp_inbound_error_fanout for details. 1531 * 1532 * 2) a IPSEC_IN if we are generating a icmp_message 1533 * for an incoming datagram destined for us i.e called 1534 * from ip_fanout_send_icmp. 1535 */ 1536 ipsec_info_t *in; 1537 1538 ipsec_mp = mp; 1539 mp = ipsec_mp->b_cont; 1540 1541 in = (ipsec_info_t *)ipsec_mp->b_rptr; 1542 ip6h = (ip6_t *)mp->b_rptr; 1543 1544 ASSERT(in->ipsec_info_type == IPSEC_OUT || 1545 in->ipsec_info_type == IPSEC_IN); 1546 1547 if (in->ipsec_info_type == IPSEC_IN) { 1548 /* 1549 * Convert the IPSEC_IN to IPSEC_OUT. 1550 */ 1551 if (!ipsec_in_to_out(ipsec_mp, NULL, ip6h)) { 1552 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 1553 ill_refrele(ill); 1554 return; 1555 } 1556 } else { 1557 ASSERT(in->ipsec_info_type == IPSEC_OUT); 1558 io = (ipsec_out_t *)in; 1559 /* 1560 * Clear out ipsec_out_proc_begin, so we do a fresh 1561 * ire lookup. 1562 */ 1563 io->ipsec_out_proc_begin = B_FALSE; 1564 } 1565 } else { 1566 /* 1567 * This is in clear. The icmp message we are building 1568 * here should go out in clear. 1569 */ 1570 ipsec_in_t *ii; 1571 ASSERT(mp->b_datap->db_type == M_DATA); 1572 ipsec_mp = ipsec_in_alloc(B_FALSE, ipst->ips_netstack); 1573 if (ipsec_mp == NULL) { 1574 freemsg(mp); 1575 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 1576 ill_refrele(ill); 1577 return; 1578 } 1579 ii = (ipsec_in_t *)ipsec_mp->b_rptr; 1580 1581 /* This is not a secure packet */ 1582 ii->ipsec_in_secure = B_FALSE; 1583 /* 1584 * For trusted extensions using a shared IP address we can 1585 * send using any zoneid. 1586 */ 1587 if (zoneid == ALL_ZONES) 1588 ii->ipsec_in_zoneid = GLOBAL_ZONEID; 1589 else 1590 ii->ipsec_in_zoneid = zoneid; 1591 ipsec_mp->b_cont = mp; 1592 ip6h = (ip6_t *)mp->b_rptr; 1593 /* 1594 * Convert the IPSEC_IN to IPSEC_OUT. 1595 */ 1596 if (!ipsec_in_to_out(ipsec_mp, NULL, ip6h)) { 1597 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 1598 ill_refrele(ill); 1599 return; 1600 } 1601 } 1602 io = (ipsec_out_t *)ipsec_mp->b_rptr; 1603 1604 if (v6src_ptr != NULL) { 1605 v6src = *v6src_ptr; 1606 } else { 1607 if (icmp_pick_source_v6(q, &ip6h->ip6_src, &ip6h->ip6_dst, 1608 &v6src, zoneid, ipst) == NULL) { 1609 freemsg(ipsec_mp); 1610 ill_refrele(ill); 1611 return; 1612 } 1613 } 1614 v6dst = ip6h->ip6_src; 1615 len_needed = ipst->ips_ipv6_icmp_return - IPV6_HDR_LEN - len; 1616 msg_len = msgdsize(mp); 1617 if (msg_len > len_needed) { 1618 if (!adjmsg(mp, len_needed - msg_len)) { 1619 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutErrors); 1620 freemsg(ipsec_mp); 1621 ill_refrele(ill); 1622 return; 1623 } 1624 msg_len = len_needed; 1625 } 1626 mp1 = allocb(IPV6_HDR_LEN + len, BPRI_HI); 1627 if (mp1 == NULL) { 1628 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutErrors); 1629 freemsg(ipsec_mp); 1630 ill_refrele(ill); 1631 return; 1632 } 1633 ill_refrele(ill); 1634 mp1->b_cont = mp; 1635 mp = mp1; 1636 ASSERT(ipsec_mp->b_datap->db_type == M_CTL && 1637 io->ipsec_out_type == IPSEC_OUT); 1638 ipsec_mp->b_cont = mp; 1639 1640 /* 1641 * Set ipsec_out_icmp_loopback so we can let the ICMP messages this 1642 * node generates be accepted in peace by all on-host destinations. 1643 * If we do NOT assume that all on-host destinations trust 1644 * self-generated ICMP messages, then rework here, ip.c, and spd.c. 1645 * (Look for ipsec_out_icmp_loopback). 1646 */ 1647 io->ipsec_out_icmp_loopback = B_TRUE; 1648 1649 ip6h = (ip6_t *)mp->b_rptr; 1650 mp1->b_wptr = (uchar_t *)ip6h + (IPV6_HDR_LEN + len); 1651 1652 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 1653 ip6h->ip6_nxt = IPPROTO_ICMPV6; 1654 ip6h->ip6_hops = ipst->ips_ipv6_def_hops; 1655 ip6h->ip6_dst = v6dst; 1656 ip6h->ip6_src = v6src; 1657 msg_len += IPV6_HDR_LEN + len; 1658 if (msg_len > IP_MAXPACKET + IPV6_HDR_LEN) { 1659 (void) adjmsg(mp, IP_MAXPACKET + IPV6_HDR_LEN - msg_len); 1660 msg_len = IP_MAXPACKET + IPV6_HDR_LEN; 1661 } 1662 ip6h->ip6_plen = htons((uint16_t)(msgdsize(mp) - IPV6_HDR_LEN)); 1663 icmp6 = (icmp6_t *)&ip6h[1]; 1664 bcopy(stuff, (char *)icmp6, len); 1665 /* 1666 * Prepare for checksum by putting icmp length in the icmp 1667 * checksum field. The checksum is calculated in ip_wput_v6. 1668 */ 1669 icmp6->icmp6_cksum = ip6h->ip6_plen; 1670 if (icmp6->icmp6_type == ND_REDIRECT) { 1671 ip6h->ip6_hops = IPV6_MAX_HOPS; 1672 } 1673 /* Send to V6 writeside put routine */ 1674 put(q, ipsec_mp); 1675 } 1676 1677 /* 1678 * Update the output mib when ICMPv6 packets are sent. 1679 */ 1680 static void 1681 icmp_update_out_mib_v6(ill_t *ill, icmp6_t *icmp6) 1682 { 1683 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutMsgs); 1684 1685 switch (icmp6->icmp6_type) { 1686 case ICMP6_DST_UNREACH: 1687 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutDestUnreachs); 1688 if (icmp6->icmp6_code == ICMP6_DST_UNREACH_ADMIN) 1689 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutAdminProhibs); 1690 break; 1691 1692 case ICMP6_TIME_EXCEEDED: 1693 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutTimeExcds); 1694 break; 1695 1696 case ICMP6_PARAM_PROB: 1697 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutParmProblems); 1698 break; 1699 1700 case ICMP6_PACKET_TOO_BIG: 1701 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutPktTooBigs); 1702 break; 1703 1704 case ICMP6_ECHO_REQUEST: 1705 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutEchos); 1706 break; 1707 1708 case ICMP6_ECHO_REPLY: 1709 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutEchoReplies); 1710 break; 1711 1712 case ND_ROUTER_SOLICIT: 1713 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutRouterSolicits); 1714 break; 1715 1716 case ND_ROUTER_ADVERT: 1717 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutRouterAdvertisements); 1718 break; 1719 1720 case ND_NEIGHBOR_SOLICIT: 1721 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutNeighborSolicits); 1722 break; 1723 1724 case ND_NEIGHBOR_ADVERT: 1725 BUMP_MIB(ill->ill_icmp6_mib, 1726 ipv6IfIcmpOutNeighborAdvertisements); 1727 break; 1728 1729 case ND_REDIRECT: 1730 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutRedirects); 1731 break; 1732 1733 case MLD_LISTENER_QUERY: 1734 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutGroupMembQueries); 1735 break; 1736 1737 case MLD_LISTENER_REPORT: 1738 case MLD_V2_LISTENER_REPORT: 1739 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutGroupMembResponses); 1740 break; 1741 1742 case MLD_LISTENER_REDUCTION: 1743 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutGroupMembReductions); 1744 break; 1745 } 1746 } 1747 1748 /* 1749 * Check if it is ok to send an ICMPv6 error packet in 1750 * response to the IP packet in mp. 1751 * Free the message and return null if no 1752 * ICMP error packet should be sent. 1753 */ 1754 static mblk_t * 1755 icmp_pkt_err_ok_v6(queue_t *q, mblk_t *mp, 1756 boolean_t llbcast, boolean_t mcast_ok, ip_stack_t *ipst) 1757 { 1758 ip6_t *ip6h; 1759 1760 if (!mp) 1761 return (NULL); 1762 1763 ip6h = (ip6_t *)mp->b_rptr; 1764 1765 /* Check if source address uniquely identifies the host */ 1766 1767 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_src) || 1768 IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_src) || 1769 IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 1770 freemsg(mp); 1771 return (NULL); 1772 } 1773 1774 if (ip6h->ip6_nxt == IPPROTO_ICMPV6) { 1775 size_t len_needed = IPV6_HDR_LEN + ICMP6_MINLEN; 1776 icmp6_t *icmp6; 1777 1778 if (mp->b_wptr - mp->b_rptr < len_needed) { 1779 if (!pullupmsg(mp, len_needed)) { 1780 ill_t *ill; 1781 1782 ill = ip_queue_to_ill_v6(q, ipst); 1783 if (ill == NULL) { 1784 BUMP_MIB(&ipst->ips_icmp6_mib, 1785 ipv6IfIcmpInErrors); 1786 } else { 1787 BUMP_MIB(ill->ill_icmp6_mib, 1788 ipv6IfIcmpInErrors); 1789 ill_refrele(ill); 1790 } 1791 freemsg(mp); 1792 return (NULL); 1793 } 1794 ip6h = (ip6_t *)mp->b_rptr; 1795 } 1796 icmp6 = (icmp6_t *)&ip6h[1]; 1797 /* Explicitly do not generate errors in response to redirects */ 1798 if (ICMP6_IS_ERROR(icmp6->icmp6_type) || 1799 icmp6->icmp6_type == ND_REDIRECT) { 1800 freemsg(mp); 1801 return (NULL); 1802 } 1803 } 1804 /* 1805 * Check that the destination is not multicast and that the packet 1806 * was not sent on link layer broadcast or multicast. (Exception 1807 * is Packet too big message as per the draft - when mcast_ok is set.) 1808 */ 1809 if (!mcast_ok && 1810 (llbcast || IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst))) { 1811 freemsg(mp); 1812 return (NULL); 1813 } 1814 if (icmp_err_rate_limit(ipst)) { 1815 /* 1816 * Only send ICMP error packets every so often. 1817 * This should be done on a per port/source basis, 1818 * but for now this will suffice. 1819 */ 1820 freemsg(mp); 1821 return (NULL); 1822 } 1823 return (mp); 1824 } 1825 1826 /* 1827 * Generate an ICMPv6 redirect message. 1828 * Include target link layer address option if it exits. 1829 * Always include redirect header. 1830 */ 1831 static void 1832 icmp_send_redirect_v6(queue_t *q, mblk_t *mp, in6_addr_t *targetp, 1833 in6_addr_t *dest, ill_t *ill, boolean_t llbcast) 1834 { 1835 nd_redirect_t *rd; 1836 nd_opt_rd_hdr_t *rdh; 1837 uchar_t *buf; 1838 nce_t *nce = NULL; 1839 nd_opt_hdr_t *opt; 1840 int len; 1841 int ll_opt_len = 0; 1842 int max_redir_hdr_data_len; 1843 int pkt_len; 1844 in6_addr_t *srcp; 1845 ip_stack_t *ipst = ill->ill_ipst; 1846 1847 /* 1848 * We are called from ip_rput where we could 1849 * not have attached an IPSEC_IN. 1850 */ 1851 ASSERT(mp->b_datap->db_type == M_DATA); 1852 1853 mp = icmp_pkt_err_ok_v6(q, mp, llbcast, B_FALSE, ipst); 1854 if (mp == NULL) 1855 return; 1856 nce = ndp_lookup_v6(ill, targetp, B_FALSE); 1857 if (nce != NULL && nce->nce_state != ND_INCOMPLETE) { 1858 ll_opt_len = (sizeof (nd_opt_hdr_t) + 1859 ill->ill_phys_addr_length + 7)/8 * 8; 1860 } 1861 len = sizeof (nd_redirect_t) + sizeof (nd_opt_rd_hdr_t) + ll_opt_len; 1862 ASSERT(len % 4 == 0); 1863 buf = kmem_alloc(len, KM_NOSLEEP); 1864 if (buf == NULL) { 1865 if (nce != NULL) 1866 NCE_REFRELE(nce); 1867 freemsg(mp); 1868 return; 1869 } 1870 1871 rd = (nd_redirect_t *)buf; 1872 rd->nd_rd_type = (uint8_t)ND_REDIRECT; 1873 rd->nd_rd_code = 0; 1874 rd->nd_rd_reserved = 0; 1875 rd->nd_rd_target = *targetp; 1876 rd->nd_rd_dst = *dest; 1877 1878 opt = (nd_opt_hdr_t *)(buf + sizeof (nd_redirect_t)); 1879 if (nce != NULL && ll_opt_len != 0) { 1880 opt->nd_opt_type = ND_OPT_TARGET_LINKADDR; 1881 opt->nd_opt_len = ll_opt_len/8; 1882 bcopy((char *)nce->nce_res_mp->b_rptr + 1883 NCE_LL_ADDR_OFFSET(ill), &opt[1], 1884 ill->ill_phys_addr_length); 1885 } 1886 if (nce != NULL) 1887 NCE_REFRELE(nce); 1888 rdh = (nd_opt_rd_hdr_t *)(buf + sizeof (nd_redirect_t) + ll_opt_len); 1889 rdh->nd_opt_rh_type = (uint8_t)ND_OPT_REDIRECTED_HEADER; 1890 /* max_redir_hdr_data_len and nd_opt_rh_len must be multiple of 8 */ 1891 max_redir_hdr_data_len = 1892 (ipst->ips_ipv6_icmp_return - IPV6_HDR_LEN - len)/8*8; 1893 pkt_len = msgdsize(mp); 1894 /* Make sure mp is 8 byte aligned */ 1895 if (pkt_len > max_redir_hdr_data_len) { 1896 rdh->nd_opt_rh_len = (max_redir_hdr_data_len + 1897 sizeof (nd_opt_rd_hdr_t))/8; 1898 (void) adjmsg(mp, max_redir_hdr_data_len - pkt_len); 1899 } else { 1900 rdh->nd_opt_rh_len = (pkt_len + sizeof (nd_opt_rd_hdr_t))/8; 1901 (void) adjmsg(mp, -(pkt_len % 8)); 1902 } 1903 rdh->nd_opt_rh_reserved1 = 0; 1904 rdh->nd_opt_rh_reserved2 = 0; 1905 /* ipif_v6src_addr contains the link-local source address */ 1906 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 1907 if (ill->ill_group != NULL) { 1908 /* 1909 * The receiver of the redirect will verify whether it 1910 * had a route through us (srcp that we will use in 1911 * the redirect) or not. As we load spread even link-locals, 1912 * we don't know which source address the receiver of 1913 * redirect has in its route for communicating with us. 1914 * Thus we randomly choose a source here and finally we 1915 * should get to the right one and it will eventually 1916 * accept the redirect from us. We can't call 1917 * ip_lookup_scope_v6 because we don't have the right 1918 * link-local address here. Thus we randomly choose one. 1919 */ 1920 int cnt = ill->ill_group->illgrp_ill_count; 1921 1922 ill = ill->ill_group->illgrp_ill; 1923 cnt = ++ipst->ips_icmp_redirect_v6_src_index % cnt; 1924 while (cnt--) 1925 ill = ill->ill_group_next; 1926 srcp = &ill->ill_ipif->ipif_v6src_addr; 1927 } else { 1928 srcp = &ill->ill_ipif->ipif_v6src_addr; 1929 } 1930 rw_exit(&ipst->ips_ill_g_lock); 1931 /* Redirects sent by router, and router is global zone */ 1932 icmp_pkt_v6(q, mp, buf, len, srcp, B_FALSE, GLOBAL_ZONEID, ipst); 1933 kmem_free(buf, len); 1934 } 1935 1936 1937 /* Generate an ICMP time exceeded message. (May be called as writer.) */ 1938 void 1939 icmp_time_exceeded_v6(queue_t *q, mblk_t *mp, uint8_t code, 1940 boolean_t llbcast, boolean_t mcast_ok, zoneid_t zoneid, 1941 ip_stack_t *ipst) 1942 { 1943 icmp6_t icmp6; 1944 boolean_t mctl_present; 1945 mblk_t *first_mp; 1946 1947 EXTRACT_PKT_MP(mp, first_mp, mctl_present); 1948 1949 mp = icmp_pkt_err_ok_v6(q, mp, llbcast, mcast_ok, ipst); 1950 if (mp == NULL) { 1951 if (mctl_present) 1952 freeb(first_mp); 1953 return; 1954 } 1955 bzero(&icmp6, sizeof (icmp6_t)); 1956 icmp6.icmp6_type = ICMP6_TIME_EXCEEDED; 1957 icmp6.icmp6_code = code; 1958 icmp_pkt_v6(q, first_mp, &icmp6, sizeof (icmp6_t), NULL, mctl_present, 1959 zoneid, ipst); 1960 } 1961 1962 /* 1963 * Generate an ICMP unreachable message. 1964 */ 1965 void 1966 icmp_unreachable_v6(queue_t *q, mblk_t *mp, uint8_t code, 1967 boolean_t llbcast, boolean_t mcast_ok, zoneid_t zoneid, 1968 ip_stack_t *ipst) 1969 { 1970 icmp6_t icmp6; 1971 boolean_t mctl_present; 1972 mblk_t *first_mp; 1973 1974 EXTRACT_PKT_MP(mp, first_mp, mctl_present); 1975 1976 mp = icmp_pkt_err_ok_v6(q, mp, llbcast, mcast_ok, ipst); 1977 if (mp == NULL) { 1978 if (mctl_present) 1979 freeb(first_mp); 1980 return; 1981 } 1982 bzero(&icmp6, sizeof (icmp6_t)); 1983 icmp6.icmp6_type = ICMP6_DST_UNREACH; 1984 icmp6.icmp6_code = code; 1985 icmp_pkt_v6(q, first_mp, &icmp6, sizeof (icmp6_t), NULL, mctl_present, 1986 zoneid, ipst); 1987 } 1988 1989 /* 1990 * Generate an ICMP pkt too big message. 1991 */ 1992 static void 1993 icmp_pkt2big_v6(queue_t *q, mblk_t *mp, uint32_t mtu, 1994 boolean_t llbcast, boolean_t mcast_ok, zoneid_t zoneid, ip_stack_t *ipst) 1995 { 1996 icmp6_t icmp6; 1997 mblk_t *first_mp; 1998 boolean_t mctl_present; 1999 2000 EXTRACT_PKT_MP(mp, first_mp, mctl_present); 2001 2002 mp = icmp_pkt_err_ok_v6(q, mp, llbcast, mcast_ok, ipst); 2003 if (mp == NULL) { 2004 if (mctl_present) 2005 freeb(first_mp); 2006 return; 2007 } 2008 bzero(&icmp6, sizeof (icmp6_t)); 2009 icmp6.icmp6_type = ICMP6_PACKET_TOO_BIG; 2010 icmp6.icmp6_code = 0; 2011 icmp6.icmp6_mtu = htonl(mtu); 2012 2013 icmp_pkt_v6(q, first_mp, &icmp6, sizeof (icmp6_t), NULL, mctl_present, 2014 zoneid, ipst); 2015 } 2016 2017 /* 2018 * Generate an ICMP parameter problem message. (May be called as writer.) 2019 * 'offset' is the offset from the beginning of the packet in error. 2020 */ 2021 static void 2022 icmp_param_problem_v6(queue_t *q, mblk_t *mp, uint8_t code, 2023 uint32_t offset, boolean_t llbcast, boolean_t mcast_ok, zoneid_t zoneid, 2024 ip_stack_t *ipst) 2025 { 2026 icmp6_t icmp6; 2027 boolean_t mctl_present; 2028 mblk_t *first_mp; 2029 2030 EXTRACT_PKT_MP(mp, first_mp, mctl_present); 2031 2032 mp = icmp_pkt_err_ok_v6(q, mp, llbcast, mcast_ok, ipst); 2033 if (mp == NULL) { 2034 if (mctl_present) 2035 freeb(first_mp); 2036 return; 2037 } 2038 bzero((char *)&icmp6, sizeof (icmp6_t)); 2039 icmp6.icmp6_type = ICMP6_PARAM_PROB; 2040 icmp6.icmp6_code = code; 2041 icmp6.icmp6_pptr = htonl(offset); 2042 icmp_pkt_v6(q, first_mp, &icmp6, sizeof (icmp6_t), NULL, mctl_present, 2043 zoneid, ipst); 2044 } 2045 2046 /* 2047 * This code will need to take into account the possibility of binding 2048 * to a link local address on a multi-homed host, in which case the 2049 * outgoing interface (from the conn) will need to be used when getting 2050 * an ire for the dst. Going through proper outgoing interface and 2051 * choosing the source address corresponding to the outgoing interface 2052 * is necessary when the destination address is a link-local address and 2053 * IPV6_BOUND_IF or IPV6_PKTINFO or scope_id has been set. 2054 * This can happen when active connection is setup; thus ipp pointer 2055 * is passed here from tcp_connect_*() routines, in non-TCP cases NULL 2056 * pointer is passed as ipp pointer. 2057 */ 2058 mblk_t * 2059 ip_bind_v6(queue_t *q, mblk_t *mp, conn_t *connp, ip6_pkt_t *ipp) 2060 { 2061 ssize_t len; 2062 int protocol; 2063 struct T_bind_req *tbr; 2064 sin6_t *sin6; 2065 ipa6_conn_t *ac6; 2066 in6_addr_t *v6srcp; 2067 in6_addr_t *v6dstp; 2068 uint16_t lport; 2069 uint16_t fport; 2070 uchar_t *ucp; 2071 mblk_t *mp1; 2072 boolean_t ire_requested; 2073 boolean_t ipsec_policy_set; 2074 int error = 0; 2075 boolean_t local_bind; 2076 boolean_t orig_pkt_isv6 = connp->conn_pkt_isv6; 2077 ipa6_conn_x_t *acx6; 2078 boolean_t verify_dst; 2079 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 2080 2081 ASSERT(connp->conn_af_isv6); 2082 len = mp->b_wptr - mp->b_rptr; 2083 if (len < (sizeof (*tbr) + 1)) { 2084 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 2085 "ip_bind_v6: bogus msg, len %ld", len); 2086 goto bad_addr; 2087 } 2088 /* Back up and extract the protocol identifier. */ 2089 mp->b_wptr--; 2090 tbr = (struct T_bind_req *)mp->b_rptr; 2091 /* Reset the message type in preparation for shipping it back. */ 2092 mp->b_datap->db_type = M_PCPROTO; 2093 2094 protocol = *mp->b_wptr & 0xFF; 2095 connp->conn_ulp = (uint8_t)protocol; 2096 2097 /* 2098 * Check for a zero length address. This is from a protocol that 2099 * wants to register to receive all packets of its type. 2100 */ 2101 if (tbr->ADDR_length == 0) { 2102 if ((protocol == IPPROTO_TCP || protocol == IPPROTO_SCTP || 2103 protocol == IPPROTO_ESP || protocol == IPPROTO_AH) && 2104 ipst->ips_ipcl_proto_fanout_v6[protocol].connf_head != 2105 NULL) { 2106 /* 2107 * TCP, SCTP, AH, and ESP have single protocol fanouts. 2108 * Do not allow others to bind to these. 2109 */ 2110 goto bad_addr; 2111 } 2112 2113 /* 2114 * 2115 * The udp module never sends down a zero-length address, 2116 * and allowing this on a labeled system will break MLP 2117 * functionality. 2118 */ 2119 if (is_system_labeled() && protocol == IPPROTO_UDP) 2120 goto bad_addr; 2121 2122 /* Allow ipsec plumbing */ 2123 if (connp->conn_mac_exempt && protocol != IPPROTO_AH && 2124 protocol != IPPROTO_ESP) 2125 goto bad_addr; 2126 2127 connp->conn_srcv6 = ipv6_all_zeros; 2128 ipcl_proto_insert_v6(connp, protocol); 2129 2130 tbr->PRIM_type = T_BIND_ACK; 2131 return (mp); 2132 } 2133 2134 /* Extract the address pointer from the message. */ 2135 ucp = (uchar_t *)mi_offset_param(mp, tbr->ADDR_offset, 2136 tbr->ADDR_length); 2137 if (ucp == NULL) { 2138 ip1dbg(("ip_bind_v6: no address\n")); 2139 goto bad_addr; 2140 } 2141 if (!OK_32PTR(ucp)) { 2142 ip1dbg(("ip_bind_v6: unaligned address\n")); 2143 goto bad_addr; 2144 } 2145 mp1 = mp->b_cont; /* trailing mp if any */ 2146 ire_requested = (mp1 && mp1->b_datap->db_type == IRE_DB_REQ_TYPE); 2147 ipsec_policy_set = (mp1 && mp1->b_datap->db_type == IPSEC_POLICY_SET); 2148 2149 switch (tbr->ADDR_length) { 2150 default: 2151 ip1dbg(("ip_bind_v6: bad address length %d\n", 2152 (int)tbr->ADDR_length)); 2153 goto bad_addr; 2154 2155 case IPV6_ADDR_LEN: 2156 /* Verification of local address only */ 2157 v6srcp = (in6_addr_t *)ucp; 2158 lport = 0; 2159 local_bind = B_TRUE; 2160 break; 2161 2162 case sizeof (sin6_t): 2163 sin6 = (sin6_t *)ucp; 2164 v6srcp = &sin6->sin6_addr; 2165 lport = sin6->sin6_port; 2166 local_bind = B_TRUE; 2167 break; 2168 2169 case sizeof (ipa6_conn_t): 2170 /* 2171 * Verify that both the source and destination addresses 2172 * are valid. 2173 * Note that we allow connect to broadcast and multicast 2174 * addresses when ire_requested is set. Thus the ULP 2175 * has to check for IRE_BROADCAST and multicast. 2176 */ 2177 ac6 = (ipa6_conn_t *)ucp; 2178 v6srcp = &ac6->ac6_laddr; 2179 v6dstp = &ac6->ac6_faddr; 2180 fport = ac6->ac6_fport; 2181 /* For raw socket, the local port is not set. */ 2182 lport = ac6->ac6_lport != 0 ? ac6->ac6_lport : 2183 connp->conn_lport; 2184 local_bind = B_FALSE; 2185 /* Always verify destination reachability. */ 2186 verify_dst = B_TRUE; 2187 break; 2188 2189 case sizeof (ipa6_conn_x_t): 2190 /* 2191 * Verify that the source address is valid. 2192 * Note that we allow connect to broadcast and multicast 2193 * addresses when ire_requested is set. Thus the ULP 2194 * has to check for IRE_BROADCAST and multicast. 2195 */ 2196 acx6 = (ipa6_conn_x_t *)ucp; 2197 ac6 = &acx6->ac6x_conn; 2198 v6srcp = &ac6->ac6_laddr; 2199 v6dstp = &ac6->ac6_faddr; 2200 fport = ac6->ac6_fport; 2201 lport = ac6->ac6_lport; 2202 local_bind = B_FALSE; 2203 /* 2204 * Client that passed ipa6_conn_x_t to us specifies whether to 2205 * verify destination reachability. 2206 */ 2207 verify_dst = (acx6->ac6x_flags & ACX_VERIFY_DST) != 0; 2208 break; 2209 } 2210 if (local_bind) { 2211 if (IN6_IS_ADDR_V4MAPPED(v6srcp) && !connp->conn_ipv6_v6only) { 2212 /* Bind to IPv4 address */ 2213 ipaddr_t v4src; 2214 2215 IN6_V4MAPPED_TO_IPADDR(v6srcp, v4src); 2216 2217 error = ip_bind_laddr(connp, mp, v4src, lport, 2218 ire_requested, ipsec_policy_set, 2219 tbr->ADDR_length != IPV6_ADDR_LEN); 2220 if (error != 0) 2221 goto bad_addr; 2222 connp->conn_pkt_isv6 = B_FALSE; 2223 } else { 2224 if (IN6_IS_ADDR_V4MAPPED(v6srcp)) { 2225 error = 0; 2226 goto bad_addr; 2227 } 2228 error = ip_bind_laddr_v6(connp, mp, v6srcp, lport, 2229 ire_requested, ipsec_policy_set, 2230 (tbr->ADDR_length != IPV6_ADDR_LEN)); 2231 if (error != 0) 2232 goto bad_addr; 2233 connp->conn_pkt_isv6 = B_TRUE; 2234 } 2235 } else { 2236 /* 2237 * Bind to local and remote address. Local might be 2238 * unspecified in which case it will be extracted from 2239 * ire_src_addr_v6 2240 */ 2241 if (IN6_IS_ADDR_V4MAPPED(v6dstp) && !connp->conn_ipv6_v6only) { 2242 /* Connect to IPv4 address */ 2243 ipaddr_t v4src; 2244 ipaddr_t v4dst; 2245 2246 /* Is the source unspecified or mapped? */ 2247 if (!IN6_IS_ADDR_V4MAPPED(v6srcp) && 2248 !IN6_IS_ADDR_UNSPECIFIED(v6srcp)) { 2249 ip1dbg(("ip_bind_v6: " 2250 "dst is mapped, but not the src\n")); 2251 goto bad_addr; 2252 } 2253 IN6_V4MAPPED_TO_IPADDR(v6srcp, v4src); 2254 IN6_V4MAPPED_TO_IPADDR(v6dstp, v4dst); 2255 2256 /* 2257 * XXX Fix needed. Need to pass ipsec_policy_set 2258 * instead of B_FALSE. 2259 */ 2260 2261 /* Always verify destination reachability. */ 2262 error = ip_bind_connected(connp, mp, &v4src, lport, 2263 v4dst, fport, ire_requested, ipsec_policy_set, 2264 B_TRUE, B_TRUE); 2265 if (error != 0) 2266 goto bad_addr; 2267 IN6_IPADDR_TO_V4MAPPED(v4src, v6srcp); 2268 connp->conn_pkt_isv6 = B_FALSE; 2269 } else if (IN6_IS_ADDR_V4MAPPED(v6srcp)) { 2270 ip1dbg(("ip_bind_v6: " 2271 "src is mapped, but not the dst\n")); 2272 goto bad_addr; 2273 } else { 2274 error = ip_bind_connected_v6(connp, mp, v6srcp, 2275 lport, v6dstp, ipp, fport, ire_requested, 2276 ipsec_policy_set, B_TRUE, verify_dst); 2277 if (error != 0) 2278 goto bad_addr; 2279 connp->conn_pkt_isv6 = B_TRUE; 2280 } 2281 } 2282 2283 /* Update conn_send and pktversion if v4/v6 changed */ 2284 if (orig_pkt_isv6 != connp->conn_pkt_isv6) { 2285 ip_setpktversion(connp, connp->conn_pkt_isv6, B_TRUE, ipst); 2286 } 2287 /* 2288 * Pass the IPSEC headers size in ire_ipsec_overhead. 2289 * We can't do this in ip_bind_insert_ire because the policy 2290 * may not have been inherited at that point in time and hence 2291 * conn_out_enforce_policy may not be set. 2292 */ 2293 mp1 = mp->b_cont; 2294 if (ire_requested && connp->conn_out_enforce_policy && 2295 mp1 != NULL && DB_TYPE(mp1) == IRE_DB_REQ_TYPE) { 2296 ire_t *ire = (ire_t *)mp1->b_rptr; 2297 ASSERT(MBLKL(mp1) >= sizeof (ire_t)); 2298 ire->ire_ipsec_overhead = (conn_ipsec_length(connp)); 2299 } 2300 2301 /* Send it home. */ 2302 mp->b_datap->db_type = M_PCPROTO; 2303 tbr->PRIM_type = T_BIND_ACK; 2304 return (mp); 2305 2306 bad_addr: 2307 if (error == EINPROGRESS) 2308 return (NULL); 2309 if (error > 0) 2310 mp = mi_tpi_err_ack_alloc(mp, TSYSERR, error); 2311 else 2312 mp = mi_tpi_err_ack_alloc(mp, TBADADDR, 0); 2313 return (mp); 2314 } 2315 2316 /* 2317 * Here address is verified to be a valid local address. 2318 * If the IRE_DB_REQ_TYPE mp is present, a multicast 2319 * address is also considered a valid local address. 2320 * In the case of a multicast address, however, the 2321 * upper protocol is expected to reset the src address 2322 * to 0 if it sees an ire with IN6_IS_ADDR_MULTICAST returned so that 2323 * no packets are emitted with multicast address as 2324 * source address. 2325 * The addresses valid for bind are: 2326 * (1) - in6addr_any 2327 * (2) - IP address of an UP interface 2328 * (3) - IP address of a DOWN interface 2329 * (4) - a multicast address. In this case 2330 * the conn will only receive packets destined to 2331 * the specified multicast address. Note: the 2332 * application still has to issue an 2333 * IPV6_JOIN_GROUP socket option. 2334 * 2335 * In all the above cases, the bound address must be valid in the current zone. 2336 * When the address is loopback or multicast, there might be many matching IREs 2337 * so bind has to look up based on the zone. 2338 */ 2339 static int 2340 ip_bind_laddr_v6(conn_t *connp, mblk_t *mp, const in6_addr_t *v6src, 2341 uint16_t lport, boolean_t ire_requested, boolean_t ipsec_policy_set, 2342 boolean_t fanout_insert) 2343 { 2344 int error = 0; 2345 ire_t *src_ire = NULL; 2346 ipif_t *ipif = NULL; 2347 mblk_t *policy_mp; 2348 zoneid_t zoneid; 2349 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 2350 2351 if (ipsec_policy_set) 2352 policy_mp = mp->b_cont; 2353 2354 /* 2355 * If it was previously connected, conn_fully_bound would have 2356 * been set. 2357 */ 2358 connp->conn_fully_bound = B_FALSE; 2359 2360 zoneid = connp->conn_zoneid; 2361 2362 if (!IN6_IS_ADDR_UNSPECIFIED(v6src)) { 2363 src_ire = ire_route_lookup_v6(v6src, 0, 0, 2364 0, NULL, NULL, zoneid, NULL, MATCH_IRE_ZONEONLY, ipst); 2365 /* 2366 * If an address other than in6addr_any is requested, 2367 * we verify that it is a valid address for bind 2368 * Note: Following code is in if-else-if form for 2369 * readability compared to a condition check. 2370 */ 2371 ASSERT(src_ire == NULL || !(src_ire->ire_type & IRE_BROADCAST)); 2372 if (IRE_IS_LOCAL(src_ire)) { 2373 /* 2374 * (2) Bind to address of local UP interface 2375 */ 2376 ipif = src_ire->ire_ipif; 2377 } else if (IN6_IS_ADDR_MULTICAST(v6src)) { 2378 ipif_t *multi_ipif = NULL; 2379 ire_t *save_ire; 2380 /* 2381 * (4) bind to multicast address. 2382 * Fake out the IRE returned to upper 2383 * layer to be a broadcast IRE in 2384 * ip_bind_insert_ire_v6(). 2385 * Pass other information that matches 2386 * the ipif (e.g. the source address). 2387 * conn_multicast_ill is only used for 2388 * IPv6 packets 2389 */ 2390 mutex_enter(&connp->conn_lock); 2391 if (connp->conn_multicast_ill != NULL) { 2392 (void) ipif_lookup_zoneid( 2393 connp->conn_multicast_ill, zoneid, 0, 2394 &multi_ipif); 2395 } else { 2396 /* 2397 * Look for default like 2398 * ip_wput_v6 2399 */ 2400 multi_ipif = ipif_lookup_group_v6( 2401 &ipv6_unspecified_group, zoneid, ipst); 2402 } 2403 mutex_exit(&connp->conn_lock); 2404 save_ire = src_ire; 2405 src_ire = NULL; 2406 if (multi_ipif == NULL || !ire_requested || 2407 (src_ire = ipif_to_ire_v6(multi_ipif)) == NULL) { 2408 src_ire = save_ire; 2409 error = EADDRNOTAVAIL; 2410 } else { 2411 ASSERT(src_ire != NULL); 2412 if (save_ire != NULL) 2413 ire_refrele(save_ire); 2414 } 2415 if (multi_ipif != NULL) 2416 ipif_refrele(multi_ipif); 2417 } else { 2418 *mp->b_wptr++ = (char)connp->conn_ulp; 2419 ipif = ipif_lookup_addr_v6(v6src, NULL, zoneid, 2420 CONNP_TO_WQ(connp), mp, ip_wput_nondata, &error, 2421 ipst); 2422 if (ipif == NULL) { 2423 if (error == EINPROGRESS) { 2424 if (src_ire != NULL) 2425 ire_refrele(src_ire); 2426 return (error); 2427 } 2428 /* 2429 * Not a valid address for bind 2430 */ 2431 error = EADDRNOTAVAIL; 2432 } else { 2433 ipif_refrele(ipif); 2434 } 2435 /* 2436 * Just to keep it consistent with the processing in 2437 * ip_bind_v6(). 2438 */ 2439 mp->b_wptr--; 2440 } 2441 2442 if (error != 0) { 2443 /* Red Alert! Attempting to be a bogon! */ 2444 if (ip_debug > 2) { 2445 /* ip1dbg */ 2446 pr_addr_dbg("ip_bind_laddr_v6: bad src" 2447 " address %s\n", AF_INET6, v6src); 2448 } 2449 goto bad_addr; 2450 } 2451 } 2452 2453 /* 2454 * Allow setting new policies. For example, disconnects come 2455 * down as ipa_t bind. As we would have set conn_policy_cached 2456 * to B_TRUE before, we should set it to B_FALSE, so that policy 2457 * can change after the disconnect. 2458 */ 2459 connp->conn_policy_cached = B_FALSE; 2460 2461 /* If not fanout_insert this was just an address verification */ 2462 if (fanout_insert) { 2463 /* 2464 * The addresses have been verified. Time to insert in 2465 * the correct fanout list. 2466 */ 2467 connp->conn_srcv6 = *v6src; 2468 connp->conn_remv6 = ipv6_all_zeros; 2469 connp->conn_lport = lport; 2470 connp->conn_fport = 0; 2471 error = ipcl_bind_insert_v6(connp, *mp->b_wptr, v6src, lport); 2472 } 2473 if (error == 0) { 2474 if (ire_requested) { 2475 if (!ip_bind_insert_ire_v6(mp, src_ire, v6src, NULL, 2476 ipst)) { 2477 error = -1; 2478 goto bad_addr; 2479 } 2480 } else if (ipsec_policy_set) { 2481 if (!ip_bind_ipsec_policy_set(connp, policy_mp)) { 2482 error = -1; 2483 goto bad_addr; 2484 } 2485 } 2486 } 2487 bad_addr: 2488 if (error != 0) { 2489 if (connp->conn_anon_port) { 2490 (void) tsol_mlp_anon(crgetzone(connp->conn_cred), 2491 connp->conn_mlp_type, connp->conn_ulp, ntohs(lport), 2492 B_FALSE); 2493 } 2494 connp->conn_mlp_type = mlptSingle; 2495 } 2496 2497 if (src_ire != NULL) 2498 ire_refrele(src_ire); 2499 2500 if (ipsec_policy_set) { 2501 ASSERT(policy_mp != NULL); 2502 freeb(policy_mp); 2503 /* 2504 * As of now assume that nothing else accompanies 2505 * IPSEC_POLICY_SET. 2506 */ 2507 mp->b_cont = NULL; 2508 } 2509 return (error); 2510 } 2511 2512 /* ARGSUSED */ 2513 static void 2514 ip_bind_connected_resume_v6(ipsq_t *ipsq, queue_t *q, mblk_t *mp, 2515 void *dummy_arg) 2516 { 2517 conn_t *connp = NULL; 2518 t_scalar_t prim; 2519 2520 ASSERT(DB_TYPE(mp) == M_PROTO || DB_TYPE(mp) == M_PCPROTO); 2521 2522 if (CONN_Q(q)) 2523 connp = Q_TO_CONN(q); 2524 ASSERT(connp != NULL); 2525 2526 prim = ((union T_primitives *)mp->b_rptr)->type; 2527 ASSERT(prim == O_T_BIND_REQ || prim == T_BIND_REQ); 2528 2529 if (IPCL_IS_TCP(connp)) { 2530 /* Pass sticky_ipp for scope_id and pktinfo */ 2531 mp = ip_bind_v6(q, mp, connp, &connp->conn_tcp->tcp_sticky_ipp); 2532 } else { 2533 /* For UDP and ICMP */ 2534 mp = ip_bind_v6(q, mp, connp, NULL); 2535 } 2536 if (mp != NULL) { 2537 if (IPCL_IS_TCP(connp)) { 2538 CONN_INC_REF(connp); 2539 squeue_fill(connp->conn_sqp, mp, ip_resume_tcp_bind, 2540 connp, SQTAG_TCP_RPUTOTHER); 2541 } else if (IPCL_IS_UDP(connp)) { 2542 udp_resume_bind(connp, mp); 2543 } else { 2544 ASSERT(IPCL_IS_RAWIP(connp)); 2545 rawip_resume_bind(connp, mp); 2546 } 2547 } 2548 } 2549 2550 /* 2551 * Verify that both the source and destination addresses 2552 * are valid. If verify_dst, then destination address must also be reachable, 2553 * i.e. have a route. Protocols like TCP want this. Tunnels do not. 2554 * It takes ip6_pkt_t * as one of the arguments to determine correct 2555 * source address when IPV6_PKTINFO or scope_id is set along with a link-local 2556 * destination address. Note that parameter ipp is only useful for TCP connect 2557 * when scope_id is set or IPV6_PKTINFO option is set with an ifindex. For all 2558 * non-TCP cases, it is NULL and for all other tcp cases it is not useful. 2559 * 2560 */ 2561 static int 2562 ip_bind_connected_v6(conn_t *connp, mblk_t *mp, in6_addr_t *v6src, 2563 uint16_t lport, const in6_addr_t *v6dst, ip6_pkt_t *ipp, uint16_t fport, 2564 boolean_t ire_requested, boolean_t ipsec_policy_set, 2565 boolean_t fanout_insert, boolean_t verify_dst) 2566 { 2567 ire_t *src_ire; 2568 ire_t *dst_ire; 2569 int error = 0; 2570 int protocol; 2571 mblk_t *policy_mp; 2572 ire_t *sire = NULL; 2573 ire_t *md_dst_ire = NULL; 2574 ill_t *md_ill = NULL; 2575 ill_t *dst_ill = NULL; 2576 ipif_t *src_ipif = NULL; 2577 zoneid_t zoneid; 2578 boolean_t ill_held = B_FALSE; 2579 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 2580 2581 src_ire = dst_ire = NULL; 2582 /* 2583 * NOTE: The protocol is beyond the wptr because that's how 2584 * the undocumented transport<-->IP T_BIND_REQ behavior works. 2585 */ 2586 protocol = *mp->b_wptr & 0xFF; 2587 2588 /* 2589 * If we never got a disconnect before, clear it now. 2590 */ 2591 connp->conn_fully_bound = B_FALSE; 2592 2593 if (ipsec_policy_set) { 2594 policy_mp = mp->b_cont; 2595 } 2596 2597 zoneid = connp->conn_zoneid; 2598 2599 if (IN6_IS_ADDR_MULTICAST(v6dst)) { 2600 ipif_t *ipif; 2601 2602 /* 2603 * Use an "emulated" IRE_BROADCAST to tell the transport it 2604 * is a multicast. 2605 * Pass other information that matches 2606 * the ipif (e.g. the source address). 2607 * 2608 * conn_multicast_ill is only used for IPv6 packets 2609 */ 2610 mutex_enter(&connp->conn_lock); 2611 if (connp->conn_multicast_ill != NULL) { 2612 (void) ipif_lookup_zoneid(connp->conn_multicast_ill, 2613 zoneid, 0, &ipif); 2614 } else { 2615 /* Look for default like ip_wput_v6 */ 2616 ipif = ipif_lookup_group_v6(v6dst, zoneid, ipst); 2617 } 2618 mutex_exit(&connp->conn_lock); 2619 if (ipif == NULL || !ire_requested || 2620 (dst_ire = ipif_to_ire_v6(ipif)) == NULL) { 2621 if (ipif != NULL) 2622 ipif_refrele(ipif); 2623 if (ip_debug > 2) { 2624 /* ip1dbg */ 2625 pr_addr_dbg("ip_bind_connected_v6: bad " 2626 "connected multicast %s\n", AF_INET6, 2627 v6dst); 2628 } 2629 error = ENETUNREACH; 2630 goto bad_addr; 2631 } 2632 if (ipif != NULL) 2633 ipif_refrele(ipif); 2634 } else { 2635 dst_ire = ire_route_lookup_v6(v6dst, NULL, NULL, 0, 2636 NULL, &sire, zoneid, MBLK_GETLABEL(mp), 2637 MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT | 2638 MATCH_IRE_PARENT | MATCH_IRE_RJ_BHOLE | MATCH_IRE_SECATTR, 2639 ipst); 2640 /* 2641 * We also prevent ire's with src address INADDR_ANY to 2642 * be used, which are created temporarily for 2643 * sending out packets from endpoints that have 2644 * conn_unspec_src set. 2645 */ 2646 if (dst_ire == NULL || 2647 (dst_ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) || 2648 IN6_IS_ADDR_UNSPECIFIED(&dst_ire->ire_src_addr_v6)) { 2649 /* 2650 * When verifying destination reachability, we always 2651 * complain. 2652 * 2653 * When not verifying destination reachability but we 2654 * found an IRE, i.e. the destination is reachable, 2655 * then the other tests still apply and we complain. 2656 */ 2657 if (verify_dst || (dst_ire != NULL)) { 2658 if (ip_debug > 2) { 2659 /* ip1dbg */ 2660 pr_addr_dbg("ip_bind_connected_v6: bad" 2661 " connected dst %s\n", AF_INET6, 2662 v6dst); 2663 } 2664 if (dst_ire == NULL || 2665 !(dst_ire->ire_type & IRE_HOST)) { 2666 error = ENETUNREACH; 2667 } else { 2668 error = EHOSTUNREACH; 2669 } 2670 goto bad_addr; 2671 } 2672 } 2673 } 2674 2675 /* 2676 * We now know that routing will allow us to reach the destination. 2677 * Check whether Trusted Solaris policy allows communication with this 2678 * host, and pretend that the destination is unreachable if not. 2679 * 2680 * This is never a problem for TCP, since that transport is known to 2681 * compute the label properly as part of the tcp_rput_other T_BIND_ACK 2682 * handling. If the remote is unreachable, it will be detected at that 2683 * point, so there's no reason to check it here. 2684 * 2685 * Note that for sendto (and other datagram-oriented friends), this 2686 * check is done as part of the data path label computation instead. 2687 * The check here is just to make non-TCP connect() report the right 2688 * error. 2689 */ 2690 if (dst_ire != NULL && is_system_labeled() && 2691 !IPCL_IS_TCP(connp) && 2692 tsol_compute_label_v6(DB_CREDDEF(mp, connp->conn_cred), v6dst, NULL, 2693 connp->conn_mac_exempt, ipst) != 0) { 2694 error = EHOSTUNREACH; 2695 if (ip_debug > 2) { 2696 pr_addr_dbg("ip_bind_connected: no label for dst %s\n", 2697 AF_INET6, v6dst); 2698 } 2699 goto bad_addr; 2700 } 2701 2702 /* 2703 * If the app does a connect(), it means that it will most likely 2704 * send more than 1 packet to the destination. It makes sense 2705 * to clear the temporary flag. 2706 */ 2707 if (dst_ire != NULL && dst_ire->ire_type == IRE_CACHE && 2708 (dst_ire->ire_marks & IRE_MARK_TEMPORARY)) { 2709 irb_t *irb = dst_ire->ire_bucket; 2710 2711 rw_enter(&irb->irb_lock, RW_WRITER); 2712 dst_ire->ire_marks &= ~IRE_MARK_TEMPORARY; 2713 irb->irb_tmp_ire_cnt--; 2714 rw_exit(&irb->irb_lock); 2715 } 2716 2717 ASSERT(dst_ire == NULL || dst_ire->ire_ipversion == IPV6_VERSION); 2718 2719 /* 2720 * See if we should notify ULP about MDT; we do this whether or not 2721 * ire_requested is TRUE, in order to handle active connects; MDT 2722 * eligibility tests for passive connects are handled separately 2723 * through tcp_adapt_ire(). We do this before the source address 2724 * selection, because dst_ire may change after a call to 2725 * ipif_select_source_v6(). This is a best-effort check, as the 2726 * packet for this connection may not actually go through 2727 * dst_ire->ire_stq, and the exact IRE can only be known after 2728 * calling ip_newroute_v6(). This is why we further check on the 2729 * IRE during Multidata packet transmission in tcp_multisend(). 2730 */ 2731 if (ipst->ips_ip_multidata_outbound && !ipsec_policy_set && 2732 dst_ire != NULL && 2733 !(dst_ire->ire_type & (IRE_LOCAL | IRE_LOOPBACK | IRE_BROADCAST)) && 2734 (md_ill = ire_to_ill(dst_ire), md_ill != NULL) && 2735 ILL_MDT_CAPABLE(md_ill)) { 2736 md_dst_ire = dst_ire; 2737 IRE_REFHOLD(md_dst_ire); 2738 } 2739 2740 if (dst_ire != NULL && 2741 dst_ire->ire_type == IRE_LOCAL && 2742 dst_ire->ire_zoneid != zoneid && 2743 dst_ire->ire_zoneid != ALL_ZONES) { 2744 src_ire = ire_ftable_lookup_v6(v6dst, 0, 0, 0, NULL, NULL, 2745 zoneid, 0, NULL, 2746 MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT | 2747 MATCH_IRE_RJ_BHOLE, ipst); 2748 if (src_ire == NULL) { 2749 error = EHOSTUNREACH; 2750 goto bad_addr; 2751 } else if (src_ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) { 2752 if (!(src_ire->ire_type & IRE_HOST)) 2753 error = ENETUNREACH; 2754 else 2755 error = EHOSTUNREACH; 2756 goto bad_addr; 2757 } 2758 if (IN6_IS_ADDR_UNSPECIFIED(v6src)) { 2759 src_ipif = src_ire->ire_ipif; 2760 ipif_refhold(src_ipif); 2761 *v6src = src_ipif->ipif_v6lcl_addr; 2762 } 2763 ire_refrele(src_ire); 2764 src_ire = NULL; 2765 } else if (IN6_IS_ADDR_UNSPECIFIED(v6src) && dst_ire != NULL) { 2766 if ((sire != NULL) && (sire->ire_flags & RTF_SETSRC)) { 2767 *v6src = sire->ire_src_addr_v6; 2768 ire_refrele(dst_ire); 2769 dst_ire = sire; 2770 sire = NULL; 2771 } else if (dst_ire->ire_type == IRE_CACHE && 2772 (dst_ire->ire_flags & RTF_SETSRC)) { 2773 ASSERT(dst_ire->ire_zoneid == zoneid || 2774 dst_ire->ire_zoneid == ALL_ZONES); 2775 *v6src = dst_ire->ire_src_addr_v6; 2776 } else { 2777 /* 2778 * Pick a source address so that a proper inbound load 2779 * spreading would happen. Use dst_ill specified by the 2780 * app. when socket option or scopeid is set. 2781 */ 2782 int err; 2783 2784 if (ipp != NULL && ipp->ipp_ifindex != 0) { 2785 uint_t if_index; 2786 2787 /* 2788 * Scope id or IPV6_PKTINFO 2789 */ 2790 2791 if_index = ipp->ipp_ifindex; 2792 dst_ill = ill_lookup_on_ifindex( 2793 if_index, B_TRUE, NULL, NULL, NULL, NULL, 2794 ipst); 2795 if (dst_ill == NULL) { 2796 ip1dbg(("ip_bind_connected_v6:" 2797 " bad ifindex %d\n", if_index)); 2798 error = EADDRNOTAVAIL; 2799 goto bad_addr; 2800 } 2801 ill_held = B_TRUE; 2802 } else if (connp->conn_outgoing_ill != NULL) { 2803 /* 2804 * For IPV6_BOUND_IF socket option, 2805 * conn_outgoing_ill should be set 2806 * already in TCP or UDP/ICMP. 2807 */ 2808 dst_ill = conn_get_held_ill(connp, 2809 &connp->conn_outgoing_ill, &err); 2810 if (err == ILL_LOOKUP_FAILED) { 2811 ip1dbg(("ip_bind_connected_v6:" 2812 "no ill for bound_if\n")); 2813 error = EADDRNOTAVAIL; 2814 goto bad_addr; 2815 } 2816 ill_held = B_TRUE; 2817 } else if (dst_ire->ire_stq != NULL) { 2818 /* No need to hold ill here */ 2819 dst_ill = (ill_t *)dst_ire->ire_stq->q_ptr; 2820 } else { 2821 /* No need to hold ill here */ 2822 dst_ill = dst_ire->ire_ipif->ipif_ill; 2823 } 2824 if (!ip6_asp_can_lookup(ipst)) { 2825 *mp->b_wptr++ = (char)protocol; 2826 ip6_asp_pending_op(CONNP_TO_WQ(connp), mp, 2827 ip_bind_connected_resume_v6); 2828 error = EINPROGRESS; 2829 goto refrele_and_quit; 2830 } 2831 src_ipif = ipif_select_source_v6(dst_ill, v6dst, 2832 RESTRICT_TO_NONE, connp->conn_src_preferences, 2833 zoneid); 2834 ip6_asp_table_refrele(ipst); 2835 if (src_ipif == NULL) { 2836 pr_addr_dbg("ip_bind_connected_v6: " 2837 "no usable source address for " 2838 "connection to %s\n", AF_INET6, v6dst); 2839 error = EADDRNOTAVAIL; 2840 goto bad_addr; 2841 } 2842 *v6src = src_ipif->ipif_v6lcl_addr; 2843 } 2844 } 2845 2846 /* 2847 * We do ire_route_lookup_v6() here (and not an interface lookup) 2848 * as we assert that v6src should only come from an 2849 * UP interface for hard binding. 2850 */ 2851 src_ire = ire_route_lookup_v6(v6src, 0, 0, 0, NULL, 2852 NULL, zoneid, NULL, MATCH_IRE_ZONEONLY, ipst); 2853 2854 /* src_ire must be a local|loopback */ 2855 if (!IRE_IS_LOCAL(src_ire)) { 2856 if (ip_debug > 2) { 2857 /* ip1dbg */ 2858 pr_addr_dbg("ip_bind_connected_v6: bad " 2859 "connected src %s\n", AF_INET6, v6src); 2860 } 2861 error = EADDRNOTAVAIL; 2862 goto bad_addr; 2863 } 2864 2865 /* 2866 * If the source address is a loopback address, the 2867 * destination had best be local or multicast. 2868 * The transports that can't handle multicast will reject 2869 * those addresses. 2870 */ 2871 if (src_ire->ire_type == IRE_LOOPBACK && 2872 !(IRE_IS_LOCAL(dst_ire) || IN6_IS_ADDR_MULTICAST(v6dst) || 2873 IN6_IS_ADDR_V4MAPPED_CLASSD(v6dst))) { 2874 ip1dbg(("ip_bind_connected_v6: bad connected loopback\n")); 2875 error = -1; 2876 goto bad_addr; 2877 } 2878 /* 2879 * Allow setting new policies. For example, disconnects come 2880 * down as ipa_t bind. As we would have set conn_policy_cached 2881 * to B_TRUE before, we should set it to B_FALSE, so that policy 2882 * can change after the disconnect. 2883 */ 2884 connp->conn_policy_cached = B_FALSE; 2885 2886 /* 2887 * The addresses have been verified. Initialize the conn 2888 * before calling the policy as they expect the conns 2889 * initialized. 2890 */ 2891 connp->conn_srcv6 = *v6src; 2892 connp->conn_remv6 = *v6dst; 2893 connp->conn_lport = lport; 2894 connp->conn_fport = fport; 2895 2896 ASSERT(!(ipsec_policy_set && ire_requested)); 2897 if (ire_requested) { 2898 iulp_t *ulp_info = NULL; 2899 2900 /* 2901 * Note that sire will not be NULL if this is an off-link 2902 * connection and there is not cache for that dest yet. 2903 * 2904 * XXX Because of an existing bug, if there are multiple 2905 * default routes, the IRE returned now may not be the actual 2906 * default route used (default routes are chosen in a 2907 * round robin fashion). So if the metrics for different 2908 * default routes are different, we may return the wrong 2909 * metrics. This will not be a problem if the existing 2910 * bug is fixed. 2911 */ 2912 if (sire != NULL) 2913 ulp_info = &(sire->ire_uinfo); 2914 2915 if (!ip_bind_insert_ire_v6(mp, dst_ire, v6dst, ulp_info, 2916 ipst)) { 2917 error = -1; 2918 goto bad_addr; 2919 } 2920 } else if (ipsec_policy_set) { 2921 if (!ip_bind_ipsec_policy_set(connp, policy_mp)) { 2922 error = -1; 2923 goto bad_addr; 2924 } 2925 } 2926 2927 /* 2928 * Cache IPsec policy in this conn. If we have per-socket policy, 2929 * we'll cache that. If we don't, we'll inherit global policy. 2930 * 2931 * We can't insert until the conn reflects the policy. Note that 2932 * conn_policy_cached is set by ipsec_conn_cache_policy() even for 2933 * connections where we don't have a policy. This is to prevent 2934 * global policy lookups in the inbound path. 2935 * 2936 * If we insert before we set conn_policy_cached, 2937 * CONN_INBOUND_POLICY_PRESENT_V6() check can still evaluate true 2938 * because global policy cound be non-empty. We normally call 2939 * ipsec_check_policy() for conn_policy_cached connections only if 2940 * conn_in_enforce_policy is set. But in this case, 2941 * conn_policy_cached can get set anytime since we made the 2942 * CONN_INBOUND_POLICY_PRESENT_V6() check and ipsec_check_policy() 2943 * is called, which will make the above assumption false. Thus, we 2944 * need to insert after we set conn_policy_cached. 2945 */ 2946 if ((error = ipsec_conn_cache_policy(connp, B_FALSE)) != 0) 2947 goto bad_addr; 2948 2949 /* If not fanout_insert this was just an address verification */ 2950 if (fanout_insert) { 2951 /* 2952 * The addresses have been verified. Time to insert in 2953 * the correct fanout list. 2954 */ 2955 error = ipcl_conn_insert_v6(connp, protocol, v6src, v6dst, 2956 connp->conn_ports, 2957 IPCL_IS_TCP(connp) ? connp->conn_tcp->tcp_bound_if : 0); 2958 } 2959 if (error == 0) { 2960 connp->conn_fully_bound = B_TRUE; 2961 /* 2962 * Our initial checks for MDT have passed; the IRE is not 2963 * LOCAL/LOOPBACK/BROADCAST, and the link layer seems to 2964 * be supporting MDT. Pass the IRE, IPC and ILL into 2965 * ip_mdinfo_return(), which performs further checks 2966 * against them and upon success, returns the MDT info 2967 * mblk which we will attach to the bind acknowledgment. 2968 */ 2969 if (md_dst_ire != NULL) { 2970 mblk_t *mdinfo_mp; 2971 2972 ASSERT(md_ill != NULL); 2973 ASSERT(md_ill->ill_mdt_capab != NULL); 2974 if ((mdinfo_mp = ip_mdinfo_return(md_dst_ire, connp, 2975 md_ill->ill_name, md_ill->ill_mdt_capab)) != NULL) 2976 linkb(mp, mdinfo_mp); 2977 } 2978 } 2979 bad_addr: 2980 if (ipsec_policy_set) { 2981 ASSERT(policy_mp != NULL); 2982 freeb(policy_mp); 2983 /* 2984 * As of now assume that nothing else accompanies 2985 * IPSEC_POLICY_SET. 2986 */ 2987 mp->b_cont = NULL; 2988 } 2989 refrele_and_quit: 2990 if (src_ire != NULL) 2991 IRE_REFRELE(src_ire); 2992 if (dst_ire != NULL) 2993 IRE_REFRELE(dst_ire); 2994 if (sire != NULL) 2995 IRE_REFRELE(sire); 2996 if (src_ipif != NULL) 2997 ipif_refrele(src_ipif); 2998 if (md_dst_ire != NULL) 2999 IRE_REFRELE(md_dst_ire); 3000 if (ill_held && dst_ill != NULL) 3001 ill_refrele(dst_ill); 3002 return (error); 3003 } 3004 3005 /* 3006 * Insert the ire in b_cont. Returns false if it fails (due to lack of space). 3007 * Makes the IRE be IRE_BROADCAST if dst is a multicast address. 3008 */ 3009 /* ARGSUSED4 */ 3010 static boolean_t 3011 ip_bind_insert_ire_v6(mblk_t *mp, ire_t *ire, const in6_addr_t *dst, 3012 iulp_t *ulp_info, ip_stack_t *ipst) 3013 { 3014 mblk_t *mp1; 3015 ire_t *ret_ire; 3016 3017 mp1 = mp->b_cont; 3018 ASSERT(mp1 != NULL); 3019 3020 if (ire != NULL) { 3021 /* 3022 * mp1 initialized above to IRE_DB_REQ_TYPE 3023 * appended mblk. Its <upper protocol>'s 3024 * job to make sure there is room. 3025 */ 3026 if ((mp1->b_datap->db_lim - mp1->b_rptr) < sizeof (ire_t)) 3027 return (B_FALSE); 3028 3029 mp1->b_datap->db_type = IRE_DB_TYPE; 3030 mp1->b_wptr = mp1->b_rptr + sizeof (ire_t); 3031 bcopy(ire, mp1->b_rptr, sizeof (ire_t)); 3032 ret_ire = (ire_t *)mp1->b_rptr; 3033 if (IN6_IS_ADDR_MULTICAST(dst) || 3034 IN6_IS_ADDR_V4MAPPED_CLASSD(dst)) { 3035 ret_ire->ire_type = IRE_BROADCAST; 3036 ret_ire->ire_addr_v6 = *dst; 3037 } 3038 if (ulp_info != NULL) { 3039 bcopy(ulp_info, &(ret_ire->ire_uinfo), 3040 sizeof (iulp_t)); 3041 } 3042 ret_ire->ire_mp = mp1; 3043 } else { 3044 /* 3045 * No IRE was found. Remove IRE mblk. 3046 */ 3047 mp->b_cont = mp1->b_cont; 3048 freeb(mp1); 3049 } 3050 return (B_TRUE); 3051 } 3052 3053 /* 3054 * Add an ip6i_t header to the front of the mblk. 3055 * Inline if possible else allocate a separate mblk containing only the ip6i_t. 3056 * Returns NULL if allocation fails (and frees original message). 3057 * Used in outgoing path when going through ip_newroute_*v6(). 3058 * Used in incoming path to pass ifindex to transports. 3059 */ 3060 mblk_t * 3061 ip_add_info_v6(mblk_t *mp, ill_t *ill, const in6_addr_t *dst) 3062 { 3063 mblk_t *mp1; 3064 ip6i_t *ip6i; 3065 ip6_t *ip6h; 3066 3067 ip6h = (ip6_t *)mp->b_rptr; 3068 ip6i = (ip6i_t *)(mp->b_rptr - sizeof (ip6i_t)); 3069 if ((uchar_t *)ip6i < mp->b_datap->db_base || 3070 mp->b_datap->db_ref > 1) { 3071 mp1 = allocb(sizeof (ip6i_t), BPRI_MED); 3072 if (mp1 == NULL) { 3073 freemsg(mp); 3074 return (NULL); 3075 } 3076 mp1->b_wptr = mp1->b_rptr = mp1->b_datap->db_lim; 3077 mp1->b_cont = mp; 3078 mp = mp1; 3079 ip6i = (ip6i_t *)(mp->b_rptr - sizeof (ip6i_t)); 3080 } 3081 mp->b_rptr = (uchar_t *)ip6i; 3082 ip6i->ip6i_vcf = ip6h->ip6_vcf; 3083 ip6i->ip6i_nxt = IPPROTO_RAW; 3084 if (ill != NULL) { 3085 ip6i->ip6i_flags = IP6I_IFINDEX; 3086 ip6i->ip6i_ifindex = ill->ill_phyint->phyint_ifindex; 3087 } else { 3088 ip6i->ip6i_flags = 0; 3089 } 3090 ip6i->ip6i_nexthop = *dst; 3091 return (mp); 3092 } 3093 3094 /* 3095 * Handle protocols with which IP is less intimate. There 3096 * can be more than one stream bound to a particular 3097 * protocol. When this is the case, normally each one gets a copy 3098 * of any incoming packets. 3099 * However, if the packet was tunneled and not multicast we only send to it 3100 * the first match. 3101 * 3102 * Zones notes: 3103 * Packets will be distributed to streams in all zones. This is really only 3104 * useful for ICMPv6 as only applications in the global zone can create raw 3105 * sockets for other protocols. 3106 */ 3107 static void 3108 ip_fanout_proto_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, ill_t *ill, 3109 ill_t *inill, uint8_t nexthdr, uint_t nexthdr_offset, uint_t flags, 3110 boolean_t mctl_present, zoneid_t zoneid) 3111 { 3112 queue_t *rq; 3113 mblk_t *mp1, *first_mp1; 3114 in6_addr_t dst = ip6h->ip6_dst; 3115 in6_addr_t src = ip6h->ip6_src; 3116 boolean_t one_only; 3117 mblk_t *first_mp = mp; 3118 boolean_t secure, shared_addr; 3119 conn_t *connp, *first_connp, *next_connp; 3120 connf_t *connfp; 3121 ip_stack_t *ipst = inill->ill_ipst; 3122 ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec; 3123 3124 if (mctl_present) { 3125 mp = first_mp->b_cont; 3126 secure = ipsec_in_is_secure(first_mp); 3127 ASSERT(mp != NULL); 3128 } else { 3129 secure = B_FALSE; 3130 } 3131 3132 /* 3133 * If the packet was tunneled and not multicast we only send to it 3134 * the first match. 3135 */ 3136 one_only = ((nexthdr == IPPROTO_ENCAP || nexthdr == IPPROTO_IPV6) && 3137 !IN6_IS_ADDR_MULTICAST(&dst)); 3138 3139 shared_addr = (zoneid == ALL_ZONES); 3140 if (shared_addr) { 3141 /* 3142 * We don't allow multilevel ports for raw IP, so no need to 3143 * check for that here. 3144 */ 3145 zoneid = tsol_packet_to_zoneid(mp); 3146 } 3147 3148 connfp = &ipst->ips_ipcl_proto_fanout_v6[nexthdr]; 3149 mutex_enter(&connfp->connf_lock); 3150 connp = connfp->connf_head; 3151 for (connp = connfp->connf_head; connp != NULL; 3152 connp = connp->conn_next) { 3153 if (IPCL_PROTO_MATCH_V6(connp, nexthdr, ip6h, ill, flags, 3154 zoneid) && 3155 (!is_system_labeled() || 3156 tsol_receive_local(mp, &dst, IPV6_VERSION, shared_addr, 3157 connp))) 3158 break; 3159 } 3160 3161 if (connp == NULL || connp->conn_upq == NULL) { 3162 /* 3163 * No one bound to this port. Is 3164 * there a client that wants all 3165 * unclaimed datagrams? 3166 */ 3167 mutex_exit(&connfp->connf_lock); 3168 if (ip_fanout_send_icmp_v6(q, first_mp, flags, 3169 ICMP6_PARAM_PROB, ICMP6_PARAMPROB_NEXTHEADER, 3170 nexthdr_offset, mctl_present, zoneid, ipst)) { 3171 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInUnknownProtos); 3172 } 3173 3174 return; 3175 } 3176 3177 CONN_INC_REF(connp); 3178 first_connp = connp; 3179 3180 /* 3181 * XXX: Fix the multiple protocol listeners case. We should not 3182 * be walking the conn->next list here. 3183 */ 3184 if (one_only) { 3185 /* 3186 * Only send message to one tunnel driver by immediately 3187 * terminating the loop. 3188 */ 3189 connp = NULL; 3190 } else { 3191 connp = connp->conn_next; 3192 3193 } 3194 for (;;) { 3195 while (connp != NULL) { 3196 if (IPCL_PROTO_MATCH_V6(connp, nexthdr, ip6h, ill, 3197 flags, zoneid) && 3198 (!is_system_labeled() || 3199 tsol_receive_local(mp, &dst, IPV6_VERSION, 3200 shared_addr, connp))) 3201 break; 3202 connp = connp->conn_next; 3203 } 3204 3205 /* 3206 * Just copy the data part alone. The mctl part is 3207 * needed just for verifying policy and it is never 3208 * sent up. 3209 */ 3210 if (connp == NULL || connp->conn_upq == NULL || 3211 (((first_mp1 = dupmsg(first_mp)) == NULL) && 3212 ((first_mp1 = ip_copymsg(first_mp)) == NULL))) { 3213 /* 3214 * No more intested clients or memory 3215 * allocation failed 3216 */ 3217 connp = first_connp; 3218 break; 3219 } 3220 mp1 = mctl_present ? first_mp1->b_cont : first_mp1; 3221 CONN_INC_REF(connp); 3222 mutex_exit(&connfp->connf_lock); 3223 rq = connp->conn_rq; 3224 /* 3225 * For link-local always add ifindex so that transport can set 3226 * sin6_scope_id. Avoid it for ICMP error fanout. 3227 */ 3228 if ((connp->conn_ip_recvpktinfo || 3229 IN6_IS_ADDR_LINKLOCAL(&src)) && 3230 (flags & IP_FF_IPINFO)) { 3231 /* Add header */ 3232 mp1 = ip_add_info_v6(mp1, inill, &dst); 3233 } 3234 if (mp1 == NULL) { 3235 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3236 } else if (!canputnext(rq)) { 3237 if (flags & IP_FF_RAWIP) { 3238 BUMP_MIB(ill->ill_ip_mib, 3239 rawipIfStatsInOverflows); 3240 } else { 3241 BUMP_MIB(ill->ill_icmp6_mib, 3242 ipv6IfIcmpInOverflows); 3243 } 3244 3245 freemsg(mp1); 3246 } else { 3247 /* 3248 * Don't enforce here if we're a tunnel - let "tun" do 3249 * it instead. 3250 */ 3251 if (!IPCL_IS_IPTUN(connp) && 3252 (CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) || 3253 secure)) { 3254 first_mp1 = ipsec_check_inbound_policy 3255 (first_mp1, connp, NULL, ip6h, 3256 mctl_present); 3257 } 3258 if (first_mp1 != NULL) { 3259 if (mctl_present) 3260 freeb(first_mp1); 3261 BUMP_MIB(ill->ill_ip_mib, 3262 ipIfStatsHCInDelivers); 3263 (connp->conn_recv)(connp, mp1, NULL); 3264 } 3265 } 3266 mutex_enter(&connfp->connf_lock); 3267 /* Follow the next pointer before releasing the conn. */ 3268 next_connp = connp->conn_next; 3269 CONN_DEC_REF(connp); 3270 connp = next_connp; 3271 } 3272 3273 /* Last one. Send it upstream. */ 3274 mutex_exit(&connfp->connf_lock); 3275 3276 /* Initiate IPPF processing */ 3277 if (IP6_IN_IPP(flags, ipst)) { 3278 uint_t ifindex; 3279 3280 mutex_enter(&ill->ill_lock); 3281 ifindex = ill->ill_phyint->phyint_ifindex; 3282 mutex_exit(&ill->ill_lock); 3283 ip_process(IPP_LOCAL_IN, &mp, ifindex); 3284 if (mp == NULL) { 3285 CONN_DEC_REF(connp); 3286 if (mctl_present) 3287 freeb(first_mp); 3288 return; 3289 } 3290 } 3291 3292 /* 3293 * For link-local always add ifindex so that transport can set 3294 * sin6_scope_id. Avoid it for ICMP error fanout. 3295 */ 3296 if ((connp->conn_ip_recvpktinfo || IN6_IS_ADDR_LINKLOCAL(&src)) && 3297 (flags & IP_FF_IPINFO)) { 3298 /* Add header */ 3299 mp = ip_add_info_v6(mp, inill, &dst); 3300 if (mp == NULL) { 3301 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3302 CONN_DEC_REF(connp); 3303 if (mctl_present) 3304 freeb(first_mp); 3305 return; 3306 } else if (mctl_present) { 3307 first_mp->b_cont = mp; 3308 } else { 3309 first_mp = mp; 3310 } 3311 } 3312 3313 rq = connp->conn_rq; 3314 if (!canputnext(rq)) { 3315 if (flags & IP_FF_RAWIP) { 3316 BUMP_MIB(ill->ill_ip_mib, rawipIfStatsInOverflows); 3317 } else { 3318 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInOverflows); 3319 } 3320 3321 freemsg(first_mp); 3322 } else { 3323 if (IPCL_IS_IPTUN(connp)) { 3324 /* 3325 * Tunneled packet. We enforce policy in the tunnel 3326 * module itself. 3327 * 3328 * Send the WHOLE packet up (incl. IPSEC_IN) without 3329 * a policy check. 3330 */ 3331 putnext(rq, first_mp); 3332 CONN_DEC_REF(connp); 3333 return; 3334 } 3335 /* 3336 * Don't enforce here if we're a tunnel - let "tun" do 3337 * it instead. 3338 */ 3339 if (nexthdr != IPPROTO_ENCAP && nexthdr != IPPROTO_IPV6 && 3340 (CONN_INBOUND_POLICY_PRESENT(connp, ipss) || secure)) { 3341 first_mp = ipsec_check_inbound_policy(first_mp, connp, 3342 NULL, ip6h, mctl_present); 3343 if (first_mp == NULL) { 3344 CONN_DEC_REF(connp); 3345 return; 3346 } 3347 } 3348 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 3349 (connp->conn_recv)(connp, mp, NULL); 3350 if (mctl_present) 3351 freeb(first_mp); 3352 } 3353 CONN_DEC_REF(connp); 3354 } 3355 3356 /* 3357 * Send an ICMP error after patching up the packet appropriately. Returns 3358 * non-zero if the appropriate MIB should be bumped; zero otherwise. 3359 */ 3360 int 3361 ip_fanout_send_icmp_v6(queue_t *q, mblk_t *mp, uint_t flags, 3362 uint_t icmp_type, uint8_t icmp_code, uint_t nexthdr_offset, 3363 boolean_t mctl_present, zoneid_t zoneid, ip_stack_t *ipst) 3364 { 3365 ip6_t *ip6h; 3366 mblk_t *first_mp; 3367 boolean_t secure; 3368 unsigned char db_type; 3369 ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec; 3370 3371 first_mp = mp; 3372 if (mctl_present) { 3373 mp = mp->b_cont; 3374 secure = ipsec_in_is_secure(first_mp); 3375 ASSERT(mp != NULL); 3376 } else { 3377 /* 3378 * If this is an ICMP error being reported - which goes 3379 * up as M_CTLs, we need to convert them to M_DATA till 3380 * we finish checking with global policy because 3381 * ipsec_check_global_policy() assumes M_DATA as clear 3382 * and M_CTL as secure. 3383 */ 3384 db_type = mp->b_datap->db_type; 3385 mp->b_datap->db_type = M_DATA; 3386 secure = B_FALSE; 3387 } 3388 /* 3389 * We are generating an icmp error for some inbound packet. 3390 * Called from all ip_fanout_(udp, tcp, proto) functions. 3391 * Before we generate an error, check with global policy 3392 * to see whether this is allowed to enter the system. As 3393 * there is no "conn", we are checking with global policy. 3394 */ 3395 ip6h = (ip6_t *)mp->b_rptr; 3396 if (secure || ipss->ipsec_inbound_v6_policy_present) { 3397 first_mp = ipsec_check_global_policy(first_mp, NULL, 3398 NULL, ip6h, mctl_present, ipst->ips_netstack); 3399 if (first_mp == NULL) 3400 return (0); 3401 } 3402 3403 if (!mctl_present) 3404 mp->b_datap->db_type = db_type; 3405 3406 if (flags & IP_FF_SEND_ICMP) { 3407 if (flags & IP_FF_HDR_COMPLETE) { 3408 if (ip_hdr_complete_v6(ip6h, zoneid, ipst)) { 3409 freemsg(first_mp); 3410 return (1); 3411 } 3412 } 3413 switch (icmp_type) { 3414 case ICMP6_DST_UNREACH: 3415 icmp_unreachable_v6(WR(q), first_mp, icmp_code, 3416 B_FALSE, B_FALSE, zoneid, ipst); 3417 break; 3418 case ICMP6_PARAM_PROB: 3419 icmp_param_problem_v6(WR(q), first_mp, icmp_code, 3420 nexthdr_offset, B_FALSE, B_FALSE, zoneid, ipst); 3421 break; 3422 default: 3423 #ifdef DEBUG 3424 panic("ip_fanout_send_icmp_v6: wrong type"); 3425 /*NOTREACHED*/ 3426 #else 3427 freemsg(first_mp); 3428 break; 3429 #endif 3430 } 3431 } else { 3432 freemsg(first_mp); 3433 return (0); 3434 } 3435 3436 return (1); 3437 } 3438 3439 3440 /* 3441 * Fanout for TCP packets 3442 * The caller puts <fport, lport> in the ports parameter. 3443 */ 3444 static void 3445 ip_fanout_tcp_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, ill_t *ill, ill_t *inill, 3446 uint_t flags, uint_t hdr_len, boolean_t mctl_present, zoneid_t zoneid) 3447 { 3448 mblk_t *first_mp; 3449 boolean_t secure; 3450 conn_t *connp; 3451 tcph_t *tcph; 3452 boolean_t syn_present = B_FALSE; 3453 ip_stack_t *ipst = inill->ill_ipst; 3454 ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec; 3455 3456 first_mp = mp; 3457 if (mctl_present) { 3458 mp = first_mp->b_cont; 3459 secure = ipsec_in_is_secure(first_mp); 3460 ASSERT(mp != NULL); 3461 } else { 3462 secure = B_FALSE; 3463 } 3464 3465 connp = ipcl_classify_v6(mp, IPPROTO_TCP, hdr_len, zoneid, ipst); 3466 3467 if (connp == NULL || 3468 !conn_wantpacket_v6(connp, ill, ip6h, flags, zoneid)) { 3469 /* 3470 * No hard-bound match. Send Reset. 3471 */ 3472 dblk_t *dp = mp->b_datap; 3473 uint32_t ill_index; 3474 3475 ASSERT((dp->db_struioflag & STRUIO_IP) == 0); 3476 3477 /* Initiate IPPf processing, if needed. */ 3478 if (IPP_ENABLED(IPP_LOCAL_IN, ipst) && 3479 (flags & IP6_NO_IPPOLICY)) { 3480 ill_index = ill->ill_phyint->phyint_ifindex; 3481 ip_process(IPP_LOCAL_IN, &first_mp, ill_index); 3482 if (first_mp == NULL) { 3483 if (connp != NULL) 3484 CONN_DEC_REF(connp); 3485 return; 3486 } 3487 } 3488 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 3489 tcp_xmit_listeners_reset(first_mp, hdr_len, zoneid, 3490 ipst->ips_netstack->netstack_tcp, connp); 3491 if (connp != NULL) 3492 CONN_DEC_REF(connp); 3493 return; 3494 } 3495 3496 tcph = (tcph_t *)&mp->b_rptr[hdr_len]; 3497 if ((tcph->th_flags[0] & (TH_SYN|TH_ACK|TH_RST|TH_URG)) == TH_SYN) { 3498 if (connp->conn_flags & IPCL_TCP) { 3499 squeue_t *sqp; 3500 3501 /* 3502 * For fused tcp loopback, assign the eager's 3503 * squeue to be that of the active connect's. 3504 */ 3505 if ((flags & IP_FF_LOOPBACK) && do_tcp_fusion && 3506 !CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) && 3507 !secure && 3508 !IP6_IN_IPP(flags, ipst)) { 3509 ASSERT(Q_TO_CONN(q) != NULL); 3510 sqp = Q_TO_CONN(q)->conn_sqp; 3511 } else { 3512 sqp = IP_SQUEUE_GET(lbolt); 3513 } 3514 3515 mp->b_datap->db_struioflag |= STRUIO_EAGER; 3516 DB_CKSUMSTART(mp) = (intptr_t)sqp; 3517 3518 /* 3519 * db_cksumstuff is unused in the incoming 3520 * path; Thus store the ifindex here. It will 3521 * be cleared in tcp_conn_create_v6(). 3522 */ 3523 DB_CKSUMSTUFF(mp) = 3524 (intptr_t)ill->ill_phyint->phyint_ifindex; 3525 syn_present = B_TRUE; 3526 } 3527 } 3528 3529 if (IPCL_IS_TCP(connp) && IPCL_IS_BOUND(connp) && !syn_present) { 3530 uint_t flags = (unsigned int)tcph->th_flags[0] & 0xFF; 3531 if ((flags & TH_RST) || (flags & TH_URG)) { 3532 CONN_DEC_REF(connp); 3533 freemsg(first_mp); 3534 return; 3535 } 3536 if (flags & TH_ACK) { 3537 tcp_xmit_listeners_reset(first_mp, hdr_len, zoneid, 3538 ipst->ips_netstack->netstack_tcp, connp); 3539 CONN_DEC_REF(connp); 3540 return; 3541 } 3542 3543 CONN_DEC_REF(connp); 3544 freemsg(first_mp); 3545 return; 3546 } 3547 3548 if (CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) || secure) { 3549 first_mp = ipsec_check_inbound_policy(first_mp, connp, 3550 NULL, ip6h, mctl_present); 3551 if (first_mp == NULL) { 3552 CONN_DEC_REF(connp); 3553 return; 3554 } 3555 if (IPCL_IS_TCP(connp) && IPCL_IS_BOUND(connp)) { 3556 ASSERT(syn_present); 3557 if (mctl_present) { 3558 ASSERT(first_mp != mp); 3559 first_mp->b_datap->db_struioflag |= 3560 STRUIO_POLICY; 3561 } else { 3562 ASSERT(first_mp == mp); 3563 mp->b_datap->db_struioflag &= 3564 ~STRUIO_EAGER; 3565 mp->b_datap->db_struioflag |= 3566 STRUIO_POLICY; 3567 } 3568 } else { 3569 /* 3570 * Discard first_mp early since we're dealing with a 3571 * fully-connected conn_t and tcp doesn't do policy in 3572 * this case. Also, if someone is bound to IPPROTO_TCP 3573 * over raw IP, they don't expect to see a M_CTL. 3574 */ 3575 if (mctl_present) { 3576 freeb(first_mp); 3577 mctl_present = B_FALSE; 3578 } 3579 first_mp = mp; 3580 } 3581 } 3582 3583 /* Initiate IPPF processing */ 3584 if (IP6_IN_IPP(flags, ipst)) { 3585 uint_t ifindex; 3586 3587 mutex_enter(&ill->ill_lock); 3588 ifindex = ill->ill_phyint->phyint_ifindex; 3589 mutex_exit(&ill->ill_lock); 3590 ip_process(IPP_LOCAL_IN, &mp, ifindex); 3591 if (mp == NULL) { 3592 CONN_DEC_REF(connp); 3593 if (mctl_present) { 3594 freeb(first_mp); 3595 } 3596 return; 3597 } else if (mctl_present) { 3598 /* 3599 * ip_add_info_v6 might return a new mp. 3600 */ 3601 ASSERT(first_mp != mp); 3602 first_mp->b_cont = mp; 3603 } else { 3604 first_mp = mp; 3605 } 3606 } 3607 3608 /* 3609 * For link-local always add ifindex so that TCP can bind to that 3610 * interface. Avoid it for ICMP error fanout. 3611 */ 3612 if (!syn_present && ((connp->conn_ip_recvpktinfo || 3613 IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src)) && 3614 (flags & IP_FF_IPINFO))) { 3615 /* Add header */ 3616 mp = ip_add_info_v6(mp, inill, &ip6h->ip6_dst); 3617 if (mp == NULL) { 3618 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3619 CONN_DEC_REF(connp); 3620 if (mctl_present) 3621 freeb(first_mp); 3622 return; 3623 } else if (mctl_present) { 3624 ASSERT(first_mp != mp); 3625 first_mp->b_cont = mp; 3626 } else { 3627 first_mp = mp; 3628 } 3629 } 3630 3631 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 3632 if (IPCL_IS_TCP(connp)) { 3633 (*ip_input_proc)(connp->conn_sqp, first_mp, 3634 connp->conn_recv, connp, SQTAG_IP6_TCP_INPUT); 3635 } else { 3636 /* SOCK_RAW, IPPROTO_TCP case */ 3637 (connp->conn_recv)(connp, first_mp, NULL); 3638 CONN_DEC_REF(connp); 3639 } 3640 } 3641 3642 /* 3643 * Fanout for UDP packets. 3644 * The caller puts <fport, lport> in the ports parameter. 3645 * ire_type must be IRE_BROADCAST for multicast and broadcast packets. 3646 * 3647 * If SO_REUSEADDR is set all multicast and broadcast packets 3648 * will be delivered to all streams bound to the same port. 3649 * 3650 * Zones notes: 3651 * Multicast packets will be distributed to streams in all zones. 3652 */ 3653 static void 3654 ip_fanout_udp_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, uint32_t ports, 3655 ill_t *ill, ill_t *inill, uint_t flags, boolean_t mctl_present, 3656 zoneid_t zoneid) 3657 { 3658 uint32_t dstport, srcport; 3659 in6_addr_t dst; 3660 mblk_t *first_mp; 3661 boolean_t secure; 3662 conn_t *connp; 3663 connf_t *connfp; 3664 conn_t *first_conn; 3665 conn_t *next_conn; 3666 mblk_t *mp1, *first_mp1; 3667 in6_addr_t src; 3668 boolean_t shared_addr; 3669 ip_stack_t *ipst = inill->ill_ipst; 3670 ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec; 3671 3672 first_mp = mp; 3673 if (mctl_present) { 3674 mp = first_mp->b_cont; 3675 secure = ipsec_in_is_secure(first_mp); 3676 ASSERT(mp != NULL); 3677 } else { 3678 secure = B_FALSE; 3679 } 3680 3681 /* Extract ports in net byte order */ 3682 dstport = htons(ntohl(ports) & 0xFFFF); 3683 srcport = htons(ntohl(ports) >> 16); 3684 dst = ip6h->ip6_dst; 3685 src = ip6h->ip6_src; 3686 3687 shared_addr = (zoneid == ALL_ZONES); 3688 if (shared_addr) { 3689 /* 3690 * No need to handle exclusive-stack zones since ALL_ZONES 3691 * only applies to the shared stack. 3692 */ 3693 zoneid = tsol_mlp_findzone(IPPROTO_UDP, dstport); 3694 /* 3695 * If no shared MLP is found, tsol_mlp_findzone returns 3696 * ALL_ZONES. In that case, we assume it's SLP, and 3697 * search for the zone based on the packet label. 3698 * That will also return ALL_ZONES on failure, but 3699 * we never allow conn_zoneid to be set to ALL_ZONES. 3700 */ 3701 if (zoneid == ALL_ZONES) 3702 zoneid = tsol_packet_to_zoneid(mp); 3703 } 3704 3705 /* Attempt to find a client stream based on destination port. */ 3706 connfp = &ipst->ips_ipcl_udp_fanout[IPCL_UDP_HASH(dstport, ipst)]; 3707 mutex_enter(&connfp->connf_lock); 3708 connp = connfp->connf_head; 3709 if (!IN6_IS_ADDR_MULTICAST(&dst)) { 3710 /* 3711 * Not multicast. Send to the one (first) client we find. 3712 */ 3713 while (connp != NULL) { 3714 if (IPCL_UDP_MATCH_V6(connp, dstport, dst, srcport, 3715 src) && IPCL_ZONE_MATCH(connp, zoneid) && 3716 conn_wantpacket_v6(connp, ill, ip6h, 3717 flags, zoneid)) { 3718 break; 3719 } 3720 connp = connp->conn_next; 3721 } 3722 if (connp == NULL || connp->conn_upq == NULL) 3723 goto notfound; 3724 3725 if (is_system_labeled() && 3726 !tsol_receive_local(mp, &dst, IPV6_VERSION, shared_addr, 3727 connp)) 3728 goto notfound; 3729 3730 /* Found a client */ 3731 CONN_INC_REF(connp); 3732 mutex_exit(&connfp->connf_lock); 3733 3734 if (CONN_UDP_FLOWCTLD(connp)) { 3735 freemsg(first_mp); 3736 CONN_DEC_REF(connp); 3737 return; 3738 } 3739 if (CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) || secure) { 3740 first_mp = ipsec_check_inbound_policy(first_mp, 3741 connp, NULL, ip6h, mctl_present); 3742 if (first_mp == NULL) { 3743 CONN_DEC_REF(connp); 3744 return; 3745 } 3746 } 3747 /* Initiate IPPF processing */ 3748 if (IP6_IN_IPP(flags, ipst)) { 3749 uint_t ifindex; 3750 3751 mutex_enter(&ill->ill_lock); 3752 ifindex = ill->ill_phyint->phyint_ifindex; 3753 mutex_exit(&ill->ill_lock); 3754 ip_process(IPP_LOCAL_IN, &mp, ifindex); 3755 if (mp == NULL) { 3756 CONN_DEC_REF(connp); 3757 if (mctl_present) 3758 freeb(first_mp); 3759 return; 3760 } 3761 } 3762 /* 3763 * For link-local always add ifindex so that 3764 * transport can set sin6_scope_id. Avoid it for 3765 * ICMP error fanout. 3766 */ 3767 if ((connp->conn_ip_recvpktinfo || 3768 IN6_IS_ADDR_LINKLOCAL(&src)) && 3769 (flags & IP_FF_IPINFO)) { 3770 /* Add header */ 3771 mp = ip_add_info_v6(mp, inill, &dst); 3772 if (mp == NULL) { 3773 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3774 CONN_DEC_REF(connp); 3775 if (mctl_present) 3776 freeb(first_mp); 3777 return; 3778 } else if (mctl_present) { 3779 first_mp->b_cont = mp; 3780 } else { 3781 first_mp = mp; 3782 } 3783 } 3784 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 3785 3786 /* Send it upstream */ 3787 (connp->conn_recv)(connp, mp, NULL); 3788 3789 IP6_STAT(ipst, ip6_udp_fannorm); 3790 CONN_DEC_REF(connp); 3791 if (mctl_present) 3792 freeb(first_mp); 3793 return; 3794 } 3795 3796 while (connp != NULL) { 3797 if ((IPCL_UDP_MATCH_V6(connp, dstport, dst, srcport, src)) && 3798 conn_wantpacket_v6(connp, ill, ip6h, flags, zoneid) && 3799 (!is_system_labeled() || 3800 tsol_receive_local(mp, &dst, IPV6_VERSION, shared_addr, 3801 connp))) 3802 break; 3803 connp = connp->conn_next; 3804 } 3805 3806 if (connp == NULL || connp->conn_upq == NULL) 3807 goto notfound; 3808 3809 first_conn = connp; 3810 3811 CONN_INC_REF(connp); 3812 connp = connp->conn_next; 3813 for (;;) { 3814 while (connp != NULL) { 3815 if (IPCL_UDP_MATCH_V6(connp, dstport, dst, srcport, 3816 src) && conn_wantpacket_v6(connp, ill, ip6h, 3817 flags, zoneid) && 3818 (!is_system_labeled() || 3819 tsol_receive_local(mp, &dst, IPV6_VERSION, 3820 shared_addr, connp))) 3821 break; 3822 connp = connp->conn_next; 3823 } 3824 /* 3825 * Just copy the data part alone. The mctl part is 3826 * needed just for verifying policy and it is never 3827 * sent up. 3828 */ 3829 if (connp == NULL || 3830 (((first_mp1 = dupmsg(first_mp)) == NULL) && 3831 ((first_mp1 = ip_copymsg(first_mp)) == NULL))) { 3832 /* 3833 * No more interested clients or memory 3834 * allocation failed 3835 */ 3836 connp = first_conn; 3837 break; 3838 } 3839 mp1 = mctl_present ? first_mp1->b_cont : first_mp1; 3840 CONN_INC_REF(connp); 3841 mutex_exit(&connfp->connf_lock); 3842 /* 3843 * For link-local always add ifindex so that transport 3844 * can set sin6_scope_id. Avoid it for ICMP error 3845 * fanout. 3846 */ 3847 if ((connp->conn_ip_recvpktinfo || 3848 IN6_IS_ADDR_LINKLOCAL(&src)) && 3849 (flags & IP_FF_IPINFO)) { 3850 /* Add header */ 3851 mp1 = ip_add_info_v6(mp1, inill, &dst); 3852 } 3853 /* mp1 could have changed */ 3854 if (mctl_present) 3855 first_mp1->b_cont = mp1; 3856 else 3857 first_mp1 = mp1; 3858 if (mp1 == NULL) { 3859 if (mctl_present) 3860 freeb(first_mp1); 3861 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3862 goto next_one; 3863 } 3864 if (CONN_UDP_FLOWCTLD(connp)) { 3865 BUMP_MIB(ill->ill_ip_mib, udpIfStatsInOverflows); 3866 freemsg(first_mp1); 3867 goto next_one; 3868 } 3869 3870 if (CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) || secure) { 3871 first_mp1 = ipsec_check_inbound_policy 3872 (first_mp1, connp, NULL, ip6h, 3873 mctl_present); 3874 } 3875 if (first_mp1 != NULL) { 3876 if (mctl_present) 3877 freeb(first_mp1); 3878 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 3879 3880 /* Send it upstream */ 3881 (connp->conn_recv)(connp, mp1, NULL); 3882 } 3883 next_one: 3884 mutex_enter(&connfp->connf_lock); 3885 /* Follow the next pointer before releasing the conn. */ 3886 next_conn = connp->conn_next; 3887 IP6_STAT(ipst, ip6_udp_fanmb); 3888 CONN_DEC_REF(connp); 3889 connp = next_conn; 3890 } 3891 3892 /* Last one. Send it upstream. */ 3893 mutex_exit(&connfp->connf_lock); 3894 3895 /* Initiate IPPF processing */ 3896 if (IP6_IN_IPP(flags, ipst)) { 3897 uint_t ifindex; 3898 3899 mutex_enter(&ill->ill_lock); 3900 ifindex = ill->ill_phyint->phyint_ifindex; 3901 mutex_exit(&ill->ill_lock); 3902 ip_process(IPP_LOCAL_IN, &mp, ifindex); 3903 if (mp == NULL) { 3904 CONN_DEC_REF(connp); 3905 if (mctl_present) { 3906 freeb(first_mp); 3907 } 3908 return; 3909 } 3910 } 3911 3912 /* 3913 * For link-local always add ifindex so that transport can set 3914 * sin6_scope_id. Avoid it for ICMP error fanout. 3915 */ 3916 if ((connp->conn_ip_recvpktinfo || 3917 IN6_IS_ADDR_LINKLOCAL(&src)) && (flags & IP_FF_IPINFO)) { 3918 /* Add header */ 3919 mp = ip_add_info_v6(mp, inill, &dst); 3920 if (mp == NULL) { 3921 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3922 CONN_DEC_REF(connp); 3923 if (mctl_present) 3924 freeb(first_mp); 3925 return; 3926 } else if (mctl_present) { 3927 first_mp->b_cont = mp; 3928 } else { 3929 first_mp = mp; 3930 } 3931 } 3932 if (CONN_UDP_FLOWCTLD(connp)) { 3933 BUMP_MIB(ill->ill_ip_mib, udpIfStatsInOverflows); 3934 freemsg(mp); 3935 } else { 3936 if (CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) || secure) { 3937 first_mp = ipsec_check_inbound_policy(first_mp, 3938 connp, NULL, ip6h, mctl_present); 3939 if (first_mp == NULL) { 3940 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3941 CONN_DEC_REF(connp); 3942 return; 3943 } 3944 } 3945 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 3946 3947 /* Send it upstream */ 3948 (connp->conn_recv)(connp, mp, NULL); 3949 } 3950 IP6_STAT(ipst, ip6_udp_fanmb); 3951 CONN_DEC_REF(connp); 3952 if (mctl_present) 3953 freeb(first_mp); 3954 return; 3955 3956 notfound: 3957 mutex_exit(&connfp->connf_lock); 3958 /* 3959 * No one bound to this port. Is 3960 * there a client that wants all 3961 * unclaimed datagrams? 3962 */ 3963 if (ipst->ips_ipcl_proto_fanout_v6[IPPROTO_UDP].connf_head != NULL) { 3964 ip_fanout_proto_v6(q, first_mp, ip6h, ill, inill, IPPROTO_UDP, 3965 0, flags | IP_FF_RAWIP | IP_FF_IPINFO, mctl_present, 3966 zoneid); 3967 } else { 3968 if (ip_fanout_send_icmp_v6(q, first_mp, flags, 3969 ICMP6_DST_UNREACH, ICMP6_DST_UNREACH_NOPORT, 0, 3970 mctl_present, zoneid, ipst)) { 3971 BUMP_MIB(ill->ill_ip_mib, udpIfStatsNoPorts); 3972 } 3973 } 3974 } 3975 3976 /* 3977 * int ip_find_hdr_v6() 3978 * 3979 * This routine is used by the upper layer protocols and the IP tunnel 3980 * module to: 3981 * - Set extension header pointers to appropriate locations 3982 * - Determine IPv6 header length and return it 3983 * - Return a pointer to the last nexthdr value 3984 * 3985 * The caller must initialize ipp_fields. 3986 * 3987 * NOTE: If multiple extension headers of the same type are present, 3988 * ip_find_hdr_v6() will set the respective extension header pointers 3989 * to the first one that it encounters in the IPv6 header. It also 3990 * skips fragment headers. This routine deals with malformed packets 3991 * of various sorts in which case the returned length is up to the 3992 * malformed part. 3993 */ 3994 int 3995 ip_find_hdr_v6(mblk_t *mp, ip6_t *ip6h, ip6_pkt_t *ipp, uint8_t *nexthdrp) 3996 { 3997 uint_t length, ehdrlen; 3998 uint8_t nexthdr; 3999 uint8_t *whereptr, *endptr; 4000 ip6_dest_t *tmpdstopts; 4001 ip6_rthdr_t *tmprthdr; 4002 ip6_hbh_t *tmphopopts; 4003 ip6_frag_t *tmpfraghdr; 4004 4005 length = IPV6_HDR_LEN; 4006 whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */ 4007 endptr = mp->b_wptr; 4008 4009 nexthdr = ip6h->ip6_nxt; 4010 while (whereptr < endptr) { 4011 /* Is there enough left for len + nexthdr? */ 4012 if (whereptr + MIN_EHDR_LEN > endptr) 4013 goto done; 4014 4015 switch (nexthdr) { 4016 case IPPROTO_HOPOPTS: 4017 tmphopopts = (ip6_hbh_t *)whereptr; 4018 ehdrlen = 8 * (tmphopopts->ip6h_len + 1); 4019 if ((uchar_t *)tmphopopts + ehdrlen > endptr) 4020 goto done; 4021 nexthdr = tmphopopts->ip6h_nxt; 4022 /* return only 1st hbh */ 4023 if (!(ipp->ipp_fields & IPPF_HOPOPTS)) { 4024 ipp->ipp_fields |= IPPF_HOPOPTS; 4025 ipp->ipp_hopopts = tmphopopts; 4026 ipp->ipp_hopoptslen = ehdrlen; 4027 } 4028 break; 4029 case IPPROTO_DSTOPTS: 4030 tmpdstopts = (ip6_dest_t *)whereptr; 4031 ehdrlen = 8 * (tmpdstopts->ip6d_len + 1); 4032 if ((uchar_t *)tmpdstopts + ehdrlen > endptr) 4033 goto done; 4034 nexthdr = tmpdstopts->ip6d_nxt; 4035 /* 4036 * ipp_dstopts is set to the destination header after a 4037 * routing header. 4038 * Assume it is a post-rthdr destination header 4039 * and adjust when we find an rthdr. 4040 */ 4041 if (!(ipp->ipp_fields & IPPF_DSTOPTS)) { 4042 ipp->ipp_fields |= IPPF_DSTOPTS; 4043 ipp->ipp_dstopts = tmpdstopts; 4044 ipp->ipp_dstoptslen = ehdrlen; 4045 } 4046 break; 4047 case IPPROTO_ROUTING: 4048 tmprthdr = (ip6_rthdr_t *)whereptr; 4049 ehdrlen = 8 * (tmprthdr->ip6r_len + 1); 4050 if ((uchar_t *)tmprthdr + ehdrlen > endptr) 4051 goto done; 4052 nexthdr = tmprthdr->ip6r_nxt; 4053 /* return only 1st rthdr */ 4054 if (!(ipp->ipp_fields & IPPF_RTHDR)) { 4055 ipp->ipp_fields |= IPPF_RTHDR; 4056 ipp->ipp_rthdr = tmprthdr; 4057 ipp->ipp_rthdrlen = ehdrlen; 4058 } 4059 /* 4060 * Make any destination header we've seen be a 4061 * pre-rthdr destination header. 4062 */ 4063 if (ipp->ipp_fields & IPPF_DSTOPTS) { 4064 ipp->ipp_fields &= ~IPPF_DSTOPTS; 4065 ipp->ipp_fields |= IPPF_RTDSTOPTS; 4066 ipp->ipp_rtdstopts = ipp->ipp_dstopts; 4067 ipp->ipp_dstopts = NULL; 4068 ipp->ipp_rtdstoptslen = ipp->ipp_dstoptslen; 4069 ipp->ipp_dstoptslen = 0; 4070 } 4071 break; 4072 case IPPROTO_FRAGMENT: 4073 tmpfraghdr = (ip6_frag_t *)whereptr; 4074 ehdrlen = sizeof (ip6_frag_t); 4075 if ((uchar_t *)tmpfraghdr + ehdrlen > endptr) 4076 goto done; 4077 nexthdr = tmpfraghdr->ip6f_nxt; 4078 if (!(ipp->ipp_fields & IPPF_FRAGHDR)) { 4079 ipp->ipp_fields |= IPPF_FRAGHDR; 4080 ipp->ipp_fraghdr = tmpfraghdr; 4081 ipp->ipp_fraghdrlen = ehdrlen; 4082 } 4083 break; 4084 case IPPROTO_NONE: 4085 default: 4086 goto done; 4087 } 4088 length += ehdrlen; 4089 whereptr += ehdrlen; 4090 } 4091 done: 4092 if (nexthdrp != NULL) 4093 *nexthdrp = nexthdr; 4094 return (length); 4095 } 4096 4097 int 4098 ip_hdr_complete_v6(ip6_t *ip6h, zoneid_t zoneid, ip_stack_t *ipst) 4099 { 4100 ire_t *ire; 4101 4102 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 4103 ire = ire_lookup_local_v6(zoneid, ipst); 4104 if (ire == NULL) { 4105 ip1dbg(("ip_hdr_complete_v6: no source IRE\n")); 4106 return (1); 4107 } 4108 ip6h->ip6_src = ire->ire_addr_v6; 4109 ire_refrele(ire); 4110 } 4111 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 4112 ip6h->ip6_hops = ipst->ips_ipv6_def_hops; 4113 return (0); 4114 } 4115 4116 /* 4117 * Try to determine where and what are the IPv6 header length and 4118 * pointer to nexthdr value for the upper layer protocol (or an 4119 * unknown next hdr). 4120 * 4121 * Parameters returns a pointer to the nexthdr value; 4122 * Must handle malformed packets of various sorts. 4123 * Function returns failure for malformed cases. 4124 */ 4125 boolean_t 4126 ip_hdr_length_nexthdr_v6(mblk_t *mp, ip6_t *ip6h, uint16_t *hdr_length_ptr, 4127 uint8_t **nexthdrpp) 4128 { 4129 uint16_t length; 4130 uint_t ehdrlen; 4131 uint8_t *nexthdrp; 4132 uint8_t *whereptr; 4133 uint8_t *endptr; 4134 ip6_dest_t *desthdr; 4135 ip6_rthdr_t *rthdr; 4136 ip6_frag_t *fraghdr; 4137 4138 ASSERT((IPH_HDR_VERSION(ip6h) & ~IP_FORWARD_PROG_BIT) == IPV6_VERSION); 4139 length = IPV6_HDR_LEN; 4140 whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */ 4141 endptr = mp->b_wptr; 4142 4143 nexthdrp = &ip6h->ip6_nxt; 4144 while (whereptr < endptr) { 4145 /* Is there enough left for len + nexthdr? */ 4146 if (whereptr + MIN_EHDR_LEN > endptr) 4147 break; 4148 4149 switch (*nexthdrp) { 4150 case IPPROTO_HOPOPTS: 4151 case IPPROTO_DSTOPTS: 4152 /* Assumes the headers are identical for hbh and dst */ 4153 desthdr = (ip6_dest_t *)whereptr; 4154 ehdrlen = 8 * (desthdr->ip6d_len + 1); 4155 if ((uchar_t *)desthdr + ehdrlen > endptr) 4156 return (B_FALSE); 4157 nexthdrp = &desthdr->ip6d_nxt; 4158 break; 4159 case IPPROTO_ROUTING: 4160 rthdr = (ip6_rthdr_t *)whereptr; 4161 ehdrlen = 8 * (rthdr->ip6r_len + 1); 4162 if ((uchar_t *)rthdr + ehdrlen > endptr) 4163 return (B_FALSE); 4164 nexthdrp = &rthdr->ip6r_nxt; 4165 break; 4166 case IPPROTO_FRAGMENT: 4167 fraghdr = (ip6_frag_t *)whereptr; 4168 ehdrlen = sizeof (ip6_frag_t); 4169 if ((uchar_t *)&fraghdr[1] > endptr) 4170 return (B_FALSE); 4171 nexthdrp = &fraghdr->ip6f_nxt; 4172 break; 4173 case IPPROTO_NONE: 4174 /* No next header means we're finished */ 4175 default: 4176 *hdr_length_ptr = length; 4177 *nexthdrpp = nexthdrp; 4178 return (B_TRUE); 4179 } 4180 length += ehdrlen; 4181 whereptr += ehdrlen; 4182 *hdr_length_ptr = length; 4183 *nexthdrpp = nexthdrp; 4184 } 4185 switch (*nexthdrp) { 4186 case IPPROTO_HOPOPTS: 4187 case IPPROTO_DSTOPTS: 4188 case IPPROTO_ROUTING: 4189 case IPPROTO_FRAGMENT: 4190 /* 4191 * If any know extension headers are still to be processed, 4192 * the packet's malformed (or at least all the IP header(s) are 4193 * not in the same mblk - and that should never happen. 4194 */ 4195 return (B_FALSE); 4196 4197 default: 4198 /* 4199 * If we get here, we know that all of the IP headers were in 4200 * the same mblk, even if the ULP header is in the next mblk. 4201 */ 4202 *hdr_length_ptr = length; 4203 *nexthdrpp = nexthdrp; 4204 return (B_TRUE); 4205 } 4206 } 4207 4208 /* 4209 * Return the length of the IPv6 related headers (including extension headers) 4210 * Returns a length even if the packet is malformed. 4211 */ 4212 int 4213 ip_hdr_length_v6(mblk_t *mp, ip6_t *ip6h) 4214 { 4215 uint16_t hdr_len; 4216 uint8_t *nexthdrp; 4217 4218 (void) ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_len, &nexthdrp); 4219 return (hdr_len); 4220 } 4221 4222 /* 4223 * Select an ill for the packet by considering load spreading across 4224 * a different ill in the group if dst_ill is part of some group. 4225 */ 4226 static ill_t * 4227 ip_newroute_get_dst_ill_v6(ill_t *dst_ill) 4228 { 4229 ill_t *ill; 4230 4231 /* 4232 * We schedule irrespective of whether the source address is 4233 * INADDR_UNSPECIED or not. 4234 */ 4235 ill = illgrp_scheduler(dst_ill); 4236 if (ill == NULL) 4237 return (NULL); 4238 4239 /* 4240 * For groups with names ip_sioctl_groupname ensures that all 4241 * ills are of same type. For groups without names, ifgrp_insert 4242 * ensures this. 4243 */ 4244 ASSERT(dst_ill->ill_type == ill->ill_type); 4245 4246 return (ill); 4247 } 4248 4249 /* 4250 * IPv6 - 4251 * ip_newroute_v6 is called by ip_rput_data_v6 or ip_wput_v6 whenever we need 4252 * to send out a packet to a destination address for which we do not have 4253 * specific routing information. 4254 * 4255 * Handle non-multicast packets. If ill is non-NULL the match is done 4256 * for that ill. 4257 * 4258 * When a specific ill is specified (using IPV6_PKTINFO, 4259 * IPV6_MULTICAST_IF, or IPV6_BOUND_IF) we will only match 4260 * on routing entries (ftable and ctable) that have a matching 4261 * ire->ire_ipif->ipif_ill. Thus this can only be used 4262 * for destinations that are on-link for the specific ill 4263 * and that can appear on multiple links. Thus it is useful 4264 * for multicast destinations, link-local destinations, and 4265 * at some point perhaps for site-local destinations (if the 4266 * node sits at a site boundary). 4267 * We create the cache entries in the regular ctable since 4268 * it can not "confuse" things for other destinations. 4269 * table. 4270 * 4271 * When ill is part of a ill group, we subject the packets 4272 * to load spreading even if the ill is specified by the 4273 * means described above. We disable only for IPV6_BOUND_PIF 4274 * and for the cases where IP6I_ATTACH_IF is set i.e NS/NA/ 4275 * Echo replies to link-local destinations have IP6I_ATTACH_IF 4276 * set. 4277 * 4278 * NOTE : These are the scopes of some of the variables that point at IRE, 4279 * which needs to be followed while making any future modifications 4280 * to avoid memory leaks. 4281 * 4282 * - ire and sire are the entries looked up initially by 4283 * ire_ftable_lookup_v6. 4284 * - ipif_ire is used to hold the interface ire associated with 4285 * the new cache ire. But it's scope is limited, so we always REFRELE 4286 * it before branching out to error paths. 4287 * - save_ire is initialized before ire_create, so that ire returned 4288 * by ire_create will not over-write the ire. We REFRELE save_ire 4289 * before breaking out of the switch. 4290 * 4291 * Thus on failures, we have to REFRELE only ire and sire, if they 4292 * are not NULL. 4293 * 4294 * v6srcp may be used in the future. Currently unused. 4295 */ 4296 /* ARGSUSED */ 4297 void 4298 ip_newroute_v6(queue_t *q, mblk_t *mp, const in6_addr_t *v6dstp, 4299 const in6_addr_t *v6srcp, ill_t *ill, zoneid_t zoneid, ip_stack_t *ipst) 4300 { 4301 in6_addr_t v6gw; 4302 in6_addr_t dst; 4303 ire_t *ire = NULL; 4304 ipif_t *src_ipif = NULL; 4305 ill_t *dst_ill = NULL; 4306 ire_t *sire = NULL; 4307 ire_t *save_ire; 4308 ip6_t *ip6h; 4309 int err = 0; 4310 mblk_t *first_mp; 4311 ipsec_out_t *io; 4312 ill_t *attach_ill = NULL; 4313 ushort_t ire_marks = 0; 4314 int match_flags; 4315 boolean_t ip6i_present; 4316 ire_t *first_sire = NULL; 4317 mblk_t *copy_mp = NULL; 4318 mblk_t *xmit_mp = NULL; 4319 in6_addr_t save_dst; 4320 uint32_t multirt_flags = 4321 MULTIRT_CACHEGW | MULTIRT_USESTAMP | MULTIRT_SETSTAMP; 4322 boolean_t multirt_is_resolvable; 4323 boolean_t multirt_resolve_next; 4324 boolean_t need_rele = B_FALSE; 4325 boolean_t do_attach_ill = B_FALSE; 4326 boolean_t ip6_asp_table_held = B_FALSE; 4327 tsol_ire_gw_secattr_t *attrp = NULL; 4328 tsol_gcgrp_t *gcgrp = NULL; 4329 tsol_gcgrp_addr_t ga; 4330 4331 ASSERT(!IN6_IS_ADDR_MULTICAST(v6dstp)); 4332 4333 first_mp = mp; 4334 if (mp->b_datap->db_type == M_CTL) { 4335 mp = mp->b_cont; 4336 io = (ipsec_out_t *)first_mp->b_rptr; 4337 ASSERT(io->ipsec_out_type == IPSEC_OUT); 4338 } else { 4339 io = NULL; 4340 } 4341 4342 /* 4343 * If this end point is bound to IPIF_NOFAILOVER, set bnf_ill and 4344 * bind_to_nofailover B_TRUE. We can't use conn to determine as it 4345 * could be NULL. 4346 * 4347 * This information can appear either in an ip6i_t or an IPSEC_OUT 4348 * message. 4349 */ 4350 ip6h = (ip6_t *)mp->b_rptr; 4351 ip6i_present = (ip6h->ip6_nxt == IPPROTO_RAW); 4352 if (ip6i_present || (io != NULL && io->ipsec_out_attach_if)) { 4353 if (!ip6i_present || 4354 ((ip6i_t *)ip6h)->ip6i_flags & IP6I_ATTACH_IF) { 4355 attach_ill = ip_grab_attach_ill(ill, first_mp, 4356 (ip6i_present ? ((ip6i_t *)ip6h)->ip6i_ifindex : 4357 io->ipsec_out_ill_index), B_TRUE, ipst); 4358 /* Failure case frees things for us. */ 4359 if (attach_ill == NULL) 4360 return; 4361 4362 /* 4363 * Check if we need an ire that will not be 4364 * looked up by anybody else i.e. HIDDEN. 4365 */ 4366 if (ill_is_probeonly(attach_ill)) 4367 ire_marks = IRE_MARK_HIDDEN; 4368 } 4369 } 4370 4371 if (IN6_IS_ADDR_LOOPBACK(v6dstp)) { 4372 ip1dbg(("ip_newroute_v6: dst with loopback addr\n")); 4373 goto icmp_err_ret; 4374 } else if ((v6srcp != NULL) && IN6_IS_ADDR_LOOPBACK(v6srcp)) { 4375 ip1dbg(("ip_newroute_v6: src with loopback addr\n")); 4376 goto icmp_err_ret; 4377 } 4378 4379 /* 4380 * If this IRE is created for forwarding or it is not for 4381 * TCP traffic, mark it as temporary. 4382 * 4383 * Is it sufficient just to check the next header?? 4384 */ 4385 if (mp->b_prev != NULL || !IP_FLOW_CONTROLLED_ULP(ip6h->ip6_nxt)) 4386 ire_marks |= IRE_MARK_TEMPORARY; 4387 4388 /* 4389 * Get what we can from ire_ftable_lookup_v6 which will follow an IRE 4390 * chain until it gets the most specific information available. 4391 * For example, we know that there is no IRE_CACHE for this dest, 4392 * but there may be an IRE_OFFSUBNET which specifies a gateway. 4393 * ire_ftable_lookup_v6 will look up the gateway, etc. 4394 */ 4395 4396 if (ill == NULL) { 4397 match_flags = MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT | 4398 MATCH_IRE_PARENT | MATCH_IRE_RJ_BHOLE | MATCH_IRE_SECATTR; 4399 ire = ire_ftable_lookup_v6(v6dstp, 0, 0, 0, 4400 NULL, &sire, zoneid, 0, MBLK_GETLABEL(mp), 4401 match_flags, ipst); 4402 /* 4403 * ire_add_then_send -> ip_newroute_v6 in the CGTP case passes 4404 * in a NULL ill, but the packet could be a neighbor 4405 * solicitation/advertisment and could have a valid attach_ill. 4406 */ 4407 if (attach_ill != NULL) 4408 ill_refrele(attach_ill); 4409 } else { 4410 if (attach_ill != NULL) { 4411 /* 4412 * attach_ill is set only for communicating with 4413 * on-link hosts. So, don't look for DEFAULT. 4414 * ip_wput_v6 passes the right ill in this case and 4415 * hence we can assert. 4416 */ 4417 ASSERT(ill == attach_ill); 4418 ill_refrele(attach_ill); 4419 do_attach_ill = B_TRUE; 4420 match_flags = MATCH_IRE_RJ_BHOLE | MATCH_IRE_ILL; 4421 } else { 4422 match_flags = MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT | 4423 MATCH_IRE_RJ_BHOLE | MATCH_IRE_ILL_GROUP; 4424 } 4425 match_flags |= MATCH_IRE_PARENT | MATCH_IRE_SECATTR; 4426 ire = ire_ftable_lookup_v6(v6dstp, NULL, NULL, 0, ill->ill_ipif, 4427 &sire, zoneid, 0, MBLK_GETLABEL(mp), match_flags, ipst); 4428 } 4429 4430 ip3dbg(("ip_newroute_v6: ire_ftable_lookup_v6() " 4431 "returned ire %p, sire %p\n", (void *)ire, (void *)sire)); 4432 4433 if (zoneid == ALL_ZONES && ire != NULL) { 4434 /* 4435 * In the forwarding case, we can use a route from any zone 4436 * since we won't change the source address. We can easily 4437 * assert that the source address is already set when there's no 4438 * ip6_info header - otherwise we'd have to call pullupmsg(). 4439 */ 4440 ASSERT(ip6i_present || 4441 !IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)); 4442 zoneid = ire->ire_zoneid; 4443 } 4444 4445 /* 4446 * We enter a loop that will be run only once in most cases. 4447 * The loop is re-entered in the case where the destination 4448 * can be reached through multiple RTF_MULTIRT-flagged routes. 4449 * The intention is to compute multiple routes to a single 4450 * destination in a single ip_newroute_v6 call. 4451 * The information is contained in sire->ire_flags. 4452 */ 4453 do { 4454 multirt_resolve_next = B_FALSE; 4455 4456 if (dst_ill != NULL) { 4457 ill_refrele(dst_ill); 4458 dst_ill = NULL; 4459 } 4460 if (src_ipif != NULL) { 4461 ipif_refrele(src_ipif); 4462 src_ipif = NULL; 4463 } 4464 if ((sire != NULL) && sire->ire_flags & RTF_MULTIRT) { 4465 ip3dbg(("ip_newroute_v6: starting new resolution " 4466 "with first_mp %p, tag %d\n", 4467 (void *)first_mp, MULTIRT_DEBUG_TAGGED(first_mp))); 4468 4469 /* 4470 * We check if there are trailing unresolved routes for 4471 * the destination contained in sire. 4472 */ 4473 multirt_is_resolvable = ire_multirt_lookup_v6(&ire, 4474 &sire, multirt_flags, MBLK_GETLABEL(mp), ipst); 4475 4476 ip3dbg(("ip_newroute_v6: multirt_is_resolvable %d, " 4477 "ire %p, sire %p\n", 4478 multirt_is_resolvable, (void *)ire, (void *)sire)); 4479 4480 if (!multirt_is_resolvable) { 4481 /* 4482 * No more multirt routes to resolve; give up 4483 * (all routes resolved or no more resolvable 4484 * routes). 4485 */ 4486 if (ire != NULL) { 4487 ire_refrele(ire); 4488 ire = NULL; 4489 } 4490 } else { 4491 ASSERT(sire != NULL); 4492 ASSERT(ire != NULL); 4493 /* 4494 * We simply use first_sire as a flag that 4495 * indicates if a resolvable multirt route has 4496 * already been found during the preceding 4497 * loops. If it is not the case, we may have 4498 * to send an ICMP error to report that the 4499 * destination is unreachable. We do not 4500 * IRE_REFHOLD first_sire. 4501 */ 4502 if (first_sire == NULL) { 4503 first_sire = sire; 4504 } 4505 } 4506 } 4507 if ((ire == NULL) || (ire == sire)) { 4508 /* 4509 * either ire == NULL (the destination cannot be 4510 * resolved) or ire == sire (the gateway cannot be 4511 * resolved). At this point, there are no more routes 4512 * to resolve for the destination, thus we exit. 4513 */ 4514 if (ip_debug > 3) { 4515 /* ip2dbg */ 4516 pr_addr_dbg("ip_newroute_v6: " 4517 "can't resolve %s\n", AF_INET6, v6dstp); 4518 } 4519 ip3dbg(("ip_newroute_v6: " 4520 "ire %p, sire %p, first_sire %p\n", 4521 (void *)ire, (void *)sire, (void *)first_sire)); 4522 4523 if (sire != NULL) { 4524 ire_refrele(sire); 4525 sire = NULL; 4526 } 4527 4528 if (first_sire != NULL) { 4529 /* 4530 * At least one multirt route has been found 4531 * in the same ip_newroute() call; there is no 4532 * need to report an ICMP error. 4533 * first_sire was not IRE_REFHOLDed. 4534 */ 4535 MULTIRT_DEBUG_UNTAG(first_mp); 4536 freemsg(first_mp); 4537 return; 4538 } 4539 ip_rts_change_v6(RTM_MISS, v6dstp, 0, 0, 0, 0, 0, 0, 4540 RTA_DST, ipst); 4541 goto icmp_err_ret; 4542 } 4543 4544 ASSERT(ire->ire_ipversion == IPV6_VERSION); 4545 4546 /* 4547 * Verify that the returned IRE does not have either the 4548 * RTF_REJECT or RTF_BLACKHOLE flags set and that the IRE is 4549 * either an IRE_CACHE, IRE_IF_NORESOLVER or IRE_IF_RESOLVER. 4550 */ 4551 if ((ire->ire_flags & (RTF_REJECT | RTF_BLACKHOLE)) || 4552 (ire->ire_type & (IRE_CACHE | IRE_INTERFACE)) == 0) 4553 goto icmp_err_ret; 4554 4555 /* 4556 * Increment the ire_ob_pkt_count field for ire if it is an 4557 * INTERFACE (IF_RESOLVER or IF_NORESOLVER) IRE type, and 4558 * increment the same for the parent IRE, sire, if it is some 4559 * sort of prefix IRE (which includes DEFAULT, PREFIX, and HOST) 4560 */ 4561 if ((ire->ire_type & IRE_INTERFACE) != 0) { 4562 UPDATE_OB_PKT_COUNT(ire); 4563 ire->ire_last_used_time = lbolt; 4564 } 4565 4566 if (sire != NULL) { 4567 mutex_enter(&sire->ire_lock); 4568 v6gw = sire->ire_gateway_addr_v6; 4569 mutex_exit(&sire->ire_lock); 4570 ASSERT((sire->ire_type & (IRE_CACHETABLE | 4571 IRE_INTERFACE)) == 0); 4572 UPDATE_OB_PKT_COUNT(sire); 4573 sire->ire_last_used_time = lbolt; 4574 } else { 4575 v6gw = ipv6_all_zeros; 4576 } 4577 4578 /* 4579 * We have a route to reach the destination. 4580 * 4581 * 1) If the interface is part of ill group, try to get a new 4582 * ill taking load spreading into account. 4583 * 4584 * 2) After selecting the ill, get a source address that might 4585 * create good inbound load spreading and that matches the 4586 * right scope. ipif_select_source_v6 does this for us. 4587 * 4588 * If the application specified the ill (ifindex), we still 4589 * load spread. Only if the packets needs to go out specifically 4590 * on a given ill e.g. bind to IPIF_NOFAILOVER address, 4591 * IPV6_BOUND_PIF we don't try to use a different ill for load 4592 * spreading. 4593 */ 4594 if (!do_attach_ill) { 4595 /* 4596 * If the interface belongs to an interface group, 4597 * make sure the next possible interface in the group 4598 * is used. This encourages load spreading among 4599 * peers in an interface group. However, in the case 4600 * of multirouting, load spreading is not used, as we 4601 * actually want to replicate outgoing packets through 4602 * particular interfaces. 4603 * 4604 * Note: While we pick a dst_ill we are really only 4605 * interested in the ill for load spreading. 4606 * The source ipif is determined by source address 4607 * selection below. 4608 */ 4609 if ((sire != NULL) && (sire->ire_flags & RTF_MULTIRT)) { 4610 dst_ill = ire->ire_ipif->ipif_ill; 4611 /* For uniformity do a refhold */ 4612 ill_refhold(dst_ill); 4613 } else { 4614 /* 4615 * If we are here trying to create an IRE_CACHE 4616 * for an offlink destination and have the 4617 * IRE_CACHE for the next hop and the latter is 4618 * using virtual IP source address selection i.e 4619 * it's ire->ire_ipif is pointing to a virtual 4620 * network interface (vni) then 4621 * ip_newroute_get_dst_ll() will return the vni 4622 * interface as the dst_ill. Since the vni is 4623 * virtual i.e not associated with any physical 4624 * interface, it cannot be the dst_ill, hence 4625 * in such a case call ip_newroute_get_dst_ll() 4626 * with the stq_ill instead of the ire_ipif ILL. 4627 * The function returns a refheld ill. 4628 */ 4629 if ((ire->ire_type == IRE_CACHE) && 4630 IS_VNI(ire->ire_ipif->ipif_ill)) 4631 dst_ill = ip_newroute_get_dst_ill_v6( 4632 ire->ire_stq->q_ptr); 4633 else 4634 dst_ill = ip_newroute_get_dst_ill_v6( 4635 ire->ire_ipif->ipif_ill); 4636 } 4637 if (dst_ill == NULL) { 4638 if (ip_debug > 2) { 4639 pr_addr_dbg("ip_newroute_v6 : no dst " 4640 "ill for dst %s\n", 4641 AF_INET6, v6dstp); 4642 } 4643 goto icmp_err_ret; 4644 } else if (dst_ill->ill_group == NULL && ill != NULL && 4645 dst_ill != ill) { 4646 /* 4647 * If "ill" is not part of any group, we should 4648 * have found a route matching "ill" as we 4649 * called ire_ftable_lookup_v6 with 4650 * MATCH_IRE_ILL_GROUP. 4651 * Rather than asserting when there is a 4652 * mismatch, we just drop the packet. 4653 */ 4654 ip0dbg(("ip_newroute_v6: BOUND_IF failed : " 4655 "dst_ill %s ill %s\n", 4656 dst_ill->ill_name, 4657 ill->ill_name)); 4658 goto icmp_err_ret; 4659 } 4660 } else { 4661 dst_ill = ire->ire_ipif->ipif_ill; 4662 /* For uniformity do refhold */ 4663 ill_refhold(dst_ill); 4664 /* 4665 * We should have found a route matching ill as we 4666 * called ire_ftable_lookup_v6 with MATCH_IRE_ILL. 4667 * Rather than asserting, while there is a mismatch, 4668 * we just drop the packet. 4669 */ 4670 if (dst_ill != ill) { 4671 ip0dbg(("ip_newroute_v6: Packet dropped as " 4672 "IP6I_ATTACH_IF ill is %s, " 4673 "ire->ire_ipif->ipif_ill is %s\n", 4674 ill->ill_name, 4675 dst_ill->ill_name)); 4676 goto icmp_err_ret; 4677 } 4678 } 4679 /* 4680 * Pick a source address which matches the scope of the 4681 * destination address. 4682 * For RTF_SETSRC routes, the source address is imposed by the 4683 * parent ire (sire). 4684 */ 4685 ASSERT(src_ipif == NULL); 4686 if (ire->ire_type == IRE_IF_RESOLVER && 4687 !IN6_IS_ADDR_UNSPECIFIED(&v6gw) && 4688 ip6_asp_can_lookup(ipst)) { 4689 /* 4690 * The ire cache entry we're adding is for the 4691 * gateway itself. The source address in this case 4692 * is relative to the gateway's address. 4693 */ 4694 ip6_asp_table_held = B_TRUE; 4695 src_ipif = ipif_select_source_v6(dst_ill, &v6gw, 4696 RESTRICT_TO_GROUP, IPV6_PREFER_SRC_DEFAULT, zoneid); 4697 if (src_ipif != NULL) 4698 ire_marks |= IRE_MARK_USESRC_CHECK; 4699 } else { 4700 if ((sire != NULL) && (sire->ire_flags & RTF_SETSRC)) { 4701 /* 4702 * Check that the ipif matching the requested 4703 * source address still exists. 4704 */ 4705 src_ipif = ipif_lookup_addr_v6( 4706 &sire->ire_src_addr_v6, NULL, zoneid, 4707 NULL, NULL, NULL, NULL, ipst); 4708 } 4709 if (src_ipif == NULL && ip6_asp_can_lookup(ipst)) { 4710 uint_t restrict_ill = RESTRICT_TO_NONE; 4711 4712 if (ip6i_present && ((ip6i_t *)ip6h)->ip6i_flags 4713 & IP6I_ATTACH_IF) 4714 restrict_ill = RESTRICT_TO_ILL; 4715 ip6_asp_table_held = B_TRUE; 4716 src_ipif = ipif_select_source_v6(dst_ill, 4717 v6dstp, restrict_ill, 4718 IPV6_PREFER_SRC_DEFAULT, zoneid); 4719 if (src_ipif != NULL) 4720 ire_marks |= IRE_MARK_USESRC_CHECK; 4721 } 4722 } 4723 4724 if (src_ipif == NULL) { 4725 if (ip_debug > 2) { 4726 /* ip1dbg */ 4727 pr_addr_dbg("ip_newroute_v6: no src for " 4728 "dst %s\n, ", AF_INET6, v6dstp); 4729 printf("ip_newroute_v6: interface name %s\n", 4730 dst_ill->ill_name); 4731 } 4732 goto icmp_err_ret; 4733 } 4734 4735 if (ip_debug > 3) { 4736 /* ip2dbg */ 4737 pr_addr_dbg("ip_newroute_v6: first hop %s\n", 4738 AF_INET6, &v6gw); 4739 } 4740 ip2dbg(("\tire type %s (%d)\n", 4741 ip_nv_lookup(ire_nv_tbl, ire->ire_type), ire->ire_type)); 4742 4743 /* 4744 * At this point in ip_newroute_v6(), ire is either the 4745 * IRE_CACHE of the next-hop gateway for an off-subnet 4746 * destination or an IRE_INTERFACE type that should be used 4747 * to resolve an on-subnet destination or an on-subnet 4748 * next-hop gateway. 4749 * 4750 * In the IRE_CACHE case, we have the following : 4751 * 4752 * 1) src_ipif - used for getting a source address. 4753 * 4754 * 2) dst_ill - from which we derive ire_stq/ire_rfq. This 4755 * means packets using this IRE_CACHE will go out on dst_ill. 4756 * 4757 * 3) The IRE sire will point to the prefix that is the longest 4758 * matching route for the destination. These prefix types 4759 * include IRE_DEFAULT, IRE_PREFIX, IRE_HOST. 4760 * 4761 * The newly created IRE_CACHE entry for the off-subnet 4762 * destination is tied to both the prefix route and the 4763 * interface route used to resolve the next-hop gateway 4764 * via the ire_phandle and ire_ihandle fields, respectively. 4765 * 4766 * In the IRE_INTERFACE case, we have the following : 4767 * 4768 * 1) src_ipif - used for getting a source address. 4769 * 4770 * 2) dst_ill - from which we derive ire_stq/ire_rfq. This 4771 * means packets using the IRE_CACHE that we will build 4772 * here will go out on dst_ill. 4773 * 4774 * 3) sire may or may not be NULL. But, the IRE_CACHE that is 4775 * to be created will only be tied to the IRE_INTERFACE that 4776 * was derived from the ire_ihandle field. 4777 * 4778 * If sire is non-NULL, it means the destination is off-link 4779 * and we will first create the IRE_CACHE for the gateway. 4780 * Next time through ip_newroute_v6, we will create the 4781 * IRE_CACHE for the final destination as described above. 4782 */ 4783 save_ire = ire; 4784 switch (ire->ire_type) { 4785 case IRE_CACHE: { 4786 ire_t *ipif_ire; 4787 4788 ASSERT(sire != NULL); 4789 if (IN6_IS_ADDR_UNSPECIFIED(&v6gw)) { 4790 mutex_enter(&ire->ire_lock); 4791 v6gw = ire->ire_gateway_addr_v6; 4792 mutex_exit(&ire->ire_lock); 4793 } 4794 /* 4795 * We need 3 ire's to create a new cache ire for an 4796 * off-link destination from the cache ire of the 4797 * gateway. 4798 * 4799 * 1. The prefix ire 'sire' 4800 * 2. The cache ire of the gateway 'ire' 4801 * 3. The interface ire 'ipif_ire' 4802 * 4803 * We have (1) and (2). We lookup (3) below. 4804 * 4805 * If there is no interface route to the gateway, 4806 * it is a race condition, where we found the cache 4807 * but the inteface route has been deleted. 4808 */ 4809 ipif_ire = ire_ihandle_lookup_offlink_v6(ire, sire); 4810 if (ipif_ire == NULL) { 4811 ip1dbg(("ip_newroute_v6:" 4812 "ire_ihandle_lookup_offlink_v6 failed\n")); 4813 goto icmp_err_ret; 4814 } 4815 /* 4816 * Assume DL_UNITDATA_REQ is same for all physical 4817 * interfaces in the ifgrp. If it isn't, this code will 4818 * have to be seriously rewhacked to allow the 4819 * fastpath probing (such that I cache the link 4820 * header in the IRE_CACHE) to work over ifgrps. 4821 * We have what we need to build an IRE_CACHE. 4822 */ 4823 /* 4824 * Note: the new ire inherits RTF_SETSRC 4825 * and RTF_MULTIRT to propagate these flags from prefix 4826 * to cache. 4827 */ 4828 4829 /* 4830 * Check cached gateway IRE for any security 4831 * attributes; if found, associate the gateway 4832 * credentials group to the destination IRE. 4833 */ 4834 if ((attrp = save_ire->ire_gw_secattr) != NULL) { 4835 mutex_enter(&attrp->igsa_lock); 4836 if ((gcgrp = attrp->igsa_gcgrp) != NULL) 4837 GCGRP_REFHOLD(gcgrp); 4838 mutex_exit(&attrp->igsa_lock); 4839 } 4840 4841 ire = ire_create_v6( 4842 v6dstp, /* dest address */ 4843 &ipv6_all_ones, /* mask */ 4844 &src_ipif->ipif_v6src_addr, /* source address */ 4845 &v6gw, /* gateway address */ 4846 &save_ire->ire_max_frag, 4847 NULL, /* src nce */ 4848 dst_ill->ill_rq, /* recv-from queue */ 4849 dst_ill->ill_wq, /* send-to queue */ 4850 IRE_CACHE, 4851 src_ipif, 4852 &sire->ire_mask_v6, /* Parent mask */ 4853 sire->ire_phandle, /* Parent handle */ 4854 ipif_ire->ire_ihandle, /* Interface handle */ 4855 sire->ire_flags & /* flags if any */ 4856 (RTF_SETSRC | RTF_MULTIRT), 4857 &(sire->ire_uinfo), 4858 NULL, 4859 gcgrp, 4860 ipst); 4861 4862 if (ire == NULL) { 4863 if (gcgrp != NULL) { 4864 GCGRP_REFRELE(gcgrp); 4865 gcgrp = NULL; 4866 } 4867 ire_refrele(save_ire); 4868 ire_refrele(ipif_ire); 4869 break; 4870 } 4871 4872 /* reference now held by IRE */ 4873 gcgrp = NULL; 4874 4875 ire->ire_marks |= ire_marks; 4876 4877 /* 4878 * Prevent sire and ipif_ire from getting deleted. The 4879 * newly created ire is tied to both of them via the 4880 * phandle and ihandle respectively. 4881 */ 4882 IRB_REFHOLD(sire->ire_bucket); 4883 /* Has it been removed already ? */ 4884 if (sire->ire_marks & IRE_MARK_CONDEMNED) { 4885 IRB_REFRELE(sire->ire_bucket); 4886 ire_refrele(ipif_ire); 4887 ire_refrele(save_ire); 4888 break; 4889 } 4890 4891 IRB_REFHOLD(ipif_ire->ire_bucket); 4892 /* Has it been removed already ? */ 4893 if (ipif_ire->ire_marks & IRE_MARK_CONDEMNED) { 4894 IRB_REFRELE(ipif_ire->ire_bucket); 4895 IRB_REFRELE(sire->ire_bucket); 4896 ire_refrele(ipif_ire); 4897 ire_refrele(save_ire); 4898 break; 4899 } 4900 4901 xmit_mp = first_mp; 4902 if (ire->ire_flags & RTF_MULTIRT) { 4903 copy_mp = copymsg(first_mp); 4904 if (copy_mp != NULL) { 4905 xmit_mp = copy_mp; 4906 MULTIRT_DEBUG_TAG(first_mp); 4907 } 4908 } 4909 ire_add_then_send(q, ire, xmit_mp); 4910 if (ip6_asp_table_held) { 4911 ip6_asp_table_refrele(ipst); 4912 ip6_asp_table_held = B_FALSE; 4913 } 4914 ire_refrele(save_ire); 4915 4916 /* Assert that sire is not deleted yet. */ 4917 ASSERT(sire->ire_ptpn != NULL); 4918 IRB_REFRELE(sire->ire_bucket); 4919 4920 /* Assert that ipif_ire is not deleted yet. */ 4921 ASSERT(ipif_ire->ire_ptpn != NULL); 4922 IRB_REFRELE(ipif_ire->ire_bucket); 4923 ire_refrele(ipif_ire); 4924 4925 if (copy_mp != NULL) { 4926 /* 4927 * Search for the next unresolved 4928 * multirt route. 4929 */ 4930 copy_mp = NULL; 4931 ipif_ire = NULL; 4932 ire = NULL; 4933 /* re-enter the loop */ 4934 multirt_resolve_next = B_TRUE; 4935 continue; 4936 } 4937 ire_refrele(sire); 4938 ill_refrele(dst_ill); 4939 ipif_refrele(src_ipif); 4940 return; 4941 } 4942 case IRE_IF_NORESOLVER: 4943 /* 4944 * We have what we need to build an IRE_CACHE. 4945 * 4946 * handle the Gated case, where we create 4947 * a NORESOLVER route for loopback. 4948 */ 4949 if (dst_ill->ill_net_type != IRE_IF_NORESOLVER) 4950 break; 4951 /* 4952 * TSol note: We are creating the ire cache for the 4953 * destination 'dst'. If 'dst' is offlink, going 4954 * through the first hop 'gw', the security attributes 4955 * of 'dst' must be set to point to the gateway 4956 * credentials of gateway 'gw'. If 'dst' is onlink, it 4957 * is possible that 'dst' is a potential gateway that is 4958 * referenced by some route that has some security 4959 * attributes. Thus in the former case, we need to do a 4960 * gcgrp_lookup of 'gw' while in the latter case we 4961 * need to do gcgrp_lookup of 'dst' itself. 4962 */ 4963 ga.ga_af = AF_INET6; 4964 if (!IN6_IS_ADDR_UNSPECIFIED(&v6gw)) 4965 ga.ga_addr = v6gw; 4966 else 4967 ga.ga_addr = *v6dstp; 4968 gcgrp = gcgrp_lookup(&ga, B_FALSE); 4969 4970 /* 4971 * Note: the new ire inherits sire flags RTF_SETSRC 4972 * and RTF_MULTIRT to propagate those rules from prefix 4973 * to cache. 4974 */ 4975 ire = ire_create_v6( 4976 v6dstp, /* dest address */ 4977 &ipv6_all_ones, /* mask */ 4978 &src_ipif->ipif_v6src_addr, /* source address */ 4979 &v6gw, /* gateway address */ 4980 &save_ire->ire_max_frag, 4981 NULL, /* no src nce */ 4982 dst_ill->ill_rq, /* recv-from queue */ 4983 dst_ill->ill_wq, /* send-to queue */ 4984 IRE_CACHE, 4985 src_ipif, 4986 &save_ire->ire_mask_v6, /* Parent mask */ 4987 (sire != NULL) ? /* Parent handle */ 4988 sire->ire_phandle : 0, 4989 save_ire->ire_ihandle, /* Interface handle */ 4990 (sire != NULL) ? /* flags if any */ 4991 sire->ire_flags & 4992 (RTF_SETSRC | RTF_MULTIRT) : 0, 4993 &(save_ire->ire_uinfo), 4994 NULL, 4995 gcgrp, 4996 ipst); 4997 4998 if (ire == NULL) { 4999 if (gcgrp != NULL) { 5000 GCGRP_REFRELE(gcgrp); 5001 gcgrp = NULL; 5002 } 5003 ire_refrele(save_ire); 5004 break; 5005 } 5006 5007 /* reference now held by IRE */ 5008 gcgrp = NULL; 5009 5010 ire->ire_marks |= ire_marks; 5011 5012 if (!IN6_IS_ADDR_UNSPECIFIED(&v6gw)) 5013 dst = v6gw; 5014 else 5015 dst = *v6dstp; 5016 err = ndp_noresolver(dst_ill, &dst); 5017 if (err != 0) { 5018 ire_refrele(save_ire); 5019 break; 5020 } 5021 5022 /* Prevent save_ire from getting deleted */ 5023 IRB_REFHOLD(save_ire->ire_bucket); 5024 /* Has it been removed already ? */ 5025 if (save_ire->ire_marks & IRE_MARK_CONDEMNED) { 5026 IRB_REFRELE(save_ire->ire_bucket); 5027 ire_refrele(save_ire); 5028 break; 5029 } 5030 5031 xmit_mp = first_mp; 5032 /* 5033 * In case of MULTIRT, a copy of the current packet 5034 * to send is made to further re-enter the 5035 * loop and attempt another route resolution 5036 */ 5037 if ((sire != NULL) && sire->ire_flags & RTF_MULTIRT) { 5038 copy_mp = copymsg(first_mp); 5039 if (copy_mp != NULL) { 5040 xmit_mp = copy_mp; 5041 MULTIRT_DEBUG_TAG(first_mp); 5042 } 5043 } 5044 ire_add_then_send(q, ire, xmit_mp); 5045 if (ip6_asp_table_held) { 5046 ip6_asp_table_refrele(ipst); 5047 ip6_asp_table_held = B_FALSE; 5048 } 5049 5050 /* Assert that it is not deleted yet. */ 5051 ASSERT(save_ire->ire_ptpn != NULL); 5052 IRB_REFRELE(save_ire->ire_bucket); 5053 ire_refrele(save_ire); 5054 5055 if (copy_mp != NULL) { 5056 /* 5057 * If we found a (no)resolver, we ignore any 5058 * trailing top priority IRE_CACHE in 5059 * further loops. This ensures that we do not 5060 * omit any (no)resolver despite the priority 5061 * in this call. 5062 * IRE_CACHE, if any, will be processed 5063 * by another thread entering ip_newroute(), 5064 * (on resolver response, for example). 5065 * We use this to force multiple parallel 5066 * resolution as soon as a packet needs to be 5067 * sent. The result is, after one packet 5068 * emission all reachable routes are generally 5069 * resolved. 5070 * Otherwise, complete resolution of MULTIRT 5071 * routes would require several emissions as 5072 * side effect. 5073 */ 5074 multirt_flags &= ~MULTIRT_CACHEGW; 5075 5076 /* 5077 * Search for the next unresolved multirt 5078 * route. 5079 */ 5080 copy_mp = NULL; 5081 save_ire = NULL; 5082 ire = NULL; 5083 /* re-enter the loop */ 5084 multirt_resolve_next = B_TRUE; 5085 continue; 5086 } 5087 5088 /* Don't need sire anymore */ 5089 if (sire != NULL) 5090 ire_refrele(sire); 5091 ill_refrele(dst_ill); 5092 ipif_refrele(src_ipif); 5093 return; 5094 5095 case IRE_IF_RESOLVER: 5096 /* 5097 * We can't build an IRE_CACHE yet, but at least we 5098 * found a resolver that can help. 5099 */ 5100 dst = *v6dstp; 5101 5102 /* 5103 * To be at this point in the code with a non-zero gw 5104 * means that dst is reachable through a gateway that 5105 * we have never resolved. By changing dst to the gw 5106 * addr we resolve the gateway first. When 5107 * ire_add_then_send() tries to put the IP dg to dst, 5108 * it will reenter ip_newroute() at which time we will 5109 * find the IRE_CACHE for the gw and create another 5110 * IRE_CACHE above (for dst itself). 5111 */ 5112 if (!IN6_IS_ADDR_UNSPECIFIED(&v6gw)) { 5113 save_dst = dst; 5114 dst = v6gw; 5115 v6gw = ipv6_all_zeros; 5116 } 5117 if (dst_ill->ill_flags & ILLF_XRESOLV) { 5118 /* 5119 * Ask the external resolver to do its thing. 5120 * Make an mblk chain in the following form: 5121 * ARQ_REQ_MBLK-->IRE_MBLK-->packet 5122 */ 5123 mblk_t *ire_mp; 5124 mblk_t *areq_mp; 5125 areq_t *areq; 5126 in6_addr_t *addrp; 5127 5128 ip1dbg(("ip_newroute_v6:ILLF_XRESOLV\n")); 5129 if (ip6_asp_table_held) { 5130 ip6_asp_table_refrele(ipst); 5131 ip6_asp_table_held = B_FALSE; 5132 } 5133 ire = ire_create_mp_v6( 5134 &dst, /* dest address */ 5135 &ipv6_all_ones, /* mask */ 5136 &src_ipif->ipif_v6src_addr, 5137 /* source address */ 5138 &v6gw, /* gateway address */ 5139 NULL, /* no src nce */ 5140 dst_ill->ill_rq, /* recv-from queue */ 5141 dst_ill->ill_wq, /* send-to queue */ 5142 IRE_CACHE, 5143 src_ipif, 5144 &save_ire->ire_mask_v6, /* Parent mask */ 5145 0, 5146 save_ire->ire_ihandle, 5147 /* Interface handle */ 5148 0, /* flags if any */ 5149 &(save_ire->ire_uinfo), 5150 NULL, 5151 NULL, 5152 ipst); 5153 5154 ire_refrele(save_ire); 5155 if (ire == NULL) { 5156 ip1dbg(("ip_newroute_v6:" 5157 "ire is NULL\n")); 5158 break; 5159 } 5160 5161 if ((sire != NULL) && 5162 (sire->ire_flags & RTF_MULTIRT)) { 5163 /* 5164 * processing a copy of the packet to 5165 * send for further resolution loops 5166 */ 5167 copy_mp = copymsg(first_mp); 5168 if (copy_mp != NULL) 5169 MULTIRT_DEBUG_TAG(copy_mp); 5170 } 5171 ire->ire_marks |= ire_marks; 5172 ire_mp = ire->ire_mp; 5173 /* 5174 * Now create or find an nce for this interface. 5175 * The hw addr will need to to be set from 5176 * the reply to the AR_ENTRY_QUERY that 5177 * we're about to send. This will be done in 5178 * ire_add_v6(). 5179 */ 5180 err = ndp_resolver(dst_ill, &dst, mp, zoneid); 5181 switch (err) { 5182 case 0: 5183 /* 5184 * New cache entry created. 5185 * Break, then ask the external 5186 * resolver. 5187 */ 5188 break; 5189 case EINPROGRESS: 5190 /* 5191 * Resolution in progress; 5192 * packet has been queued by 5193 * ndp_resolver(). 5194 */ 5195 ire_delete(ire); 5196 ire = NULL; 5197 /* 5198 * Check if another multirt 5199 * route must be resolved. 5200 */ 5201 if (copy_mp != NULL) { 5202 /* 5203 * If we found a resolver, we 5204 * ignore any trailing top 5205 * priority IRE_CACHE in 5206 * further loops. The reason is 5207 * the same as for noresolver. 5208 */ 5209 multirt_flags &= 5210 ~MULTIRT_CACHEGW; 5211 /* 5212 * Search for the next 5213 * unresolved multirt route. 5214 */ 5215 first_mp = copy_mp; 5216 copy_mp = NULL; 5217 mp = first_mp; 5218 if (mp->b_datap->db_type == 5219 M_CTL) { 5220 mp = mp->b_cont; 5221 } 5222 ASSERT(sire != NULL); 5223 dst = save_dst; 5224 /* 5225 * re-enter the loop 5226 */ 5227 multirt_resolve_next = 5228 B_TRUE; 5229 continue; 5230 } 5231 5232 if (sire != NULL) 5233 ire_refrele(sire); 5234 ill_refrele(dst_ill); 5235 ipif_refrele(src_ipif); 5236 return; 5237 default: 5238 /* 5239 * Transient error; packet will be 5240 * freed. 5241 */ 5242 ire_delete(ire); 5243 ire = NULL; 5244 break; 5245 } 5246 if (err != 0) 5247 break; 5248 /* 5249 * Now set up the AR_ENTRY_QUERY and send it. 5250 */ 5251 areq_mp = ill_arp_alloc(dst_ill, 5252 (uchar_t *)&ipv6_areq_template, 5253 (caddr_t)&dst); 5254 if (areq_mp == NULL) { 5255 ip1dbg(("ip_newroute_v6:" 5256 "areq_mp is NULL\n")); 5257 freemsg(ire_mp); 5258 break; 5259 } 5260 areq = (areq_t *)areq_mp->b_rptr; 5261 addrp = (in6_addr_t *)((char *)areq + 5262 areq->areq_target_addr_offset); 5263 *addrp = dst; 5264 addrp = (in6_addr_t *)((char *)areq + 5265 areq->areq_sender_addr_offset); 5266 *addrp = src_ipif->ipif_v6src_addr; 5267 /* 5268 * link the chain, then send up to the resolver. 5269 */ 5270 linkb(areq_mp, ire_mp); 5271 linkb(areq_mp, mp); 5272 ip1dbg(("ip_newroute_v6:" 5273 "putnext to resolver\n")); 5274 putnext(dst_ill->ill_rq, areq_mp); 5275 /* 5276 * Check if another multirt route 5277 * must be resolved. 5278 */ 5279 ire = NULL; 5280 if (copy_mp != NULL) { 5281 /* 5282 * If we find a resolver, we ignore any 5283 * trailing top priority IRE_CACHE in 5284 * further loops. The reason is the 5285 * same as for noresolver. 5286 */ 5287 multirt_flags &= ~MULTIRT_CACHEGW; 5288 /* 5289 * Search for the next unresolved 5290 * multirt route. 5291 */ 5292 first_mp = copy_mp; 5293 copy_mp = NULL; 5294 mp = first_mp; 5295 if (mp->b_datap->db_type == M_CTL) { 5296 mp = mp->b_cont; 5297 } 5298 ASSERT(sire != NULL); 5299 dst = save_dst; 5300 /* 5301 * re-enter the loop 5302 */ 5303 multirt_resolve_next = B_TRUE; 5304 continue; 5305 } 5306 5307 if (sire != NULL) 5308 ire_refrele(sire); 5309 ill_refrele(dst_ill); 5310 ipif_refrele(src_ipif); 5311 return; 5312 } 5313 /* 5314 * Non-external resolver case. 5315 * 5316 * TSol note: Please see the note above the 5317 * IRE_IF_NORESOLVER case. 5318 */ 5319 ga.ga_af = AF_INET6; 5320 ga.ga_addr = dst; 5321 gcgrp = gcgrp_lookup(&ga, B_FALSE); 5322 5323 ire = ire_create_v6( 5324 &dst, /* dest address */ 5325 &ipv6_all_ones, /* mask */ 5326 &src_ipif->ipif_v6src_addr, /* source address */ 5327 &v6gw, /* gateway address */ 5328 &save_ire->ire_max_frag, 5329 NULL, /* no src nce */ 5330 dst_ill->ill_rq, /* recv-from queue */ 5331 dst_ill->ill_wq, /* send-to queue */ 5332 IRE_CACHE, 5333 src_ipif, 5334 &save_ire->ire_mask_v6, /* Parent mask */ 5335 0, 5336 save_ire->ire_ihandle, /* Interface handle */ 5337 0, /* flags if any */ 5338 &(save_ire->ire_uinfo), 5339 NULL, 5340 gcgrp, 5341 ipst); 5342 5343 if (ire == NULL) { 5344 if (gcgrp != NULL) { 5345 GCGRP_REFRELE(gcgrp); 5346 gcgrp = NULL; 5347 } 5348 ire_refrele(save_ire); 5349 break; 5350 } 5351 5352 /* reference now held by IRE */ 5353 gcgrp = NULL; 5354 5355 if ((sire != NULL) && 5356 (sire->ire_flags & RTF_MULTIRT)) { 5357 copy_mp = copymsg(first_mp); 5358 if (copy_mp != NULL) 5359 MULTIRT_DEBUG_TAG(copy_mp); 5360 } 5361 5362 ire->ire_marks |= ire_marks; 5363 err = ndp_resolver(dst_ill, &dst, first_mp, zoneid); 5364 switch (err) { 5365 case 0: 5366 /* Prevent save_ire from getting deleted */ 5367 IRB_REFHOLD(save_ire->ire_bucket); 5368 /* Has it been removed already ? */ 5369 if (save_ire->ire_marks & IRE_MARK_CONDEMNED) { 5370 IRB_REFRELE(save_ire->ire_bucket); 5371 ire_refrele(save_ire); 5372 break; 5373 } 5374 5375 /* 5376 * We have a resolved cache entry, 5377 * add in the IRE. 5378 */ 5379 ire_add_then_send(q, ire, first_mp); 5380 if (ip6_asp_table_held) { 5381 ip6_asp_table_refrele(ipst); 5382 ip6_asp_table_held = B_FALSE; 5383 } 5384 5385 /* Assert that it is not deleted yet. */ 5386 ASSERT(save_ire->ire_ptpn != NULL); 5387 IRB_REFRELE(save_ire->ire_bucket); 5388 ire_refrele(save_ire); 5389 /* 5390 * Check if another multirt route 5391 * must be resolved. 5392 */ 5393 ire = NULL; 5394 if (copy_mp != NULL) { 5395 /* 5396 * If we find a resolver, we ignore any 5397 * trailing top priority IRE_CACHE in 5398 * further loops. The reason is the 5399 * same as for noresolver. 5400 */ 5401 multirt_flags &= ~MULTIRT_CACHEGW; 5402 /* 5403 * Search for the next unresolved 5404 * multirt route. 5405 */ 5406 first_mp = copy_mp; 5407 copy_mp = NULL; 5408 mp = first_mp; 5409 if (mp->b_datap->db_type == M_CTL) { 5410 mp = mp->b_cont; 5411 } 5412 ASSERT(sire != NULL); 5413 dst = save_dst; 5414 /* 5415 * re-enter the loop 5416 */ 5417 multirt_resolve_next = B_TRUE; 5418 continue; 5419 } 5420 5421 if (sire != NULL) 5422 ire_refrele(sire); 5423 ill_refrele(dst_ill); 5424 ipif_refrele(src_ipif); 5425 return; 5426 5427 case EINPROGRESS: 5428 /* 5429 * mp was consumed - presumably queued. 5430 * No need for ire, presumably resolution is 5431 * in progress, and ire will be added when the 5432 * address is resolved. 5433 */ 5434 if (ip6_asp_table_held) { 5435 ip6_asp_table_refrele(ipst); 5436 ip6_asp_table_held = B_FALSE; 5437 } 5438 ASSERT(ire->ire_nce == NULL); 5439 ire_delete(ire); 5440 ire_refrele(save_ire); 5441 /* 5442 * Check if another multirt route 5443 * must be resolved. 5444 */ 5445 ire = NULL; 5446 if (copy_mp != NULL) { 5447 /* 5448 * If we find a resolver, we ignore any 5449 * trailing top priority IRE_CACHE in 5450 * further loops. The reason is the 5451 * same as for noresolver. 5452 */ 5453 multirt_flags &= ~MULTIRT_CACHEGW; 5454 /* 5455 * Search for the next unresolved 5456 * multirt route. 5457 */ 5458 first_mp = copy_mp; 5459 copy_mp = NULL; 5460 mp = first_mp; 5461 if (mp->b_datap->db_type == M_CTL) { 5462 mp = mp->b_cont; 5463 } 5464 ASSERT(sire != NULL); 5465 dst = save_dst; 5466 /* 5467 * re-enter the loop 5468 */ 5469 multirt_resolve_next = B_TRUE; 5470 continue; 5471 } 5472 if (sire != NULL) 5473 ire_refrele(sire); 5474 ill_refrele(dst_ill); 5475 ipif_refrele(src_ipif); 5476 return; 5477 default: 5478 /* Some transient error */ 5479 ASSERT(ire->ire_nce == NULL); 5480 ire_refrele(save_ire); 5481 break; 5482 } 5483 break; 5484 default: 5485 break; 5486 } 5487 if (ip6_asp_table_held) { 5488 ip6_asp_table_refrele(ipst); 5489 ip6_asp_table_held = B_FALSE; 5490 } 5491 } while (multirt_resolve_next); 5492 5493 err_ret: 5494 ip1dbg(("ip_newroute_v6: dropped\n")); 5495 if (src_ipif != NULL) 5496 ipif_refrele(src_ipif); 5497 if (dst_ill != NULL) { 5498 need_rele = B_TRUE; 5499 ill = dst_ill; 5500 } 5501 if (ill != NULL) { 5502 if (mp->b_prev != NULL) { 5503 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 5504 } else { 5505 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); 5506 } 5507 5508 if (need_rele) 5509 ill_refrele(ill); 5510 } else { 5511 if (mp->b_prev != NULL) { 5512 BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsInDiscards); 5513 } else { 5514 BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsOutDiscards); 5515 } 5516 } 5517 /* Did this packet originate externally? */ 5518 if (mp->b_prev) { 5519 mp->b_next = NULL; 5520 mp->b_prev = NULL; 5521 } 5522 if (copy_mp != NULL) { 5523 MULTIRT_DEBUG_UNTAG(copy_mp); 5524 freemsg(copy_mp); 5525 } 5526 MULTIRT_DEBUG_UNTAG(first_mp); 5527 freemsg(first_mp); 5528 if (ire != NULL) 5529 ire_refrele(ire); 5530 if (sire != NULL) 5531 ire_refrele(sire); 5532 return; 5533 5534 icmp_err_ret: 5535 if (ip6_asp_table_held) 5536 ip6_asp_table_refrele(ipst); 5537 if (src_ipif != NULL) 5538 ipif_refrele(src_ipif); 5539 if (dst_ill != NULL) { 5540 need_rele = B_TRUE; 5541 ill = dst_ill; 5542 } 5543 ip1dbg(("ip_newroute_v6: no route\n")); 5544 if (sire != NULL) 5545 ire_refrele(sire); 5546 /* 5547 * We need to set sire to NULL to avoid double freeing if we 5548 * ever goto err_ret from below. 5549 */ 5550 sire = NULL; 5551 ip6h = (ip6_t *)mp->b_rptr; 5552 /* Skip ip6i_t header if present */ 5553 if (ip6h->ip6_nxt == IPPROTO_RAW) { 5554 /* Make sure the IPv6 header is present */ 5555 if ((mp->b_wptr - (uchar_t *)ip6h) < 5556 sizeof (ip6i_t) + IPV6_HDR_LEN) { 5557 if (!pullupmsg(mp, sizeof (ip6i_t) + IPV6_HDR_LEN)) { 5558 ip1dbg(("ip_newroute_v6: pullupmsg failed\n")); 5559 goto err_ret; 5560 } 5561 } 5562 mp->b_rptr += sizeof (ip6i_t); 5563 ip6h = (ip6_t *)mp->b_rptr; 5564 } 5565 /* Did this packet originate externally? */ 5566 if (mp->b_prev) { 5567 if (ill != NULL) { 5568 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInNoRoutes); 5569 } else { 5570 BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsInNoRoutes); 5571 } 5572 mp->b_next = NULL; 5573 mp->b_prev = NULL; 5574 q = WR(q); 5575 } else { 5576 if (ill != NULL) { 5577 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutNoRoutes); 5578 } else { 5579 BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsOutNoRoutes); 5580 } 5581 if (ip_hdr_complete_v6(ip6h, zoneid, ipst)) { 5582 /* Failed */ 5583 if (copy_mp != NULL) { 5584 MULTIRT_DEBUG_UNTAG(copy_mp); 5585 freemsg(copy_mp); 5586 } 5587 MULTIRT_DEBUG_UNTAG(first_mp); 5588 freemsg(first_mp); 5589 if (ire != NULL) 5590 ire_refrele(ire); 5591 if (need_rele) 5592 ill_refrele(ill); 5593 return; 5594 } 5595 } 5596 5597 if (need_rele) 5598 ill_refrele(ill); 5599 5600 /* 5601 * At this point we will have ire only if RTF_BLACKHOLE 5602 * or RTF_REJECT flags are set on the IRE. It will not 5603 * generate ICMP6_DST_UNREACH_NOROUTE if RTF_BLACKHOLE is set. 5604 */ 5605 if (ire != NULL) { 5606 if (ire->ire_flags & RTF_BLACKHOLE) { 5607 ire_refrele(ire); 5608 if (copy_mp != NULL) { 5609 MULTIRT_DEBUG_UNTAG(copy_mp); 5610 freemsg(copy_mp); 5611 } 5612 MULTIRT_DEBUG_UNTAG(first_mp); 5613 freemsg(first_mp); 5614 return; 5615 } 5616 ire_refrele(ire); 5617 } 5618 if (ip_debug > 3) { 5619 /* ip2dbg */ 5620 pr_addr_dbg("ip_newroute_v6: no route to %s\n", 5621 AF_INET6, v6dstp); 5622 } 5623 icmp_unreachable_v6(WR(q), first_mp, ICMP6_DST_UNREACH_NOROUTE, 5624 B_FALSE, B_FALSE, zoneid, ipst); 5625 } 5626 5627 /* 5628 * ip_newroute_ipif_v6 is called by ip_wput_v6 and ip_wput_ipsec_out_v6 whenever 5629 * we need to send out a packet to a destination address for which we do not 5630 * have specific routing information. It is only used for multicast packets. 5631 * 5632 * If unspec_src we allow creating an IRE with source address zero. 5633 * ire_send_v6() will delete it after the packet is sent. 5634 */ 5635 void 5636 ip_newroute_ipif_v6(queue_t *q, mblk_t *mp, ipif_t *ipif, 5637 in6_addr_t v6dst, int unspec_src, zoneid_t zoneid) 5638 { 5639 ire_t *ire = NULL; 5640 ipif_t *src_ipif = NULL; 5641 int err = 0; 5642 ill_t *dst_ill = NULL; 5643 ire_t *save_ire; 5644 ushort_t ire_marks = 0; 5645 ipsec_out_t *io; 5646 ill_t *attach_ill = NULL; 5647 ill_t *ill; 5648 ip6_t *ip6h; 5649 mblk_t *first_mp; 5650 boolean_t ip6i_present; 5651 ire_t *fire = NULL; 5652 mblk_t *copy_mp = NULL; 5653 boolean_t multirt_resolve_next; 5654 in6_addr_t *v6dstp = &v6dst; 5655 boolean_t ipif_held = B_FALSE; 5656 boolean_t ill_held = B_FALSE; 5657 boolean_t ip6_asp_table_held = B_FALSE; 5658 ip_stack_t *ipst = ipif->ipif_ill->ill_ipst; 5659 5660 /* 5661 * This loop is run only once in most cases. 5662 * We loop to resolve further routes only when the destination 5663 * can be reached through multiple RTF_MULTIRT-flagged ires. 5664 */ 5665 do { 5666 multirt_resolve_next = B_FALSE; 5667 if (dst_ill != NULL) { 5668 ill_refrele(dst_ill); 5669 dst_ill = NULL; 5670 } 5671 5672 if (src_ipif != NULL) { 5673 ipif_refrele(src_ipif); 5674 src_ipif = NULL; 5675 } 5676 ASSERT(ipif != NULL); 5677 ill = ipif->ipif_ill; 5678 5679 ASSERT(!IN6_IS_ADDR_V4MAPPED(v6dstp)); 5680 if (ip_debug > 2) { 5681 /* ip1dbg */ 5682 pr_addr_dbg("ip_newroute_ipif_v6: v6dst %s\n", 5683 AF_INET6, v6dstp); 5684 printf("ip_newroute_ipif_v6: if %s, v6 %d\n", 5685 ill->ill_name, ipif->ipif_isv6); 5686 } 5687 5688 first_mp = mp; 5689 if (mp->b_datap->db_type == M_CTL) { 5690 mp = mp->b_cont; 5691 io = (ipsec_out_t *)first_mp->b_rptr; 5692 ASSERT(io->ipsec_out_type == IPSEC_OUT); 5693 } else { 5694 io = NULL; 5695 } 5696 5697 /* 5698 * If the interface is a pt-pt interface we look for an 5699 * IRE_IF_RESOLVER or IRE_IF_NORESOLVER that matches both the 5700 * local_address and the pt-pt destination address. 5701 * Otherwise we just match the local address. 5702 */ 5703 if (!(ill->ill_flags & ILLF_MULTICAST)) { 5704 goto err_ret; 5705 } 5706 /* 5707 * If this end point is bound to IPIF_NOFAILOVER, set bnf_ill 5708 * and bind_to_nofailover B_TRUE. We can't use conn to determine 5709 * as it could be NULL. 5710 * 5711 * This information can appear either in an ip6i_t or an 5712 * IPSEC_OUT message. 5713 */ 5714 ip6h = (ip6_t *)mp->b_rptr; 5715 ip6i_present = (ip6h->ip6_nxt == IPPROTO_RAW); 5716 if (ip6i_present || (io != NULL && io->ipsec_out_attach_if)) { 5717 if (!ip6i_present || 5718 ((ip6i_t *)ip6h)->ip6i_flags & IP6I_ATTACH_IF) { 5719 attach_ill = ip_grab_attach_ill(ill, first_mp, 5720 (ip6i_present ? 5721 ((ip6i_t *)ip6h)->ip6i_ifindex : 5722 io->ipsec_out_ill_index), B_TRUE, ipst); 5723 /* Failure case frees things for us. */ 5724 if (attach_ill == NULL) 5725 return; 5726 5727 /* 5728 * Check if we need an ire that will not be 5729 * looked up by anybody else i.e. HIDDEN. 5730 */ 5731 if (ill_is_probeonly(attach_ill)) 5732 ire_marks = IRE_MARK_HIDDEN; 5733 } 5734 } 5735 5736 /* 5737 * We check if an IRE_OFFSUBNET for the addr that goes through 5738 * ipif exists. We need it to determine if the RTF_SETSRC and/or 5739 * RTF_MULTIRT flags must be honored. 5740 */ 5741 fire = ipif_lookup_multi_ire_v6(ipif, v6dstp); 5742 ip2dbg(("ip_newroute_ipif_v6: " 5743 "ipif_lookup_multi_ire_v6(" 5744 "ipif %p, dst %08x) = fire %p\n", 5745 (void *)ipif, ntohl(V4_PART_OF_V6((*v6dstp))), 5746 (void *)fire)); 5747 5748 /* 5749 * If the application specified the ill (ifindex), we still 5750 * load spread. Only if the packets needs to go out specifically 5751 * on a given ill e.g. binding to IPIF_NOFAILOVER address or 5752 * IPV6_BOUND_PIF, or there is a parent ire entry that specified 5753 * multirouting, then we don't try to use a different ill for 5754 * load spreading. 5755 */ 5756 if (attach_ill == NULL) { 5757 /* 5758 * If the interface belongs to an interface group, 5759 * make sure the next possible interface in the group 5760 * is used. This encourages load spreading among peers 5761 * in an interface group. 5762 * 5763 * Note: While we pick a dst_ill we are really only 5764 * interested in the ill for load spreading. The source 5765 * ipif is determined by source address selection below. 5766 */ 5767 if ((fire != NULL) && (fire->ire_flags & RTF_MULTIRT)) { 5768 dst_ill = ipif->ipif_ill; 5769 /* For uniformity do a refhold */ 5770 ill_refhold(dst_ill); 5771 } else { 5772 /* refheld by ip_newroute_get_dst_ill_v6 */ 5773 dst_ill = 5774 ip_newroute_get_dst_ill_v6(ipif->ipif_ill); 5775 } 5776 if (dst_ill == NULL) { 5777 if (ip_debug > 2) { 5778 pr_addr_dbg("ip_newroute_ipif_v6: " 5779 "no dst ill for dst %s\n", 5780 AF_INET6, v6dstp); 5781 } 5782 goto err_ret; 5783 } 5784 } else { 5785 dst_ill = ipif->ipif_ill; 5786 /* 5787 * ip_wput_v6 passes the right ipif for IPIF_NOFAILOVER 5788 * and IPV6_BOUND_PIF case. 5789 */ 5790 ASSERT(dst_ill == attach_ill); 5791 /* attach_ill is already refheld */ 5792 } 5793 /* 5794 * Pick a source address which matches the scope of the 5795 * destination address. 5796 * For RTF_SETSRC routes, the source address is imposed by the 5797 * parent ire (fire). 5798 */ 5799 ASSERT(src_ipif == NULL); 5800 if ((fire != NULL) && (fire->ire_flags & RTF_SETSRC)) { 5801 /* 5802 * Check that the ipif matching the requested source 5803 * address still exists. 5804 */ 5805 src_ipif = 5806 ipif_lookup_addr_v6(&fire->ire_src_addr_v6, 5807 NULL, zoneid, NULL, NULL, NULL, NULL, ipst); 5808 } 5809 if (src_ipif == NULL && ip6_asp_can_lookup(ipst)) { 5810 ip6_asp_table_held = B_TRUE; 5811 src_ipif = ipif_select_source_v6(dst_ill, v6dstp, 5812 RESTRICT_TO_NONE, IPV6_PREFER_SRC_DEFAULT, zoneid); 5813 } 5814 5815 if (src_ipif == NULL) { 5816 if (!unspec_src) { 5817 if (ip_debug > 2) { 5818 /* ip1dbg */ 5819 pr_addr_dbg("ip_newroute_ipif_v6: " 5820 "no src for dst %s\n,", 5821 AF_INET6, v6dstp); 5822 printf(" through interface %s\n", 5823 dst_ill->ill_name); 5824 } 5825 goto err_ret; 5826 } 5827 src_ipif = ipif; 5828 ipif_refhold(src_ipif); 5829 } 5830 ire = ipif_to_ire_v6(ipif); 5831 if (ire == NULL) { 5832 if (ip_debug > 2) { 5833 /* ip1dbg */ 5834 pr_addr_dbg("ip_newroute_ipif_v6: v6src %s\n", 5835 AF_INET6, &ipif->ipif_v6lcl_addr); 5836 printf("ip_newroute_ipif_v6: " 5837 "if %s\n", dst_ill->ill_name); 5838 } 5839 goto err_ret; 5840 } 5841 if (ire->ire_flags & (RTF_REJECT | RTF_BLACKHOLE)) 5842 goto err_ret; 5843 5844 ASSERT(ire->ire_ipversion == IPV6_VERSION); 5845 5846 ip1dbg(("ip_newroute_ipif_v6: interface type %s (%d),", 5847 ip_nv_lookup(ire_nv_tbl, ire->ire_type), ire->ire_type)); 5848 if (ip_debug > 2) { 5849 /* ip1dbg */ 5850 pr_addr_dbg(" address %s\n", 5851 AF_INET6, &ire->ire_src_addr_v6); 5852 } 5853 save_ire = ire; 5854 ip2dbg(("ip_newroute_ipif: ire %p, ipif %p\n", 5855 (void *)ire, (void *)ipif)); 5856 5857 if ((fire != NULL) && (fire->ire_flags & RTF_MULTIRT)) { 5858 /* 5859 * an IRE_OFFSUBET was looked up 5860 * on that interface. 5861 * this ire has RTF_MULTIRT flag, 5862 * so the resolution loop 5863 * will be re-entered to resolve 5864 * additional routes on other 5865 * interfaces. For that purpose, 5866 * a copy of the packet is 5867 * made at this point. 5868 */ 5869 fire->ire_last_used_time = lbolt; 5870 copy_mp = copymsg(first_mp); 5871 if (copy_mp) { 5872 MULTIRT_DEBUG_TAG(copy_mp); 5873 } 5874 } 5875 5876 ASSERT((attach_ill == NULL) || (dst_ill == attach_ill)); 5877 switch (ire->ire_type) { 5878 case IRE_IF_NORESOLVER: { 5879 /* 5880 * We have what we need to build an IRE_CACHE. 5881 * 5882 * handle the Gated case, where we create 5883 * a NORESOLVER route for loopback. 5884 */ 5885 if (dst_ill->ill_net_type != IRE_IF_NORESOLVER) 5886 break; 5887 /* 5888 * The newly created ire will inherit the flags of the 5889 * parent ire, if any. 5890 */ 5891 ire = ire_create_v6( 5892 v6dstp, /* dest address */ 5893 &ipv6_all_ones, /* mask */ 5894 &src_ipif->ipif_v6src_addr, /* source address */ 5895 NULL, /* gateway address */ 5896 &save_ire->ire_max_frag, 5897 NULL, /* no src nce */ 5898 dst_ill->ill_rq, /* recv-from queue */ 5899 dst_ill->ill_wq, /* send-to queue */ 5900 IRE_CACHE, 5901 src_ipif, 5902 NULL, 5903 (fire != NULL) ? /* Parent handle */ 5904 fire->ire_phandle : 0, 5905 save_ire->ire_ihandle, /* Interface handle */ 5906 (fire != NULL) ? 5907 (fire->ire_flags & (RTF_SETSRC | RTF_MULTIRT)) : 5908 0, 5909 &ire_uinfo_null, 5910 NULL, 5911 NULL, 5912 ipst); 5913 5914 if (ire == NULL) { 5915 ire_refrele(save_ire); 5916 break; 5917 } 5918 5919 ire->ire_marks |= ire_marks; 5920 5921 err = ndp_noresolver(dst_ill, v6dstp); 5922 if (err != 0) { 5923 ire_refrele(save_ire); 5924 break; 5925 } 5926 5927 /* Prevent save_ire from getting deleted */ 5928 IRB_REFHOLD(save_ire->ire_bucket); 5929 /* Has it been removed already ? */ 5930 if (save_ire->ire_marks & IRE_MARK_CONDEMNED) { 5931 IRB_REFRELE(save_ire->ire_bucket); 5932 ire_refrele(save_ire); 5933 break; 5934 } 5935 5936 ire_add_then_send(q, ire, first_mp); 5937 if (ip6_asp_table_held) { 5938 ip6_asp_table_refrele(ipst); 5939 ip6_asp_table_held = B_FALSE; 5940 } 5941 5942 /* Assert that it is not deleted yet. */ 5943 ASSERT(save_ire->ire_ptpn != NULL); 5944 IRB_REFRELE(save_ire->ire_bucket); 5945 ire_refrele(save_ire); 5946 if (fire != NULL) { 5947 ire_refrele(fire); 5948 fire = NULL; 5949 } 5950 5951 /* 5952 * The resolution loop is re-entered if we 5953 * actually are in a multirouting case. 5954 */ 5955 if (copy_mp != NULL) { 5956 boolean_t need_resolve = 5957 ire_multirt_need_resolve_v6(v6dstp, 5958 MBLK_GETLABEL(copy_mp), ipst); 5959 if (!need_resolve) { 5960 MULTIRT_DEBUG_UNTAG(copy_mp); 5961 freemsg(copy_mp); 5962 copy_mp = NULL; 5963 } else { 5964 /* 5965 * ipif_lookup_group_v6() calls 5966 * ire_lookup_multi_v6() that uses 5967 * ire_ftable_lookup_v6() to find 5968 * an IRE_INTERFACE for the group. 5969 * In the multirt case, 5970 * ire_lookup_multi_v6() then invokes 5971 * ire_multirt_lookup_v6() to find 5972 * the next resolvable ire. 5973 * As a result, we obtain a new 5974 * interface, derived from the 5975 * next ire. 5976 */ 5977 if (ipif_held) { 5978 ipif_refrele(ipif); 5979 ipif_held = B_FALSE; 5980 } 5981 ipif = ipif_lookup_group_v6(v6dstp, 5982 zoneid, ipst); 5983 ip2dbg(("ip_newroute_ipif: " 5984 "multirt dst %08x, ipif %p\n", 5985 ntohl(V4_PART_OF_V6((*v6dstp))), 5986 (void *)ipif)); 5987 if (ipif != NULL) { 5988 ipif_held = B_TRUE; 5989 mp = copy_mp; 5990 copy_mp = NULL; 5991 multirt_resolve_next = 5992 B_TRUE; 5993 continue; 5994 } else { 5995 freemsg(copy_mp); 5996 } 5997 } 5998 } 5999 ill_refrele(dst_ill); 6000 if (ipif_held) { 6001 ipif_refrele(ipif); 6002 ipif_held = B_FALSE; 6003 } 6004 if (src_ipif != NULL) 6005 ipif_refrele(src_ipif); 6006 return; 6007 } 6008 case IRE_IF_RESOLVER: { 6009 6010 ASSERT(dst_ill->ill_isv6); 6011 6012 /* 6013 * We obtain a partial IRE_CACHE which we will pass 6014 * along with the resolver query. When the response 6015 * comes back it will be there ready for us to add. 6016 */ 6017 /* 6018 * the newly created ire will inherit the flags of the 6019 * parent ire, if any. 6020 */ 6021 ire = ire_create_v6( 6022 v6dstp, /* dest address */ 6023 &ipv6_all_ones, /* mask */ 6024 &src_ipif->ipif_v6src_addr, /* source address */ 6025 NULL, /* gateway address */ 6026 &save_ire->ire_max_frag, 6027 NULL, /* src nce */ 6028 dst_ill->ill_rq, /* recv-from queue */ 6029 dst_ill->ill_wq, /* send-to queue */ 6030 IRE_CACHE, 6031 src_ipif, 6032 NULL, 6033 (fire != NULL) ? /* Parent handle */ 6034 fire->ire_phandle : 0, 6035 save_ire->ire_ihandle, /* Interface handle */ 6036 (fire != NULL) ? 6037 (fire->ire_flags & (RTF_SETSRC | RTF_MULTIRT)) : 6038 0, 6039 &ire_uinfo_null, 6040 NULL, 6041 NULL, 6042 ipst); 6043 6044 if (ire == NULL) { 6045 ire_refrele(save_ire); 6046 break; 6047 } 6048 6049 ire->ire_marks |= ire_marks; 6050 6051 /* Resolve and add ire to the ctable */ 6052 err = ndp_resolver(dst_ill, v6dstp, first_mp, zoneid); 6053 switch (err) { 6054 case 0: 6055 /* Prevent save_ire from getting deleted */ 6056 IRB_REFHOLD(save_ire->ire_bucket); 6057 /* Has it been removed already ? */ 6058 if (save_ire->ire_marks & IRE_MARK_CONDEMNED) { 6059 IRB_REFRELE(save_ire->ire_bucket); 6060 ire_refrele(save_ire); 6061 break; 6062 } 6063 /* 6064 * We have a resolved cache entry, 6065 * add in the IRE. 6066 */ 6067 ire_add_then_send(q, ire, first_mp); 6068 if (ip6_asp_table_held) { 6069 ip6_asp_table_refrele(ipst); 6070 ip6_asp_table_held = B_FALSE; 6071 } 6072 6073 /* Assert that it is not deleted yet. */ 6074 ASSERT(save_ire->ire_ptpn != NULL); 6075 IRB_REFRELE(save_ire->ire_bucket); 6076 ire_refrele(save_ire); 6077 if (fire != NULL) { 6078 ire_refrele(fire); 6079 fire = NULL; 6080 } 6081 6082 /* 6083 * The resolution loop is re-entered if we 6084 * actually are in a multirouting case. 6085 */ 6086 if (copy_mp != NULL) { 6087 boolean_t need_resolve = 6088 ire_multirt_need_resolve_v6(v6dstp, 6089 MBLK_GETLABEL(copy_mp), ipst); 6090 if (!need_resolve) { 6091 MULTIRT_DEBUG_UNTAG(copy_mp); 6092 freemsg(copy_mp); 6093 copy_mp = NULL; 6094 } else { 6095 /* 6096 * ipif_lookup_group_v6() calls 6097 * ire_lookup_multi_v6() that 6098 * uses ire_ftable_lookup_v6() 6099 * to find an IRE_INTERFACE for 6100 * the group. In the multirt 6101 * case, ire_lookup_multi_v6() 6102 * then invokes 6103 * ire_multirt_lookup_v6() to 6104 * find the next resolvable ire. 6105 * As a result, we obtain a new 6106 * interface, derived from the 6107 * next ire. 6108 */ 6109 if (ipif_held) { 6110 ipif_refrele(ipif); 6111 ipif_held = B_FALSE; 6112 } 6113 ipif = ipif_lookup_group_v6( 6114 v6dstp, zoneid, ipst); 6115 ip2dbg(("ip_newroute_ipif: " 6116 "multirt dst %08x, " 6117 "ipif %p\n", 6118 ntohl(V4_PART_OF_V6( 6119 (*v6dstp))), 6120 (void *)ipif)); 6121 if (ipif != NULL) { 6122 ipif_held = B_TRUE; 6123 mp = copy_mp; 6124 copy_mp = NULL; 6125 multirt_resolve_next = 6126 B_TRUE; 6127 continue; 6128 } else { 6129 freemsg(copy_mp); 6130 } 6131 } 6132 } 6133 ill_refrele(dst_ill); 6134 if (ipif_held) { 6135 ipif_refrele(ipif); 6136 ipif_held = B_FALSE; 6137 } 6138 if (src_ipif != NULL) 6139 ipif_refrele(src_ipif); 6140 return; 6141 6142 case EINPROGRESS: 6143 /* 6144 * mp was consumed - presumably queued. 6145 * No need for ire, presumably resolution is 6146 * in progress, and ire will be added when the 6147 * address is resolved. 6148 */ 6149 if (ip6_asp_table_held) { 6150 ip6_asp_table_refrele(ipst); 6151 ip6_asp_table_held = B_FALSE; 6152 } 6153 ire_delete(ire); 6154 ire_refrele(save_ire); 6155 if (fire != NULL) { 6156 ire_refrele(fire); 6157 fire = NULL; 6158 } 6159 6160 /* 6161 * The resolution loop is re-entered if we 6162 * actually are in a multirouting case. 6163 */ 6164 if (copy_mp != NULL) { 6165 boolean_t need_resolve = 6166 ire_multirt_need_resolve_v6(v6dstp, 6167 MBLK_GETLABEL(copy_mp), ipst); 6168 if (!need_resolve) { 6169 MULTIRT_DEBUG_UNTAG(copy_mp); 6170 freemsg(copy_mp); 6171 copy_mp = NULL; 6172 } else { 6173 /* 6174 * ipif_lookup_group_v6() calls 6175 * ire_lookup_multi_v6() that 6176 * uses ire_ftable_lookup_v6() 6177 * to find an IRE_INTERFACE for 6178 * the group. In the multirt 6179 * case, ire_lookup_multi_v6() 6180 * then invokes 6181 * ire_multirt_lookup_v6() to 6182 * find the next resolvable ire. 6183 * As a result, we obtain a new 6184 * interface, derived from the 6185 * next ire. 6186 */ 6187 if (ipif_held) { 6188 ipif_refrele(ipif); 6189 ipif_held = B_FALSE; 6190 } 6191 ipif = ipif_lookup_group_v6( 6192 v6dstp, zoneid, ipst); 6193 ip2dbg(("ip_newroute_ipif: " 6194 "multirt dst %08x, " 6195 "ipif %p\n", 6196 ntohl(V4_PART_OF_V6( 6197 (*v6dstp))), 6198 (void *)ipif)); 6199 if (ipif != NULL) { 6200 ipif_held = B_TRUE; 6201 mp = copy_mp; 6202 copy_mp = NULL; 6203 multirt_resolve_next = 6204 B_TRUE; 6205 continue; 6206 } else { 6207 freemsg(copy_mp); 6208 } 6209 } 6210 } 6211 ill_refrele(dst_ill); 6212 if (ipif_held) { 6213 ipif_refrele(ipif); 6214 ipif_held = B_FALSE; 6215 } 6216 if (src_ipif != NULL) 6217 ipif_refrele(src_ipif); 6218 return; 6219 default: 6220 /* Some transient error */ 6221 ire_refrele(save_ire); 6222 break; 6223 } 6224 break; 6225 } 6226 default: 6227 break; 6228 } 6229 if (ip6_asp_table_held) { 6230 ip6_asp_table_refrele(ipst); 6231 ip6_asp_table_held = B_FALSE; 6232 } 6233 } while (multirt_resolve_next); 6234 6235 err_ret: 6236 if (ip6_asp_table_held) 6237 ip6_asp_table_refrele(ipst); 6238 if (ire != NULL) 6239 ire_refrele(ire); 6240 if (fire != NULL) 6241 ire_refrele(fire); 6242 if (ipif != NULL && ipif_held) 6243 ipif_refrele(ipif); 6244 if (src_ipif != NULL) 6245 ipif_refrele(src_ipif); 6246 /* Multicast - no point in trying to generate ICMP error */ 6247 ASSERT((attach_ill == NULL) || (dst_ill == attach_ill)); 6248 if (dst_ill != NULL) { 6249 ill = dst_ill; 6250 ill_held = B_TRUE; 6251 } 6252 if (mp->b_prev || mp->b_next) { 6253 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 6254 } else { 6255 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); 6256 } 6257 ip1dbg(("ip_newroute_ipif_v6: dropped\n")); 6258 mp->b_next = NULL; 6259 mp->b_prev = NULL; 6260 freemsg(first_mp); 6261 if (ill_held) 6262 ill_refrele(ill); 6263 } 6264 6265 /* 6266 * Parse and process any hop-by-hop or destination options. 6267 * 6268 * Assumes that q is an ill read queue so that ICMP errors for link-local 6269 * destinations are sent out the correct interface. 6270 * 6271 * Returns -1 if there was an error and mp has been consumed. 6272 * Returns 0 if no special action is needed. 6273 * Returns 1 if the packet contained a router alert option for this node 6274 * which is verified to be "interesting/known" for our implementation. 6275 * 6276 * XXX Note: In future as more hbh or dest options are defined, 6277 * it may be better to have different routines for hbh and dest 6278 * options as opt_type fields other than IP6OPT_PAD1 and IP6OPT_PADN 6279 * may have same value in different namespaces. Or is it same namespace ?? 6280 * Current code checks for each opt_type (other than pads) if it is in 6281 * the expected nexthdr (hbh or dest) 6282 */ 6283 static int 6284 ip_process_options_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, 6285 uint8_t *optptr, uint_t optlen, uint8_t hdr_type, ip_stack_t *ipst) 6286 { 6287 uint8_t opt_type; 6288 uint_t optused; 6289 int ret = 0; 6290 mblk_t *first_mp; 6291 const char *errtype; 6292 zoneid_t zoneid; 6293 ill_t *ill = q->q_ptr; 6294 6295 first_mp = mp; 6296 if (mp->b_datap->db_type == M_CTL) { 6297 mp = mp->b_cont; 6298 } 6299 6300 while (optlen != 0) { 6301 opt_type = *optptr; 6302 if (opt_type == IP6OPT_PAD1) { 6303 optused = 1; 6304 } else { 6305 if (optlen < 2) 6306 goto bad_opt; 6307 errtype = "malformed"; 6308 if (opt_type == ip6opt_ls) { 6309 optused = 2 + optptr[1]; 6310 if (optused > optlen) 6311 goto bad_opt; 6312 } else switch (opt_type) { 6313 case IP6OPT_PADN: 6314 /* 6315 * Note:We don't verify that (N-2) pad octets 6316 * are zero as required by spec. Adhere to 6317 * "be liberal in what you accept..." part of 6318 * implementation philosophy (RFC791,RFC1122) 6319 */ 6320 optused = 2 + optptr[1]; 6321 if (optused > optlen) 6322 goto bad_opt; 6323 break; 6324 6325 case IP6OPT_JUMBO: 6326 if (hdr_type != IPPROTO_HOPOPTS) 6327 goto opt_error; 6328 goto opt_error; /* XXX Not implemented! */ 6329 6330 case IP6OPT_ROUTER_ALERT: { 6331 struct ip6_opt_router *or; 6332 6333 if (hdr_type != IPPROTO_HOPOPTS) 6334 goto opt_error; 6335 optused = 2 + optptr[1]; 6336 if (optused > optlen) 6337 goto bad_opt; 6338 or = (struct ip6_opt_router *)optptr; 6339 /* Check total length and alignment */ 6340 if (optused != sizeof (*or) || 6341 ((uintptr_t)or->ip6or_value & 0x1) != 0) 6342 goto opt_error; 6343 /* Check value */ 6344 switch (*((uint16_t *)or->ip6or_value)) { 6345 case IP6_ALERT_MLD: 6346 case IP6_ALERT_RSVP: 6347 ret = 1; 6348 } 6349 break; 6350 } 6351 case IP6OPT_HOME_ADDRESS: { 6352 /* 6353 * Minimal support for the home address option 6354 * (which is required by all IPv6 nodes). 6355 * Implement by just swapping the home address 6356 * and source address. 6357 * XXX Note: this has IPsec implications since 6358 * AH needs to take this into account. 6359 * Also, when IPsec is used we need to ensure 6360 * that this is only processed once 6361 * in the received packet (to avoid swapping 6362 * back and forth). 6363 * NOTE:This option processing is considered 6364 * to be unsafe and prone to a denial of 6365 * service attack. 6366 * The current processing is not safe even with 6367 * IPsec secured IP packets. Since the home 6368 * address option processing requirement still 6369 * is in the IETF draft and in the process of 6370 * being redefined for its usage, it has been 6371 * decided to turn off the option by default. 6372 * If this section of code needs to be executed, 6373 * ndd variable ip6_ignore_home_address_opt 6374 * should be set to 0 at the user's own risk. 6375 */ 6376 struct ip6_opt_home_address *oh; 6377 in6_addr_t tmp; 6378 6379 if (ipst->ips_ipv6_ignore_home_address_opt) 6380 goto opt_error; 6381 6382 if (hdr_type != IPPROTO_DSTOPTS) 6383 goto opt_error; 6384 optused = 2 + optptr[1]; 6385 if (optused > optlen) 6386 goto bad_opt; 6387 6388 /* 6389 * We did this dest. opt the first time 6390 * around (i.e. before AH processing). 6391 * If we've done AH... stop now. 6392 */ 6393 if (first_mp != mp) { 6394 ipsec_in_t *ii; 6395 6396 ii = (ipsec_in_t *)first_mp->b_rptr; 6397 if (ii->ipsec_in_ah_sa != NULL) 6398 break; 6399 } 6400 6401 oh = (struct ip6_opt_home_address *)optptr; 6402 /* Check total length and alignment */ 6403 if (optused < sizeof (*oh) || 6404 ((uintptr_t)oh->ip6oh_addr & 0x7) != 0) 6405 goto opt_error; 6406 /* Swap ip6_src and the home address */ 6407 tmp = ip6h->ip6_src; 6408 /* XXX Note: only 8 byte alignment option */ 6409 ip6h->ip6_src = *(in6_addr_t *)oh->ip6oh_addr; 6410 *(in6_addr_t *)oh->ip6oh_addr = tmp; 6411 break; 6412 } 6413 6414 case IP6OPT_TUNNEL_LIMIT: 6415 if (hdr_type != IPPROTO_DSTOPTS) { 6416 goto opt_error; 6417 } 6418 optused = 2 + optptr[1]; 6419 if (optused > optlen) { 6420 goto bad_opt; 6421 } 6422 if (optused != 3) { 6423 goto opt_error; 6424 } 6425 break; 6426 6427 default: 6428 errtype = "unknown"; 6429 /* FALLTHROUGH */ 6430 opt_error: 6431 /* Determine which zone should send error */ 6432 zoneid = ipif_lookup_addr_zoneid_v6( 6433 &ip6h->ip6_dst, ill, ipst); 6434 switch (IP6OPT_TYPE(opt_type)) { 6435 case IP6OPT_TYPE_SKIP: 6436 optused = 2 + optptr[1]; 6437 if (optused > optlen) 6438 goto bad_opt; 6439 ip1dbg(("ip_process_options_v6: %s " 6440 "opt 0x%x skipped\n", 6441 errtype, opt_type)); 6442 break; 6443 case IP6OPT_TYPE_DISCARD: 6444 ip1dbg(("ip_process_options_v6: %s " 6445 "opt 0x%x; packet dropped\n", 6446 errtype, opt_type)); 6447 freemsg(first_mp); 6448 return (-1); 6449 case IP6OPT_TYPE_ICMP: 6450 if (zoneid == ALL_ZONES) { 6451 freemsg(first_mp); 6452 return (-1); 6453 } 6454 icmp_param_problem_v6(WR(q), first_mp, 6455 ICMP6_PARAMPROB_OPTION, 6456 (uint32_t)(optptr - 6457 (uint8_t *)ip6h), 6458 B_FALSE, B_FALSE, zoneid, ipst); 6459 return (-1); 6460 case IP6OPT_TYPE_FORCEICMP: 6461 if (zoneid == ALL_ZONES) { 6462 freemsg(first_mp); 6463 return (-1); 6464 } 6465 icmp_param_problem_v6(WR(q), first_mp, 6466 ICMP6_PARAMPROB_OPTION, 6467 (uint32_t)(optptr - 6468 (uint8_t *)ip6h), 6469 B_FALSE, B_TRUE, zoneid, ipst); 6470 return (-1); 6471 default: 6472 ASSERT(0); 6473 } 6474 } 6475 } 6476 optlen -= optused; 6477 optptr += optused; 6478 } 6479 return (ret); 6480 6481 bad_opt: 6482 /* Determine which zone should send error */ 6483 zoneid = ipif_lookup_addr_zoneid_v6(&ip6h->ip6_dst, ill, ipst); 6484 if (zoneid == ALL_ZONES) { 6485 freemsg(first_mp); 6486 } else { 6487 icmp_param_problem_v6(WR(q), first_mp, ICMP6_PARAMPROB_OPTION, 6488 (uint32_t)(optptr - (uint8_t *)ip6h), 6489 B_FALSE, B_FALSE, zoneid, ipst); 6490 } 6491 return (-1); 6492 } 6493 6494 /* 6495 * Process a routing header that is not yet empty. 6496 * Only handles type 0 routing headers. 6497 */ 6498 static void 6499 ip_process_rthdr(queue_t *q, mblk_t *mp, ip6_t *ip6h, ip6_rthdr_t *rth, 6500 ill_t *ill, uint_t flags, mblk_t *hada_mp, mblk_t *dl_mp) 6501 { 6502 ip6_rthdr0_t *rthdr; 6503 uint_t ehdrlen; 6504 uint_t numaddr; 6505 in6_addr_t *addrptr; 6506 in6_addr_t tmp; 6507 ip_stack_t *ipst = ill->ill_ipst; 6508 6509 ASSERT(rth->ip6r_segleft != 0); 6510 6511 if (!ipst->ips_ipv6_forward_src_routed) { 6512 /* XXX Check for source routed out same interface? */ 6513 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 6514 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInAddrErrors); 6515 freemsg(hada_mp); 6516 freemsg(mp); 6517 return; 6518 } 6519 6520 if (rth->ip6r_type != 0) { 6521 if (hada_mp != NULL) 6522 goto hada_drop; 6523 /* Sent by forwarding path, and router is global zone */ 6524 icmp_param_problem_v6(WR(q), mp, 6525 ICMP6_PARAMPROB_HEADER, 6526 (uint32_t)((uchar_t *)&rth->ip6r_type - (uchar_t *)ip6h), 6527 B_FALSE, B_FALSE, GLOBAL_ZONEID, ipst); 6528 return; 6529 } 6530 rthdr = (ip6_rthdr0_t *)rth; 6531 ehdrlen = 8 * (rthdr->ip6r0_len + 1); 6532 ASSERT(mp->b_rptr + ehdrlen <= mp->b_wptr); 6533 addrptr = (in6_addr_t *)((char *)rthdr + sizeof (*rthdr)); 6534 /* rthdr->ip6r0_len is twice the number of addresses in the header */ 6535 if (rthdr->ip6r0_len & 0x1) { 6536 /* An odd length is impossible */ 6537 if (hada_mp != NULL) 6538 goto hada_drop; 6539 /* Sent by forwarding path, and router is global zone */ 6540 icmp_param_problem_v6(WR(q), mp, 6541 ICMP6_PARAMPROB_HEADER, 6542 (uint32_t)((uchar_t *)&rthdr->ip6r0_len - (uchar_t *)ip6h), 6543 B_FALSE, B_FALSE, GLOBAL_ZONEID, ipst); 6544 return; 6545 } 6546 numaddr = rthdr->ip6r0_len / 2; 6547 if (rthdr->ip6r0_segleft > numaddr) { 6548 /* segleft exceeds number of addresses in routing header */ 6549 if (hada_mp != NULL) 6550 goto hada_drop; 6551 /* Sent by forwarding path, and router is global zone */ 6552 icmp_param_problem_v6(WR(q), mp, 6553 ICMP6_PARAMPROB_HEADER, 6554 (uint32_t)((uchar_t *)&rthdr->ip6r0_segleft - 6555 (uchar_t *)ip6h), 6556 B_FALSE, B_FALSE, GLOBAL_ZONEID, ipst); 6557 return; 6558 } 6559 addrptr += (numaddr - rthdr->ip6r0_segleft); 6560 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst) || 6561 IN6_IS_ADDR_MULTICAST(addrptr)) { 6562 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 6563 freemsg(hada_mp); 6564 freemsg(mp); 6565 return; 6566 } 6567 /* Swap */ 6568 tmp = *addrptr; 6569 *addrptr = ip6h->ip6_dst; 6570 ip6h->ip6_dst = tmp; 6571 rthdr->ip6r0_segleft--; 6572 /* Don't allow any mapped addresses - ip_wput_v6 can't handle them */ 6573 if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_dst)) { 6574 if (hada_mp != NULL) 6575 goto hada_drop; 6576 /* Sent by forwarding path, and router is global zone */ 6577 icmp_unreachable_v6(WR(q), mp, ICMP6_DST_UNREACH_NOROUTE, 6578 B_FALSE, B_FALSE, GLOBAL_ZONEID, ipst); 6579 return; 6580 } 6581 if (ip_check_v6_mblk(mp, ill) == 0) { 6582 ip6h = (ip6_t *)mp->b_rptr; 6583 ip_rput_data_v6(q, ill, mp, ip6h, flags, hada_mp, dl_mp); 6584 } 6585 return; 6586 hada_drop: 6587 /* IPsec kstats: bean counter? */ 6588 freemsg(hada_mp); 6589 freemsg(mp); 6590 } 6591 6592 /* 6593 * Read side put procedure for IPv6 module. 6594 */ 6595 void 6596 ip_rput_v6(queue_t *q, mblk_t *mp) 6597 { 6598 mblk_t *first_mp; 6599 mblk_t *hada_mp = NULL; 6600 ip6_t *ip6h; 6601 boolean_t ll_multicast = B_FALSE; 6602 boolean_t mctl_present = B_FALSE; 6603 ill_t *ill; 6604 struct iocblk *iocp; 6605 uint_t flags = 0; 6606 mblk_t *dl_mp; 6607 ip_stack_t *ipst; 6608 6609 ill = (ill_t *)q->q_ptr; 6610 ipst = ill->ill_ipst; 6611 if (ill->ill_state_flags & ILL_CONDEMNED) { 6612 union DL_primitives *dl; 6613 6614 dl = (union DL_primitives *)mp->b_rptr; 6615 /* 6616 * Things are opening or closing - only accept DLPI 6617 * ack messages. If the stream is closing and ip_wsrv 6618 * has completed, ip_close is out of the qwait, but has 6619 * not yet completed qprocsoff. Don't proceed any further 6620 * because the ill has been cleaned up and things hanging 6621 * off the ill have been freed. 6622 */ 6623 if ((mp->b_datap->db_type != M_PCPROTO) || 6624 (dl->dl_primitive == DL_UNITDATA_IND)) { 6625 inet_freemsg(mp); 6626 return; 6627 } 6628 } 6629 6630 dl_mp = NULL; 6631 switch (mp->b_datap->db_type) { 6632 case M_DATA: { 6633 int hlen; 6634 uchar_t *ucp; 6635 struct ether_header *eh; 6636 dl_unitdata_ind_t *dui; 6637 6638 /* 6639 * This is a work-around for CR 6451644, a bug in Nemo. It 6640 * should be removed when that problem is fixed. 6641 */ 6642 if (ill->ill_mactype == DL_ETHER && 6643 (hlen = MBLKHEAD(mp)) >= sizeof (struct ether_header) && 6644 (ucp = mp->b_rptr)[-1] == (IP6_DL_SAP & 0xFF) && 6645 ucp[-2] == (IP6_DL_SAP >> 8)) { 6646 if (hlen >= sizeof (struct ether_vlan_header) && 6647 ucp[-5] == 0 && ucp[-6] == 0x81) 6648 ucp -= sizeof (struct ether_vlan_header); 6649 else 6650 ucp -= sizeof (struct ether_header); 6651 /* 6652 * If it's a group address, then fabricate a 6653 * DL_UNITDATA_IND message. 6654 */ 6655 if ((ll_multicast = (ucp[0] & 1)) != 0 && 6656 (dl_mp = allocb(DL_UNITDATA_IND_SIZE + 16, 6657 BPRI_HI)) != NULL) { 6658 eh = (struct ether_header *)ucp; 6659 dui = (dl_unitdata_ind_t *)dl_mp->b_rptr; 6660 DB_TYPE(dl_mp) = M_PROTO; 6661 dl_mp->b_wptr = (uchar_t *)(dui + 1) + 16; 6662 dui->dl_primitive = DL_UNITDATA_IND; 6663 dui->dl_dest_addr_length = 8; 6664 dui->dl_dest_addr_offset = DL_UNITDATA_IND_SIZE; 6665 dui->dl_src_addr_length = 8; 6666 dui->dl_src_addr_offset = DL_UNITDATA_IND_SIZE + 6667 8; 6668 dui->dl_group_address = 1; 6669 ucp = (uchar_t *)(dui + 1); 6670 if (ill->ill_sap_length > 0) 6671 ucp += ill->ill_sap_length; 6672 bcopy(&eh->ether_dhost, ucp, 6); 6673 bcopy(&eh->ether_shost, ucp + 8, 6); 6674 ucp = (uchar_t *)(dui + 1); 6675 if (ill->ill_sap_length < 0) 6676 ucp += 8 + ill->ill_sap_length; 6677 bcopy(&eh->ether_type, ucp, 2); 6678 bcopy(&eh->ether_type, ucp + 8, 2); 6679 } 6680 } 6681 break; 6682 } 6683 6684 case M_PROTO: 6685 case M_PCPROTO: 6686 if (((dl_unitdata_ind_t *)mp->b_rptr)->dl_primitive != 6687 DL_UNITDATA_IND) { 6688 /* Go handle anything other than data elsewhere. */ 6689 ip_rput_dlpi(q, mp); 6690 return; 6691 } 6692 #define dlur ((dl_unitdata_ind_t *)mp->b_rptr) 6693 ll_multicast = dlur->dl_group_address; 6694 #undef dlur 6695 /* Save the DLPI header. */ 6696 dl_mp = mp; 6697 mp = mp->b_cont; 6698 dl_mp->b_cont = NULL; 6699 break; 6700 case M_BREAK: 6701 panic("ip_rput_v6: got an M_BREAK"); 6702 /*NOTREACHED*/ 6703 case M_IOCACK: 6704 iocp = (struct iocblk *)mp->b_rptr; 6705 switch (iocp->ioc_cmd) { 6706 case DL_IOC_HDR_INFO: 6707 ill = (ill_t *)q->q_ptr; 6708 ill_fastpath_ack(ill, mp); 6709 return; 6710 6711 case SIOCGTUNPARAM: 6712 case OSIOCGTUNPARAM: 6713 ip_rput_other(NULL, q, mp, NULL); 6714 return; 6715 6716 case SIOCSTUNPARAM: 6717 case OSIOCSTUNPARAM: 6718 /* Go through qwriter */ 6719 break; 6720 default: 6721 putnext(q, mp); 6722 return; 6723 } 6724 /* FALLTHRU */ 6725 case M_ERROR: 6726 case M_HANGUP: 6727 mutex_enter(&ill->ill_lock); 6728 if (ill->ill_state_flags & ILL_CONDEMNED) { 6729 mutex_exit(&ill->ill_lock); 6730 freemsg(mp); 6731 return; 6732 } 6733 ill_refhold_locked(ill); 6734 mutex_exit(&ill->ill_lock); 6735 qwriter_ip(ill, q, mp, ip_rput_other, CUR_OP, B_FALSE); 6736 return; 6737 case M_CTL: 6738 if ((MBLKL(mp) > sizeof (int)) && 6739 ((da_ipsec_t *)mp->b_rptr)->da_type == IPHADA_M_CTL) { 6740 ASSERT(MBLKL(mp) >= sizeof (da_ipsec_t)); 6741 mctl_present = B_TRUE; 6742 break; 6743 } 6744 putnext(q, mp); 6745 return; 6746 case M_IOCNAK: 6747 iocp = (struct iocblk *)mp->b_rptr; 6748 switch (iocp->ioc_cmd) { 6749 case DL_IOC_HDR_INFO: 6750 case SIOCGTUNPARAM: 6751 case OSIOCGTUNPARAM: 6752 ip_rput_other(NULL, q, mp, NULL); 6753 return; 6754 6755 case SIOCSTUNPARAM: 6756 case OSIOCSTUNPARAM: 6757 mutex_enter(&ill->ill_lock); 6758 if (ill->ill_state_flags & ILL_CONDEMNED) { 6759 mutex_exit(&ill->ill_lock); 6760 freemsg(mp); 6761 return; 6762 } 6763 ill_refhold_locked(ill); 6764 mutex_exit(&ill->ill_lock); 6765 qwriter_ip(ill, q, mp, ip_rput_other, CUR_OP, B_FALSE); 6766 return; 6767 default: 6768 break; 6769 } 6770 /* FALLTHRU */ 6771 default: 6772 putnext(q, mp); 6773 return; 6774 } 6775 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInReceives); 6776 UPDATE_MIB(ill->ill_ip_mib, ipIfStatsHCInOctets, 6777 (mp->b_cont == NULL) ? MBLKL(mp) : msgdsize(mp)); 6778 /* 6779 * if db_ref > 1 then copymsg and free original. Packet may be 6780 * changed and do not want other entity who has a reference to this 6781 * message to trip over the changes. This is a blind change because 6782 * trying to catch all places that might change packet is too 6783 * difficult (since it may be a module above this one). 6784 */ 6785 if (mp->b_datap->db_ref > 1) { 6786 mblk_t *mp1; 6787 6788 mp1 = copymsg(mp); 6789 freemsg(mp); 6790 if (mp1 == NULL) { 6791 first_mp = NULL; 6792 goto discard; 6793 } 6794 mp = mp1; 6795 } 6796 first_mp = mp; 6797 if (mctl_present) { 6798 hada_mp = first_mp; 6799 mp = first_mp->b_cont; 6800 } 6801 6802 if (ip_check_v6_mblk(mp, ill) == -1) 6803 return; 6804 6805 ip6h = (ip6_t *)mp->b_rptr; 6806 6807 DTRACE_PROBE4(ip6__physical__in__start, 6808 ill_t *, ill, ill_t *, NULL, 6809 ip6_t *, ip6h, mblk_t *, first_mp); 6810 6811 FW_HOOKS6(ipst->ips_ip6_physical_in_event, 6812 ipst->ips_ipv6firewall_physical_in, 6813 ill, NULL, ip6h, first_mp, mp, ipst); 6814 6815 DTRACE_PROBE1(ip6__physical__in__end, mblk_t *, first_mp); 6816 6817 if (first_mp == NULL) 6818 return; 6819 6820 if ((ip6h->ip6_vcf & IPV6_VERS_AND_FLOW_MASK) == 6821 IPV6_DEFAULT_VERS_AND_FLOW) { 6822 /* 6823 * It may be a bit too expensive to do this mapped address 6824 * check here, but in the interest of robustness, it seems 6825 * like the correct place. 6826 * TODO: Avoid this check for e.g. connected TCP sockets 6827 */ 6828 if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_src)) { 6829 ip1dbg(("ip_rput_v6: pkt with mapped src addr\n")); 6830 goto discard; 6831 } 6832 6833 if (IN6_IS_ADDR_LOOPBACK(&ip6h->ip6_src)) { 6834 ip1dbg(("ip_rput_v6: pkt with loopback src")); 6835 goto discard; 6836 } else if (IN6_IS_ADDR_LOOPBACK(&ip6h->ip6_dst)) { 6837 ip1dbg(("ip_rput_v6: pkt with loopback dst")); 6838 goto discard; 6839 } 6840 6841 flags |= (ll_multicast ? IP6_IN_LLMCAST : 0); 6842 ip_rput_data_v6(q, ill, mp, ip6h, flags, hada_mp, dl_mp); 6843 } else { 6844 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInWrongIPVersion); 6845 goto discard; 6846 } 6847 freemsg(dl_mp); 6848 return; 6849 6850 discard: 6851 if (dl_mp != NULL) 6852 freeb(dl_mp); 6853 freemsg(first_mp); 6854 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 6855 } 6856 6857 /* 6858 * Walk through the IPv6 packet in mp and see if there's an AH header 6859 * in it. See if the AH header needs to get done before other headers in 6860 * the packet. (Worker function for ipsec_early_ah_v6().) 6861 */ 6862 #define IPSEC_HDR_DONT_PROCESS 0 6863 #define IPSEC_HDR_PROCESS 1 6864 #define IPSEC_MEMORY_ERROR 2 6865 static int 6866 ipsec_needs_processing_v6(mblk_t *mp, uint8_t *nexthdr) 6867 { 6868 uint_t length; 6869 uint_t ehdrlen; 6870 uint8_t *whereptr; 6871 uint8_t *endptr; 6872 uint8_t *nexthdrp; 6873 ip6_dest_t *desthdr; 6874 ip6_rthdr_t *rthdr; 6875 ip6_t *ip6h; 6876 6877 /* 6878 * For now just pullup everything. In general, the less pullups, 6879 * the better, but there's so much squirrelling through anyway, 6880 * it's just easier this way. 6881 */ 6882 if (!pullupmsg(mp, -1)) { 6883 return (IPSEC_MEMORY_ERROR); 6884 } 6885 6886 ip6h = (ip6_t *)mp->b_rptr; 6887 length = IPV6_HDR_LEN; 6888 whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */ 6889 endptr = mp->b_wptr; 6890 6891 /* 6892 * We can't just use the argument nexthdr in the place 6893 * of nexthdrp becaue we don't dereference nexthdrp 6894 * till we confirm whether it is a valid address. 6895 */ 6896 nexthdrp = &ip6h->ip6_nxt; 6897 while (whereptr < endptr) { 6898 /* Is there enough left for len + nexthdr? */ 6899 if (whereptr + MIN_EHDR_LEN > endptr) 6900 return (IPSEC_MEMORY_ERROR); 6901 6902 switch (*nexthdrp) { 6903 case IPPROTO_HOPOPTS: 6904 case IPPROTO_DSTOPTS: 6905 /* Assumes the headers are identical for hbh and dst */ 6906 desthdr = (ip6_dest_t *)whereptr; 6907 ehdrlen = 8 * (desthdr->ip6d_len + 1); 6908 if ((uchar_t *)desthdr + ehdrlen > endptr) 6909 return (IPSEC_MEMORY_ERROR); 6910 /* 6911 * Return DONT_PROCESS because the destination 6912 * options header may be for each hop in a 6913 * routing-header, and we only want AH if we're 6914 * finished with routing headers. 6915 */ 6916 if (*nexthdrp == IPPROTO_DSTOPTS) 6917 return (IPSEC_HDR_DONT_PROCESS); 6918 nexthdrp = &desthdr->ip6d_nxt; 6919 break; 6920 case IPPROTO_ROUTING: 6921 rthdr = (ip6_rthdr_t *)whereptr; 6922 6923 /* 6924 * If there's more hops left on the routing header, 6925 * return now with DON'T PROCESS. 6926 */ 6927 if (rthdr->ip6r_segleft > 0) 6928 return (IPSEC_HDR_DONT_PROCESS); 6929 6930 ehdrlen = 8 * (rthdr->ip6r_len + 1); 6931 if ((uchar_t *)rthdr + ehdrlen > endptr) 6932 return (IPSEC_MEMORY_ERROR); 6933 nexthdrp = &rthdr->ip6r_nxt; 6934 break; 6935 case IPPROTO_FRAGMENT: 6936 /* Wait for reassembly */ 6937 return (IPSEC_HDR_DONT_PROCESS); 6938 case IPPROTO_AH: 6939 *nexthdr = IPPROTO_AH; 6940 return (IPSEC_HDR_PROCESS); 6941 case IPPROTO_NONE: 6942 /* No next header means we're finished */ 6943 default: 6944 return (IPSEC_HDR_DONT_PROCESS); 6945 } 6946 length += ehdrlen; 6947 whereptr += ehdrlen; 6948 } 6949 panic("ipsec_needs_processing_v6"); 6950 /*NOTREACHED*/ 6951 } 6952 6953 /* 6954 * Path for AH if options are present. If this is the first time we are 6955 * sending a datagram to AH, allocate a IPSEC_IN message and prepend it. 6956 * Otherwise, just fanout. Return value answers the boolean question: 6957 * "Did I consume the mblk you sent me?" 6958 * 6959 * Sometimes AH needs to be done before other IPv6 headers for security 6960 * reasons. This function (and its ipsec_needs_processing_v6() above) 6961 * indicates if that is so, and fans out to the appropriate IPsec protocol 6962 * for the datagram passed in. 6963 */ 6964 static boolean_t 6965 ipsec_early_ah_v6(queue_t *q, mblk_t *first_mp, boolean_t mctl_present, 6966 ill_t *ill, mblk_t *hada_mp, zoneid_t zoneid) 6967 { 6968 mblk_t *mp; 6969 uint8_t nexthdr; 6970 ipsec_in_t *ii = NULL; 6971 ah_t *ah; 6972 ipsec_status_t ipsec_rc; 6973 ip_stack_t *ipst = ill->ill_ipst; 6974 netstack_t *ns = ipst->ips_netstack; 6975 ipsec_stack_t *ipss = ns->netstack_ipsec; 6976 6977 ASSERT((hada_mp == NULL) || (!mctl_present)); 6978 6979 switch (ipsec_needs_processing_v6( 6980 (mctl_present ? first_mp->b_cont : first_mp), &nexthdr)) { 6981 case IPSEC_MEMORY_ERROR: 6982 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 6983 freemsg(hada_mp); 6984 freemsg(first_mp); 6985 return (B_TRUE); 6986 case IPSEC_HDR_DONT_PROCESS: 6987 return (B_FALSE); 6988 } 6989 6990 /* Default means send it to AH! */ 6991 ASSERT(nexthdr == IPPROTO_AH); 6992 if (!mctl_present) { 6993 mp = first_mp; 6994 first_mp = ipsec_in_alloc(B_FALSE, ipst->ips_netstack); 6995 if (first_mp == NULL) { 6996 ip1dbg(("ipsec_early_ah_v6: IPSEC_IN " 6997 "allocation failure.\n")); 6998 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 6999 freemsg(hada_mp); 7000 freemsg(mp); 7001 return (B_TRUE); 7002 } 7003 /* 7004 * Store the ill_index so that when we come back 7005 * from IPSEC we ride on the same queue. 7006 */ 7007 ii = (ipsec_in_t *)first_mp->b_rptr; 7008 ii->ipsec_in_ill_index = ill->ill_phyint->phyint_ifindex; 7009 ii->ipsec_in_rill_index = ii->ipsec_in_ill_index; 7010 first_mp->b_cont = mp; 7011 } 7012 /* 7013 * Cache hardware acceleration info. 7014 */ 7015 if (hada_mp != NULL) { 7016 ASSERT(ii != NULL); 7017 IPSECHW_DEBUG(IPSECHW_PKT, ("ipsec_early_ah_v6: " 7018 "caching data attr.\n")); 7019 ii->ipsec_in_accelerated = B_TRUE; 7020 ii->ipsec_in_da = hada_mp; 7021 } 7022 7023 if (!ipsec_loaded(ipss)) { 7024 ip_proto_not_sup(q, first_mp, IP_FF_SEND_ICMP, zoneid, ipst); 7025 return (B_TRUE); 7026 } 7027 7028 ah = ipsec_inbound_ah_sa(first_mp, ns); 7029 if (ah == NULL) 7030 return (B_TRUE); 7031 ASSERT(ii->ipsec_in_ah_sa != NULL); 7032 ASSERT(ii->ipsec_in_ah_sa->ipsa_input_func != NULL); 7033 ipsec_rc = ii->ipsec_in_ah_sa->ipsa_input_func(first_mp, ah); 7034 7035 switch (ipsec_rc) { 7036 case IPSEC_STATUS_SUCCESS: 7037 /* we're done with IPsec processing, send it up */ 7038 ip_fanout_proto_again(first_mp, ill, ill, NULL); 7039 break; 7040 case IPSEC_STATUS_FAILED: 7041 BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsInDiscards); 7042 break; 7043 case IPSEC_STATUS_PENDING: 7044 /* no action needed */ 7045 break; 7046 } 7047 return (B_TRUE); 7048 } 7049 7050 /* 7051 * Validate the IPv6 mblk for alignment. 7052 */ 7053 int 7054 ip_check_v6_mblk(mblk_t *mp, ill_t *ill) 7055 { 7056 int pkt_len, ip6_len; 7057 ip6_t *ip6h = (ip6_t *)mp->b_rptr; 7058 7059 /* check for alignment and full IPv6 header */ 7060 if (!OK_32PTR((uchar_t *)ip6h) || 7061 (mp->b_wptr - (uchar_t *)ip6h) < IPV6_HDR_LEN) { 7062 if (!pullupmsg(mp, IPV6_HDR_LEN)) { 7063 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 7064 ip1dbg(("ip_rput_v6: pullupmsg failed\n")); 7065 freemsg(mp); 7066 return (-1); 7067 } 7068 ip6h = (ip6_t *)mp->b_rptr; 7069 } 7070 7071 ASSERT(OK_32PTR((uchar_t *)ip6h) && 7072 (mp->b_wptr - (uchar_t *)ip6h) >= IPV6_HDR_LEN); 7073 7074 if (mp->b_cont == NULL) 7075 pkt_len = mp->b_wptr - mp->b_rptr; 7076 else 7077 pkt_len = msgdsize(mp); 7078 ip6_len = ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN; 7079 7080 /* 7081 * Check for bogus (too short packet) and packet which 7082 * was padded by the link layer. 7083 */ 7084 if (ip6_len != pkt_len) { 7085 ssize_t diff; 7086 7087 if (ip6_len > pkt_len) { 7088 ip1dbg(("ip_rput_data_v6: packet too short %d %d\n", 7089 ip6_len, pkt_len)); 7090 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInTruncatedPkts); 7091 freemsg(mp); 7092 return (-1); 7093 } 7094 diff = (ssize_t)(pkt_len - ip6_len); 7095 7096 if (!adjmsg(mp, -diff)) { 7097 ip1dbg(("ip_rput_data_v6: adjmsg failed\n")); 7098 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 7099 freemsg(mp); 7100 return (-1); 7101 } 7102 } 7103 return (0); 7104 } 7105 7106 /* 7107 * ip_rput_data_v6 -- received IPv6 packets in M_DATA messages show up here. 7108 * ip_rput_v6 has already verified alignment, the min length, the version, 7109 * and db_ref = 1. 7110 * 7111 * The ill passed in (the arg named inill) is the ill that the packet 7112 * actually arrived on. We need to remember this when saving the 7113 * input interface index into potential IPV6_PKTINFO data in 7114 * ip_add_info_v6(). 7115 * 7116 * This routine doesn't free dl_mp; that's the caller's responsibility on 7117 * return. (Note that the callers are complex enough that there's no tail 7118 * recursion here anyway.) 7119 */ 7120 void 7121 ip_rput_data_v6(queue_t *q, ill_t *inill, mblk_t *mp, ip6_t *ip6h, 7122 uint_t flags, mblk_t *hada_mp, mblk_t *dl_mp) 7123 { 7124 ire_t *ire = NULL; 7125 ill_t *ill = inill; 7126 ill_t *outill; 7127 ipif_t *ipif; 7128 uint8_t *whereptr; 7129 uint8_t nexthdr; 7130 uint16_t remlen; 7131 uint_t prev_nexthdr_offset; 7132 uint_t used; 7133 size_t old_pkt_len; 7134 size_t pkt_len; 7135 uint16_t ip6_len; 7136 uint_t hdr_len; 7137 boolean_t mctl_present; 7138 mblk_t *first_mp; 7139 mblk_t *first_mp1; 7140 boolean_t no_forward; 7141 ip6_hbh_t *hbhhdr; 7142 boolean_t ll_multicast = (flags & IP6_IN_LLMCAST); 7143 conn_t *connp; 7144 ilm_t *ilm; 7145 uint32_t ports; 7146 zoneid_t zoneid = GLOBAL_ZONEID; 7147 uint16_t hck_flags, reass_hck_flags; 7148 uint32_t reass_sum; 7149 boolean_t cksum_err; 7150 mblk_t *mp1; 7151 ip_stack_t *ipst = inill->ill_ipst; 7152 7153 EXTRACT_PKT_MP(mp, first_mp, mctl_present); 7154 7155 if (hada_mp != NULL) { 7156 /* 7157 * It's an IPsec accelerated packet. 7158 * Keep a pointer to the data attributes around until 7159 * we allocate the ipsecinfo structure. 7160 */ 7161 IPSECHW_DEBUG(IPSECHW_PKT, 7162 ("ip_rput_data_v6: inbound HW accelerated IPsec pkt\n")); 7163 hada_mp->b_cont = NULL; 7164 /* 7165 * Since it is accelerated, it came directly from 7166 * the ill. 7167 */ 7168 ASSERT(mctl_present == B_FALSE); 7169 ASSERT(mp->b_datap->db_type != M_CTL); 7170 } 7171 7172 ip6h = (ip6_t *)mp->b_rptr; 7173 ip6_len = ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN; 7174 old_pkt_len = pkt_len = ip6_len; 7175 7176 if (ILL_HCKSUM_CAPABLE(ill) && !mctl_present && dohwcksum) 7177 hck_flags = DB_CKSUMFLAGS(mp); 7178 else 7179 hck_flags = 0; 7180 7181 /* Clear checksum flags in case we need to forward */ 7182 DB_CKSUMFLAGS(mp) = 0; 7183 reass_sum = reass_hck_flags = 0; 7184 7185 nexthdr = ip6h->ip6_nxt; 7186 7187 prev_nexthdr_offset = (uint_t)((uchar_t *)&ip6h->ip6_nxt - 7188 (uchar_t *)ip6h); 7189 whereptr = (uint8_t *)&ip6h[1]; 7190 remlen = pkt_len - IPV6_HDR_LEN; /* Track how much is left */ 7191 7192 /* Process hop by hop header options */ 7193 if (nexthdr == IPPROTO_HOPOPTS) { 7194 uint_t ehdrlen; 7195 uint8_t *optptr; 7196 7197 if (remlen < MIN_EHDR_LEN) 7198 goto pkt_too_short; 7199 if (mp->b_cont != NULL && 7200 whereptr + MIN_EHDR_LEN > mp->b_wptr) { 7201 if (!pullupmsg(mp, IPV6_HDR_LEN + MIN_EHDR_LEN)) { 7202 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 7203 freemsg(hada_mp); 7204 freemsg(first_mp); 7205 return; 7206 } 7207 ip6h = (ip6_t *)mp->b_rptr; 7208 whereptr = (uint8_t *)ip6h + pkt_len - remlen; 7209 } 7210 hbhhdr = (ip6_hbh_t *)whereptr; 7211 nexthdr = hbhhdr->ip6h_nxt; 7212 prev_nexthdr_offset = (uint_t)(whereptr - (uint8_t *)ip6h); 7213 ehdrlen = 8 * (hbhhdr->ip6h_len + 1); 7214 7215 if (remlen < ehdrlen) 7216 goto pkt_too_short; 7217 if (mp->b_cont != NULL && 7218 whereptr + ehdrlen > mp->b_wptr) { 7219 if (!pullupmsg(mp, IPV6_HDR_LEN + ehdrlen)) { 7220 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 7221 freemsg(hada_mp); 7222 freemsg(first_mp); 7223 return; 7224 } 7225 ip6h = (ip6_t *)mp->b_rptr; 7226 whereptr = (uint8_t *)ip6h + pkt_len - remlen; 7227 hbhhdr = (ip6_hbh_t *)whereptr; 7228 } 7229 7230 optptr = whereptr + 2; 7231 whereptr += ehdrlen; 7232 remlen -= ehdrlen; 7233 switch (ip_process_options_v6(q, first_mp, ip6h, optptr, 7234 ehdrlen - 2, IPPROTO_HOPOPTS, ipst)) { 7235 case -1: 7236 /* 7237 * Packet has been consumed and any 7238 * needed ICMP messages sent. 7239 */ 7240 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors); 7241 freemsg(hada_mp); 7242 return; 7243 case 0: 7244 /* no action needed */ 7245 break; 7246 case 1: 7247 /* Known router alert */ 7248 goto ipv6forus; 7249 } 7250 } 7251 7252 /* 7253 * Attach any necessary label information to this packet. 7254 */ 7255 if (is_system_labeled() && !tsol_get_pkt_label(mp, IPV6_VERSION)) { 7256 if (ip6opt_ls != 0) 7257 ip0dbg(("tsol_get_pkt_label v6 failed\n")); 7258 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors); 7259 freemsg(hada_mp); 7260 freemsg(first_mp); 7261 return; 7262 } 7263 7264 /* 7265 * On incoming v6 multicast packets we will bypass the ire table, 7266 * and assume that the read queue corresponds to the targetted 7267 * interface. 7268 * 7269 * The effect of this is the same as the IPv4 original code, but is 7270 * much cleaner I think. See ip_rput for how that was done. 7271 */ 7272 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 7273 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInMcastPkts); 7274 UPDATE_MIB(ill->ill_ip_mib, ipIfStatsHCInMcastOctets, pkt_len); 7275 /* 7276 * XXX TODO Give to mrouted to for multicast forwarding. 7277 */ 7278 ILM_WALKER_HOLD(ill); 7279 ilm = ilm_lookup_ill_v6(ill, &ip6h->ip6_dst, ALL_ZONES); 7280 ILM_WALKER_RELE(ill); 7281 if (ilm == NULL) { 7282 if (ip_debug > 3) { 7283 /* ip2dbg */ 7284 pr_addr_dbg("ip_rput_data_v6: got mcast packet" 7285 " which is not for us: %s\n", AF_INET6, 7286 &ip6h->ip6_dst); 7287 } 7288 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 7289 freemsg(hada_mp); 7290 freemsg(first_mp); 7291 return; 7292 } 7293 if (ip_debug > 3) { 7294 /* ip2dbg */ 7295 pr_addr_dbg("ip_rput_data_v6: multicast for us: %s\n", 7296 AF_INET6, &ip6h->ip6_dst); 7297 } 7298 zoneid = GLOBAL_ZONEID; 7299 goto ipv6forus; 7300 } 7301 7302 ipif = ill->ill_ipif; 7303 7304 /* 7305 * If a packet was received on an interface that is a 6to4 tunnel, 7306 * incoming IPv6 packets, with a 6to4 addressed IPv6 destination, must 7307 * be checked to have a 6to4 prefix (2002:V4ADDR::/48) that is equal to 7308 * the 6to4 prefix of the address configured on the receiving interface. 7309 * Otherwise, the packet was delivered to this interface in error and 7310 * the packet must be dropped. 7311 */ 7312 if ((ill->ill_is_6to4tun) && IN6_IS_ADDR_6TO4(&ip6h->ip6_dst)) { 7313 7314 if (!IN6_ARE_6TO4_PREFIX_EQUAL(&ipif->ipif_v6lcl_addr, 7315 &ip6h->ip6_dst)) { 7316 if (ip_debug > 2) { 7317 /* ip1dbg */ 7318 pr_addr_dbg("ip_rput_data_v6: received 6to4 " 7319 "addressed packet which is not for us: " 7320 "%s\n", AF_INET6, &ip6h->ip6_dst); 7321 } 7322 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 7323 freemsg(first_mp); 7324 return; 7325 } 7326 } 7327 7328 /* 7329 * Find an ire that matches destination. For link-local addresses 7330 * we have to match the ill. 7331 * TBD for site local addresses. 7332 */ 7333 if (IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_dst)) { 7334 ire = ire_ctable_lookup_v6(&ip6h->ip6_dst, NULL, 7335 IRE_CACHE|IRE_LOCAL, ill->ill_ipif, ALL_ZONES, NULL, 7336 MATCH_IRE_TYPE | MATCH_IRE_ILL_GROUP, ipst); 7337 } else { 7338 ire = ire_cache_lookup_v6(&ip6h->ip6_dst, ALL_ZONES, 7339 MBLK_GETLABEL(mp), ipst); 7340 } 7341 if (ire == NULL) { 7342 /* 7343 * No matching IRE found. Mark this packet as having 7344 * originated externally. 7345 */ 7346 if (!(ill->ill_flags & ILLF_ROUTER) || ll_multicast) { 7347 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 7348 if (!(ill->ill_flags & ILLF_ROUTER)) { 7349 BUMP_MIB(ill->ill_ip_mib, 7350 ipIfStatsInAddrErrors); 7351 } 7352 freemsg(hada_mp); 7353 freemsg(first_mp); 7354 return; 7355 } 7356 if (ip6h->ip6_hops <= 1) { 7357 if (hada_mp != NULL) 7358 goto hada_drop; 7359 /* Sent by forwarding path, and router is global zone */ 7360 icmp_time_exceeded_v6(WR(q), first_mp, 7361 ICMP6_TIME_EXCEED_TRANSIT, ll_multicast, B_FALSE, 7362 GLOBAL_ZONEID, ipst); 7363 return; 7364 } 7365 /* 7366 * Per RFC 3513 section 2.5.2, we must not forward packets with 7367 * an unspecified source address. 7368 */ 7369 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 7370 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 7371 freemsg(hada_mp); 7372 freemsg(first_mp); 7373 return; 7374 } 7375 mp->b_prev = (mblk_t *)(uintptr_t) 7376 ill->ill_phyint->phyint_ifindex; 7377 ip_newroute_v6(q, mp, &ip6h->ip6_dst, &ip6h->ip6_src, 7378 IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_dst) ? ill : NULL, 7379 ALL_ZONES, ipst); 7380 return; 7381 } 7382 /* we have a matching IRE */ 7383 if (ire->ire_stq != NULL) { 7384 ill_group_t *ill_group; 7385 ill_group_t *ire_group; 7386 7387 /* 7388 * To be quicker, we may wish not to chase pointers 7389 * (ire->ire_ipif->ipif_ill...) and instead store the 7390 * forwarding policy in the ire. An unfortunate side- 7391 * effect of this would be requiring an ire flush whenever 7392 * the ILLF_ROUTER flag changes. For now, chase pointers 7393 * once and store in the boolean no_forward. 7394 * 7395 * This appears twice to keep it out of the non-forwarding, 7396 * yes-it's-for-us-on-the-right-interface case. 7397 */ 7398 no_forward = ((ill->ill_flags & 7399 ire->ire_ipif->ipif_ill->ill_flags & ILLF_ROUTER) == 0); 7400 7401 7402 ASSERT(first_mp == mp); 7403 /* 7404 * This ire has a send-to queue - forward the packet. 7405 */ 7406 if (no_forward || ll_multicast || (hada_mp != NULL)) { 7407 freemsg(hada_mp); 7408 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 7409 if (no_forward) { 7410 BUMP_MIB(ill->ill_ip_mib, 7411 ipIfStatsInAddrErrors); 7412 } 7413 freemsg(mp); 7414 ire_refrele(ire); 7415 return; 7416 } 7417 /* 7418 * ipIfStatsHCInForwDatagrams should only be increment if there 7419 * will be an attempt to forward the packet, which is why we 7420 * increment after the above condition has been checked. 7421 */ 7422 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInForwDatagrams); 7423 if (ip6h->ip6_hops <= 1) { 7424 ip1dbg(("ip_rput_data_v6: hop limit expired.\n")); 7425 /* Sent by forwarding path, and router is global zone */ 7426 icmp_time_exceeded_v6(WR(q), mp, 7427 ICMP6_TIME_EXCEED_TRANSIT, ll_multicast, B_FALSE, 7428 GLOBAL_ZONEID, ipst); 7429 ire_refrele(ire); 7430 return; 7431 } 7432 /* 7433 * Per RFC 3513 section 2.5.2, we must not forward packets with 7434 * an unspecified source address. 7435 */ 7436 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 7437 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 7438 freemsg(mp); 7439 ire_refrele(ire); 7440 return; 7441 } 7442 7443 if (is_system_labeled()) { 7444 mblk_t *mp1; 7445 7446 if ((mp1 = tsol_ip_forward(ire, mp)) == NULL) { 7447 BUMP_MIB(ill->ill_ip_mib, 7448 ipIfStatsForwProhibits); 7449 freemsg(mp); 7450 ire_refrele(ire); 7451 return; 7452 } 7453 /* Size may have changed */ 7454 mp = mp1; 7455 ip6h = (ip6_t *)mp->b_rptr; 7456 pkt_len = msgdsize(mp); 7457 } 7458 7459 if (pkt_len > ire->ire_max_frag) { 7460 int max_frag = ire->ire_max_frag; 7461 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInTooBigErrors); 7462 /* 7463 * Handle labeled packet resizing. 7464 */ 7465 if (is_system_labeled()) { 7466 max_frag = tsol_pmtu_adjust(mp, max_frag, 7467 pkt_len - old_pkt_len, AF_INET6); 7468 } 7469 7470 /* Sent by forwarding path, and router is global zone */ 7471 icmp_pkt2big_v6(WR(q), mp, max_frag, 7472 ll_multicast, B_TRUE, GLOBAL_ZONEID, ipst); 7473 ire_refrele(ire); 7474 return; 7475 } 7476 7477 /* 7478 * Check to see if we're forwarding the packet to a 7479 * different link from which it came. If so, check the 7480 * source and destination addresses since routers must not 7481 * forward any packets with link-local source or 7482 * destination addresses to other links. Otherwise (if 7483 * we're forwarding onto the same link), conditionally send 7484 * a redirect message. 7485 */ 7486 ill_group = ill->ill_group; 7487 ire_group = ((ill_t *)(ire->ire_rfq)->q_ptr)->ill_group; 7488 if (ire->ire_rfq != q && (ill_group == NULL || 7489 ill_group != ire_group)) { 7490 if (IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_dst) || 7491 IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src)) { 7492 BUMP_MIB(ill->ill_ip_mib, 7493 ipIfStatsInAddrErrors); 7494 freemsg(mp); 7495 ire_refrele(ire); 7496 return; 7497 } 7498 /* TBD add site-local check at site boundary? */ 7499 } else if (ipst->ips_ipv6_send_redirects) { 7500 in6_addr_t *v6targ; 7501 in6_addr_t gw_addr_v6; 7502 ire_t *src_ire_v6 = NULL; 7503 7504 /* 7505 * Don't send a redirect when forwarding a source 7506 * routed packet. 7507 */ 7508 if (ip_source_routed_v6(ip6h, mp, ipst)) 7509 goto forward; 7510 7511 mutex_enter(&ire->ire_lock); 7512 gw_addr_v6 = ire->ire_gateway_addr_v6; 7513 mutex_exit(&ire->ire_lock); 7514 if (!IN6_IS_ADDR_UNSPECIFIED(&gw_addr_v6)) { 7515 v6targ = &gw_addr_v6; 7516 /* 7517 * We won't send redirects to a router 7518 * that doesn't have a link local 7519 * address, but will forward. 7520 */ 7521 if (!IN6_IS_ADDR_LINKLOCAL(v6targ)) { 7522 BUMP_MIB(ill->ill_ip_mib, 7523 ipIfStatsInAddrErrors); 7524 goto forward; 7525 } 7526 } else { 7527 v6targ = &ip6h->ip6_dst; 7528 } 7529 7530 src_ire_v6 = ire_ftable_lookup_v6(&ip6h->ip6_src, 7531 NULL, NULL, IRE_INTERFACE, ire->ire_ipif, NULL, 7532 ALL_ZONES, 0, NULL, 7533 MATCH_IRE_IPIF | MATCH_IRE_TYPE, 7534 ipst); 7535 7536 if (src_ire_v6 != NULL) { 7537 /* 7538 * The source is directly connected. 7539 */ 7540 mp1 = copymsg(mp); 7541 if (mp1 != NULL) { 7542 icmp_send_redirect_v6(WR(q), 7543 mp1, v6targ, &ip6h->ip6_dst, 7544 ill, B_FALSE); 7545 } 7546 ire_refrele(src_ire_v6); 7547 } 7548 } 7549 7550 forward: 7551 /* Hoplimit verified above */ 7552 ip6h->ip6_hops--; 7553 7554 outill = ire->ire_ipif->ipif_ill; 7555 7556 DTRACE_PROBE4(ip6__forwarding__start, 7557 ill_t *, inill, ill_t *, outill, 7558 ip6_t *, ip6h, mblk_t *, mp); 7559 7560 FW_HOOKS6(ipst->ips_ip6_forwarding_event, 7561 ipst->ips_ipv6firewall_forwarding, 7562 inill, outill, ip6h, mp, mp, ipst); 7563 7564 DTRACE_PROBE1(ip6__forwarding__end, mblk_t *, mp); 7565 7566 if (mp != NULL) { 7567 UPDATE_IB_PKT_COUNT(ire); 7568 ire->ire_last_used_time = lbolt; 7569 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCOutForwDatagrams); 7570 ip_xmit_v6(mp, ire, 0, NULL, B_FALSE, NULL); 7571 } 7572 IRE_REFRELE(ire); 7573 return; 7574 } 7575 7576 /* 7577 * Need to put on correct queue for reassembly to find it. 7578 * No need to use put() since reassembly has its own locks. 7579 * Note: multicast packets and packets destined to addresses 7580 * assigned to loopback (ire_rfq is NULL) will be reassembled on 7581 * the arriving ill. Unlike the IPv4 case, enabling strict 7582 * destination multihoming will prevent accepting packets 7583 * addressed to an IRE_LOCAL on lo0. 7584 */ 7585 if (ire->ire_rfq != q) { 7586 if ((ire = ip_check_multihome(&ip6h->ip6_dst, ire, ill)) 7587 == NULL) { 7588 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 7589 freemsg(hada_mp); 7590 freemsg(first_mp); 7591 return; 7592 } 7593 if (ire->ire_rfq != NULL) { 7594 q = ire->ire_rfq; 7595 ill = (ill_t *)q->q_ptr; 7596 ASSERT(ill != NULL); 7597 } 7598 } 7599 7600 zoneid = ire->ire_zoneid; 7601 UPDATE_IB_PKT_COUNT(ire); 7602 ire->ire_last_used_time = lbolt; 7603 /* Don't use the ire after this point, we'll NULL it out to be sure. */ 7604 ire_refrele(ire); 7605 ire = NULL; 7606 ipv6forus: 7607 /* 7608 * Looks like this packet is for us one way or another. 7609 * This is where we'll process destination headers etc. 7610 */ 7611 for (; ; ) { 7612 switch (nexthdr) { 7613 case IPPROTO_TCP: { 7614 uint16_t *up; 7615 uint32_t sum; 7616 int offset; 7617 7618 hdr_len = pkt_len - remlen; 7619 7620 if (hada_mp != NULL) { 7621 ip0dbg(("tcp hada drop\n")); 7622 goto hada_drop; 7623 } 7624 7625 7626 /* TCP needs all of the TCP header */ 7627 if (remlen < TCP_MIN_HEADER_LENGTH) 7628 goto pkt_too_short; 7629 if (mp->b_cont != NULL && 7630 whereptr + TCP_MIN_HEADER_LENGTH > mp->b_wptr) { 7631 if (!pullupmsg(mp, 7632 hdr_len + TCP_MIN_HEADER_LENGTH)) { 7633 BUMP_MIB(ill->ill_ip_mib, 7634 ipIfStatsInDiscards); 7635 freemsg(first_mp); 7636 return; 7637 } 7638 hck_flags = 0; 7639 ip6h = (ip6_t *)mp->b_rptr; 7640 whereptr = (uint8_t *)ip6h + hdr_len; 7641 } 7642 /* 7643 * Extract the offset field from the TCP header. 7644 */ 7645 offset = ((uchar_t *)ip6h)[hdr_len + 12] >> 4; 7646 if (offset != 5) { 7647 if (offset < 5) { 7648 ip1dbg(("ip_rput_data_v6: short " 7649 "TCP data offset")); 7650 BUMP_MIB(ill->ill_ip_mib, 7651 ipIfStatsInDiscards); 7652 freemsg(first_mp); 7653 return; 7654 } 7655 /* 7656 * There must be TCP options. 7657 * Make sure we can grab them. 7658 */ 7659 offset <<= 2; 7660 if (remlen < offset) 7661 goto pkt_too_short; 7662 if (mp->b_cont != NULL && 7663 whereptr + offset > mp->b_wptr) { 7664 if (!pullupmsg(mp, 7665 hdr_len + offset)) { 7666 BUMP_MIB(ill->ill_ip_mib, 7667 ipIfStatsInDiscards); 7668 freemsg(first_mp); 7669 return; 7670 } 7671 hck_flags = 0; 7672 ip6h = (ip6_t *)mp->b_rptr; 7673 whereptr = (uint8_t *)ip6h + hdr_len; 7674 } 7675 } 7676 7677 up = (uint16_t *)&ip6h->ip6_src; 7678 /* 7679 * TCP checksum calculation. First sum up the 7680 * pseudo-header fields: 7681 * - Source IPv6 address 7682 * - Destination IPv6 address 7683 * - TCP payload length 7684 * - TCP protocol ID 7685 */ 7686 sum = htons(IPPROTO_TCP + remlen) + 7687 up[0] + up[1] + up[2] + up[3] + 7688 up[4] + up[5] + up[6] + up[7] + 7689 up[8] + up[9] + up[10] + up[11] + 7690 up[12] + up[13] + up[14] + up[15]; 7691 7692 /* Fold initial sum */ 7693 sum = (sum & 0xffff) + (sum >> 16); 7694 7695 mp1 = mp->b_cont; 7696 7697 if ((hck_flags & (HCK_FULLCKSUM|HCK_PARTIALCKSUM)) == 0) 7698 IP6_STAT(ipst, ip6_in_sw_cksum); 7699 7700 IP_CKSUM_RECV(hck_flags, sum, (uchar_t *) 7701 ((uchar_t *)mp->b_rptr + DB_CKSUMSTART(mp)), 7702 (int32_t)(whereptr - (uchar_t *)mp->b_rptr), 7703 mp, mp1, cksum_err); 7704 7705 if (cksum_err) { 7706 BUMP_MIB(ill->ill_ip_mib, tcpIfStatsInErrs); 7707 7708 if (hck_flags & HCK_FULLCKSUM) { 7709 IP6_STAT(ipst, 7710 ip6_tcp_in_full_hw_cksum_err); 7711 } else if (hck_flags & HCK_PARTIALCKSUM) { 7712 IP6_STAT(ipst, 7713 ip6_tcp_in_part_hw_cksum_err); 7714 } else { 7715 IP6_STAT(ipst, ip6_tcp_in_sw_cksum_err); 7716 } 7717 freemsg(first_mp); 7718 return; 7719 } 7720 tcp_fanout: 7721 ip_fanout_tcp_v6(q, first_mp, ip6h, ill, inill, 7722 (flags|IP_FF_SEND_ICMP|IP_FF_SYN_ADDIRE| 7723 IP_FF_IPINFO), hdr_len, mctl_present, zoneid); 7724 return; 7725 } 7726 case IPPROTO_SCTP: 7727 { 7728 sctp_hdr_t *sctph; 7729 uint32_t calcsum, pktsum; 7730 uint_t hdr_len = pkt_len - remlen; 7731 sctp_stack_t *sctps; 7732 7733 sctps = inill->ill_ipst->ips_netstack->netstack_sctp; 7734 7735 /* SCTP needs all of the SCTP header */ 7736 if (remlen < sizeof (*sctph)) { 7737 goto pkt_too_short; 7738 } 7739 if (whereptr + sizeof (*sctph) > mp->b_wptr) { 7740 ASSERT(mp->b_cont != NULL); 7741 if (!pullupmsg(mp, hdr_len + sizeof (*sctph))) { 7742 BUMP_MIB(ill->ill_ip_mib, 7743 ipIfStatsInDiscards); 7744 freemsg(mp); 7745 return; 7746 } 7747 ip6h = (ip6_t *)mp->b_rptr; 7748 whereptr = (uint8_t *)ip6h + hdr_len; 7749 } 7750 7751 sctph = (sctp_hdr_t *)(mp->b_rptr + hdr_len); 7752 /* checksum */ 7753 pktsum = sctph->sh_chksum; 7754 sctph->sh_chksum = 0; 7755 calcsum = sctp_cksum(mp, hdr_len); 7756 if (calcsum != pktsum) { 7757 BUMP_MIB(&sctps->sctps_mib, sctpChecksumError); 7758 freemsg(mp); 7759 return; 7760 } 7761 sctph->sh_chksum = pktsum; 7762 ports = *(uint32_t *)(mp->b_rptr + hdr_len); 7763 if ((connp = sctp_fanout(&ip6h->ip6_src, &ip6h->ip6_dst, 7764 ports, zoneid, mp, sctps)) == NULL) { 7765 ip_fanout_sctp_raw(first_mp, ill, 7766 (ipha_t *)ip6h, B_FALSE, ports, 7767 mctl_present, 7768 (flags|IP_FF_SEND_ICMP|IP_FF_IPINFO), 7769 B_TRUE, zoneid); 7770 return; 7771 } 7772 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 7773 sctp_input(connp, (ipha_t *)ip6h, mp, first_mp, ill, 7774 B_FALSE, mctl_present); 7775 return; 7776 } 7777 case IPPROTO_UDP: { 7778 uint16_t *up; 7779 uint32_t sum; 7780 7781 hdr_len = pkt_len - remlen; 7782 7783 if (hada_mp != NULL) { 7784 ip0dbg(("udp hada drop\n")); 7785 goto hada_drop; 7786 } 7787 7788 /* Verify that at least the ports are present */ 7789 if (remlen < UDPH_SIZE) 7790 goto pkt_too_short; 7791 if (mp->b_cont != NULL && 7792 whereptr + UDPH_SIZE > mp->b_wptr) { 7793 if (!pullupmsg(mp, hdr_len + UDPH_SIZE)) { 7794 BUMP_MIB(ill->ill_ip_mib, 7795 ipIfStatsInDiscards); 7796 freemsg(first_mp); 7797 return; 7798 } 7799 hck_flags = 0; 7800 ip6h = (ip6_t *)mp->b_rptr; 7801 whereptr = (uint8_t *)ip6h + hdr_len; 7802 } 7803 7804 /* 7805 * Before going through the regular checksum 7806 * calculation, make sure the received checksum 7807 * is non-zero. RFC 2460 says, a 0x0000 checksum 7808 * in a UDP packet (within IPv6 packet) is invalid 7809 * and should be replaced by 0xffff. This makes 7810 * sense as regular checksum calculation will 7811 * pass for both the cases i.e. 0x0000 and 0xffff. 7812 * Removing one of the case makes error detection 7813 * stronger. 7814 */ 7815 7816 if (((udpha_t *)whereptr)->uha_checksum == 0) { 7817 /* 0x0000 checksum is invalid */ 7818 ip1dbg(("ip_rput_data_v6: Invalid UDP " 7819 "checksum value 0x0000\n")); 7820 BUMP_MIB(ill->ill_ip_mib, 7821 udpIfStatsInCksumErrs); 7822 freemsg(first_mp); 7823 return; 7824 } 7825 7826 up = (uint16_t *)&ip6h->ip6_src; 7827 7828 /* 7829 * UDP checksum calculation. First sum up the 7830 * pseudo-header fields: 7831 * - Source IPv6 address 7832 * - Destination IPv6 address 7833 * - UDP payload length 7834 * - UDP protocol ID 7835 */ 7836 7837 sum = htons(IPPROTO_UDP + remlen) + 7838 up[0] + up[1] + up[2] + up[3] + 7839 up[4] + up[5] + up[6] + up[7] + 7840 up[8] + up[9] + up[10] + up[11] + 7841 up[12] + up[13] + up[14] + up[15]; 7842 7843 /* Fold initial sum */ 7844 sum = (sum & 0xffff) + (sum >> 16); 7845 7846 if (reass_hck_flags != 0) { 7847 hck_flags = reass_hck_flags; 7848 7849 IP_CKSUM_RECV_REASS(hck_flags, 7850 (int32_t)(whereptr - (uchar_t *)mp->b_rptr), 7851 sum, reass_sum, cksum_err); 7852 } else { 7853 mp1 = mp->b_cont; 7854 7855 IP_CKSUM_RECV(hck_flags, sum, (uchar_t *) 7856 ((uchar_t *)mp->b_rptr + DB_CKSUMSTART(mp)), 7857 (int32_t)(whereptr - (uchar_t *)mp->b_rptr), 7858 mp, mp1, cksum_err); 7859 } 7860 7861 if ((hck_flags & (HCK_FULLCKSUM|HCK_PARTIALCKSUM)) == 0) 7862 IP6_STAT(ipst, ip6_in_sw_cksum); 7863 7864 if (cksum_err) { 7865 BUMP_MIB(ill->ill_ip_mib, 7866 udpIfStatsInCksumErrs); 7867 7868 if (hck_flags & HCK_FULLCKSUM) 7869 IP6_STAT(ipst, 7870 ip6_udp_in_full_hw_cksum_err); 7871 else if (hck_flags & HCK_PARTIALCKSUM) 7872 IP6_STAT(ipst, 7873 ip6_udp_in_part_hw_cksum_err); 7874 else 7875 IP6_STAT(ipst, ip6_udp_in_sw_cksum_err); 7876 7877 freemsg(first_mp); 7878 return; 7879 } 7880 goto udp_fanout; 7881 } 7882 case IPPROTO_ICMPV6: { 7883 uint16_t *up; 7884 uint32_t sum; 7885 uint_t hdr_len = pkt_len - remlen; 7886 7887 if (hada_mp != NULL) { 7888 ip0dbg(("icmp hada drop\n")); 7889 goto hada_drop; 7890 } 7891 7892 up = (uint16_t *)&ip6h->ip6_src; 7893 sum = htons(IPPROTO_ICMPV6 + remlen) + 7894 up[0] + up[1] + up[2] + up[3] + 7895 up[4] + up[5] + up[6] + up[7] + 7896 up[8] + up[9] + up[10] + up[11] + 7897 up[12] + up[13] + up[14] + up[15]; 7898 sum = (sum & 0xffff) + (sum >> 16); 7899 sum = IP_CSUM(mp, hdr_len, sum); 7900 if (sum != 0) { 7901 /* IPv6 ICMP checksum failed */ 7902 ip1dbg(("ip_rput_data_v6: ICMPv6 checksum " 7903 "failed %x\n", 7904 sum)); 7905 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInMsgs); 7906 BUMP_MIB(ill->ill_icmp6_mib, 7907 ipv6IfIcmpInErrors); 7908 freemsg(first_mp); 7909 return; 7910 } 7911 7912 icmp_fanout: 7913 /* Check variable for testing applications */ 7914 if (ipst->ips_ipv6_drop_inbound_icmpv6) { 7915 freemsg(first_mp); 7916 return; 7917 } 7918 /* 7919 * Assume that there is always at least one conn for 7920 * ICMPv6 (in.ndpd) i.e. don't optimize the case 7921 * where there is no conn. 7922 */ 7923 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 7924 ASSERT(!IS_LOOPBACK((ill))); 7925 /* 7926 * In the multicast case, applications may have 7927 * joined the group from different zones, so we 7928 * need to deliver the packet to each of them. 7929 * Loop through the multicast memberships 7930 * structures (ilm) on the receive ill and send 7931 * a copy of the packet up each matching one. 7932 */ 7933 ILM_WALKER_HOLD(ill); 7934 for (ilm = ill->ill_ilm; ilm != NULL; 7935 ilm = ilm->ilm_next) { 7936 if (ilm->ilm_flags & ILM_DELETED) 7937 continue; 7938 if (!IN6_ARE_ADDR_EQUAL( 7939 &ilm->ilm_v6addr, &ip6h->ip6_dst)) 7940 continue; 7941 if (!ipif_lookup_zoneid(ill, 7942 ilm->ilm_zoneid, IPIF_UP, NULL)) 7943 continue; 7944 7945 first_mp1 = ip_copymsg(first_mp); 7946 if (first_mp1 == NULL) 7947 continue; 7948 icmp_inbound_v6(q, first_mp1, ill, 7949 hdr_len, mctl_present, 0, 7950 ilm->ilm_zoneid, dl_mp); 7951 } 7952 ILM_WALKER_RELE(ill); 7953 } else { 7954 first_mp1 = ip_copymsg(first_mp); 7955 if (first_mp1 != NULL) 7956 icmp_inbound_v6(q, first_mp1, ill, 7957 hdr_len, mctl_present, 0, zoneid, 7958 dl_mp); 7959 } 7960 /* FALLTHRU */ 7961 default: { 7962 /* 7963 * Handle protocols with which IPv6 is less intimate. 7964 */ 7965 uint_t proto_flags = IP_FF_RAWIP|IP_FF_IPINFO; 7966 7967 if (hada_mp != NULL) { 7968 ip0dbg(("default hada drop\n")); 7969 goto hada_drop; 7970 } 7971 7972 /* 7973 * Enable sending ICMP for "Unknown" nexthdr 7974 * case. i.e. where we did not FALLTHRU from 7975 * IPPROTO_ICMPV6 processing case above. 7976 * If we did FALLTHRU, then the packet has already been 7977 * processed for IPPF, don't process it again in 7978 * ip_fanout_proto_v6; set IP6_NO_IPPOLICY in the 7979 * flags 7980 */ 7981 if (nexthdr != IPPROTO_ICMPV6) 7982 proto_flags |= IP_FF_SEND_ICMP; 7983 else 7984 proto_flags |= IP6_NO_IPPOLICY; 7985 7986 ip_fanout_proto_v6(q, first_mp, ip6h, ill, inill, 7987 nexthdr, prev_nexthdr_offset, (flags|proto_flags), 7988 mctl_present, zoneid); 7989 return; 7990 } 7991 7992 case IPPROTO_DSTOPTS: { 7993 uint_t ehdrlen; 7994 uint8_t *optptr; 7995 ip6_dest_t *desthdr; 7996 7997 /* Check if AH is present. */ 7998 if (ipsec_early_ah_v6(q, first_mp, mctl_present, ill, 7999 hada_mp, zoneid)) { 8000 ip0dbg(("dst early hada drop\n")); 8001 return; 8002 } 8003 8004 /* 8005 * Reinitialize pointers, as ipsec_early_ah_v6() does 8006 * complete pullups. We don't have to do more pullups 8007 * as a result. 8008 */ 8009 whereptr = (uint8_t *)((uintptr_t)mp->b_rptr + 8010 (uintptr_t)(whereptr - ((uint8_t *)ip6h))); 8011 ip6h = (ip6_t *)mp->b_rptr; 8012 8013 if (remlen < MIN_EHDR_LEN) 8014 goto pkt_too_short; 8015 8016 desthdr = (ip6_dest_t *)whereptr; 8017 nexthdr = desthdr->ip6d_nxt; 8018 prev_nexthdr_offset = (uint_t)(whereptr - 8019 (uint8_t *)ip6h); 8020 ehdrlen = 8 * (desthdr->ip6d_len + 1); 8021 if (remlen < ehdrlen) 8022 goto pkt_too_short; 8023 optptr = whereptr + 2; 8024 /* 8025 * Note: XXX This code does not seem to make 8026 * distinction between Destination Options Header 8027 * being before/after Routing Header which can 8028 * happen if we are at the end of source route. 8029 * This may become significant in future. 8030 * (No real significant Destination Options are 8031 * defined/implemented yet ). 8032 */ 8033 switch (ip_process_options_v6(q, first_mp, ip6h, optptr, 8034 ehdrlen - 2, IPPROTO_DSTOPTS, ipst)) { 8035 case -1: 8036 /* 8037 * Packet has been consumed and any needed 8038 * ICMP errors sent. 8039 */ 8040 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors); 8041 freemsg(hada_mp); 8042 return; 8043 case 0: 8044 /* No action needed continue */ 8045 break; 8046 case 1: 8047 /* 8048 * Unnexpected return value 8049 * (Router alert is a Hop-by-Hop option) 8050 */ 8051 #ifdef DEBUG 8052 panic("ip_rput_data_v6: router " 8053 "alert hbh opt indication in dest opt"); 8054 /*NOTREACHED*/ 8055 #else 8056 freemsg(hada_mp); 8057 freemsg(first_mp); 8058 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 8059 return; 8060 #endif 8061 } 8062 used = ehdrlen; 8063 break; 8064 } 8065 case IPPROTO_FRAGMENT: { 8066 ip6_frag_t *fraghdr; 8067 size_t no_frag_hdr_len; 8068 8069 if (hada_mp != NULL) { 8070 ip0dbg(("frag hada drop\n")); 8071 goto hada_drop; 8072 } 8073 8074 ASSERT(first_mp == mp); 8075 if (remlen < sizeof (ip6_frag_t)) 8076 goto pkt_too_short; 8077 8078 if (mp->b_cont != NULL && 8079 whereptr + sizeof (ip6_frag_t) > mp->b_wptr) { 8080 if (!pullupmsg(mp, 8081 pkt_len - remlen + sizeof (ip6_frag_t))) { 8082 BUMP_MIB(ill->ill_ip_mib, 8083 ipIfStatsInDiscards); 8084 freemsg(mp); 8085 return; 8086 } 8087 hck_flags = 0; 8088 ip6h = (ip6_t *)mp->b_rptr; 8089 whereptr = (uint8_t *)ip6h + pkt_len - remlen; 8090 } 8091 8092 fraghdr = (ip6_frag_t *)whereptr; 8093 used = (uint_t)sizeof (ip6_frag_t); 8094 BUMP_MIB(ill->ill_ip_mib, ipIfStatsReasmReqds); 8095 8096 /* 8097 * Invoke the CGTP (multirouting) filtering module to 8098 * process the incoming packet. Packets identified as 8099 * duplicates must be discarded. Filtering is active 8100 * only if the the ip_cgtp_filter ndd variable is 8101 * non-zero. 8102 */ 8103 if (ipst->ips_ip_cgtp_filter && 8104 ipst->ips_ip_cgtp_filter_ops != NULL) { 8105 int cgtp_flt_pkt; 8106 netstackid_t stackid; 8107 8108 stackid = ipst->ips_netstack->netstack_stackid; 8109 8110 cgtp_flt_pkt = 8111 ipst->ips_ip_cgtp_filter_ops->cfo_filter_v6( 8112 stackid, inill->ill_phyint->phyint_ifindex, 8113 ip6h, fraghdr); 8114 if (cgtp_flt_pkt == CGTP_IP_PKT_DUPLICATE) { 8115 freemsg(mp); 8116 return; 8117 } 8118 } 8119 8120 /* Restore the flags */ 8121 DB_CKSUMFLAGS(mp) = hck_flags; 8122 8123 mp = ip_rput_frag_v6(q, mp, ip6h, fraghdr, 8124 remlen - used, &prev_nexthdr_offset, 8125 &reass_sum, &reass_hck_flags); 8126 if (mp == NULL) { 8127 /* Reassembly is still pending */ 8128 return; 8129 } 8130 /* The first mblk are the headers before the frag hdr */ 8131 BUMP_MIB(ill->ill_ip_mib, ipIfStatsReasmOKs); 8132 8133 first_mp = mp; /* mp has most likely changed! */ 8134 no_frag_hdr_len = mp->b_wptr - mp->b_rptr; 8135 ip6h = (ip6_t *)mp->b_rptr; 8136 nexthdr = ((char *)ip6h)[prev_nexthdr_offset]; 8137 whereptr = mp->b_rptr + no_frag_hdr_len; 8138 remlen = ntohs(ip6h->ip6_plen) + 8139 (uint16_t)(IPV6_HDR_LEN - no_frag_hdr_len); 8140 pkt_len = msgdsize(mp); 8141 used = 0; 8142 break; 8143 } 8144 case IPPROTO_HOPOPTS: 8145 if (hada_mp != NULL) { 8146 ip0dbg(("hop hada drop\n")); 8147 goto hada_drop; 8148 } 8149 /* 8150 * Illegal header sequence. 8151 * (Hop-by-hop headers are processed above 8152 * and required to immediately follow IPv6 header) 8153 */ 8154 icmp_param_problem_v6(WR(q), first_mp, 8155 ICMP6_PARAMPROB_NEXTHEADER, 8156 prev_nexthdr_offset, 8157 B_FALSE, B_FALSE, zoneid, ipst); 8158 return; 8159 } 8160 case IPPROTO_ROUTING: { 8161 uint_t ehdrlen; 8162 ip6_rthdr_t *rthdr; 8163 8164 /* Check if AH is present. */ 8165 if (ipsec_early_ah_v6(q, first_mp, mctl_present, ill, 8166 hada_mp, zoneid)) { 8167 ip0dbg(("routing hada drop\n")); 8168 return; 8169 } 8170 8171 /* 8172 * Reinitialize pointers, as ipsec_early_ah_v6() does 8173 * complete pullups. We don't have to do more pullups 8174 * as a result. 8175 */ 8176 whereptr = (uint8_t *)((uintptr_t)mp->b_rptr + 8177 (uintptr_t)(whereptr - ((uint8_t *)ip6h))); 8178 ip6h = (ip6_t *)mp->b_rptr; 8179 8180 if (remlen < MIN_EHDR_LEN) 8181 goto pkt_too_short; 8182 rthdr = (ip6_rthdr_t *)whereptr; 8183 nexthdr = rthdr->ip6r_nxt; 8184 prev_nexthdr_offset = (uint_t)(whereptr - 8185 (uint8_t *)ip6h); 8186 ehdrlen = 8 * (rthdr->ip6r_len + 1); 8187 if (remlen < ehdrlen) 8188 goto pkt_too_short; 8189 if (rthdr->ip6r_segleft != 0) { 8190 /* Not end of source route */ 8191 if (ll_multicast) { 8192 BUMP_MIB(ill->ill_ip_mib, 8193 ipIfStatsForwProhibits); 8194 freemsg(hada_mp); 8195 freemsg(mp); 8196 return; 8197 } 8198 ip_process_rthdr(q, mp, ip6h, rthdr, ill, 8199 flags, hada_mp, dl_mp); 8200 return; 8201 } 8202 used = ehdrlen; 8203 break; 8204 } 8205 case IPPROTO_AH: 8206 case IPPROTO_ESP: { 8207 /* 8208 * Fast path for AH/ESP. If this is the first time 8209 * we are sending a datagram to AH/ESP, allocate 8210 * a IPSEC_IN message and prepend it. Otherwise, 8211 * just fanout. 8212 */ 8213 8214 ipsec_in_t *ii; 8215 int ipsec_rc; 8216 ipsec_stack_t *ipss; 8217 8218 ipss = ipst->ips_netstack->netstack_ipsec; 8219 if (!mctl_present) { 8220 ASSERT(first_mp == mp); 8221 first_mp = ipsec_in_alloc(B_FALSE, 8222 ipst->ips_netstack); 8223 if (first_mp == NULL) { 8224 ip1dbg(("ip_rput_data_v6: IPSEC_IN " 8225 "allocation failure.\n")); 8226 BUMP_MIB(ill->ill_ip_mib, 8227 ipIfStatsInDiscards); 8228 freemsg(mp); 8229 return; 8230 } 8231 /* 8232 * Store the ill_index so that when we come back 8233 * from IPSEC we ride on the same queue. 8234 */ 8235 ii = (ipsec_in_t *)first_mp->b_rptr; 8236 ii->ipsec_in_ill_index = 8237 ill->ill_phyint->phyint_ifindex; 8238 ii->ipsec_in_rill_index = 8239 ii->ipsec_in_ill_index; 8240 first_mp->b_cont = mp; 8241 /* 8242 * Cache hardware acceleration info. 8243 */ 8244 if (hada_mp != NULL) { 8245 IPSECHW_DEBUG(IPSECHW_PKT, 8246 ("ip_rput_data_v6: " 8247 "caching data attr.\n")); 8248 ii->ipsec_in_accelerated = B_TRUE; 8249 ii->ipsec_in_da = hada_mp; 8250 hada_mp = NULL; 8251 } 8252 } else { 8253 ii = (ipsec_in_t *)first_mp->b_rptr; 8254 } 8255 8256 if (!ipsec_loaded(ipss)) { 8257 ip_proto_not_sup(q, first_mp, IP_FF_SEND_ICMP, 8258 zoneid, ipst); 8259 return; 8260 } 8261 8262 /* select inbound SA and have IPsec process the pkt */ 8263 if (nexthdr == IPPROTO_ESP) { 8264 esph_t *esph = ipsec_inbound_esp_sa(first_mp, 8265 ipst->ips_netstack); 8266 if (esph == NULL) 8267 return; 8268 ASSERT(ii->ipsec_in_esp_sa != NULL); 8269 ASSERT(ii->ipsec_in_esp_sa->ipsa_input_func != 8270 NULL); 8271 ipsec_rc = ii->ipsec_in_esp_sa->ipsa_input_func( 8272 first_mp, esph); 8273 } else { 8274 ah_t *ah = ipsec_inbound_ah_sa(first_mp, 8275 ipst->ips_netstack); 8276 if (ah == NULL) 8277 return; 8278 ASSERT(ii->ipsec_in_ah_sa != NULL); 8279 ASSERT(ii->ipsec_in_ah_sa->ipsa_input_func != 8280 NULL); 8281 ipsec_rc = ii->ipsec_in_ah_sa->ipsa_input_func( 8282 first_mp, ah); 8283 } 8284 8285 switch (ipsec_rc) { 8286 case IPSEC_STATUS_SUCCESS: 8287 break; 8288 case IPSEC_STATUS_FAILED: 8289 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 8290 /* FALLTHRU */ 8291 case IPSEC_STATUS_PENDING: 8292 return; 8293 } 8294 /* we're done with IPsec processing, send it up */ 8295 ip_fanout_proto_again(first_mp, ill, inill, NULL); 8296 return; 8297 } 8298 case IPPROTO_NONE: 8299 /* All processing is done. Count as "delivered". */ 8300 freemsg(hada_mp); 8301 freemsg(first_mp); 8302 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 8303 return; 8304 } 8305 whereptr += used; 8306 ASSERT(remlen >= used); 8307 remlen -= used; 8308 } 8309 /* NOTREACHED */ 8310 8311 pkt_too_short: 8312 ip1dbg(("ip_rput_data_v6: packet too short %d %lu %d\n", 8313 ip6_len, pkt_len, remlen)); 8314 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInTruncatedPkts); 8315 freemsg(hada_mp); 8316 freemsg(first_mp); 8317 return; 8318 udp_fanout: 8319 if (mctl_present || IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 8320 connp = NULL; 8321 } else { 8322 connp = ipcl_classify_v6(mp, IPPROTO_UDP, hdr_len, zoneid, 8323 ipst); 8324 if ((connp != NULL) && (connp->conn_upq == NULL)) { 8325 CONN_DEC_REF(connp); 8326 connp = NULL; 8327 } 8328 } 8329 8330 if (connp == NULL) { 8331 uint32_t ports; 8332 8333 ports = *(uint32_t *)(mp->b_rptr + hdr_len + 8334 UDP_PORTS_OFFSET); 8335 IP6_STAT(ipst, ip6_udp_slow_path); 8336 ip_fanout_udp_v6(q, first_mp, ip6h, ports, ill, inill, 8337 (flags|IP_FF_SEND_ICMP|IP_FF_IPINFO), mctl_present, 8338 zoneid); 8339 return; 8340 } 8341 8342 if (CONN_UDP_FLOWCTLD(connp)) { 8343 freemsg(first_mp); 8344 BUMP_MIB(ill->ill_ip_mib, udpIfStatsInOverflows); 8345 CONN_DEC_REF(connp); 8346 return; 8347 } 8348 8349 /* Initiate IPPF processing */ 8350 if (IP6_IN_IPP(flags, ipst)) { 8351 ip_process(IPP_LOCAL_IN, &mp, ill->ill_phyint->phyint_ifindex); 8352 if (mp == NULL) { 8353 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 8354 CONN_DEC_REF(connp); 8355 return; 8356 } 8357 } 8358 8359 if (connp->conn_ip_recvpktinfo || 8360 IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src)) { 8361 mp = ip_add_info_v6(mp, inill, &ip6h->ip6_dst); 8362 if (mp == NULL) { 8363 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 8364 CONN_DEC_REF(connp); 8365 return; 8366 } 8367 } 8368 8369 IP6_STAT(ipst, ip6_udp_fast_path); 8370 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 8371 8372 /* Send it upstream */ 8373 (connp->conn_recv)(connp, mp, NULL); 8374 8375 CONN_DEC_REF(connp); 8376 freemsg(hada_mp); 8377 return; 8378 8379 hada_drop: 8380 ip1dbg(("ip_rput_data_v6: malformed accelerated packet\n")); 8381 /* IPsec kstats: bump counter here */ 8382 freemsg(hada_mp); 8383 freemsg(first_mp); 8384 } 8385 8386 /* 8387 * Reassemble fragment. 8388 * When it returns a completed message the first mblk will only contain 8389 * the headers prior to the fragment header. 8390 * 8391 * prev_nexthdr_offset is an offset indication of where the nexthdr field is 8392 * of the preceding header. This is needed to patch the previous header's 8393 * nexthdr field when reassembly completes. 8394 */ 8395 static mblk_t * 8396 ip_rput_frag_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, 8397 ip6_frag_t *fraghdr, uint_t remlen, uint_t *prev_nexthdr_offset, 8398 uint32_t *cksum_val, uint16_t *cksum_flags) 8399 { 8400 ill_t *ill = (ill_t *)q->q_ptr; 8401 uint32_t ident = ntohl(fraghdr->ip6f_ident); 8402 uint16_t offset; 8403 boolean_t more_frags; 8404 uint8_t nexthdr = fraghdr->ip6f_nxt; 8405 in6_addr_t *v6dst_ptr; 8406 in6_addr_t *v6src_ptr; 8407 uint_t end; 8408 uint_t hdr_length; 8409 size_t count; 8410 ipf_t *ipf; 8411 ipf_t **ipfp; 8412 ipfb_t *ipfb; 8413 mblk_t *mp1; 8414 uint8_t ecn_info = 0; 8415 size_t msg_len; 8416 mblk_t *tail_mp; 8417 mblk_t *t_mp; 8418 boolean_t pruned = B_FALSE; 8419 uint32_t sum_val; 8420 uint16_t sum_flags; 8421 ip_stack_t *ipst = ill->ill_ipst; 8422 8423 if (cksum_val != NULL) 8424 *cksum_val = 0; 8425 if (cksum_flags != NULL) 8426 *cksum_flags = 0; 8427 8428 /* 8429 * We utilize hardware computed checksum info only for UDP since 8430 * IP fragmentation is a normal occurence for the protocol. In 8431 * addition, checksum offload support for IP fragments carrying 8432 * UDP payload is commonly implemented across network adapters. 8433 */ 8434 ASSERT(ill != NULL); 8435 if (nexthdr == IPPROTO_UDP && dohwcksum && ILL_HCKSUM_CAPABLE(ill) && 8436 (DB_CKSUMFLAGS(mp) & (HCK_FULLCKSUM | HCK_PARTIALCKSUM))) { 8437 mblk_t *mp1 = mp->b_cont; 8438 int32_t len; 8439 8440 /* Record checksum information from the packet */ 8441 sum_val = (uint32_t)DB_CKSUM16(mp); 8442 sum_flags = DB_CKSUMFLAGS(mp); 8443 8444 /* fragmented payload offset from beginning of mblk */ 8445 offset = (uint16_t)((uchar_t *)&fraghdr[1] - mp->b_rptr); 8446 8447 if ((sum_flags & HCK_PARTIALCKSUM) && 8448 (mp1 == NULL || mp1->b_cont == NULL) && 8449 offset >= (uint16_t)DB_CKSUMSTART(mp) && 8450 ((len = offset - (uint16_t)DB_CKSUMSTART(mp)) & 1) == 0) { 8451 uint32_t adj; 8452 /* 8453 * Partial checksum has been calculated by hardware 8454 * and attached to the packet; in addition, any 8455 * prepended extraneous data is even byte aligned. 8456 * If any such data exists, we adjust the checksum; 8457 * this would also handle any postpended data. 8458 */ 8459 IP_ADJCKSUM_PARTIAL(mp->b_rptr + DB_CKSUMSTART(mp), 8460 mp, mp1, len, adj); 8461 8462 /* One's complement subtract extraneous checksum */ 8463 if (adj >= sum_val) 8464 sum_val = ~(adj - sum_val) & 0xFFFF; 8465 else 8466 sum_val -= adj; 8467 } 8468 } else { 8469 sum_val = 0; 8470 sum_flags = 0; 8471 } 8472 8473 /* Clear hardware checksumming flag */ 8474 DB_CKSUMFLAGS(mp) = 0; 8475 8476 /* 8477 * Note: Fragment offset in header is in 8-octet units. 8478 * Clearing least significant 3 bits not only extracts 8479 * it but also gets it in units of octets. 8480 */ 8481 offset = ntohs(fraghdr->ip6f_offlg) & ~7; 8482 more_frags = (fraghdr->ip6f_offlg & IP6F_MORE_FRAG); 8483 8484 /* 8485 * Is the more frags flag on and the payload length not a multiple 8486 * of eight? 8487 */ 8488 if (more_frags && (ntohs(ip6h->ip6_plen) & 7)) { 8489 zoneid_t zoneid; 8490 8491 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors); 8492 zoneid = ipif_lookup_addr_zoneid_v6(&ip6h->ip6_dst, ill, ipst); 8493 if (zoneid == ALL_ZONES) { 8494 freemsg(mp); 8495 return (NULL); 8496 } 8497 icmp_param_problem_v6(WR(q), mp, ICMP6_PARAMPROB_HEADER, 8498 (uint32_t)((char *)&ip6h->ip6_plen - 8499 (char *)ip6h), B_FALSE, B_FALSE, zoneid, ipst); 8500 return (NULL); 8501 } 8502 8503 v6src_ptr = &ip6h->ip6_src; 8504 v6dst_ptr = &ip6h->ip6_dst; 8505 end = remlen; 8506 8507 hdr_length = (uint_t)((char *)&fraghdr[1] - (char *)ip6h); 8508 end += offset; 8509 8510 /* 8511 * Would fragment cause reassembled packet to have a payload length 8512 * greater than IP_MAXPACKET - the max payload size? 8513 */ 8514 if (end > IP_MAXPACKET) { 8515 zoneid_t zoneid; 8516 8517 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors); 8518 zoneid = ipif_lookup_addr_zoneid_v6(&ip6h->ip6_dst, ill, ipst); 8519 if (zoneid == ALL_ZONES) { 8520 freemsg(mp); 8521 return (NULL); 8522 } 8523 icmp_param_problem_v6(WR(q), mp, ICMP6_PARAMPROB_HEADER, 8524 (uint32_t)((char *)&fraghdr->ip6f_offlg - 8525 (char *)ip6h), B_FALSE, B_FALSE, zoneid, ipst); 8526 return (NULL); 8527 } 8528 8529 /* 8530 * This packet just has one fragment. Reassembly not 8531 * needed. 8532 */ 8533 if (!more_frags && offset == 0) { 8534 goto reass_done; 8535 } 8536 8537 /* 8538 * Drop the fragmented as early as possible, if 8539 * we don't have resource(s) to re-assemble. 8540 */ 8541 if (ipst->ips_ip_reass_queue_bytes == 0) { 8542 freemsg(mp); 8543 return (NULL); 8544 } 8545 8546 /* Record the ECN field info. */ 8547 ecn_info = (uint8_t)(ntohl(ip6h->ip6_vcf & htonl(~0xFFCFFFFF)) >> 20); 8548 /* 8549 * If this is not the first fragment, dump the unfragmentable 8550 * portion of the packet. 8551 */ 8552 if (offset) 8553 mp->b_rptr = (uchar_t *)&fraghdr[1]; 8554 8555 /* 8556 * Fragmentation reassembly. Each ILL has a hash table for 8557 * queueing packets undergoing reassembly for all IPIFs 8558 * associated with the ILL. The hash is based on the packet 8559 * IP ident field. The ILL frag hash table was allocated 8560 * as a timer block at the time the ILL was created. Whenever 8561 * there is anything on the reassembly queue, the timer will 8562 * be running. 8563 */ 8564 msg_len = MBLKSIZE(mp); 8565 tail_mp = mp; 8566 while (tail_mp->b_cont != NULL) { 8567 tail_mp = tail_mp->b_cont; 8568 msg_len += MBLKSIZE(tail_mp); 8569 } 8570 /* 8571 * If the reassembly list for this ILL will get too big 8572 * prune it. 8573 */ 8574 8575 if ((msg_len + sizeof (*ipf) + ill->ill_frag_count) >= 8576 ipst->ips_ip_reass_queue_bytes) { 8577 ill_frag_prune(ill, 8578 (ipst->ips_ip_reass_queue_bytes < msg_len) ? 0 : 8579 (ipst->ips_ip_reass_queue_bytes - msg_len)); 8580 pruned = B_TRUE; 8581 } 8582 8583 ipfb = &ill->ill_frag_hash_tbl[ILL_FRAG_HASH_V6(*v6src_ptr, ident)]; 8584 mutex_enter(&ipfb->ipfb_lock); 8585 8586 ipfp = &ipfb->ipfb_ipf; 8587 /* Try to find an existing fragment queue for this packet. */ 8588 for (;;) { 8589 ipf = ipfp[0]; 8590 if (ipf) { 8591 /* 8592 * It has to match on ident, source address, and 8593 * dest address. 8594 */ 8595 if (ipf->ipf_ident == ident && 8596 IN6_ARE_ADDR_EQUAL(&ipf->ipf_v6src, v6src_ptr) && 8597 IN6_ARE_ADDR_EQUAL(&ipf->ipf_v6dst, v6dst_ptr)) { 8598 8599 /* 8600 * If we have received too many 8601 * duplicate fragments for this packet 8602 * free it. 8603 */ 8604 if (ipf->ipf_num_dups > ip_max_frag_dups) { 8605 ill_frag_free_pkts(ill, ipfb, ipf, 1); 8606 freemsg(mp); 8607 mutex_exit(&ipfb->ipfb_lock); 8608 return (NULL); 8609 } 8610 8611 break; 8612 } 8613 ipfp = &ipf->ipf_hash_next; 8614 continue; 8615 } 8616 8617 8618 /* 8619 * If we pruned the list, do we want to store this new 8620 * fragment?. We apply an optimization here based on the 8621 * fact that most fragments will be received in order. 8622 * So if the offset of this incoming fragment is zero, 8623 * it is the first fragment of a new packet. We will 8624 * keep it. Otherwise drop the fragment, as we have 8625 * probably pruned the packet already (since the 8626 * packet cannot be found). 8627 */ 8628 8629 if (pruned && offset != 0) { 8630 mutex_exit(&ipfb->ipfb_lock); 8631 freemsg(mp); 8632 return (NULL); 8633 } 8634 8635 /* New guy. Allocate a frag message. */ 8636 mp1 = allocb(sizeof (*ipf), BPRI_MED); 8637 if (!mp1) { 8638 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 8639 freemsg(mp); 8640 partial_reass_done: 8641 mutex_exit(&ipfb->ipfb_lock); 8642 return (NULL); 8643 } 8644 8645 if (ipfb->ipfb_frag_pkts >= MAX_FRAG_PKTS(ipst)) { 8646 /* 8647 * Too many fragmented packets in this hash bucket. 8648 * Free the oldest. 8649 */ 8650 ill_frag_free_pkts(ill, ipfb, ipfb->ipfb_ipf, 1); 8651 } 8652 8653 mp1->b_cont = mp; 8654 8655 /* Initialize the fragment header. */ 8656 ipf = (ipf_t *)mp1->b_rptr; 8657 ipf->ipf_mp = mp1; 8658 ipf->ipf_ptphn = ipfp; 8659 ipfp[0] = ipf; 8660 ipf->ipf_hash_next = NULL; 8661 ipf->ipf_ident = ident; 8662 ipf->ipf_v6src = *v6src_ptr; 8663 ipf->ipf_v6dst = *v6dst_ptr; 8664 /* Record reassembly start time. */ 8665 ipf->ipf_timestamp = gethrestime_sec(); 8666 /* Record ipf generation and account for frag header */ 8667 ipf->ipf_gen = ill->ill_ipf_gen++; 8668 ipf->ipf_count = MBLKSIZE(mp1); 8669 ipf->ipf_protocol = nexthdr; 8670 ipf->ipf_nf_hdr_len = 0; 8671 ipf->ipf_prev_nexthdr_offset = 0; 8672 ipf->ipf_last_frag_seen = B_FALSE; 8673 ipf->ipf_ecn = ecn_info; 8674 ipf->ipf_num_dups = 0; 8675 ipfb->ipfb_frag_pkts++; 8676 ipf->ipf_checksum = 0; 8677 ipf->ipf_checksum_flags = 0; 8678 8679 /* Store checksum value in fragment header */ 8680 if (sum_flags != 0) { 8681 sum_val = (sum_val & 0xFFFF) + (sum_val >> 16); 8682 sum_val = (sum_val & 0xFFFF) + (sum_val >> 16); 8683 ipf->ipf_checksum = sum_val; 8684 ipf->ipf_checksum_flags = sum_flags; 8685 } 8686 8687 /* 8688 * We handle reassembly two ways. In the easy case, 8689 * where all the fragments show up in order, we do 8690 * minimal bookkeeping, and just clip new pieces on 8691 * the end. If we ever see a hole, then we go off 8692 * to ip_reassemble which has to mark the pieces and 8693 * keep track of the number of holes, etc. Obviously, 8694 * the point of having both mechanisms is so we can 8695 * handle the easy case as efficiently as possible. 8696 */ 8697 if (offset == 0) { 8698 /* Easy case, in-order reassembly so far. */ 8699 /* Update the byte count */ 8700 ipf->ipf_count += msg_len; 8701 ipf->ipf_tail_mp = tail_mp; 8702 /* 8703 * Keep track of next expected offset in 8704 * ipf_end. 8705 */ 8706 ipf->ipf_end = end; 8707 ipf->ipf_nf_hdr_len = hdr_length; 8708 ipf->ipf_prev_nexthdr_offset = *prev_nexthdr_offset; 8709 } else { 8710 /* Hard case, hole at the beginning. */ 8711 ipf->ipf_tail_mp = NULL; 8712 /* 8713 * ipf_end == 0 means that we have given up 8714 * on easy reassembly. 8715 */ 8716 ipf->ipf_end = 0; 8717 8718 /* Forget checksum offload from now on */ 8719 ipf->ipf_checksum_flags = 0; 8720 8721 /* 8722 * ipf_hole_cnt is set by ip_reassemble. 8723 * ipf_count is updated by ip_reassemble. 8724 * No need to check for return value here 8725 * as we don't expect reassembly to complete or 8726 * fail for the first fragment itself. 8727 */ 8728 (void) ip_reassemble(mp, ipf, offset, more_frags, ill, 8729 msg_len); 8730 } 8731 /* Update per ipfb and ill byte counts */ 8732 ipfb->ipfb_count += ipf->ipf_count; 8733 ASSERT(ipfb->ipfb_count > 0); /* Wraparound */ 8734 ill->ill_frag_count += ipf->ipf_count; 8735 /* If the frag timer wasn't already going, start it. */ 8736 mutex_enter(&ill->ill_lock); 8737 ill_frag_timer_start(ill); 8738 mutex_exit(&ill->ill_lock); 8739 goto partial_reass_done; 8740 } 8741 8742 /* 8743 * If the packet's flag has changed (it could be coming up 8744 * from an interface different than the previous, therefore 8745 * possibly different checksum capability), then forget about 8746 * any stored checksum states. Otherwise add the value to 8747 * the existing one stored in the fragment header. 8748 */ 8749 if (sum_flags != 0 && sum_flags == ipf->ipf_checksum_flags) { 8750 sum_val += ipf->ipf_checksum; 8751 sum_val = (sum_val & 0xFFFF) + (sum_val >> 16); 8752 sum_val = (sum_val & 0xFFFF) + (sum_val >> 16); 8753 ipf->ipf_checksum = sum_val; 8754 } else if (ipf->ipf_checksum_flags != 0) { 8755 /* Forget checksum offload from now on */ 8756 ipf->ipf_checksum_flags = 0; 8757 } 8758 8759 /* 8760 * We have a new piece of a datagram which is already being 8761 * reassembled. Update the ECN info if all IP fragments 8762 * are ECN capable. If there is one which is not, clear 8763 * all the info. If there is at least one which has CE 8764 * code point, IP needs to report that up to transport. 8765 */ 8766 if (ecn_info != IPH_ECN_NECT && ipf->ipf_ecn != IPH_ECN_NECT) { 8767 if (ecn_info == IPH_ECN_CE) 8768 ipf->ipf_ecn = IPH_ECN_CE; 8769 } else { 8770 ipf->ipf_ecn = IPH_ECN_NECT; 8771 } 8772 8773 if (offset && ipf->ipf_end == offset) { 8774 /* The new fragment fits at the end */ 8775 ipf->ipf_tail_mp->b_cont = mp; 8776 /* Update the byte count */ 8777 ipf->ipf_count += msg_len; 8778 /* Update per ipfb and ill byte counts */ 8779 ipfb->ipfb_count += msg_len; 8780 ASSERT(ipfb->ipfb_count > 0); /* Wraparound */ 8781 ill->ill_frag_count += msg_len; 8782 if (more_frags) { 8783 /* More to come. */ 8784 ipf->ipf_end = end; 8785 ipf->ipf_tail_mp = tail_mp; 8786 goto partial_reass_done; 8787 } 8788 } else { 8789 /* 8790 * Go do the hard cases. 8791 * Call ip_reassemble(). 8792 */ 8793 int ret; 8794 8795 if (offset == 0) { 8796 if (ipf->ipf_prev_nexthdr_offset == 0) { 8797 ipf->ipf_nf_hdr_len = hdr_length; 8798 ipf->ipf_prev_nexthdr_offset = 8799 *prev_nexthdr_offset; 8800 } 8801 } 8802 /* Save current byte count */ 8803 count = ipf->ipf_count; 8804 ret = ip_reassemble(mp, ipf, offset, more_frags, ill, msg_len); 8805 8806 /* Count of bytes added and subtracted (freeb()ed) */ 8807 count = ipf->ipf_count - count; 8808 if (count) { 8809 /* Update per ipfb and ill byte counts */ 8810 ipfb->ipfb_count += count; 8811 ASSERT(ipfb->ipfb_count > 0); /* Wraparound */ 8812 ill->ill_frag_count += count; 8813 } 8814 if (ret == IP_REASS_PARTIAL) { 8815 goto partial_reass_done; 8816 } else if (ret == IP_REASS_FAILED) { 8817 /* Reassembly failed. Free up all resources */ 8818 ill_frag_free_pkts(ill, ipfb, ipf, 1); 8819 for (t_mp = mp; t_mp != NULL; t_mp = t_mp->b_cont) { 8820 IP_REASS_SET_START(t_mp, 0); 8821 IP_REASS_SET_END(t_mp, 0); 8822 } 8823 freemsg(mp); 8824 goto partial_reass_done; 8825 } 8826 8827 /* We will reach here iff 'ret' is IP_REASS_COMPLETE */ 8828 } 8829 /* 8830 * We have completed reassembly. Unhook the frag header from 8831 * the reassembly list. 8832 * 8833 * Grab the unfragmentable header length next header value out 8834 * of the first fragment 8835 */ 8836 ASSERT(ipf->ipf_nf_hdr_len != 0); 8837 hdr_length = ipf->ipf_nf_hdr_len; 8838 8839 /* 8840 * Before we free the frag header, record the ECN info 8841 * to report back to the transport. 8842 */ 8843 ecn_info = ipf->ipf_ecn; 8844 8845 /* 8846 * Store the nextheader field in the header preceding the fragment 8847 * header 8848 */ 8849 nexthdr = ipf->ipf_protocol; 8850 *prev_nexthdr_offset = ipf->ipf_prev_nexthdr_offset; 8851 ipfp = ipf->ipf_ptphn; 8852 8853 /* We need to supply these to caller */ 8854 if ((sum_flags = ipf->ipf_checksum_flags) != 0) 8855 sum_val = ipf->ipf_checksum; 8856 else 8857 sum_val = 0; 8858 8859 mp1 = ipf->ipf_mp; 8860 count = ipf->ipf_count; 8861 ipf = ipf->ipf_hash_next; 8862 if (ipf) 8863 ipf->ipf_ptphn = ipfp; 8864 ipfp[0] = ipf; 8865 ill->ill_frag_count -= count; 8866 ASSERT(ipfb->ipfb_count >= count); 8867 ipfb->ipfb_count -= count; 8868 ipfb->ipfb_frag_pkts--; 8869 mutex_exit(&ipfb->ipfb_lock); 8870 /* Ditch the frag header. */ 8871 mp = mp1->b_cont; 8872 freeb(mp1); 8873 8874 /* 8875 * Make sure the packet is good by doing some sanity 8876 * check. If bad we can silentely drop the packet. 8877 */ 8878 reass_done: 8879 if (hdr_length < sizeof (ip6_frag_t)) { 8880 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors); 8881 ip1dbg(("ip_rput_frag_v6: bad packet\n")); 8882 freemsg(mp); 8883 return (NULL); 8884 } 8885 8886 /* 8887 * Remove the fragment header from the initial header by 8888 * splitting the mblk into the non-fragmentable header and 8889 * everthing after the fragment extension header. This has the 8890 * side effect of putting all the headers that need destination 8891 * processing into the b_cont block-- on return this fact is 8892 * used in order to avoid having to look at the extensions 8893 * already processed. 8894 * 8895 * Note that this code assumes that the unfragmentable portion 8896 * of the header is in the first mblk and increments 8897 * the read pointer past it. If this assumption is broken 8898 * this code fails badly. 8899 */ 8900 if (mp->b_rptr + hdr_length != mp->b_wptr) { 8901 mblk_t *nmp; 8902 8903 if (!(nmp = dupb(mp))) { 8904 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 8905 ip1dbg(("ip_rput_frag_v6: dupb failed\n")); 8906 freemsg(mp); 8907 return (NULL); 8908 } 8909 nmp->b_cont = mp->b_cont; 8910 mp->b_cont = nmp; 8911 nmp->b_rptr += hdr_length; 8912 } 8913 mp->b_wptr = mp->b_rptr + hdr_length - sizeof (ip6_frag_t); 8914 8915 ip6h = (ip6_t *)mp->b_rptr; 8916 ((char *)ip6h)[*prev_nexthdr_offset] = nexthdr; 8917 8918 /* Restore original IP length in header. */ 8919 ip6h->ip6_plen = htons((uint16_t)(msgdsize(mp) - IPV6_HDR_LEN)); 8920 /* Record the ECN info. */ 8921 ip6h->ip6_vcf &= htonl(0xFFCFFFFF); 8922 ip6h->ip6_vcf |= htonl(ecn_info << 20); 8923 8924 /* Reassembly is successful; return checksum information if needed */ 8925 if (cksum_val != NULL) 8926 *cksum_val = sum_val; 8927 if (cksum_flags != NULL) 8928 *cksum_flags = sum_flags; 8929 8930 return (mp); 8931 } 8932 8933 /* 8934 * Walk through the options to see if there is a routing header. 8935 * If present get the destination which is the last address of 8936 * the option. 8937 */ 8938 in6_addr_t 8939 ip_get_dst_v6(ip6_t *ip6h, boolean_t *is_fragment) 8940 { 8941 uint8_t nexthdr; 8942 uint8_t *whereptr; 8943 ip6_hbh_t *hbhhdr; 8944 ip6_dest_t *dsthdr; 8945 ip6_rthdr0_t *rthdr; 8946 ip6_frag_t *fraghdr; 8947 int ehdrlen; 8948 int left; 8949 in6_addr_t *ap, rv; 8950 8951 if (is_fragment != NULL) 8952 *is_fragment = B_FALSE; 8953 8954 rv = ip6h->ip6_dst; 8955 8956 nexthdr = ip6h->ip6_nxt; 8957 whereptr = (uint8_t *)&ip6h[1]; 8958 for (;;) { 8959 8960 ASSERT(nexthdr != IPPROTO_RAW); 8961 switch (nexthdr) { 8962 case IPPROTO_HOPOPTS: 8963 hbhhdr = (ip6_hbh_t *)whereptr; 8964 nexthdr = hbhhdr->ip6h_nxt; 8965 ehdrlen = 8 * (hbhhdr->ip6h_len + 1); 8966 break; 8967 case IPPROTO_DSTOPTS: 8968 dsthdr = (ip6_dest_t *)whereptr; 8969 nexthdr = dsthdr->ip6d_nxt; 8970 ehdrlen = 8 * (dsthdr->ip6d_len + 1); 8971 break; 8972 case IPPROTO_ROUTING: 8973 rthdr = (ip6_rthdr0_t *)whereptr; 8974 nexthdr = rthdr->ip6r0_nxt; 8975 ehdrlen = 8 * (rthdr->ip6r0_len + 1); 8976 8977 left = rthdr->ip6r0_segleft; 8978 ap = (in6_addr_t *)((char *)rthdr + sizeof (*rthdr)); 8979 rv = *(ap + left - 1); 8980 /* 8981 * If the caller doesn't care whether the packet 8982 * is a fragment or not, we can stop here since 8983 * we have our destination. 8984 */ 8985 if (is_fragment == NULL) 8986 goto done; 8987 break; 8988 case IPPROTO_FRAGMENT: 8989 fraghdr = (ip6_frag_t *)whereptr; 8990 nexthdr = fraghdr->ip6f_nxt; 8991 ehdrlen = sizeof (ip6_frag_t); 8992 if (is_fragment != NULL) 8993 *is_fragment = B_TRUE; 8994 goto done; 8995 default : 8996 goto done; 8997 } 8998 whereptr += ehdrlen; 8999 } 9000 9001 done: 9002 return (rv); 9003 } 9004 9005 /* 9006 * ip_source_routed_v6: 9007 * This function is called by redirect code in ip_rput_data_v6 to 9008 * know whether this packet is source routed through this node i.e 9009 * whether this node (router) is part of the journey. This 9010 * function is called under two cases : 9011 * 9012 * case 1 : Routing header was processed by this node and 9013 * ip_process_rthdr replaced ip6_dst with the next hop 9014 * and we are forwarding the packet to the next hop. 9015 * 9016 * case 2 : Routing header was not processed by this node and we 9017 * are just forwarding the packet. 9018 * 9019 * For case (1) we don't want to send redirects. For case(2) we 9020 * want to send redirects. 9021 */ 9022 static boolean_t 9023 ip_source_routed_v6(ip6_t *ip6h, mblk_t *mp, ip_stack_t *ipst) 9024 { 9025 uint8_t nexthdr; 9026 in6_addr_t *addrptr; 9027 ip6_rthdr0_t *rthdr; 9028 uint8_t numaddr; 9029 ip6_hbh_t *hbhhdr; 9030 uint_t ehdrlen; 9031 uint8_t *byteptr; 9032 9033 ip2dbg(("ip_source_routed_v6\n")); 9034 nexthdr = ip6h->ip6_nxt; 9035 ehdrlen = IPV6_HDR_LEN; 9036 9037 /* if a routing hdr is preceeded by HOPOPT or DSTOPT */ 9038 while (nexthdr == IPPROTO_HOPOPTS || 9039 nexthdr == IPPROTO_DSTOPTS) { 9040 byteptr = (uint8_t *)ip6h + ehdrlen; 9041 /* 9042 * Check if we have already processed 9043 * packets or we are just a forwarding 9044 * router which only pulled up msgs up 9045 * to IPV6HDR and one HBH ext header 9046 */ 9047 if (byteptr + MIN_EHDR_LEN > mp->b_wptr) { 9048 ip2dbg(("ip_source_routed_v6: Extension" 9049 " headers not processed\n")); 9050 return (B_FALSE); 9051 } 9052 hbhhdr = (ip6_hbh_t *)byteptr; 9053 nexthdr = hbhhdr->ip6h_nxt; 9054 ehdrlen = ehdrlen + 8 * (hbhhdr->ip6h_len + 1); 9055 } 9056 switch (nexthdr) { 9057 case IPPROTO_ROUTING: 9058 byteptr = (uint8_t *)ip6h + ehdrlen; 9059 /* 9060 * If for some reason, we haven't pulled up 9061 * the routing hdr data mblk, then we must 9062 * not have processed it at all. So for sure 9063 * we are not part of the source routed journey. 9064 */ 9065 if (byteptr + MIN_EHDR_LEN > mp->b_wptr) { 9066 ip2dbg(("ip_source_routed_v6: Routing" 9067 " header not processed\n")); 9068 return (B_FALSE); 9069 } 9070 rthdr = (ip6_rthdr0_t *)byteptr; 9071 /* 9072 * Either we are an intermediate router or the 9073 * last hop before destination and we have 9074 * already processed the routing header. 9075 * If segment_left is greater than or equal to zero, 9076 * then we must be the (numaddr - segleft) entry 9077 * of the routing header. Although ip6r0_segleft 9078 * is a unit8_t variable, we still check for zero 9079 * or greater value, if in case the data type 9080 * is changed someday in future. 9081 */ 9082 if (rthdr->ip6r0_segleft > 0 || 9083 rthdr->ip6r0_segleft == 0) { 9084 ire_t *ire = NULL; 9085 9086 numaddr = rthdr->ip6r0_len / 2; 9087 addrptr = (in6_addr_t *)((char *)rthdr + 9088 sizeof (*rthdr)); 9089 addrptr += (numaddr - (rthdr->ip6r0_segleft + 1)); 9090 if (addrptr != NULL) { 9091 ire = ire_ctable_lookup_v6(addrptr, NULL, 9092 IRE_LOCAL, NULL, ALL_ZONES, NULL, 9093 MATCH_IRE_TYPE, 9094 ipst); 9095 if (ire != NULL) { 9096 ire_refrele(ire); 9097 return (B_TRUE); 9098 } 9099 ip1dbg(("ip_source_routed_v6: No ire found\n")); 9100 } 9101 } 9102 /* FALLTHRU */ 9103 default: 9104 ip2dbg(("ip_source_routed_v6: Not source routed here\n")); 9105 return (B_FALSE); 9106 } 9107 } 9108 9109 /* 9110 * ip_wput_v6 -- Packets sent down from transport modules show up here. 9111 * Assumes that the following set of headers appear in the first 9112 * mblk: 9113 * ip6i_t (if present) CAN also appear as a separate mblk. 9114 * ip6_t 9115 * Any extension headers 9116 * TCP/UDP/SCTP header (if present) 9117 * The routine can handle an ICMPv6 header that is not in the first mblk. 9118 * 9119 * The order to determine the outgoing interface is as follows: 9120 * 1. IPV6_BOUND_PIF is set, use that ill (conn_outgoing_pill) 9121 * 2. If conn_nofailover_ill is set then use that ill. 9122 * 3. If an ip6i_t with IP6I_IFINDEX set then use that ill. 9123 * 4. If q is an ill queue and (link local or multicast destination) then 9124 * use that ill. 9125 * 5. If IPV6_BOUND_IF has been set use that ill. 9126 * 6. For multicast: if IPV6_MULTICAST_IF has been set use it. Otherwise 9127 * look for the best IRE match for the unspecified group to determine 9128 * the ill. 9129 * 7. For unicast: Just do an IRE lookup for the best match. 9130 * 9131 * arg2 is always a queue_t *. 9132 * When that queue is an ill_t (i.e. q_next != NULL), then arg must be 9133 * the zoneid. 9134 * When that queue is not an ill_t, then arg must be a conn_t pointer. 9135 */ 9136 void 9137 ip_output_v6(void *arg, mblk_t *mp, void *arg2, int caller) 9138 { 9139 conn_t *connp = NULL; 9140 queue_t *q = (queue_t *)arg2; 9141 ire_t *ire = NULL; 9142 ire_t *sctp_ire = NULL; 9143 ip6_t *ip6h; 9144 in6_addr_t *v6dstp; 9145 ill_t *ill = NULL; 9146 ipif_t *ipif; 9147 ip6i_t *ip6i; 9148 int cksum_request; /* -1 => normal. */ 9149 /* 1 => Skip TCP/UDP/SCTP checksum */ 9150 /* Otherwise contains insert offset for checksum */ 9151 int unspec_src; 9152 boolean_t do_outrequests; /* Increment OutRequests? */ 9153 mib2_ipIfStatsEntry_t *mibptr; 9154 int match_flags = MATCH_IRE_ILL_GROUP; 9155 boolean_t attach_if = B_FALSE; 9156 mblk_t *first_mp; 9157 boolean_t mctl_present; 9158 ipsec_out_t *io; 9159 boolean_t drop_if_delayed = B_FALSE; 9160 boolean_t multirt_need_resolve = B_FALSE; 9161 mblk_t *copy_mp = NULL; 9162 int err; 9163 int ip6i_flags = 0; 9164 zoneid_t zoneid; 9165 ill_t *saved_ill = NULL; 9166 boolean_t conn_lock_held; 9167 boolean_t need_decref = B_FALSE; 9168 ip_stack_t *ipst; 9169 9170 if (q->q_next != NULL) { 9171 ill = (ill_t *)q->q_ptr; 9172 ipst = ill->ill_ipst; 9173 } else { 9174 connp = (conn_t *)arg; 9175 ASSERT(connp != NULL); 9176 ipst = connp->conn_netstack->netstack_ip; 9177 } 9178 9179 /* 9180 * Highest bit in version field is Reachability Confirmation bit 9181 * used by NUD in ip_xmit_v6(). 9182 */ 9183 #ifdef _BIG_ENDIAN 9184 #define IPVER(ip6h) ((((uint32_t *)ip6h)[0] >> 28) & 0x7) 9185 #else 9186 #define IPVER(ip6h) ((((uint32_t *)ip6h)[0] >> 4) & 0x7) 9187 #endif 9188 9189 /* 9190 * M_CTL comes from 6 places 9191 * 9192 * 1) TCP sends down IPSEC_OUT(M_CTL) for detached connections 9193 * both V4 and V6 datagrams. 9194 * 9195 * 2) AH/ESP sends down M_CTL after doing their job with both 9196 * V4 and V6 datagrams. 9197 * 9198 * 3) NDP callbacks when nce is resolved and IPSEC_OUT has been 9199 * attached. 9200 * 9201 * 4) Notifications from an external resolver (for XRESOLV ifs) 9202 * 9203 * 5) AH/ESP send down IPSEC_CTL(M_CTL) to be relayed to hardware for 9204 * IPsec hardware acceleration support. 9205 * 9206 * 6) TUN_HELLO. 9207 * 9208 * We need to handle (1)'s IPv6 case and (3) here. For the 9209 * IPv4 case in (1), and (2), IPSEC processing has already 9210 * started. The code in ip_wput() already knows how to handle 9211 * continuing IPSEC processing (for IPv4 and IPv6). All other 9212 * M_CTLs (including case (4)) are passed on to ip_wput_nondata() 9213 * for handling. 9214 */ 9215 first_mp = mp; 9216 mctl_present = B_FALSE; 9217 io = NULL; 9218 9219 /* Multidata transmit? */ 9220 if (DB_TYPE(mp) == M_MULTIDATA) { 9221 /* 9222 * We should never get here, since all Multidata messages 9223 * originating from tcp should have been directed over to 9224 * tcp_multisend() in the first place. 9225 */ 9226 BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsOutDiscards); 9227 freemsg(mp); 9228 return; 9229 } else if (DB_TYPE(mp) == M_CTL) { 9230 uint32_t mctltype = 0; 9231 uint32_t mlen = MBLKL(first_mp); 9232 9233 mp = mp->b_cont; 9234 mctl_present = B_TRUE; 9235 io = (ipsec_out_t *)first_mp->b_rptr; 9236 9237 /* 9238 * Validate this M_CTL message. The only three types of 9239 * M_CTL messages we expect to see in this code path are 9240 * ipsec_out_t or ipsec_in_t structures (allocated as 9241 * ipsec_info_t unions), or ipsec_ctl_t structures. 9242 * The ipsec_out_type and ipsec_in_type overlap in the two 9243 * data structures, and they are either set to IPSEC_OUT 9244 * or IPSEC_IN depending on which data structure it is. 9245 * ipsec_ctl_t is an IPSEC_CTL. 9246 * 9247 * All other M_CTL messages are sent to ip_wput_nondata() 9248 * for handling. 9249 */ 9250 if (mlen >= sizeof (io->ipsec_out_type)) 9251 mctltype = io->ipsec_out_type; 9252 9253 if ((mlen == sizeof (ipsec_ctl_t)) && 9254 (mctltype == IPSEC_CTL)) { 9255 ip_output(arg, first_mp, arg2, caller); 9256 return; 9257 } 9258 9259 if ((mlen < sizeof (ipsec_info_t)) || 9260 (mctltype != IPSEC_OUT && mctltype != IPSEC_IN) || 9261 mp == NULL) { 9262 ip_wput_nondata(NULL, q, first_mp, NULL); 9263 return; 9264 } 9265 /* NDP callbacks have q_next non-NULL. That's case #3. */ 9266 if (q->q_next == NULL) { 9267 ip6h = (ip6_t *)mp->b_rptr; 9268 /* 9269 * For a freshly-generated TCP dgram that needs IPV6 9270 * processing, don't call ip_wput immediately. We can 9271 * tell this by the ipsec_out_proc_begin. In-progress 9272 * IPSEC_OUT messages have proc_begin set to TRUE, 9273 * and we want to send all IPSEC_IN messages to 9274 * ip_wput() for IPsec processing or finishing. 9275 */ 9276 if (mctltype == IPSEC_IN || 9277 IPVER(ip6h) != IPV6_VERSION || 9278 io->ipsec_out_proc_begin) { 9279 mibptr = &ipst->ips_ip6_mib; 9280 goto notv6; 9281 } 9282 } 9283 } else if (DB_TYPE(mp) != M_DATA) { 9284 ip_wput_nondata(NULL, q, mp, NULL); 9285 return; 9286 } 9287 9288 ip6h = (ip6_t *)mp->b_rptr; 9289 9290 if (IPVER(ip6h) != IPV6_VERSION) { 9291 mibptr = &ipst->ips_ip6_mib; 9292 goto notv6; 9293 } 9294 9295 if (q->q_next != NULL) { 9296 /* 9297 * We don't know if this ill will be used for IPv6 9298 * until the ILLF_IPV6 flag is set via SIOCSLIFNAME. 9299 * ipif_set_values() sets the ill_isv6 flag to true if 9300 * ILLF_IPV6 is set. If the ill_isv6 flag isn't true, 9301 * just drop the packet. 9302 */ 9303 if (!ill->ill_isv6) { 9304 ip1dbg(("ip_wput_v6: Received an IPv6 packet before " 9305 "ILLF_IPV6 was set\n")); 9306 freemsg(first_mp); 9307 return; 9308 } 9309 /* For uniformity do a refhold */ 9310 mutex_enter(&ill->ill_lock); 9311 if (!ILL_CAN_LOOKUP(ill)) { 9312 mutex_exit(&ill->ill_lock); 9313 freemsg(first_mp); 9314 return; 9315 } 9316 ill_refhold_locked(ill); 9317 mutex_exit(&ill->ill_lock); 9318 mibptr = ill->ill_ip_mib; 9319 9320 ASSERT(mibptr != NULL); 9321 unspec_src = 0; 9322 BUMP_MIB(mibptr, ipIfStatsHCOutRequests); 9323 do_outrequests = B_FALSE; 9324 zoneid = (zoneid_t)(uintptr_t)arg; 9325 } else { 9326 connp = (conn_t *)arg; 9327 ASSERT(connp != NULL); 9328 zoneid = connp->conn_zoneid; 9329 9330 /* is queue flow controlled? */ 9331 if ((q->q_first || connp->conn_draining) && 9332 (caller == IP_WPUT)) { 9333 /* 9334 * 1) TCP sends down M_CTL for detached connections. 9335 * 2) AH/ESP sends down M_CTL. 9336 * 9337 * We don't flow control either of the above. Only 9338 * UDP and others are flow controlled for which we 9339 * can't have a M_CTL. 9340 */ 9341 ASSERT(first_mp == mp); 9342 (void) putq(q, mp); 9343 return; 9344 } 9345 mibptr = &ipst->ips_ip6_mib; 9346 unspec_src = connp->conn_unspec_src; 9347 do_outrequests = B_TRUE; 9348 if (mp->b_flag & MSGHASREF) { 9349 mp->b_flag &= ~MSGHASREF; 9350 ASSERT(connp->conn_ulp == IPPROTO_SCTP); 9351 SCTP_EXTRACT_IPINFO(mp, sctp_ire); 9352 need_decref = B_TRUE; 9353 } 9354 9355 /* 9356 * If there is a policy, try to attach an ipsec_out in 9357 * the front. At the end, first_mp either points to a 9358 * M_DATA message or IPSEC_OUT message linked to a 9359 * M_DATA message. We have to do it now as we might 9360 * lose the "conn" if we go through ip_newroute. 9361 */ 9362 if (!mctl_present && 9363 (connp->conn_out_enforce_policy || 9364 connp->conn_latch != NULL)) { 9365 ASSERT(first_mp == mp); 9366 /* XXX Any better way to get the protocol fast ? */ 9367 if (((mp = ipsec_attach_ipsec_out(&mp, connp, NULL, 9368 connp->conn_ulp, ipst->ips_netstack)) == NULL)) { 9369 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 9370 if (need_decref) 9371 CONN_DEC_REF(connp); 9372 return; 9373 } else { 9374 ASSERT(mp->b_datap->db_type == M_CTL); 9375 first_mp = mp; 9376 mp = mp->b_cont; 9377 mctl_present = B_TRUE; 9378 io = (ipsec_out_t *)first_mp->b_rptr; 9379 } 9380 } 9381 } 9382 9383 /* check for alignment and full IPv6 header */ 9384 if (!OK_32PTR((uchar_t *)ip6h) || 9385 (mp->b_wptr - (uchar_t *)ip6h) < IPV6_HDR_LEN) { 9386 ip0dbg(("ip_wput_v6: bad alignment or length\n")); 9387 if (do_outrequests) 9388 BUMP_MIB(mibptr, ipIfStatsHCOutRequests); 9389 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 9390 freemsg(first_mp); 9391 if (ill != NULL) 9392 ill_refrele(ill); 9393 if (need_decref) 9394 CONN_DEC_REF(connp); 9395 return; 9396 } 9397 v6dstp = &ip6h->ip6_dst; 9398 cksum_request = -1; 9399 ip6i = NULL; 9400 9401 /* 9402 * Once neighbor discovery has completed, ndp_process() will provide 9403 * locally generated packets for which processing can be reattempted. 9404 * In these cases, connp is NULL and the original zone is part of a 9405 * prepended ipsec_out_t. 9406 */ 9407 if (io != NULL) { 9408 /* 9409 * When coming from icmp_input_v6, the zoneid might not match 9410 * for the loopback case, because inside icmp_input_v6 the 9411 * queue_t is a conn queue from the sending side. 9412 */ 9413 zoneid = io->ipsec_out_zoneid; 9414 ASSERT(zoneid != ALL_ZONES); 9415 } 9416 9417 if (ip6h->ip6_nxt == IPPROTO_RAW) { 9418 /* 9419 * This is an ip6i_t header followed by an ip6_hdr. 9420 * Check which fields are set. 9421 * 9422 * When the packet comes from a transport we should have 9423 * all needed headers in the first mblk. However, when 9424 * going through ip_newroute*_v6 the ip6i might be in 9425 * a separate mblk when we return here. In that case 9426 * we pullup everything to ensure that extension and transport 9427 * headers "stay" in the first mblk. 9428 */ 9429 ip6i = (ip6i_t *)ip6h; 9430 ip6i_flags = ip6i->ip6i_flags; 9431 9432 ASSERT((mp->b_wptr - (uchar_t *)ip6i) == sizeof (ip6i_t) || 9433 ((mp->b_wptr - (uchar_t *)ip6i) >= 9434 sizeof (ip6i_t) + IPV6_HDR_LEN)); 9435 9436 if ((mp->b_wptr - (uchar_t *)ip6i) == sizeof (ip6i_t)) { 9437 if (!pullupmsg(mp, -1)) { 9438 ip1dbg(("ip_wput_v6: pullupmsg failed\n")); 9439 if (do_outrequests) { 9440 BUMP_MIB(mibptr, 9441 ipIfStatsHCOutRequests); 9442 } 9443 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 9444 freemsg(first_mp); 9445 if (ill != NULL) 9446 ill_refrele(ill); 9447 if (need_decref) 9448 CONN_DEC_REF(connp); 9449 return; 9450 } 9451 ip6h = (ip6_t *)mp->b_rptr; 9452 v6dstp = &ip6h->ip6_dst; 9453 ip6i = (ip6i_t *)ip6h; 9454 } 9455 ip6h = (ip6_t *)&ip6i[1]; 9456 9457 /* 9458 * Advance rptr past the ip6i_t to get ready for 9459 * transmitting the packet. However, if the packet gets 9460 * passed to ip_newroute*_v6 then rptr is moved back so 9461 * that the ip6i_t header can be inspected when the 9462 * packet comes back here after passing through 9463 * ire_add_then_send. 9464 */ 9465 mp->b_rptr = (uchar_t *)ip6h; 9466 9467 /* 9468 * IP6I_ATTACH_IF is set in this function when we had a 9469 * conn and it was either bound to the IPFF_NOFAILOVER address 9470 * or IPV6_BOUND_PIF was set. These options override other 9471 * options that set the ifindex. We come here with 9472 * IP6I_ATTACH_IF set when we can't find the ire and 9473 * ip_newroute_v6 is feeding the packet for second time. 9474 */ 9475 if ((ip6i->ip6i_flags & IP6I_IFINDEX) || 9476 (ip6i->ip6i_flags & IP6I_ATTACH_IF)) { 9477 ASSERT(ip6i->ip6i_ifindex != 0); 9478 if (ill != NULL) 9479 ill_refrele(ill); 9480 ill = ill_lookup_on_ifindex(ip6i->ip6i_ifindex, 1, 9481 NULL, NULL, NULL, NULL, ipst); 9482 if (ill == NULL) { 9483 if (do_outrequests) { 9484 BUMP_MIB(mibptr, 9485 ipIfStatsHCOutRequests); 9486 } 9487 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 9488 ip1dbg(("ip_wput_v6: bad ifindex %d\n", 9489 ip6i->ip6i_ifindex)); 9490 if (need_decref) 9491 CONN_DEC_REF(connp); 9492 freemsg(first_mp); 9493 return; 9494 } 9495 mibptr = ill->ill_ip_mib; 9496 if (ip6i->ip6i_flags & IP6I_IFINDEX) { 9497 /* 9498 * Preserve the index so that when we return 9499 * from IPSEC processing, we know where to 9500 * send the packet. 9501 */ 9502 if (mctl_present) { 9503 ASSERT(io != NULL); 9504 io->ipsec_out_ill_index = 9505 ip6i->ip6i_ifindex; 9506 } 9507 } 9508 if (ip6i->ip6i_flags & IP6I_ATTACH_IF) { 9509 /* 9510 * This is a multipathing probe packet that has 9511 * been delayed in ND resolution. Drop the 9512 * packet for the reasons mentioned in 9513 * nce_queue_mp() 9514 */ 9515 if ((ip6i->ip6i_flags & IP6I_DROP_IFDELAYED) && 9516 (ip6i->ip6i_flags & IP6I_ND_DELAYED)) { 9517 freemsg(first_mp); 9518 ill_refrele(ill); 9519 if (need_decref) 9520 CONN_DEC_REF(connp); 9521 return; 9522 } 9523 } 9524 } 9525 if (ip6i->ip6i_flags & IP6I_VERIFY_SRC) { 9526 cred_t *cr = DB_CREDDEF(mp, GET_QUEUE_CRED(q)); 9527 9528 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)); 9529 if (secpolicy_net_rawaccess(cr) != 0) { 9530 /* 9531 * Use IPCL_ZONEID to honor SO_ALLZONES. 9532 */ 9533 ire = ire_route_lookup_v6(&ip6h->ip6_src, 9534 0, 0, (IRE_LOCAL|IRE_LOOPBACK), NULL, 9535 NULL, connp != NULL ? 9536 IPCL_ZONEID(connp) : zoneid, NULL, 9537 MATCH_IRE_TYPE | MATCH_IRE_ZONEONLY, ipst); 9538 if (ire == NULL) { 9539 if (do_outrequests) 9540 BUMP_MIB(mibptr, 9541 ipIfStatsHCOutRequests); 9542 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 9543 ip1dbg(("ip_wput_v6: bad source " 9544 "addr\n")); 9545 freemsg(first_mp); 9546 if (ill != NULL) 9547 ill_refrele(ill); 9548 if (need_decref) 9549 CONN_DEC_REF(connp); 9550 return; 9551 } 9552 ire_refrele(ire); 9553 } 9554 /* No need to verify again when using ip_newroute */ 9555 ip6i->ip6i_flags &= ~IP6I_VERIFY_SRC; 9556 } 9557 if (!(ip6i->ip6i_flags & IP6I_NEXTHOP)) { 9558 /* 9559 * Make sure they match since ip_newroute*_v6 etc might 9560 * (unknown to them) inspect ip6i_nexthop when 9561 * they think they access ip6_dst. 9562 */ 9563 ip6i->ip6i_nexthop = ip6h->ip6_dst; 9564 } 9565 if (ip6i->ip6i_flags & IP6I_NO_ULP_CKSUM) 9566 cksum_request = 1; 9567 if (ip6i->ip6i_flags & IP6I_RAW_CHECKSUM) 9568 cksum_request = ip6i->ip6i_checksum_off; 9569 if (ip6i->ip6i_flags & IP6I_UNSPEC_SRC) 9570 unspec_src = 1; 9571 9572 if (do_outrequests && ill != NULL) { 9573 BUMP_MIB(mibptr, ipIfStatsHCOutRequests); 9574 do_outrequests = B_FALSE; 9575 } 9576 /* 9577 * Store ip6i_t info that we need after we come back 9578 * from IPSEC processing. 9579 */ 9580 if (mctl_present) { 9581 ASSERT(io != NULL); 9582 io->ipsec_out_unspec_src = unspec_src; 9583 } 9584 } 9585 if (connp != NULL && connp->conn_dontroute) 9586 ip6h->ip6_hops = 1; 9587 9588 if (IN6_IS_ADDR_MULTICAST(v6dstp)) 9589 goto ipv6multicast; 9590 9591 /* 1. IPV6_BOUND_PIF takes precedence over all the ifindex settings. */ 9592 if (connp != NULL && connp->conn_outgoing_pill != NULL) { 9593 ill_t *conn_outgoing_pill; 9594 9595 conn_outgoing_pill = conn_get_held_ill(connp, 9596 &connp->conn_outgoing_pill, &err); 9597 if (err == ILL_LOOKUP_FAILED) { 9598 if (ill != NULL) 9599 ill_refrele(ill); 9600 if (need_decref) 9601 CONN_DEC_REF(connp); 9602 freemsg(first_mp); 9603 return; 9604 } 9605 if (conn_outgoing_pill != NULL) { 9606 if (ill != NULL) 9607 ill_refrele(ill); 9608 ill = conn_outgoing_pill; 9609 attach_if = B_TRUE; 9610 match_flags = MATCH_IRE_ILL; 9611 mibptr = ill->ill_ip_mib; 9612 9613 /* 9614 * Check if we need an ire that will not be 9615 * looked up by anybody else i.e. HIDDEN. 9616 */ 9617 if (ill_is_probeonly(ill)) 9618 match_flags |= MATCH_IRE_MARK_HIDDEN; 9619 goto send_from_ill; 9620 } 9621 } 9622 9623 /* 2. If ipc_nofailover_ill is set then use that ill. */ 9624 if (connp != NULL && connp->conn_nofailover_ill != NULL) { 9625 ill_t *conn_nofailover_ill; 9626 9627 conn_nofailover_ill = conn_get_held_ill(connp, 9628 &connp->conn_nofailover_ill, &err); 9629 if (err == ILL_LOOKUP_FAILED) { 9630 if (ill != NULL) 9631 ill_refrele(ill); 9632 if (need_decref) 9633 CONN_DEC_REF(connp); 9634 freemsg(first_mp); 9635 return; 9636 } 9637 if (conn_nofailover_ill != NULL) { 9638 if (ill != NULL) 9639 ill_refrele(ill); 9640 ill = conn_nofailover_ill; 9641 attach_if = B_TRUE; 9642 /* 9643 * Assumes that ipc_nofailover_ill is used only for 9644 * multipathing probe packets. These packets are better 9645 * dropped, if they are delayed in ND resolution, for 9646 * the reasons described in nce_queue_mp(). 9647 * IP6I_DROP_IFDELAYED will be set later on in this 9648 * function for this packet. 9649 */ 9650 drop_if_delayed = B_TRUE; 9651 match_flags = MATCH_IRE_ILL; 9652 mibptr = ill->ill_ip_mib; 9653 9654 /* 9655 * Check if we need an ire that will not be 9656 * looked up by anybody else i.e. HIDDEN. 9657 */ 9658 if (ill_is_probeonly(ill)) 9659 match_flags |= MATCH_IRE_MARK_HIDDEN; 9660 goto send_from_ill; 9661 } 9662 } 9663 9664 /* 9665 * Redo 1. If we did not find an IRE_CACHE the first time, we should 9666 * have an ip6i_t with IP6I_ATTACH_IF if IPV6_BOUND_PIF or 9667 * bind to the IPIF_NOFAILOVER address was used on this endpoint. 9668 */ 9669 if (ip6i != NULL && (ip6i->ip6i_flags & IP6I_ATTACH_IF)) { 9670 ASSERT(ip6i->ip6i_ifindex != 0); 9671 attach_if = B_TRUE; 9672 ASSERT(ill != NULL); 9673 match_flags = MATCH_IRE_ILL; 9674 9675 /* 9676 * Check if we need an ire that will not be 9677 * looked up by anybody else i.e. HIDDEN. 9678 */ 9679 if (ill_is_probeonly(ill)) 9680 match_flags |= MATCH_IRE_MARK_HIDDEN; 9681 goto send_from_ill; 9682 } 9683 9684 /* 3. If an ip6i_t with IP6I_IFINDEX set then use that ill. */ 9685 if (ip6i != NULL && (ip6i->ip6i_flags & IP6I_IFINDEX)) { 9686 ASSERT(ill != NULL); 9687 goto send_from_ill; 9688 } 9689 9690 /* 9691 * 4. If q is an ill queue and (link local or multicast destination) 9692 * then use that ill. 9693 */ 9694 if (ill != NULL && IN6_IS_ADDR_LINKLOCAL(v6dstp)) { 9695 goto send_from_ill; 9696 } 9697 9698 /* 5. If IPV6_BOUND_IF has been set use that ill. */ 9699 if (connp != NULL && connp->conn_outgoing_ill != NULL) { 9700 ill_t *conn_outgoing_ill; 9701 9702 conn_outgoing_ill = conn_get_held_ill(connp, 9703 &connp->conn_outgoing_ill, &err); 9704 if (err == ILL_LOOKUP_FAILED) { 9705 if (ill != NULL) 9706 ill_refrele(ill); 9707 if (need_decref) 9708 CONN_DEC_REF(connp); 9709 freemsg(first_mp); 9710 return; 9711 } 9712 if (ill != NULL) 9713 ill_refrele(ill); 9714 ill = conn_outgoing_ill; 9715 mibptr = ill->ill_ip_mib; 9716 goto send_from_ill; 9717 } 9718 9719 /* 9720 * 6. For unicast: Just do an IRE lookup for the best match. 9721 * If we get here for a link-local address it is rather random 9722 * what interface we pick on a multihomed host. 9723 * *If* there is an IRE_CACHE (and the link-local address 9724 * isn't duplicated on multi links) this will find the IRE_CACHE. 9725 * Otherwise it will use one of the matching IRE_INTERFACE routes 9726 * for the link-local prefix. Hence, applications 9727 * *should* be encouraged to specify an outgoing interface when sending 9728 * to a link local address. 9729 */ 9730 if (connp == NULL || (IP_FLOW_CONTROLLED_ULP(connp->conn_ulp) && 9731 !connp->conn_fully_bound)) { 9732 /* 9733 * We cache IRE_CACHEs to avoid lookups. We don't do 9734 * this for the tcp global queue and listen end point 9735 * as it does not really have a real destination to 9736 * talk to. 9737 */ 9738 ire = ire_cache_lookup_v6(v6dstp, zoneid, MBLK_GETLABEL(mp), 9739 ipst); 9740 } else { 9741 /* 9742 * IRE_MARK_CONDEMNED is marked in ire_delete. We don't 9743 * grab a lock here to check for CONDEMNED as it is okay 9744 * to send a packet or two with the IRE_CACHE that is going 9745 * away. 9746 */ 9747 mutex_enter(&connp->conn_lock); 9748 ire = sctp_ire != NULL ? sctp_ire : connp->conn_ire_cache; 9749 if (ire != NULL && 9750 IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, v6dstp) && 9751 !(ire->ire_marks & IRE_MARK_CONDEMNED)) { 9752 9753 IRE_REFHOLD(ire); 9754 mutex_exit(&connp->conn_lock); 9755 9756 } else { 9757 boolean_t cached = B_FALSE; 9758 9759 connp->conn_ire_cache = NULL; 9760 mutex_exit(&connp->conn_lock); 9761 /* Release the old ire */ 9762 if (ire != NULL && sctp_ire == NULL) 9763 IRE_REFRELE_NOTR(ire); 9764 9765 ire = ire_cache_lookup_v6(v6dstp, zoneid, 9766 MBLK_GETLABEL(mp), ipst); 9767 if (ire != NULL) { 9768 IRE_REFHOLD_NOTR(ire); 9769 9770 mutex_enter(&connp->conn_lock); 9771 if (CONN_CACHE_IRE(connp) && 9772 (connp->conn_ire_cache == NULL)) { 9773 rw_enter(&ire->ire_bucket->irb_lock, 9774 RW_READER); 9775 if (!(ire->ire_marks & 9776 IRE_MARK_CONDEMNED)) { 9777 connp->conn_ire_cache = ire; 9778 cached = B_TRUE; 9779 } 9780 rw_exit(&ire->ire_bucket->irb_lock); 9781 } 9782 mutex_exit(&connp->conn_lock); 9783 9784 /* 9785 * We can continue to use the ire but since it 9786 * was not cached, we should drop the extra 9787 * reference. 9788 */ 9789 if (!cached) 9790 IRE_REFRELE_NOTR(ire); 9791 } 9792 } 9793 } 9794 9795 if (ire != NULL) { 9796 if (do_outrequests) { 9797 /* Handle IRE_LOCAL's that might appear here */ 9798 if (ire->ire_type == IRE_CACHE) { 9799 mibptr = ((ill_t *)ire->ire_stq->q_ptr)-> 9800 ill_ip_mib; 9801 } else { 9802 mibptr = ire->ire_ipif->ipif_ill->ill_ip_mib; 9803 } 9804 BUMP_MIB(mibptr, ipIfStatsHCOutRequests); 9805 } 9806 ASSERT(!attach_if); 9807 9808 /* 9809 * Check if the ire has the RTF_MULTIRT flag, inherited 9810 * from an IRE_OFFSUBNET ire entry in ip_newroute(). 9811 */ 9812 if (ire->ire_flags & RTF_MULTIRT) { 9813 /* 9814 * Force hop limit of multirouted packets if required. 9815 * The hop limit of such packets is bounded by the 9816 * ip_multirt_ttl ndd variable. 9817 * NDP packets must have a hop limit of 255; don't 9818 * change the hop limit in that case. 9819 */ 9820 if ((ipst->ips_ip_multirt_ttl > 0) && 9821 (ip6h->ip6_hops > ipst->ips_ip_multirt_ttl) && 9822 (ip6h->ip6_hops != IPV6_MAX_HOPS)) { 9823 if (ip_debug > 3) { 9824 ip2dbg(("ip_wput_v6: forcing multirt " 9825 "hop limit to %d (was %d) ", 9826 ipst->ips_ip_multirt_ttl, 9827 ip6h->ip6_hops)); 9828 pr_addr_dbg("v6dst %s\n", AF_INET6, 9829 &ire->ire_addr_v6); 9830 } 9831 ip6h->ip6_hops = ipst->ips_ip_multirt_ttl; 9832 } 9833 9834 /* 9835 * We look at this point if there are pending 9836 * unresolved routes. ire_multirt_need_resolve_v6() 9837 * checks in O(n) that all IRE_OFFSUBNET ire 9838 * entries for the packet's destination and 9839 * flagged RTF_MULTIRT are currently resolved. 9840 * If some remain unresolved, we do a copy 9841 * of the current message. It will be used 9842 * to initiate additional route resolutions. 9843 */ 9844 multirt_need_resolve = 9845 ire_multirt_need_resolve_v6(&ire->ire_addr_v6, 9846 MBLK_GETLABEL(first_mp), ipst); 9847 ip2dbg(("ip_wput_v6: ire %p, " 9848 "multirt_need_resolve %d, first_mp %p\n", 9849 (void *)ire, multirt_need_resolve, 9850 (void *)first_mp)); 9851 if (multirt_need_resolve) { 9852 copy_mp = copymsg(first_mp); 9853 if (copy_mp != NULL) { 9854 MULTIRT_DEBUG_TAG(copy_mp); 9855 } 9856 } 9857 } 9858 ip_wput_ire_v6(q, first_mp, ire, unspec_src, cksum_request, 9859 connp, caller, 0, ip6i_flags, zoneid); 9860 if (need_decref) { 9861 CONN_DEC_REF(connp); 9862 connp = NULL; 9863 } 9864 IRE_REFRELE(ire); 9865 9866 /* 9867 * Try to resolve another multiroute if 9868 * ire_multirt_need_resolve_v6() deemed it necessary. 9869 * copy_mp will be consumed (sent or freed) by 9870 * ip_newroute_v6(). 9871 */ 9872 if (copy_mp != NULL) { 9873 if (mctl_present) { 9874 ip6h = (ip6_t *)copy_mp->b_cont->b_rptr; 9875 } else { 9876 ip6h = (ip6_t *)copy_mp->b_rptr; 9877 } 9878 ip_newroute_v6(q, copy_mp, &ip6h->ip6_dst, 9879 &ip6h->ip6_src, NULL, zoneid, ipst); 9880 } 9881 if (ill != NULL) 9882 ill_refrele(ill); 9883 return; 9884 } 9885 9886 /* 9887 * No full IRE for this destination. Send it to 9888 * ip_newroute_v6 to see if anything else matches. 9889 * Mark this packet as having originated on this 9890 * machine. 9891 * Update rptr if there was an ip6i_t header. 9892 */ 9893 mp->b_prev = NULL; 9894 mp->b_next = NULL; 9895 if (ip6i != NULL) 9896 mp->b_rptr -= sizeof (ip6i_t); 9897 9898 if (unspec_src) { 9899 if (ip6i == NULL) { 9900 /* 9901 * Add ip6i_t header to carry unspec_src 9902 * until the packet comes back in ip_wput_v6. 9903 */ 9904 mp = ip_add_info_v6(mp, NULL, v6dstp); 9905 if (mp == NULL) { 9906 if (do_outrequests) 9907 BUMP_MIB(mibptr, 9908 ipIfStatsHCOutRequests); 9909 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 9910 if (mctl_present) 9911 freeb(first_mp); 9912 if (ill != NULL) 9913 ill_refrele(ill); 9914 if (need_decref) 9915 CONN_DEC_REF(connp); 9916 return; 9917 } 9918 ip6i = (ip6i_t *)mp->b_rptr; 9919 9920 if (mctl_present) { 9921 ASSERT(first_mp != mp); 9922 first_mp->b_cont = mp; 9923 } else { 9924 first_mp = mp; 9925 } 9926 9927 if ((mp->b_wptr - (uchar_t *)ip6i) == 9928 sizeof (ip6i_t)) { 9929 /* 9930 * ndp_resolver called from ip_newroute_v6 9931 * expects pulled up message. 9932 */ 9933 if (!pullupmsg(mp, -1)) { 9934 ip1dbg(("ip_wput_v6: pullupmsg" 9935 " failed\n")); 9936 if (do_outrequests) { 9937 BUMP_MIB(mibptr, 9938 ipIfStatsHCOutRequests); 9939 } 9940 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 9941 freemsg(first_mp); 9942 if (ill != NULL) 9943 ill_refrele(ill); 9944 if (need_decref) 9945 CONN_DEC_REF(connp); 9946 return; 9947 } 9948 ip6i = (ip6i_t *)mp->b_rptr; 9949 } 9950 ip6h = (ip6_t *)&ip6i[1]; 9951 v6dstp = &ip6h->ip6_dst; 9952 } 9953 ip6i->ip6i_flags |= IP6I_UNSPEC_SRC; 9954 if (mctl_present) { 9955 ASSERT(io != NULL); 9956 io->ipsec_out_unspec_src = unspec_src; 9957 } 9958 } 9959 if (do_outrequests) 9960 BUMP_MIB(mibptr, ipIfStatsHCOutRequests); 9961 if (need_decref) 9962 CONN_DEC_REF(connp); 9963 ip_newroute_v6(q, first_mp, v6dstp, &ip6h->ip6_src, NULL, zoneid, ipst); 9964 if (ill != NULL) 9965 ill_refrele(ill); 9966 return; 9967 9968 9969 /* 9970 * Handle multicast packets with or without an conn. 9971 * Assumes that the transports set ip6_hops taking 9972 * IPV6_MULTICAST_HOPS (and the other ways to set the hoplimit) 9973 * into account. 9974 */ 9975 ipv6multicast: 9976 ip2dbg(("ip_wput_v6: multicast\n")); 9977 9978 /* 9979 * 1. IPV6_BOUND_PIF takes precedence over all the ifindex settings 9980 * 2. If conn_nofailover_ill is set then use that ill. 9981 * 9982 * Hold the conn_lock till we refhold the ill of interest that is 9983 * pointed to from the conn. Since we cannot do an ill/ipif_refrele 9984 * while holding any locks, postpone the refrele until after the 9985 * conn_lock is dropped. 9986 */ 9987 if (connp != NULL) { 9988 mutex_enter(&connp->conn_lock); 9989 conn_lock_held = B_TRUE; 9990 } else { 9991 conn_lock_held = B_FALSE; 9992 } 9993 if (connp != NULL && connp->conn_outgoing_pill != NULL) { 9994 err = ill_check_and_refhold(connp->conn_outgoing_pill); 9995 if (err == ILL_LOOKUP_FAILED) { 9996 ip1dbg(("ip_output_v6: multicast" 9997 " conn_outgoing_pill no ipif\n")); 9998 multicast_discard: 9999 ASSERT(saved_ill == NULL); 10000 if (conn_lock_held) 10001 mutex_exit(&connp->conn_lock); 10002 if (ill != NULL) 10003 ill_refrele(ill); 10004 freemsg(first_mp); 10005 if (do_outrequests) 10006 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10007 if (need_decref) 10008 CONN_DEC_REF(connp); 10009 return; 10010 } 10011 saved_ill = ill; 10012 ill = connp->conn_outgoing_pill; 10013 attach_if = B_TRUE; 10014 match_flags = MATCH_IRE_ILL; 10015 mibptr = ill->ill_ip_mib; 10016 10017 /* 10018 * Check if we need an ire that will not be 10019 * looked up by anybody else i.e. HIDDEN. 10020 */ 10021 if (ill_is_probeonly(ill)) 10022 match_flags |= MATCH_IRE_MARK_HIDDEN; 10023 } else if (connp != NULL && connp->conn_nofailover_ill != NULL) { 10024 err = ill_check_and_refhold(connp->conn_nofailover_ill); 10025 if (err == ILL_LOOKUP_FAILED) { 10026 ip1dbg(("ip_output_v6: multicast" 10027 " conn_nofailover_ill no ipif\n")); 10028 goto multicast_discard; 10029 } 10030 saved_ill = ill; 10031 ill = connp->conn_nofailover_ill; 10032 attach_if = B_TRUE; 10033 match_flags = MATCH_IRE_ILL; 10034 10035 /* 10036 * Check if we need an ire that will not be 10037 * looked up by anybody else i.e. HIDDEN. 10038 */ 10039 if (ill_is_probeonly(ill)) 10040 match_flags |= MATCH_IRE_MARK_HIDDEN; 10041 } else if (ip6i != NULL && (ip6i->ip6i_flags & IP6I_ATTACH_IF)) { 10042 /* 10043 * Redo 1. If we did not find an IRE_CACHE the first time, 10044 * we should have an ip6i_t with IP6I_ATTACH_IF if 10045 * IPV6_BOUND_PIF or bind to the IPIF_NOFAILOVER address was 10046 * used on this endpoint. 10047 */ 10048 ASSERT(ip6i->ip6i_ifindex != 0); 10049 attach_if = B_TRUE; 10050 ASSERT(ill != NULL); 10051 match_flags = MATCH_IRE_ILL; 10052 10053 /* 10054 * Check if we need an ire that will not be 10055 * looked up by anybody else i.e. HIDDEN. 10056 */ 10057 if (ill_is_probeonly(ill)) 10058 match_flags |= MATCH_IRE_MARK_HIDDEN; 10059 } else if (ip6i != NULL && (ip6i->ip6i_flags & IP6I_IFINDEX)) { 10060 /* 3. If an ip6i_t with IP6I_IFINDEX set then use that ill. */ 10061 10062 ASSERT(ill != NULL); 10063 } else if (ill != NULL) { 10064 /* 10065 * 4. If q is an ill queue and (link local or multicast 10066 * destination) then use that ill. 10067 * We don't need the ipif initialization here. 10068 * This useless assert below is just to prevent lint from 10069 * reporting a null body if statement. 10070 */ 10071 ASSERT(ill != NULL); 10072 } else if (connp != NULL) { 10073 /* 10074 * 5. If IPV6_BOUND_IF has been set use that ill. 10075 * 10076 * 6. For multicast: if IPV6_MULTICAST_IF has been set use it. 10077 * Otherwise look for the best IRE match for the unspecified 10078 * group to determine the ill. 10079 * 10080 * conn_multicast_ill is used for only IPv6 packets. 10081 * conn_multicast_ipif is used for only IPv4 packets. 10082 * Thus a PF_INET6 socket send both IPv4 and IPv6 10083 * multicast packets using different IP*_MULTICAST_IF 10084 * interfaces. 10085 */ 10086 if (connp->conn_outgoing_ill != NULL) { 10087 err = ill_check_and_refhold(connp->conn_outgoing_ill); 10088 if (err == ILL_LOOKUP_FAILED) { 10089 ip1dbg(("ip_output_v6: multicast" 10090 " conn_outgoing_ill no ipif\n")); 10091 goto multicast_discard; 10092 } 10093 ill = connp->conn_outgoing_ill; 10094 } else if (connp->conn_multicast_ill != NULL) { 10095 err = ill_check_and_refhold(connp->conn_multicast_ill); 10096 if (err == ILL_LOOKUP_FAILED) { 10097 ip1dbg(("ip_output_v6: multicast" 10098 " conn_multicast_ill no ipif\n")); 10099 goto multicast_discard; 10100 } 10101 ill = connp->conn_multicast_ill; 10102 } else { 10103 mutex_exit(&connp->conn_lock); 10104 conn_lock_held = B_FALSE; 10105 ipif = ipif_lookup_group_v6(v6dstp, zoneid, ipst); 10106 if (ipif == NULL) { 10107 ip1dbg(("ip_output_v6: multicast no ipif\n")); 10108 goto multicast_discard; 10109 } 10110 /* 10111 * We have a ref to this ipif, so we can safely 10112 * access ipif_ill. 10113 */ 10114 ill = ipif->ipif_ill; 10115 mutex_enter(&ill->ill_lock); 10116 if (!ILL_CAN_LOOKUP(ill)) { 10117 mutex_exit(&ill->ill_lock); 10118 ipif_refrele(ipif); 10119 ill = NULL; 10120 ip1dbg(("ip_output_v6: multicast no ipif\n")); 10121 goto multicast_discard; 10122 } 10123 ill_refhold_locked(ill); 10124 mutex_exit(&ill->ill_lock); 10125 ipif_refrele(ipif); 10126 /* 10127 * Save binding until IPV6_MULTICAST_IF 10128 * changes it 10129 */ 10130 mutex_enter(&connp->conn_lock); 10131 connp->conn_multicast_ill = ill; 10132 connp->conn_orig_multicast_ifindex = 10133 ill->ill_phyint->phyint_ifindex; 10134 mutex_exit(&connp->conn_lock); 10135 } 10136 } 10137 if (conn_lock_held) 10138 mutex_exit(&connp->conn_lock); 10139 10140 if (saved_ill != NULL) 10141 ill_refrele(saved_ill); 10142 10143 ASSERT(ill != NULL); 10144 /* 10145 * For multicast loopback interfaces replace the multicast address 10146 * with a unicast address for the ire lookup. 10147 */ 10148 if (IS_LOOPBACK(ill)) 10149 v6dstp = &ill->ill_ipif->ipif_v6lcl_addr; 10150 10151 mibptr = ill->ill_ip_mib; 10152 if (do_outrequests) { 10153 BUMP_MIB(mibptr, ipIfStatsHCOutRequests); 10154 do_outrequests = B_FALSE; 10155 } 10156 BUMP_MIB(mibptr, ipIfStatsHCOutMcastPkts); 10157 UPDATE_MIB(mibptr, ipIfStatsHCOutMcastOctets, 10158 ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN); 10159 10160 /* 10161 * As we may lose the conn by the time we reach ip_wput_ire_v6 10162 * we copy conn_multicast_loop and conn_dontroute on to an 10163 * ipsec_out. In case if this datagram goes out secure, 10164 * we need the ill_index also. Copy that also into the 10165 * ipsec_out. 10166 */ 10167 if (mctl_present) { 10168 io = (ipsec_out_t *)first_mp->b_rptr; 10169 ASSERT(first_mp->b_datap->db_type == M_CTL); 10170 ASSERT(io->ipsec_out_type == IPSEC_OUT); 10171 } else { 10172 ASSERT(mp == first_mp); 10173 if ((first_mp = ipsec_alloc_ipsec_out(ipst->ips_netstack)) == 10174 NULL) { 10175 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10176 freemsg(mp); 10177 if (ill != NULL) 10178 ill_refrele(ill); 10179 if (need_decref) 10180 CONN_DEC_REF(connp); 10181 return; 10182 } 10183 io = (ipsec_out_t *)first_mp->b_rptr; 10184 /* This is not a secure packet */ 10185 io->ipsec_out_secure = B_FALSE; 10186 io->ipsec_out_use_global_policy = B_TRUE; 10187 io->ipsec_out_zoneid = 10188 (zoneid != ALL_ZONES ? zoneid : GLOBAL_ZONEID); 10189 first_mp->b_cont = mp; 10190 mctl_present = B_TRUE; 10191 } 10192 io->ipsec_out_ill_index = ill->ill_phyint->phyint_ifindex; 10193 io->ipsec_out_unspec_src = unspec_src; 10194 if (connp != NULL) 10195 io->ipsec_out_dontroute = connp->conn_dontroute; 10196 10197 send_from_ill: 10198 ASSERT(ill != NULL); 10199 ASSERT(mibptr == ill->ill_ip_mib); 10200 if (do_outrequests) { 10201 BUMP_MIB(mibptr, ipIfStatsHCOutRequests); 10202 do_outrequests = B_FALSE; 10203 } 10204 10205 if (io != NULL) 10206 io->ipsec_out_ill_index = ill->ill_phyint->phyint_ifindex; 10207 10208 /* 10209 * When a specific ill is specified (using IPV6_PKTINFO, 10210 * IPV6_MULTICAST_IF, or IPV6_BOUND_IF) we will only match 10211 * on routing entries (ftable and ctable) that have a matching 10212 * ire->ire_ipif->ipif_ill. Thus this can only be used 10213 * for destinations that are on-link for the specific ill 10214 * and that can appear on multiple links. Thus it is useful 10215 * for multicast destinations, link-local destinations, and 10216 * at some point perhaps for site-local destinations (if the 10217 * node sits at a site boundary). 10218 * We create the cache entries in the regular ctable since 10219 * it can not "confuse" things for other destinations. 10220 * table. 10221 * 10222 * NOTE : conn_ire_cache is not used for caching ire_ctable_lookups. 10223 * It is used only when ire_cache_lookup is used above. 10224 */ 10225 ire = ire_ctable_lookup_v6(v6dstp, 0, 0, ill->ill_ipif, 10226 zoneid, MBLK_GETLABEL(mp), match_flags, ipst); 10227 if (ire != NULL) { 10228 /* 10229 * Check if the ire has the RTF_MULTIRT flag, inherited 10230 * from an IRE_OFFSUBNET ire entry in ip_newroute(). 10231 */ 10232 if (ire->ire_flags & RTF_MULTIRT) { 10233 /* 10234 * Force hop limit of multirouted packets if required. 10235 * The hop limit of such packets is bounded by the 10236 * ip_multirt_ttl ndd variable. 10237 * NDP packets must have a hop limit of 255; don't 10238 * change the hop limit in that case. 10239 */ 10240 if ((ipst->ips_ip_multirt_ttl > 0) && 10241 (ip6h->ip6_hops > ipst->ips_ip_multirt_ttl) && 10242 (ip6h->ip6_hops != IPV6_MAX_HOPS)) { 10243 if (ip_debug > 3) { 10244 ip2dbg(("ip_wput_v6: forcing multirt " 10245 "hop limit to %d (was %d) ", 10246 ipst->ips_ip_multirt_ttl, 10247 ip6h->ip6_hops)); 10248 pr_addr_dbg("v6dst %s\n", AF_INET6, 10249 &ire->ire_addr_v6); 10250 } 10251 ip6h->ip6_hops = ipst->ips_ip_multirt_ttl; 10252 } 10253 10254 /* 10255 * We look at this point if there are pending 10256 * unresolved routes. ire_multirt_need_resolve_v6() 10257 * checks in O(n) that all IRE_OFFSUBNET ire 10258 * entries for the packet's destination and 10259 * flagged RTF_MULTIRT are currently resolved. 10260 * If some remain unresolved, we make a copy 10261 * of the current message. It will be used 10262 * to initiate additional route resolutions. 10263 */ 10264 multirt_need_resolve = 10265 ire_multirt_need_resolve_v6(&ire->ire_addr_v6, 10266 MBLK_GETLABEL(first_mp), ipst); 10267 ip2dbg(("ip_wput_v6[send_from_ill]: ire %p, " 10268 "multirt_need_resolve %d, first_mp %p\n", 10269 (void *)ire, multirt_need_resolve, 10270 (void *)first_mp)); 10271 if (multirt_need_resolve) { 10272 copy_mp = copymsg(first_mp); 10273 if (copy_mp != NULL) { 10274 MULTIRT_DEBUG_TAG(copy_mp); 10275 } 10276 } 10277 } 10278 10279 ip1dbg(("ip_wput_v6: send on %s, ire = %p, ill index = %d\n", 10280 ill->ill_name, (void *)ire, 10281 ill->ill_phyint->phyint_ifindex)); 10282 ip_wput_ire_v6(q, first_mp, ire, unspec_src, cksum_request, 10283 connp, caller, 10284 (attach_if ? ill->ill_phyint->phyint_ifindex : 0), 10285 ip6i_flags, zoneid); 10286 ire_refrele(ire); 10287 if (need_decref) { 10288 CONN_DEC_REF(connp); 10289 connp = NULL; 10290 } 10291 10292 /* 10293 * Try to resolve another multiroute if 10294 * ire_multirt_need_resolve_v6() deemed it necessary. 10295 * copy_mp will be consumed (sent or freed) by 10296 * ip_newroute_[ipif_]v6(). 10297 */ 10298 if (copy_mp != NULL) { 10299 if (mctl_present) { 10300 ip6h = (ip6_t *)copy_mp->b_cont->b_rptr; 10301 } else { 10302 ip6h = (ip6_t *)copy_mp->b_rptr; 10303 } 10304 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 10305 ipif = ipif_lookup_group_v6(&ip6h->ip6_dst, 10306 zoneid, ipst); 10307 if (ipif == NULL) { 10308 ip1dbg(("ip_wput_v6: No ipif for " 10309 "multicast\n")); 10310 MULTIRT_DEBUG_UNTAG(copy_mp); 10311 freemsg(copy_mp); 10312 return; 10313 } 10314 ip_newroute_ipif_v6(q, copy_mp, ipif, 10315 ip6h->ip6_dst, unspec_src, zoneid); 10316 ipif_refrele(ipif); 10317 } else { 10318 ip_newroute_v6(q, copy_mp, &ip6h->ip6_dst, 10319 &ip6h->ip6_src, ill, zoneid, ipst); 10320 } 10321 } 10322 ill_refrele(ill); 10323 return; 10324 } 10325 if (need_decref) { 10326 CONN_DEC_REF(connp); 10327 connp = NULL; 10328 } 10329 10330 /* Update rptr if there was an ip6i_t header. */ 10331 if (ip6i != NULL) 10332 mp->b_rptr -= sizeof (ip6i_t); 10333 if (unspec_src || attach_if) { 10334 if (ip6i == NULL) { 10335 /* 10336 * Add ip6i_t header to carry unspec_src 10337 * or attach_if until the packet comes back in 10338 * ip_wput_v6. 10339 */ 10340 if (mctl_present) { 10341 first_mp->b_cont = 10342 ip_add_info_v6(mp, NULL, v6dstp); 10343 mp = first_mp->b_cont; 10344 if (mp == NULL) 10345 freeb(first_mp); 10346 } else { 10347 first_mp = mp = ip_add_info_v6(mp, NULL, 10348 v6dstp); 10349 } 10350 if (mp == NULL) { 10351 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10352 ill_refrele(ill); 10353 return; 10354 } 10355 ip6i = (ip6i_t *)mp->b_rptr; 10356 if ((mp->b_wptr - (uchar_t *)ip6i) == 10357 sizeof (ip6i_t)) { 10358 /* 10359 * ndp_resolver called from ip_newroute_v6 10360 * expects a pulled up message. 10361 */ 10362 if (!pullupmsg(mp, -1)) { 10363 ip1dbg(("ip_wput_v6: pullupmsg" 10364 " failed\n")); 10365 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10366 freemsg(first_mp); 10367 return; 10368 } 10369 ip6i = (ip6i_t *)mp->b_rptr; 10370 } 10371 ip6h = (ip6_t *)&ip6i[1]; 10372 v6dstp = &ip6h->ip6_dst; 10373 } 10374 if (unspec_src) 10375 ip6i->ip6i_flags |= IP6I_UNSPEC_SRC; 10376 if (attach_if) { 10377 /* 10378 * Bind to nofailover/BOUND_PIF overrides ifindex. 10379 */ 10380 ip6i->ip6i_flags |= IP6I_ATTACH_IF; 10381 ip6i->ip6i_flags &= ~IP6I_IFINDEX; 10382 ip6i->ip6i_ifindex = ill->ill_phyint->phyint_ifindex; 10383 if (drop_if_delayed) { 10384 /* This is a multipathing probe packet */ 10385 ip6i->ip6i_flags |= IP6I_DROP_IFDELAYED; 10386 } 10387 } 10388 if (mctl_present) { 10389 ASSERT(io != NULL); 10390 io->ipsec_out_unspec_src = unspec_src; 10391 } 10392 } 10393 if (IN6_IS_ADDR_MULTICAST(v6dstp)) { 10394 ip_newroute_ipif_v6(q, first_mp, ill->ill_ipif, *v6dstp, 10395 unspec_src, zoneid); 10396 } else { 10397 ip_newroute_v6(q, first_mp, v6dstp, &ip6h->ip6_src, ill, 10398 zoneid, ipst); 10399 } 10400 ill_refrele(ill); 10401 return; 10402 10403 notv6: 10404 /* FIXME?: assume the caller calls the right version of ip_output? */ 10405 if (q->q_next == NULL) { 10406 connp = Q_TO_CONN(q); 10407 10408 /* 10409 * We can change conn_send for all types of conn, even 10410 * though only TCP uses it right now. 10411 * FIXME: sctp could use conn_send but doesn't currently. 10412 */ 10413 ip_setpktversion(connp, B_FALSE, B_TRUE, ipst); 10414 } 10415 BUMP_MIB(mibptr, ipIfStatsOutWrongIPVersion); 10416 (void) ip_output(arg, first_mp, arg2, caller); 10417 if (ill != NULL) 10418 ill_refrele(ill); 10419 } 10420 10421 /* 10422 * If this is a conn_t queue, then we pass in the conn. This includes the 10423 * zoneid. 10424 * Otherwise, this is a message for an ill_t queue, 10425 * in which case we use the global zoneid since those are all part of 10426 * the global zone. 10427 */ 10428 void 10429 ip_wput_v6(queue_t *q, mblk_t *mp) 10430 { 10431 if (CONN_Q(q)) 10432 ip_output_v6(Q_TO_CONN(q), mp, q, IP_WPUT); 10433 else 10434 ip_output_v6(GLOBAL_ZONEID, mp, q, IP_WPUT); 10435 } 10436 10437 static void 10438 ipsec_out_attach_if(ipsec_out_t *io, int attach_index) 10439 { 10440 ASSERT(io->ipsec_out_type == IPSEC_OUT); 10441 io->ipsec_out_attach_if = B_TRUE; 10442 io->ipsec_out_ill_index = attach_index; 10443 } 10444 10445 /* 10446 * NULL send-to queue - packet is to be delivered locally. 10447 */ 10448 void 10449 ip_wput_local_v6(queue_t *q, ill_t *ill, ip6_t *ip6h, mblk_t *first_mp, 10450 ire_t *ire, int fanout_flags) 10451 { 10452 uint32_t ports; 10453 mblk_t *mp = first_mp, *first_mp1; 10454 boolean_t mctl_present; 10455 uint8_t nexthdr; 10456 uint16_t hdr_length; 10457 ipsec_out_t *io; 10458 mib2_ipIfStatsEntry_t *mibptr; 10459 ilm_t *ilm; 10460 uint_t nexthdr_offset; 10461 ip_stack_t *ipst = ill->ill_ipst; 10462 10463 if (DB_TYPE(mp) == M_CTL) { 10464 io = (ipsec_out_t *)mp->b_rptr; 10465 if (!io->ipsec_out_secure) { 10466 mp = mp->b_cont; 10467 freeb(first_mp); 10468 first_mp = mp; 10469 mctl_present = B_FALSE; 10470 } else { 10471 mctl_present = B_TRUE; 10472 mp = first_mp->b_cont; 10473 ipsec_out_to_in(first_mp); 10474 } 10475 } else { 10476 mctl_present = B_FALSE; 10477 } 10478 10479 /* 10480 * Remove reachability confirmation bit from version field 10481 * before passing the packet on to any firewall hooks or 10482 * looping back the packet. 10483 */ 10484 if (ip6h->ip6_vcf & IP_FORWARD_PROG) 10485 ip6h->ip6_vcf &= ~IP_FORWARD_PROG; 10486 10487 DTRACE_PROBE4(ip6__loopback__in__start, 10488 ill_t *, ill, ill_t *, NULL, 10489 ip6_t *, ip6h, mblk_t *, first_mp); 10490 10491 FW_HOOKS6(ipst->ips_ip6_loopback_in_event, 10492 ipst->ips_ipv6firewall_loopback_in, 10493 ill, NULL, ip6h, first_mp, mp, ipst); 10494 10495 DTRACE_PROBE1(ip6__loopback__in__end, mblk_t *, first_mp); 10496 10497 if (first_mp == NULL) 10498 return; 10499 10500 nexthdr = ip6h->ip6_nxt; 10501 mibptr = ill->ill_ip_mib; 10502 10503 /* Fastpath */ 10504 switch (nexthdr) { 10505 case IPPROTO_TCP: 10506 case IPPROTO_UDP: 10507 case IPPROTO_ICMPV6: 10508 case IPPROTO_SCTP: 10509 hdr_length = IPV6_HDR_LEN; 10510 nexthdr_offset = (uint_t)((uchar_t *)&ip6h->ip6_nxt - 10511 (uchar_t *)ip6h); 10512 break; 10513 default: { 10514 uint8_t *nexthdrp; 10515 10516 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, 10517 &hdr_length, &nexthdrp)) { 10518 /* Malformed packet */ 10519 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10520 freemsg(first_mp); 10521 return; 10522 } 10523 nexthdr = *nexthdrp; 10524 nexthdr_offset = nexthdrp - (uint8_t *)ip6h; 10525 break; 10526 } 10527 } 10528 10529 UPDATE_OB_PKT_COUNT(ire); 10530 ire->ire_last_used_time = lbolt; 10531 10532 switch (nexthdr) { 10533 case IPPROTO_TCP: 10534 if (DB_TYPE(mp) == M_DATA) { 10535 /* 10536 * M_DATA mblk, so init mblk (chain) for 10537 * no struio(). 10538 */ 10539 mblk_t *mp1 = mp; 10540 10541 do { 10542 mp1->b_datap->db_struioflag = 0; 10543 } while ((mp1 = mp1->b_cont) != NULL); 10544 } 10545 ports = *(uint32_t *)(mp->b_rptr + hdr_length + 10546 TCP_PORTS_OFFSET); 10547 ip_fanout_tcp_v6(q, first_mp, ip6h, ill, ill, 10548 fanout_flags|IP_FF_SEND_ICMP|IP_FF_SYN_ADDIRE| 10549 IP_FF_IPINFO|IP6_NO_IPPOLICY|IP_FF_LOOPBACK, 10550 hdr_length, mctl_present, ire->ire_zoneid); 10551 return; 10552 10553 case IPPROTO_UDP: 10554 ports = *(uint32_t *)(mp->b_rptr + hdr_length + 10555 UDP_PORTS_OFFSET); 10556 ip_fanout_udp_v6(q, first_mp, ip6h, ports, ill, ill, 10557 fanout_flags|IP_FF_SEND_ICMP|IP_FF_IPINFO| 10558 IP6_NO_IPPOLICY, mctl_present, ire->ire_zoneid); 10559 return; 10560 10561 case IPPROTO_SCTP: 10562 { 10563 ports = *(uint32_t *)(mp->b_rptr + hdr_length); 10564 ip_fanout_sctp(mp, ill, (ipha_t *)ip6h, ports, 10565 fanout_flags|IP_FF_SEND_ICMP|IP_FF_IPINFO, 10566 mctl_present, IP6_NO_IPPOLICY, ire->ire_zoneid); 10567 return; 10568 } 10569 case IPPROTO_ICMPV6: { 10570 icmp6_t *icmp6; 10571 10572 /* check for full IPv6+ICMPv6 header */ 10573 if ((mp->b_wptr - mp->b_rptr) < 10574 (hdr_length + ICMP6_MINLEN)) { 10575 if (!pullupmsg(mp, hdr_length + ICMP6_MINLEN)) { 10576 ip1dbg(("ip_wput_v6: ICMP hdr pullupmsg" 10577 " failed\n")); 10578 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10579 freemsg(first_mp); 10580 return; 10581 } 10582 ip6h = (ip6_t *)mp->b_rptr; 10583 } 10584 icmp6 = (icmp6_t *)((uchar_t *)ip6h + hdr_length); 10585 10586 /* Update output mib stats */ 10587 icmp_update_out_mib_v6(ill, icmp6); 10588 10589 /* Check variable for testing applications */ 10590 if (ipst->ips_ipv6_drop_inbound_icmpv6) { 10591 freemsg(first_mp); 10592 return; 10593 } 10594 /* 10595 * Assume that there is always at least one conn for 10596 * ICMPv6 (in.ndpd) i.e. don't optimize the case 10597 * where there is no conn. 10598 */ 10599 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst) && 10600 !IS_LOOPBACK(ill)) { 10601 /* 10602 * In the multicast case, applications may have 10603 * joined the group from different zones, so we 10604 * need to deliver the packet to each of them. 10605 * Loop through the multicast memberships 10606 * structures (ilm) on the receive ill and send 10607 * a copy of the packet up each matching one. 10608 * However, we don't do this for multicasts sent 10609 * on the loopback interface (PHYI_LOOPBACK flag 10610 * set) as they must stay in the sender's zone. 10611 */ 10612 ILM_WALKER_HOLD(ill); 10613 for (ilm = ill->ill_ilm; ilm != NULL; 10614 ilm = ilm->ilm_next) { 10615 if (ilm->ilm_flags & ILM_DELETED) 10616 continue; 10617 if (!IN6_ARE_ADDR_EQUAL( 10618 &ilm->ilm_v6addr, &ip6h->ip6_dst)) 10619 continue; 10620 if ((fanout_flags & 10621 IP_FF_NO_MCAST_LOOP) && 10622 ilm->ilm_zoneid == ire->ire_zoneid) 10623 continue; 10624 if (!ipif_lookup_zoneid(ill, 10625 ilm->ilm_zoneid, IPIF_UP, NULL)) 10626 continue; 10627 10628 first_mp1 = ip_copymsg(first_mp); 10629 if (first_mp1 == NULL) 10630 continue; 10631 icmp_inbound_v6(q, first_mp1, ill, 10632 hdr_length, mctl_present, 10633 IP6_NO_IPPOLICY, ilm->ilm_zoneid, 10634 NULL); 10635 } 10636 ILM_WALKER_RELE(ill); 10637 } else { 10638 first_mp1 = ip_copymsg(first_mp); 10639 if (first_mp1 != NULL) 10640 icmp_inbound_v6(q, first_mp1, ill, 10641 hdr_length, mctl_present, 10642 IP6_NO_IPPOLICY, ire->ire_zoneid, 10643 NULL); 10644 } 10645 } 10646 /* FALLTHRU */ 10647 default: { 10648 /* 10649 * Handle protocols with which IPv6 is less intimate. 10650 */ 10651 fanout_flags |= IP_FF_RAWIP|IP_FF_IPINFO; 10652 10653 /* 10654 * Enable sending ICMP for "Unknown" nexthdr 10655 * case. i.e. where we did not FALLTHRU from 10656 * IPPROTO_ICMPV6 processing case above. 10657 */ 10658 if (nexthdr != IPPROTO_ICMPV6) 10659 fanout_flags |= IP_FF_SEND_ICMP; 10660 /* 10661 * Note: There can be more than one stream bound 10662 * to a particular protocol. When this is the case, 10663 * each one gets a copy of any incoming packets. 10664 */ 10665 ip_fanout_proto_v6(q, first_mp, ip6h, ill, ill, nexthdr, 10666 nexthdr_offset, fanout_flags|IP6_NO_IPPOLICY, 10667 mctl_present, ire->ire_zoneid); 10668 return; 10669 } 10670 } 10671 } 10672 10673 /* 10674 * Send packet using IRE. 10675 * Checksumming is controlled by cksum_request: 10676 * -1 => normal i.e. TCP/UDP/SCTP/ICMPv6 are checksummed and nothing else. 10677 * 1 => Skip TCP/UDP/SCTP checksum 10678 * Otherwise => checksum_request contains insert offset for checksum 10679 * 10680 * Assumes that the following set of headers appear in the first 10681 * mblk: 10682 * ip6_t 10683 * Any extension headers 10684 * TCP/UDP/SCTP header (if present) 10685 * The routine can handle an ICMPv6 header that is not in the first mblk. 10686 * 10687 * NOTE : This function does not ire_refrele the ire passed in as the 10688 * argument unlike ip_wput_ire where the REFRELE is done. 10689 * Refer to ip_wput_ire for more on this. 10690 */ 10691 static void 10692 ip_wput_ire_v6(queue_t *q, mblk_t *mp, ire_t *ire, int unspec_src, 10693 int cksum_request, conn_t *connp, int caller, int attach_index, int flags, 10694 zoneid_t zoneid) 10695 { 10696 ip6_t *ip6h; 10697 uint8_t nexthdr; 10698 uint16_t hdr_length; 10699 uint_t reachable = 0x0; 10700 ill_t *ill; 10701 mib2_ipIfStatsEntry_t *mibptr; 10702 mblk_t *first_mp; 10703 boolean_t mctl_present; 10704 ipsec_out_t *io; 10705 boolean_t conn_dontroute; /* conn value for multicast */ 10706 boolean_t conn_multicast_loop; /* conn value for multicast */ 10707 boolean_t multicast_forward; /* Should we forward ? */ 10708 int max_frag; 10709 ip_stack_t *ipst = ire->ire_ipst; 10710 ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec; 10711 10712 ill = ire_to_ill(ire); 10713 first_mp = mp; 10714 multicast_forward = B_FALSE; 10715 10716 if (mp->b_datap->db_type != M_CTL) { 10717 ip6h = (ip6_t *)first_mp->b_rptr; 10718 } else { 10719 io = (ipsec_out_t *)first_mp->b_rptr; 10720 ASSERT(io->ipsec_out_type == IPSEC_OUT); 10721 /* 10722 * Grab the zone id now because the M_CTL can be discarded by 10723 * ip_wput_ire_parse_ipsec_out() below. 10724 */ 10725 ASSERT(zoneid == io->ipsec_out_zoneid); 10726 ASSERT(zoneid != ALL_ZONES); 10727 ip6h = (ip6_t *)first_mp->b_cont->b_rptr; 10728 /* 10729 * For the multicast case, ipsec_out carries conn_dontroute and 10730 * conn_multicast_loop as conn may not be available here. We 10731 * need this for multicast loopback and forwarding which is done 10732 * later in the code. 10733 */ 10734 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 10735 conn_dontroute = io->ipsec_out_dontroute; 10736 conn_multicast_loop = io->ipsec_out_multicast_loop; 10737 /* 10738 * If conn_dontroute is not set or conn_multicast_loop 10739 * is set, we need to do forwarding/loopback. For 10740 * datagrams from ip_wput_multicast, conn_dontroute is 10741 * set to B_TRUE and conn_multicast_loop is set to 10742 * B_FALSE so that we neither do forwarding nor 10743 * loopback. 10744 */ 10745 if (!conn_dontroute || conn_multicast_loop) 10746 multicast_forward = B_TRUE; 10747 } 10748 } 10749 10750 /* 10751 * If the sender didn't supply the hop limit and there is a default 10752 * unicast hop limit associated with the output interface, we use 10753 * that if the packet is unicast. Interface specific unicast hop 10754 * limits as set via the SIOCSLIFLNKINFO ioctl. 10755 */ 10756 if (ill->ill_max_hops != 0 && !(flags & IP6I_HOPLIMIT) && 10757 !(IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst))) { 10758 ip6h->ip6_hops = ill->ill_max_hops; 10759 } 10760 10761 if (ire->ire_type == IRE_LOCAL && ire->ire_zoneid != zoneid && 10762 ire->ire_zoneid != ALL_ZONES) { 10763 /* 10764 * When a zone sends a packet to another zone, we try to deliver 10765 * the packet under the same conditions as if the destination 10766 * was a real node on the network. To do so, we look for a 10767 * matching route in the forwarding table. 10768 * RTF_REJECT and RTF_BLACKHOLE are handled just like 10769 * ip_newroute_v6() does. 10770 * Note that IRE_LOCAL are special, since they are used 10771 * when the zoneid doesn't match in some cases. This means that 10772 * we need to handle ipha_src differently since ire_src_addr 10773 * belongs to the receiving zone instead of the sending zone. 10774 * When ip_restrict_interzone_loopback is set, then 10775 * ire_cache_lookup_v6() ensures that IRE_LOCAL are only used 10776 * for loopback between zones when the logical "Ethernet" would 10777 * have looped them back. 10778 */ 10779 ire_t *src_ire; 10780 10781 src_ire = ire_ftable_lookup_v6(&ip6h->ip6_dst, 0, 0, 0, 10782 NULL, NULL, zoneid, 0, NULL, (MATCH_IRE_RECURSIVE | 10783 MATCH_IRE_DEFAULT | MATCH_IRE_RJ_BHOLE), ipst); 10784 if (src_ire != NULL && 10785 !(src_ire->ire_flags & (RTF_REJECT | RTF_BLACKHOLE)) && 10786 (!ipst->ips_ip_restrict_interzone_loopback || 10787 ire_local_same_ill_group(ire, src_ire))) { 10788 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src) && 10789 !unspec_src) { 10790 ip6h->ip6_src = src_ire->ire_src_addr_v6; 10791 } 10792 ire_refrele(src_ire); 10793 } else { 10794 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutNoRoutes); 10795 if (src_ire != NULL) { 10796 if (src_ire->ire_flags & RTF_BLACKHOLE) { 10797 ire_refrele(src_ire); 10798 freemsg(first_mp); 10799 return; 10800 } 10801 ire_refrele(src_ire); 10802 } 10803 if (ip_hdr_complete_v6(ip6h, zoneid, ipst)) { 10804 /* Failed */ 10805 freemsg(first_mp); 10806 return; 10807 } 10808 icmp_unreachable_v6(q, first_mp, 10809 ICMP6_DST_UNREACH_NOROUTE, B_FALSE, B_FALSE, 10810 zoneid, ipst); 10811 return; 10812 } 10813 } 10814 10815 if (mp->b_datap->db_type == M_CTL || 10816 ipss->ipsec_outbound_v6_policy_present) { 10817 mp = ip_wput_ire_parse_ipsec_out(first_mp, NULL, ip6h, ire, 10818 connp, unspec_src, zoneid); 10819 if (mp == NULL) { 10820 return; 10821 } 10822 } 10823 10824 first_mp = mp; 10825 if (mp->b_datap->db_type == M_CTL) { 10826 io = (ipsec_out_t *)mp->b_rptr; 10827 ASSERT(io->ipsec_out_type == IPSEC_OUT); 10828 mp = mp->b_cont; 10829 mctl_present = B_TRUE; 10830 } else { 10831 mctl_present = B_FALSE; 10832 } 10833 10834 ip6h = (ip6_t *)mp->b_rptr; 10835 nexthdr = ip6h->ip6_nxt; 10836 mibptr = ill->ill_ip_mib; 10837 10838 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src) && !unspec_src) { 10839 ipif_t *ipif; 10840 10841 /* 10842 * Select the source address using ipif_select_source_v6. 10843 */ 10844 if (attach_index != 0) { 10845 ipif = ipif_select_source_v6(ill, &ip6h->ip6_dst, 10846 RESTRICT_TO_ILL, IPV6_PREFER_SRC_DEFAULT, zoneid); 10847 } else { 10848 ipif = ipif_select_source_v6(ill, &ip6h->ip6_dst, 10849 RESTRICT_TO_NONE, IPV6_PREFER_SRC_DEFAULT, zoneid); 10850 } 10851 if (ipif == NULL) { 10852 if (ip_debug > 2) { 10853 /* ip1dbg */ 10854 pr_addr_dbg("ip_wput_ire_v6: no src for " 10855 "dst %s\n, ", AF_INET6, &ip6h->ip6_dst); 10856 printf("ip_wput_ire_v6: interface name %s\n", 10857 ill->ill_name); 10858 } 10859 freemsg(first_mp); 10860 return; 10861 } 10862 ip6h->ip6_src = ipif->ipif_v6src_addr; 10863 ipif_refrele(ipif); 10864 } 10865 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 10866 if ((connp != NULL && connp->conn_multicast_loop) || 10867 !IS_LOOPBACK(ill)) { 10868 ilm_t *ilm; 10869 10870 ILM_WALKER_HOLD(ill); 10871 ilm = ilm_lookup_ill_v6(ill, &ip6h->ip6_dst, ALL_ZONES); 10872 ILM_WALKER_RELE(ill); 10873 if (ilm != NULL) { 10874 mblk_t *nmp; 10875 int fanout_flags = 0; 10876 10877 if (connp != NULL && 10878 !connp->conn_multicast_loop) { 10879 fanout_flags |= IP_FF_NO_MCAST_LOOP; 10880 } 10881 ip1dbg(("ip_wput_ire_v6: " 10882 "Loopback multicast\n")); 10883 nmp = ip_copymsg(first_mp); 10884 if (nmp != NULL) { 10885 ip6_t *nip6h; 10886 mblk_t *mp_ip6h; 10887 10888 if (mctl_present) { 10889 nip6h = (ip6_t *) 10890 nmp->b_cont->b_rptr; 10891 mp_ip6h = nmp->b_cont; 10892 } else { 10893 nip6h = (ip6_t *)nmp->b_rptr; 10894 mp_ip6h = nmp; 10895 } 10896 10897 DTRACE_PROBE4( 10898 ip6__loopback__out__start, 10899 ill_t *, NULL, 10900 ill_t *, ill, 10901 ip6_t *, nip6h, 10902 mblk_t *, nmp); 10903 10904 FW_HOOKS6( 10905 ipst->ips_ip6_loopback_out_event, 10906 ipst->ips_ipv6firewall_loopback_out, 10907 NULL, ill, nip6h, nmp, mp_ip6h, 10908 ipst); 10909 10910 DTRACE_PROBE1( 10911 ip6__loopback__out__end, 10912 mblk_t *, nmp); 10913 10914 if (nmp != NULL) { 10915 /* 10916 * Deliver locally and to 10917 * every local zone, except 10918 * the sending zone when 10919 * IPV6_MULTICAST_LOOP is 10920 * disabled. 10921 */ 10922 ip_wput_local_v6(RD(q), ill, 10923 nip6h, nmp, 10924 ire, fanout_flags); 10925 } 10926 } else { 10927 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10928 ip1dbg(("ip_wput_ire_v6: " 10929 "copymsg failed\n")); 10930 } 10931 } 10932 } 10933 if (ip6h->ip6_hops == 0 || 10934 IN6_IS_ADDR_MC_NODELOCAL(&ip6h->ip6_dst) || 10935 IS_LOOPBACK(ill)) { 10936 /* 10937 * Local multicast or just loopback on loopback 10938 * interface. 10939 */ 10940 BUMP_MIB(mibptr, ipIfStatsHCOutMcastPkts); 10941 UPDATE_MIB(mibptr, ipIfStatsHCOutMcastOctets, 10942 ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN); 10943 ip1dbg(("ip_wput_ire_v6: local multicast only\n")); 10944 freemsg(first_mp); 10945 return; 10946 } 10947 } 10948 10949 if (ire->ire_stq != NULL) { 10950 uint32_t sum; 10951 uint_t ill_index = ((ill_t *)ire->ire_stq->q_ptr)-> 10952 ill_phyint->phyint_ifindex; 10953 queue_t *dev_q = ire->ire_stq->q_next; 10954 10955 /* 10956 * non-NULL send-to queue - packet is to be sent 10957 * out an interface. 10958 */ 10959 10960 /* Driver is flow-controlling? */ 10961 if (!IP_FLOW_CONTROLLED_ULP(nexthdr) && 10962 ((dev_q->q_next || dev_q->q_first) && !canput(dev_q))) { 10963 /* 10964 * Queue packet if we have an conn to give back 10965 * pressure. We can't queue packets intended for 10966 * hardware acceleration since we've tossed that 10967 * state already. If the packet is being fed back 10968 * from ire_send_v6, we don't know the position in 10969 * the queue to enqueue the packet and we discard 10970 * the packet. 10971 */ 10972 if (ipst->ips_ip_output_queue && connp != NULL && 10973 !mctl_present && caller != IRE_SEND) { 10974 if (caller == IP_WSRV) { 10975 connp->conn_did_putbq = 1; 10976 (void) putbq(connp->conn_wq, mp); 10977 conn_drain_insert(connp); 10978 /* 10979 * caller == IP_WSRV implies we are 10980 * the service thread, and the 10981 * queue is already noenabled. 10982 * The check for canput and 10983 * the putbq is not atomic. 10984 * So we need to check again. 10985 */ 10986 if (canput(dev_q)) 10987 connp->conn_did_putbq = 0; 10988 } else { 10989 (void) putq(connp->conn_wq, mp); 10990 } 10991 return; 10992 } 10993 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10994 freemsg(first_mp); 10995 return; 10996 } 10997 10998 /* 10999 * Look for reachability confirmations from the transport. 11000 */ 11001 if (ip6h->ip6_vcf & IP_FORWARD_PROG) { 11002 reachable |= IPV6_REACHABILITY_CONFIRMATION; 11003 ip6h->ip6_vcf &= ~IP_FORWARD_PROG; 11004 if (mctl_present) 11005 io->ipsec_out_reachable = B_TRUE; 11006 } 11007 /* Fastpath */ 11008 switch (nexthdr) { 11009 case IPPROTO_TCP: 11010 case IPPROTO_UDP: 11011 case IPPROTO_ICMPV6: 11012 case IPPROTO_SCTP: 11013 hdr_length = IPV6_HDR_LEN; 11014 break; 11015 default: { 11016 uint8_t *nexthdrp; 11017 11018 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, 11019 &hdr_length, &nexthdrp)) { 11020 /* Malformed packet */ 11021 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 11022 freemsg(first_mp); 11023 return; 11024 } 11025 nexthdr = *nexthdrp; 11026 break; 11027 } 11028 } 11029 11030 if (cksum_request != -1 && nexthdr != IPPROTO_ICMPV6) { 11031 uint16_t *up; 11032 uint16_t *insp; 11033 11034 /* 11035 * The packet header is processed once for all, even 11036 * in the multirouting case. We disable hardware 11037 * checksum if the packet is multirouted, as it will be 11038 * replicated via several interfaces, and not all of 11039 * them may have this capability. 11040 */ 11041 if (cksum_request == 1 && 11042 !(ire->ire_flags & RTF_MULTIRT)) { 11043 /* Skip the transport checksum */ 11044 goto cksum_done; 11045 } 11046 /* 11047 * Do user-configured raw checksum. 11048 * Compute checksum and insert at offset "cksum_request" 11049 */ 11050 11051 /* check for enough headers for checksum */ 11052 cksum_request += hdr_length; /* offset from rptr */ 11053 if ((mp->b_wptr - mp->b_rptr) < 11054 (cksum_request + sizeof (int16_t))) { 11055 if (!pullupmsg(mp, 11056 cksum_request + sizeof (int16_t))) { 11057 ip1dbg(("ip_wput_v6: ICMP hdr pullupmsg" 11058 " failed\n")); 11059 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 11060 freemsg(first_mp); 11061 return; 11062 } 11063 ip6h = (ip6_t *)mp->b_rptr; 11064 } 11065 insp = (uint16_t *)((uchar_t *)ip6h + cksum_request); 11066 ASSERT(((uintptr_t)insp & 0x1) == 0); 11067 up = (uint16_t *)&ip6h->ip6_src; 11068 /* 11069 * icmp has placed length and routing 11070 * header adjustment in *insp. 11071 */ 11072 sum = htons(nexthdr) + 11073 up[0] + up[1] + up[2] + up[3] + 11074 up[4] + up[5] + up[6] + up[7] + 11075 up[8] + up[9] + up[10] + up[11] + 11076 up[12] + up[13] + up[14] + up[15]; 11077 sum = (sum & 0xffff) + (sum >> 16); 11078 *insp = IP_CSUM(mp, hdr_length, sum); 11079 } else if (nexthdr == IPPROTO_TCP) { 11080 uint16_t *up; 11081 11082 /* 11083 * Check for full IPv6 header + enough TCP header 11084 * to get at the checksum field. 11085 */ 11086 if ((mp->b_wptr - mp->b_rptr) < 11087 (hdr_length + TCP_CHECKSUM_OFFSET + 11088 TCP_CHECKSUM_SIZE)) { 11089 if (!pullupmsg(mp, hdr_length + 11090 TCP_CHECKSUM_OFFSET + TCP_CHECKSUM_SIZE)) { 11091 ip1dbg(("ip_wput_v6: TCP hdr pullupmsg" 11092 " failed\n")); 11093 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 11094 freemsg(first_mp); 11095 return; 11096 } 11097 ip6h = (ip6_t *)mp->b_rptr; 11098 } 11099 11100 up = (uint16_t *)&ip6h->ip6_src; 11101 /* 11102 * Note: The TCP module has stored the length value 11103 * into the tcp checksum field, so we don't 11104 * need to explicitly sum it in here. 11105 */ 11106 sum = up[0] + up[1] + up[2] + up[3] + 11107 up[4] + up[5] + up[6] + up[7] + 11108 up[8] + up[9] + up[10] + up[11] + 11109 up[12] + up[13] + up[14] + up[15]; 11110 11111 /* Fold the initial sum */ 11112 sum = (sum & 0xffff) + (sum >> 16); 11113 11114 up = (uint16_t *)(((uchar_t *)ip6h) + 11115 hdr_length + TCP_CHECKSUM_OFFSET); 11116 11117 IP_CKSUM_XMIT(ill, ire, mp, ip6h, up, IPPROTO_TCP, 11118 hdr_length, ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN, 11119 ire->ire_max_frag, mctl_present, sum); 11120 11121 /* Software checksum? */ 11122 if (DB_CKSUMFLAGS(mp) == 0) { 11123 IP6_STAT(ipst, ip6_out_sw_cksum); 11124 IP6_STAT_UPDATE(ipst, 11125 ip6_tcp_out_sw_cksum_bytes, 11126 (ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN) - 11127 hdr_length); 11128 } 11129 } else if (nexthdr == IPPROTO_UDP) { 11130 uint16_t *up; 11131 11132 /* 11133 * check for full IPv6 header + enough UDP header 11134 * to get at the UDP checksum field 11135 */ 11136 if ((mp->b_wptr - mp->b_rptr) < (hdr_length + 11137 UDP_CHECKSUM_OFFSET + UDP_CHECKSUM_SIZE)) { 11138 if (!pullupmsg(mp, hdr_length + 11139 UDP_CHECKSUM_OFFSET + UDP_CHECKSUM_SIZE)) { 11140 ip1dbg(("ip_wput_v6: UDP hdr pullupmsg" 11141 " failed\n")); 11142 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 11143 freemsg(first_mp); 11144 return; 11145 } 11146 ip6h = (ip6_t *)mp->b_rptr; 11147 } 11148 up = (uint16_t *)&ip6h->ip6_src; 11149 /* 11150 * Note: The UDP module has stored the length value 11151 * into the udp checksum field, so we don't 11152 * need to explicitly sum it in here. 11153 */ 11154 sum = up[0] + up[1] + up[2] + up[3] + 11155 up[4] + up[5] + up[6] + up[7] + 11156 up[8] + up[9] + up[10] + up[11] + 11157 up[12] + up[13] + up[14] + up[15]; 11158 11159 /* Fold the initial sum */ 11160 sum = (sum & 0xffff) + (sum >> 16); 11161 11162 up = (uint16_t *)(((uchar_t *)ip6h) + 11163 hdr_length + UDP_CHECKSUM_OFFSET); 11164 11165 IP_CKSUM_XMIT(ill, ire, mp, ip6h, up, IPPROTO_UDP, 11166 hdr_length, ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN, 11167 ire->ire_max_frag, mctl_present, sum); 11168 11169 /* Software checksum? */ 11170 if (DB_CKSUMFLAGS(mp) == 0) { 11171 IP6_STAT(ipst, ip6_out_sw_cksum); 11172 IP6_STAT_UPDATE(ipst, 11173 ip6_udp_out_sw_cksum_bytes, 11174 (ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN) - 11175 hdr_length); 11176 } 11177 } else if (nexthdr == IPPROTO_ICMPV6) { 11178 uint16_t *up; 11179 icmp6_t *icmp6; 11180 11181 /* check for full IPv6+ICMPv6 header */ 11182 if ((mp->b_wptr - mp->b_rptr) < 11183 (hdr_length + ICMP6_MINLEN)) { 11184 if (!pullupmsg(mp, hdr_length + ICMP6_MINLEN)) { 11185 ip1dbg(("ip_wput_v6: ICMP hdr pullupmsg" 11186 " failed\n")); 11187 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 11188 freemsg(first_mp); 11189 return; 11190 } 11191 ip6h = (ip6_t *)mp->b_rptr; 11192 } 11193 icmp6 = (icmp6_t *)((uchar_t *)ip6h + hdr_length); 11194 up = (uint16_t *)&ip6h->ip6_src; 11195 /* 11196 * icmp has placed length and routing 11197 * header adjustment in icmp6_cksum. 11198 */ 11199 sum = htons(IPPROTO_ICMPV6) + 11200 up[0] + up[1] + up[2] + up[3] + 11201 up[4] + up[5] + up[6] + up[7] + 11202 up[8] + up[9] + up[10] + up[11] + 11203 up[12] + up[13] + up[14] + up[15]; 11204 sum = (sum & 0xffff) + (sum >> 16); 11205 icmp6->icmp6_cksum = IP_CSUM(mp, hdr_length, sum); 11206 11207 /* Update output mib stats */ 11208 icmp_update_out_mib_v6(ill, icmp6); 11209 } else if (nexthdr == IPPROTO_SCTP) { 11210 sctp_hdr_t *sctph; 11211 11212 if (MBLKL(mp) < (hdr_length + sizeof (*sctph))) { 11213 if (!pullupmsg(mp, hdr_length + 11214 sizeof (*sctph))) { 11215 ip1dbg(("ip_wput_v6: SCTP hdr pullupmsg" 11216 " failed\n")); 11217 BUMP_MIB(ill->ill_ip_mib, 11218 ipIfStatsOutDiscards); 11219 freemsg(mp); 11220 return; 11221 } 11222 ip6h = (ip6_t *)mp->b_rptr; 11223 } 11224 sctph = (sctp_hdr_t *)(mp->b_rptr + hdr_length); 11225 sctph->sh_chksum = 0; 11226 sctph->sh_chksum = sctp_cksum(mp, hdr_length); 11227 } 11228 11229 cksum_done: 11230 /* 11231 * We force the insertion of a fragment header using the 11232 * IPH_FRAG_HDR flag in two cases: 11233 * - after reception of an ICMPv6 "packet too big" message 11234 * with a MTU < 1280 (cf. RFC 2460 section 5) 11235 * - for multirouted IPv6 packets, so that the receiver can 11236 * discard duplicates according to their fragment identifier 11237 * 11238 * Two flags modifed from the API can modify this behavior. 11239 * The first is IPV6_USE_MIN_MTU. With this API the user 11240 * can specify how to manage PMTUD for unicast and multicast. 11241 * 11242 * IPV6_DONTFRAG disallows fragmentation. 11243 */ 11244 max_frag = ire->ire_max_frag; 11245 switch (IP6I_USE_MIN_MTU_API(flags)) { 11246 case IPV6_USE_MIN_MTU_DEFAULT: 11247 case IPV6_USE_MIN_MTU_UNICAST: 11248 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 11249 max_frag = IPV6_MIN_MTU; 11250 } 11251 break; 11252 11253 case IPV6_USE_MIN_MTU_NEVER: 11254 max_frag = IPV6_MIN_MTU; 11255 break; 11256 } 11257 if (ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN > max_frag || 11258 (ire->ire_frag_flag & IPH_FRAG_HDR)) { 11259 if (connp != NULL && (flags & IP6I_DONTFRAG)) { 11260 icmp_pkt2big_v6(ire->ire_stq, first_mp, 11261 max_frag, B_FALSE, B_TRUE, zoneid, ipst); 11262 return; 11263 } 11264 11265 if (ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN != 11266 (mp->b_cont ? msgdsize(mp) : 11267 mp->b_wptr - (uchar_t *)ip6h)) { 11268 ip0dbg(("Packet length mismatch: %d, %ld\n", 11269 ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN, 11270 msgdsize(mp))); 11271 freemsg(first_mp); 11272 return; 11273 } 11274 /* Do IPSEC processing first */ 11275 if (mctl_present) { 11276 if (attach_index != 0) 11277 ipsec_out_attach_if(io, attach_index); 11278 ipsec_out_process(q, first_mp, ire, ill_index); 11279 return; 11280 } 11281 ASSERT(mp->b_prev == NULL); 11282 ip2dbg(("Fragmenting Size = %d, mtu = %d\n", 11283 ntohs(ip6h->ip6_plen) + 11284 IPV6_HDR_LEN, max_frag)); 11285 ASSERT(mp == first_mp); 11286 /* Initiate IPPF processing */ 11287 if (IPP_ENABLED(IPP_LOCAL_OUT, ipst)) { 11288 ip_process(IPP_LOCAL_OUT, &mp, ill_index); 11289 if (mp == NULL) { 11290 return; 11291 } 11292 } 11293 ip_wput_frag_v6(mp, ire, reachable, connp, 11294 caller, max_frag); 11295 return; 11296 } 11297 /* Do IPSEC processing first */ 11298 if (mctl_present) { 11299 int extra_len = ipsec_out_extra_length(first_mp); 11300 11301 if (ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN + extra_len > 11302 max_frag) { 11303 /* 11304 * IPsec headers will push the packet over the 11305 * MTU limit. Issue an ICMPv6 Packet Too Big 11306 * message for this packet if the upper-layer 11307 * that issued this packet will be able to 11308 * react to the icmp_pkt2big_v6() that we'll 11309 * generate. 11310 */ 11311 icmp_pkt2big_v6(ire->ire_stq, first_mp, 11312 max_frag, B_FALSE, B_TRUE, zoneid, ipst); 11313 return; 11314 } 11315 if (attach_index != 0) 11316 ipsec_out_attach_if(io, attach_index); 11317 ipsec_out_process(q, first_mp, ire, ill_index); 11318 return; 11319 } 11320 /* 11321 * XXX multicast: add ip_mforward_v6() here. 11322 * Check conn_dontroute 11323 */ 11324 #ifdef lint 11325 /* 11326 * XXX The only purpose of this statement is to avoid lint 11327 * errors. See the above "XXX multicast". When that gets 11328 * fixed, remove this whole #ifdef lint section. 11329 */ 11330 ip3dbg(("multicast forward is %s.\n", 11331 (multicast_forward ? "TRUE" : "FALSE"))); 11332 #endif 11333 11334 UPDATE_OB_PKT_COUNT(ire); 11335 ire->ire_last_used_time = lbolt; 11336 ASSERT(mp == first_mp); 11337 ip_xmit_v6(mp, ire, reachable, connp, caller, NULL); 11338 } else { 11339 DTRACE_PROBE4(ip6__loopback__out__start, 11340 ill_t *, NULL, ill_t *, ill, 11341 ip6_t *, ip6h, mblk_t *, first_mp); 11342 FW_HOOKS6(ipst->ips_ip6_loopback_out_event, 11343 ipst->ips_ipv6firewall_loopback_out, 11344 NULL, ill, ip6h, first_mp, mp, ipst); 11345 DTRACE_PROBE1(ip6__loopback__out__end, mblk_t *, first_mp); 11346 if (first_mp != NULL) 11347 ip_wput_local_v6(RD(q), ill, ip6h, first_mp, ire, 0); 11348 } 11349 } 11350 11351 /* 11352 * Outbound IPv6 fragmentation routine using MDT. 11353 */ 11354 static void 11355 ip_wput_frag_mdt_v6(mblk_t *mp, ire_t *ire, size_t max_chunk, 11356 size_t unfragmentable_len, uint8_t nexthdr, uint_t prev_nexthdr_offset) 11357 { 11358 ip6_t *ip6h = (ip6_t *)mp->b_rptr; 11359 uint_t pkts, wroff, hdr_chunk_len, pbuf_idx; 11360 mblk_t *hdr_mp, *md_mp = NULL; 11361 int i1; 11362 multidata_t *mmd; 11363 unsigned char *hdr_ptr, *pld_ptr; 11364 ip_pdescinfo_t pdi; 11365 uint32_t ident; 11366 size_t len; 11367 uint16_t offset; 11368 queue_t *stq = ire->ire_stq; 11369 ill_t *ill = (ill_t *)stq->q_ptr; 11370 ip_stack_t *ipst = ill->ill_ipst; 11371 11372 ASSERT(DB_TYPE(mp) == M_DATA); 11373 ASSERT(MBLKL(mp) > unfragmentable_len); 11374 11375 /* 11376 * Move read ptr past unfragmentable portion, we don't want this part 11377 * of the data in our fragments. 11378 */ 11379 mp->b_rptr += unfragmentable_len; 11380 11381 /* Calculate how many packets we will send out */ 11382 i1 = (mp->b_cont == NULL) ? MBLKL(mp) : msgsize(mp); 11383 pkts = (i1 + max_chunk - 1) / max_chunk; 11384 ASSERT(pkts > 1); 11385 11386 /* Allocate a message block which will hold all the IP Headers. */ 11387 wroff = ipst->ips_ip_wroff_extra; 11388 hdr_chunk_len = wroff + unfragmentable_len + sizeof (ip6_frag_t); 11389 11390 i1 = pkts * hdr_chunk_len; 11391 /* 11392 * Create the header buffer, Multidata and destination address 11393 * and SAP attribute that should be associated with it. 11394 */ 11395 if ((hdr_mp = allocb(i1, BPRI_HI)) == NULL || 11396 ((hdr_mp->b_wptr += i1), 11397 (mmd = mmd_alloc(hdr_mp, &md_mp, KM_NOSLEEP)) == NULL) || 11398 !ip_md_addr_attr(mmd, NULL, ire->ire_nce->nce_res_mp)) { 11399 freemsg(mp); 11400 if (md_mp == NULL) { 11401 freemsg(hdr_mp); 11402 } else { 11403 free_mmd: IP6_STAT(ipst, ip6_frag_mdt_discarded); 11404 freemsg(md_mp); 11405 } 11406 IP6_STAT(ipst, ip6_frag_mdt_allocfail); 11407 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails); 11408 return; 11409 } 11410 IP6_STAT(ipst, ip6_frag_mdt_allocd); 11411 11412 /* 11413 * Add a payload buffer to the Multidata; this operation must not 11414 * fail, or otherwise our logic in this routine is broken. There 11415 * is no memory allocation done by the routine, so any returned 11416 * failure simply tells us that we've done something wrong. 11417 * 11418 * A failure tells us that either we're adding the same payload 11419 * buffer more than once, or we're trying to add more buffers than 11420 * allowed. None of the above cases should happen, and we panic 11421 * because either there's horrible heap corruption, and/or 11422 * programming mistake. 11423 */ 11424 if ((pbuf_idx = mmd_addpldbuf(mmd, mp)) < 0) { 11425 goto pbuf_panic; 11426 } 11427 11428 hdr_ptr = hdr_mp->b_rptr; 11429 pld_ptr = mp->b_rptr; 11430 11431 pdi.flags = PDESC_HBUF_REF | PDESC_PBUF_REF; 11432 11433 ident = htonl(atomic_add_32_nv(&ire->ire_ident, 1)); 11434 11435 /* 11436 * len is the total length of the fragmentable data in this 11437 * datagram. For each fragment sent, we will decrement len 11438 * by the amount of fragmentable data sent in that fragment 11439 * until len reaches zero. 11440 */ 11441 len = ntohs(ip6h->ip6_plen) - (unfragmentable_len - IPV6_HDR_LEN); 11442 11443 offset = 0; 11444 prev_nexthdr_offset += wroff; 11445 11446 while (len != 0) { 11447 size_t mlen; 11448 ip6_t *fip6h; 11449 ip6_frag_t *fraghdr; 11450 int error; 11451 11452 ASSERT((hdr_ptr + hdr_chunk_len) <= hdr_mp->b_wptr); 11453 mlen = MIN(len, max_chunk); 11454 len -= mlen; 11455 11456 fip6h = (ip6_t *)(hdr_ptr + wroff); 11457 ASSERT(OK_32PTR(fip6h)); 11458 bcopy(ip6h, fip6h, unfragmentable_len); 11459 hdr_ptr[prev_nexthdr_offset] = IPPROTO_FRAGMENT; 11460 11461 fip6h->ip6_plen = htons((uint16_t)(mlen + 11462 unfragmentable_len - IPV6_HDR_LEN + sizeof (ip6_frag_t))); 11463 11464 fraghdr = (ip6_frag_t *)((unsigned char *)fip6h + 11465 unfragmentable_len); 11466 fraghdr->ip6f_nxt = nexthdr; 11467 fraghdr->ip6f_reserved = 0; 11468 fraghdr->ip6f_offlg = htons(offset) | 11469 ((len != 0) ? IP6F_MORE_FRAG : 0); 11470 fraghdr->ip6f_ident = ident; 11471 11472 /* 11473 * Record offset and size of header and data of the next packet 11474 * in the multidata message. 11475 */ 11476 PDESC_HDR_ADD(&pdi, hdr_ptr, wroff, 11477 unfragmentable_len + sizeof (ip6_frag_t), 0); 11478 PDESC_PLD_INIT(&pdi); 11479 i1 = MIN(mp->b_wptr - pld_ptr, mlen); 11480 ASSERT(i1 > 0); 11481 PDESC_PLD_SPAN_ADD(&pdi, pbuf_idx, pld_ptr, i1); 11482 if (i1 == mlen) { 11483 pld_ptr += mlen; 11484 } else { 11485 i1 = mlen - i1; 11486 mp = mp->b_cont; 11487 ASSERT(mp != NULL); 11488 ASSERT(MBLKL(mp) >= i1); 11489 /* 11490 * Attach the next payload message block to the 11491 * multidata message. 11492 */ 11493 if ((pbuf_idx = mmd_addpldbuf(mmd, mp)) < 0) 11494 goto pbuf_panic; 11495 PDESC_PLD_SPAN_ADD(&pdi, pbuf_idx, mp->b_rptr, i1); 11496 pld_ptr = mp->b_rptr + i1; 11497 } 11498 11499 if ((mmd_addpdesc(mmd, (pdescinfo_t *)&pdi, &error, 11500 KM_NOSLEEP)) == NULL) { 11501 /* 11502 * Any failure other than ENOMEM indicates that we 11503 * have passed in invalid pdesc info or parameters 11504 * to mmd_addpdesc, which must not happen. 11505 * 11506 * EINVAL is a result of failure on boundary checks 11507 * against the pdesc info contents. It should not 11508 * happen, and we panic because either there's 11509 * horrible heap corruption, and/or programming 11510 * mistake. 11511 */ 11512 if (error != ENOMEM) { 11513 cmn_err(CE_PANIC, "ip_wput_frag_mdt_v6: " 11514 "pdesc logic error detected for " 11515 "mmd %p pinfo %p (%d)\n", 11516 (void *)mmd, (void *)&pdi, error); 11517 /* NOTREACHED */ 11518 } 11519 IP6_STAT(ipst, ip6_frag_mdt_addpdescfail); 11520 /* Free unattached payload message blocks as well */ 11521 md_mp->b_cont = mp->b_cont; 11522 goto free_mmd; 11523 } 11524 11525 /* Advance fragment offset. */ 11526 offset += mlen; 11527 11528 /* Advance to location for next header in the buffer. */ 11529 hdr_ptr += hdr_chunk_len; 11530 11531 /* Did we reach the next payload message block? */ 11532 if (pld_ptr == mp->b_wptr && mp->b_cont != NULL) { 11533 mp = mp->b_cont; 11534 /* 11535 * Attach the next message block with payload 11536 * data to the multidata message. 11537 */ 11538 if ((pbuf_idx = mmd_addpldbuf(mmd, mp)) < 0) 11539 goto pbuf_panic; 11540 pld_ptr = mp->b_rptr; 11541 } 11542 } 11543 11544 ASSERT(hdr_mp->b_wptr == hdr_ptr); 11545 ASSERT(mp->b_wptr == pld_ptr); 11546 11547 /* Update IP statistics */ 11548 UPDATE_MIB(ill->ill_ip_mib, ipIfStatsOutFragCreates, pkts); 11549 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragOKs); 11550 UPDATE_MIB(ill->ill_ip_mib, ipIfStatsHCOutTransmits, pkts); 11551 /* 11552 * The ipv6 header len is accounted for in unfragmentable_len so 11553 * when calculating the fragmentation overhead just add the frag 11554 * header len. 11555 */ 11556 UPDATE_MIB(ill->ill_ip_mib, ipIfStatsHCOutOctets, 11557 (ntohs(ip6h->ip6_plen) - (unfragmentable_len - IPV6_HDR_LEN)) + 11558 pkts * (unfragmentable_len + sizeof (ip6_frag_t))); 11559 IP6_STAT_UPDATE(ipst, ip6_frag_mdt_pkt_out, pkts); 11560 11561 ire->ire_ob_pkt_count += pkts; 11562 if (ire->ire_ipif != NULL) 11563 atomic_add_32(&ire->ire_ipif->ipif_ob_pkt_count, pkts); 11564 11565 ire->ire_last_used_time = lbolt; 11566 /* Send it down */ 11567 putnext(stq, md_mp); 11568 return; 11569 11570 pbuf_panic: 11571 cmn_err(CE_PANIC, "ip_wput_frag_mdt_v6: payload buffer logic " 11572 "error for mmd %p pbuf %p (%d)", (void *)mmd, (void *)mp, 11573 pbuf_idx); 11574 /* NOTREACHED */ 11575 } 11576 11577 /* 11578 * IPv6 fragmentation. Essentially the same as IPv4 fragmentation. 11579 * We have not optimized this in terms of number of mblks 11580 * allocated. For instance, for each fragment sent we always allocate a 11581 * mblk to hold the IPv6 header and fragment header. 11582 * 11583 * Assumes that all the extension headers are contained in the first mblk. 11584 * 11585 * The fragment header is inserted after an hop-by-hop options header 11586 * and after [an optional destinations header followed by] a routing header. 11587 * 11588 * NOTE : This function does not ire_refrele the ire passed in as 11589 * the argument. 11590 */ 11591 void 11592 ip_wput_frag_v6(mblk_t *mp, ire_t *ire, uint_t reachable, conn_t *connp, 11593 int caller, int max_frag) 11594 { 11595 ip6_t *ip6h = (ip6_t *)mp->b_rptr; 11596 ip6_t *fip6h; 11597 mblk_t *hmp; 11598 mblk_t *hmp0; 11599 mblk_t *dmp; 11600 ip6_frag_t *fraghdr; 11601 size_t unfragmentable_len; 11602 size_t len; 11603 size_t mlen; 11604 size_t max_chunk; 11605 uint32_t ident; 11606 uint16_t off_flags; 11607 uint16_t offset = 0; 11608 ill_t *ill; 11609 uint8_t nexthdr; 11610 uint_t prev_nexthdr_offset; 11611 uint8_t *ptr; 11612 ip_stack_t *ipst = ire->ire_ipst; 11613 11614 ASSERT(ire->ire_type == IRE_CACHE); 11615 ill = (ill_t *)ire->ire_stq->q_ptr; 11616 11617 if (max_frag <= 0) { 11618 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails); 11619 freemsg(mp); 11620 return; 11621 } 11622 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragReqds); 11623 11624 /* 11625 * Determine the length of the unfragmentable portion of this 11626 * datagram. This consists of the IPv6 header, a potential 11627 * hop-by-hop options header, a potential pre-routing-header 11628 * destination options header, and a potential routing header. 11629 */ 11630 nexthdr = ip6h->ip6_nxt; 11631 prev_nexthdr_offset = (uint8_t *)&ip6h->ip6_nxt - (uint8_t *)ip6h; 11632 ptr = (uint8_t *)&ip6h[1]; 11633 11634 if (nexthdr == IPPROTO_HOPOPTS) { 11635 ip6_hbh_t *hbh_hdr; 11636 uint_t hdr_len; 11637 11638 hbh_hdr = (ip6_hbh_t *)ptr; 11639 hdr_len = 8 * (hbh_hdr->ip6h_len + 1); 11640 nexthdr = hbh_hdr->ip6h_nxt; 11641 prev_nexthdr_offset = (uint8_t *)&hbh_hdr->ip6h_nxt 11642 - (uint8_t *)ip6h; 11643 ptr += hdr_len; 11644 } 11645 if (nexthdr == IPPROTO_DSTOPTS) { 11646 ip6_dest_t *dest_hdr; 11647 uint_t hdr_len; 11648 11649 dest_hdr = (ip6_dest_t *)ptr; 11650 if (dest_hdr->ip6d_nxt == IPPROTO_ROUTING) { 11651 hdr_len = 8 * (dest_hdr->ip6d_len + 1); 11652 nexthdr = dest_hdr->ip6d_nxt; 11653 prev_nexthdr_offset = (uint8_t *)&dest_hdr->ip6d_nxt 11654 - (uint8_t *)ip6h; 11655 ptr += hdr_len; 11656 } 11657 } 11658 if (nexthdr == IPPROTO_ROUTING) { 11659 ip6_rthdr_t *rthdr; 11660 uint_t hdr_len; 11661 11662 rthdr = (ip6_rthdr_t *)ptr; 11663 nexthdr = rthdr->ip6r_nxt; 11664 prev_nexthdr_offset = (uint8_t *)&rthdr->ip6r_nxt 11665 - (uint8_t *)ip6h; 11666 hdr_len = 8 * (rthdr->ip6r_len + 1); 11667 ptr += hdr_len; 11668 } 11669 unfragmentable_len = (uint_t)(ptr - (uint8_t *)ip6h); 11670 11671 max_chunk = (min(max_frag, ire->ire_max_frag) - unfragmentable_len - 11672 sizeof (ip6_frag_t)) & ~7; 11673 11674 /* Check if we can use MDT to send out the frags. */ 11675 ASSERT(!IRE_IS_LOCAL(ire)); 11676 if (ipst->ips_ip_multidata_outbound && reachable == 0 && 11677 !(ire->ire_flags & RTF_MULTIRT) && ILL_MDT_CAPABLE(ill) && 11678 IP_CAN_FRAG_MDT(mp, unfragmentable_len, max_chunk)) { 11679 ip_wput_frag_mdt_v6(mp, ire, max_chunk, unfragmentable_len, 11680 nexthdr, prev_nexthdr_offset); 11681 return; 11682 } 11683 11684 /* 11685 * Allocate an mblk with enough room for the link-layer 11686 * header, the unfragmentable part of the datagram, and the 11687 * fragment header. This (or a copy) will be used as the 11688 * first mblk for each fragment we send. 11689 */ 11690 hmp = allocb(unfragmentable_len + sizeof (ip6_frag_t) + 11691 ipst->ips_ip_wroff_extra, BPRI_HI); 11692 if (hmp == NULL) { 11693 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails); 11694 freemsg(mp); 11695 return; 11696 } 11697 hmp->b_rptr += ipst->ips_ip_wroff_extra; 11698 hmp->b_wptr = hmp->b_rptr + unfragmentable_len + sizeof (ip6_frag_t); 11699 11700 fip6h = (ip6_t *)hmp->b_rptr; 11701 fraghdr = (ip6_frag_t *)(hmp->b_rptr + unfragmentable_len); 11702 11703 bcopy(ip6h, fip6h, unfragmentable_len); 11704 hmp->b_rptr[prev_nexthdr_offset] = IPPROTO_FRAGMENT; 11705 11706 ident = atomic_add_32_nv(&ire->ire_ident, 1); 11707 11708 fraghdr->ip6f_nxt = nexthdr; 11709 fraghdr->ip6f_reserved = 0; 11710 fraghdr->ip6f_offlg = 0; 11711 fraghdr->ip6f_ident = htonl(ident); 11712 11713 /* 11714 * len is the total length of the fragmentable data in this 11715 * datagram. For each fragment sent, we will decrement len 11716 * by the amount of fragmentable data sent in that fragment 11717 * until len reaches zero. 11718 */ 11719 len = ntohs(ip6h->ip6_plen) - (unfragmentable_len - IPV6_HDR_LEN); 11720 11721 /* 11722 * Move read ptr past unfragmentable portion, we don't want this part 11723 * of the data in our fragments. 11724 */ 11725 mp->b_rptr += unfragmentable_len; 11726 11727 while (len != 0) { 11728 mlen = MIN(len, max_chunk); 11729 len -= mlen; 11730 if (len != 0) { 11731 /* Not last */ 11732 hmp0 = copyb(hmp); 11733 if (hmp0 == NULL) { 11734 freeb(hmp); 11735 freemsg(mp); 11736 BUMP_MIB(ill->ill_ip_mib, 11737 ipIfStatsOutFragFails); 11738 ip1dbg(("ip_wput_frag_v6: copyb failed\n")); 11739 return; 11740 } 11741 off_flags = IP6F_MORE_FRAG; 11742 } else { 11743 /* Last fragment */ 11744 hmp0 = hmp; 11745 hmp = NULL; 11746 off_flags = 0; 11747 } 11748 fip6h = (ip6_t *)(hmp0->b_rptr); 11749 fraghdr = (ip6_frag_t *)(hmp0->b_rptr + unfragmentable_len); 11750 11751 fip6h->ip6_plen = htons((uint16_t)(mlen + 11752 unfragmentable_len - IPV6_HDR_LEN + sizeof (ip6_frag_t))); 11753 /* 11754 * Note: Optimization alert. 11755 * In IPv6 (and IPv4) protocol header, Fragment Offset 11756 * ("offset") is 13 bits wide and in 8-octet units. 11757 * In IPv6 protocol header (unlike IPv4) in a 16 bit field, 11758 * it occupies the most significant 13 bits. 11759 * (least significant 13 bits in IPv4). 11760 * We do not do any shifts here. Not shifting is same effect 11761 * as taking offset value in octet units, dividing by 8 and 11762 * then shifting 3 bits left to line it up in place in proper 11763 * place protocol header. 11764 */ 11765 fraghdr->ip6f_offlg = htons(offset) | off_flags; 11766 11767 if (!(dmp = ip_carve_mp(&mp, mlen))) { 11768 /* mp has already been freed by ip_carve_mp() */ 11769 if (hmp != NULL) 11770 freeb(hmp); 11771 freeb(hmp0); 11772 ip1dbg(("ip_carve_mp: failed\n")); 11773 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails); 11774 return; 11775 } 11776 hmp0->b_cont = dmp; 11777 /* Get the priority marking, if any */ 11778 hmp0->b_band = dmp->b_band; 11779 UPDATE_OB_PKT_COUNT(ire); 11780 ire->ire_last_used_time = lbolt; 11781 ip_xmit_v6(hmp0, ire, reachable | IP6_NO_IPPOLICY, connp, 11782 caller, NULL); 11783 reachable = 0; /* No need to redo state machine in loop */ 11784 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragCreates); 11785 offset += mlen; 11786 } 11787 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragOKs); 11788 } 11789 11790 /* 11791 * Determine if the ill and multicast aspects of that packets 11792 * "matches" the conn. 11793 */ 11794 boolean_t 11795 conn_wantpacket_v6(conn_t *connp, ill_t *ill, ip6_t *ip6h, int fanout_flags, 11796 zoneid_t zoneid) 11797 { 11798 ill_t *in_ill; 11799 boolean_t wantpacket = B_TRUE; 11800 in6_addr_t *v6dst_ptr = &ip6h->ip6_dst; 11801 in6_addr_t *v6src_ptr = &ip6h->ip6_src; 11802 11803 /* 11804 * conn_incoming_ill is set by IPV6_BOUND_IF which limits 11805 * unicast and multicast reception to conn_incoming_ill. 11806 * conn_wantpacket_v6 is called both for unicast and 11807 * multicast. 11808 * 11809 * 1) The unicast copy of the packet can come anywhere in 11810 * the ill group if it is part of the group. Thus, we 11811 * need to check to see whether the ill group matches 11812 * if in_ill is part of a group. 11813 * 11814 * 2) ip_rput does not suppress duplicate multicast packets. 11815 * If there are two interfaces in a ill group and we have 11816 * 2 applications (conns) joined a multicast group G on 11817 * both the interfaces, ilm_lookup_ill filter in ip_rput 11818 * will give us two packets because we join G on both the 11819 * interfaces rather than nominating just one interface 11820 * for receiving multicast like broadcast above. So, 11821 * we have to call ilg_lookup_ill to filter out duplicate 11822 * copies, if ill is part of a group, to supress duplicates. 11823 */ 11824 in_ill = connp->conn_incoming_ill; 11825 if (in_ill != NULL) { 11826 mutex_enter(&connp->conn_lock); 11827 in_ill = connp->conn_incoming_ill; 11828 mutex_enter(&ill->ill_lock); 11829 /* 11830 * No IPMP, and the packet did not arrive on conn_incoming_ill 11831 * OR, IPMP in use and the packet arrived on an IPMP group 11832 * different from the conn_incoming_ill's IPMP group. 11833 * Reject the packet. 11834 */ 11835 if ((in_ill->ill_group == NULL && in_ill != ill) || 11836 (in_ill->ill_group != NULL && 11837 in_ill->ill_group != ill->ill_group)) { 11838 wantpacket = B_FALSE; 11839 } 11840 mutex_exit(&ill->ill_lock); 11841 mutex_exit(&connp->conn_lock); 11842 if (!wantpacket) 11843 return (B_FALSE); 11844 } 11845 11846 if (connp->conn_multi_router) 11847 return (B_TRUE); 11848 11849 if (!IN6_IS_ADDR_MULTICAST(v6dst_ptr) && 11850 !IN6_IS_ADDR_V4MAPPED_CLASSD(v6dst_ptr)) { 11851 /* 11852 * Unicast case: we match the conn only if it's in the specified 11853 * zone. 11854 */ 11855 return (IPCL_ZONE_MATCH(connp, zoneid)); 11856 } 11857 11858 if ((fanout_flags & IP_FF_NO_MCAST_LOOP) && 11859 (connp->conn_zoneid == zoneid || zoneid == ALL_ZONES)) { 11860 /* 11861 * Loopback case: the sending endpoint has IP_MULTICAST_LOOP 11862 * disabled, therefore we don't dispatch the multicast packet to 11863 * the sending zone. 11864 */ 11865 return (B_FALSE); 11866 } 11867 11868 if (IS_LOOPBACK(ill) && connp->conn_zoneid != zoneid && 11869 zoneid != ALL_ZONES) { 11870 /* 11871 * Multicast packet on the loopback interface: we only match 11872 * conns who joined the group in the specified zone. 11873 */ 11874 return (B_FALSE); 11875 } 11876 11877 mutex_enter(&connp->conn_lock); 11878 wantpacket = 11879 ilg_lookup_ill_withsrc_v6(connp, v6dst_ptr, v6src_ptr, ill) != NULL; 11880 mutex_exit(&connp->conn_lock); 11881 11882 return (wantpacket); 11883 } 11884 11885 11886 /* 11887 * Transmit a packet and update any NUD state based on the flags 11888 * XXX need to "recover" any ip6i_t when doing putq! 11889 * 11890 * NOTE : This function does not ire_refrele the ire passed in as the 11891 * argument. 11892 */ 11893 void 11894 ip_xmit_v6(mblk_t *mp, ire_t *ire, uint_t flags, conn_t *connp, 11895 int caller, ipsec_out_t *io) 11896 { 11897 mblk_t *mp1; 11898 nce_t *nce = ire->ire_nce; 11899 ill_t *ill; 11900 ill_t *out_ill; 11901 uint64_t delta; 11902 ip6_t *ip6h; 11903 queue_t *stq = ire->ire_stq; 11904 ire_t *ire1 = NULL; 11905 ire_t *save_ire = ire; 11906 boolean_t multirt_send = B_FALSE; 11907 mblk_t *next_mp = NULL; 11908 ip_stack_t *ipst = ire->ire_ipst; 11909 11910 ip6h = (ip6_t *)mp->b_rptr; 11911 ASSERT(!IN6_IS_ADDR_V4MAPPED(&ire->ire_addr_v6)); 11912 ASSERT(ire->ire_ipversion == IPV6_VERSION); 11913 ASSERT(nce != NULL); 11914 ASSERT(mp->b_datap->db_type == M_DATA); 11915 ASSERT(stq != NULL); 11916 11917 ill = ire_to_ill(ire); 11918 if (!ill) { 11919 ip0dbg(("ip_xmit_v6: ire_to_ill failed\n")); 11920 freemsg(mp); 11921 return; 11922 } 11923 11924 /* 11925 * If a packet is to be sent out an interface that is a 6to4 11926 * tunnel, outgoing IPv6 packets, with a 6to4 addressed IPv6 11927 * destination, must be checked to have a 6to4 prefix 11928 * (2002:V4ADDR::/48) that is NOT equal to the 6to4 prefix of 11929 * address configured on the sending interface. Otherwise, 11930 * the packet was delivered to this interface in error and the 11931 * packet must be dropped. 11932 */ 11933 if ((ill->ill_is_6to4tun) && IN6_IS_ADDR_6TO4(&ip6h->ip6_dst)) { 11934 ipif_t *ipif = ill->ill_ipif; 11935 11936 if (IN6_ARE_6TO4_PREFIX_EQUAL(&ipif->ipif_v6lcl_addr, 11937 &ip6h->ip6_dst)) { 11938 if (ip_debug > 2) { 11939 /* ip1dbg */ 11940 pr_addr_dbg("ip_xmit_v6: attempting to " 11941 "send 6to4 addressed IPv6 " 11942 "destination (%s) out the wrong " 11943 "interface.\n", AF_INET6, 11944 &ip6h->ip6_dst); 11945 } 11946 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); 11947 freemsg(mp); 11948 return; 11949 } 11950 } 11951 11952 /* Flow-control check has been done in ip_wput_ire_v6 */ 11953 if (IP_FLOW_CONTROLLED_ULP(ip6h->ip6_nxt) || caller == IP_WPUT || 11954 caller == IP_WSRV || canput(stq->q_next)) { 11955 uint32_t ill_index; 11956 11957 /* 11958 * In most cases, the emission loop below is entered only 11959 * once. Only in the case where the ire holds the 11960 * RTF_MULTIRT flag, do we loop to process all RTF_MULTIRT 11961 * flagged ires in the bucket, and send the packet 11962 * through all crossed RTF_MULTIRT routes. 11963 */ 11964 if (ire->ire_flags & RTF_MULTIRT) { 11965 /* 11966 * Multirouting case. The bucket where ire is stored 11967 * probably holds other RTF_MULTIRT flagged ires 11968 * to the destination. In this call to ip_xmit_v6, 11969 * we attempt to send the packet through all 11970 * those ires. Thus, we first ensure that ire is the 11971 * first RTF_MULTIRT ire in the bucket, 11972 * before walking the ire list. 11973 */ 11974 ire_t *first_ire; 11975 irb_t *irb = ire->ire_bucket; 11976 ASSERT(irb != NULL); 11977 multirt_send = B_TRUE; 11978 11979 /* Make sure we do not omit any multiroute ire. */ 11980 IRB_REFHOLD(irb); 11981 for (first_ire = irb->irb_ire; 11982 first_ire != NULL; 11983 first_ire = first_ire->ire_next) { 11984 if ((first_ire->ire_flags & RTF_MULTIRT) && 11985 (IN6_ARE_ADDR_EQUAL(&first_ire->ire_addr_v6, 11986 &ire->ire_addr_v6)) && 11987 !(first_ire->ire_marks & 11988 (IRE_MARK_CONDEMNED | IRE_MARK_HIDDEN))) 11989 break; 11990 } 11991 11992 if ((first_ire != NULL) && (first_ire != ire)) { 11993 IRE_REFHOLD(first_ire); 11994 /* ire will be released by the caller */ 11995 ire = first_ire; 11996 nce = ire->ire_nce; 11997 stq = ire->ire_stq; 11998 ill = ire_to_ill(ire); 11999 } 12000 IRB_REFRELE(irb); 12001 } else if (connp != NULL && IPCL_IS_TCP(connp) && 12002 connp->conn_mdt_ok && !connp->conn_tcp->tcp_mdt && 12003 ILL_MDT_USABLE(ill)) { 12004 /* 12005 * This tcp connection was marked as MDT-capable, but 12006 * it has been turned off due changes in the interface. 12007 * Now that the interface support is back, turn it on 12008 * by notifying tcp. We don't directly modify tcp_mdt, 12009 * since we leave all the details to the tcp code that 12010 * knows better. 12011 */ 12012 mblk_t *mdimp = ip_mdinfo_alloc(ill->ill_mdt_capab); 12013 12014 if (mdimp == NULL) { 12015 ip0dbg(("ip_xmit_v6: can't re-enable MDT for " 12016 "connp %p (ENOMEM)\n", (void *)connp)); 12017 } else { 12018 CONN_INC_REF(connp); 12019 squeue_fill(connp->conn_sqp, mdimp, tcp_input, 12020 connp, SQTAG_TCP_INPUT_MCTL); 12021 } 12022 } 12023 12024 do { 12025 mblk_t *mp_ip6h; 12026 12027 if (multirt_send) { 12028 irb_t *irb; 12029 /* 12030 * We are in a multiple send case, need to get 12031 * the next ire and make a duplicate of the 12032 * packet. ire1 holds here the next ire to 12033 * process in the bucket. If multirouting is 12034 * expected, any non-RTF_MULTIRT ire that has 12035 * the right destination address is ignored. 12036 */ 12037 irb = ire->ire_bucket; 12038 ASSERT(irb != NULL); 12039 12040 IRB_REFHOLD(irb); 12041 for (ire1 = ire->ire_next; 12042 ire1 != NULL; 12043 ire1 = ire1->ire_next) { 12044 if (!(ire1->ire_flags & RTF_MULTIRT)) 12045 continue; 12046 if (!IN6_ARE_ADDR_EQUAL( 12047 &ire1->ire_addr_v6, 12048 &ire->ire_addr_v6)) 12049 continue; 12050 if (ire1->ire_marks & 12051 (IRE_MARK_CONDEMNED| 12052 IRE_MARK_HIDDEN)) 12053 continue; 12054 12055 /* Got one */ 12056 if (ire1 != save_ire) { 12057 IRE_REFHOLD(ire1); 12058 } 12059 break; 12060 } 12061 IRB_REFRELE(irb); 12062 12063 if (ire1 != NULL) { 12064 next_mp = copyb(mp); 12065 if ((next_mp == NULL) || 12066 ((mp->b_cont != NULL) && 12067 ((next_mp->b_cont = 12068 dupmsg(mp->b_cont)) == NULL))) { 12069 freemsg(next_mp); 12070 next_mp = NULL; 12071 ire_refrele(ire1); 12072 ire1 = NULL; 12073 } 12074 } 12075 12076 /* Last multiroute ire; don't loop anymore. */ 12077 if (ire1 == NULL) { 12078 multirt_send = B_FALSE; 12079 } 12080 } 12081 12082 ill_index = 12083 ((ill_t *)stq->q_ptr)->ill_phyint->phyint_ifindex; 12084 12085 /* Initiate IPPF processing */ 12086 if (IP6_OUT_IPP(flags, ipst)) { 12087 ip_process(IPP_LOCAL_OUT, &mp, ill_index); 12088 if (mp == NULL) { 12089 BUMP_MIB(ill->ill_ip_mib, 12090 ipIfStatsOutDiscards); 12091 if (next_mp != NULL) 12092 freemsg(next_mp); 12093 if (ire != save_ire) { 12094 ire_refrele(ire); 12095 } 12096 return; 12097 } 12098 ip6h = (ip6_t *)mp->b_rptr; 12099 } 12100 mp_ip6h = mp; 12101 12102 /* 12103 * Check for fastpath, we need to hold nce_lock to 12104 * prevent fastpath update from chaining nce_fp_mp. 12105 */ 12106 12107 ASSERT(nce->nce_ipversion != IPV4_VERSION); 12108 mutex_enter(&nce->nce_lock); 12109 if ((mp1 = nce->nce_fp_mp) != NULL) { 12110 uint32_t hlen; 12111 uchar_t *rptr; 12112 12113 hlen = MBLKL(mp1); 12114 rptr = mp->b_rptr - hlen; 12115 /* 12116 * make sure there is room for the fastpath 12117 * datalink header 12118 */ 12119 if (rptr < mp->b_datap->db_base) { 12120 mp1 = copyb(mp1); 12121 mutex_exit(&nce->nce_lock); 12122 if (mp1 == NULL) { 12123 BUMP_MIB(ill->ill_ip_mib, 12124 ipIfStatsOutDiscards); 12125 freemsg(mp); 12126 if (next_mp != NULL) 12127 freemsg(next_mp); 12128 if (ire != save_ire) { 12129 ire_refrele(ire); 12130 } 12131 return; 12132 } 12133 mp1->b_cont = mp; 12134 12135 /* Get the priority marking, if any */ 12136 mp1->b_band = mp->b_band; 12137 mp = mp1; 12138 } else { 12139 mp->b_rptr = rptr; 12140 /* 12141 * fastpath - pre-pend datalink 12142 * header 12143 */ 12144 bcopy(mp1->b_rptr, rptr, hlen); 12145 mutex_exit(&nce->nce_lock); 12146 } 12147 } else { 12148 /* 12149 * Get the DL_UNITDATA_REQ. 12150 */ 12151 mp1 = nce->nce_res_mp; 12152 if (mp1 == NULL) { 12153 mutex_exit(&nce->nce_lock); 12154 ip1dbg(("ip_xmit_v6: No resolution " 12155 "block ire = %p\n", (void *)ire)); 12156 freemsg(mp); 12157 if (next_mp != NULL) 12158 freemsg(next_mp); 12159 if (ire != save_ire) { 12160 ire_refrele(ire); 12161 } 12162 return; 12163 } 12164 /* 12165 * Prepend the DL_UNITDATA_REQ. 12166 */ 12167 mp1 = copyb(mp1); 12168 mutex_exit(&nce->nce_lock); 12169 if (mp1 == NULL) { 12170 BUMP_MIB(ill->ill_ip_mib, 12171 ipIfStatsOutDiscards); 12172 freemsg(mp); 12173 if (next_mp != NULL) 12174 freemsg(next_mp); 12175 if (ire != save_ire) { 12176 ire_refrele(ire); 12177 } 12178 return; 12179 } 12180 mp1->b_cont = mp; 12181 12182 /* Get the priority marking, if any */ 12183 mp1->b_band = mp->b_band; 12184 mp = mp1; 12185 } 12186 12187 out_ill = (ill_t *)stq->q_ptr; 12188 12189 DTRACE_PROBE4(ip6__physical__out__start, 12190 ill_t *, NULL, ill_t *, out_ill, 12191 ip6_t *, ip6h, mblk_t *, mp); 12192 12193 FW_HOOKS6(ipst->ips_ip6_physical_out_event, 12194 ipst->ips_ipv6firewall_physical_out, 12195 NULL, out_ill, ip6h, mp, mp_ip6h, ipst); 12196 12197 DTRACE_PROBE1(ip6__physical__out__end, mblk_t *, mp); 12198 12199 if (mp == NULL) { 12200 if (multirt_send) { 12201 ASSERT(ire1 != NULL); 12202 if (ire != save_ire) { 12203 ire_refrele(ire); 12204 } 12205 /* 12206 * Proceed with the next RTF_MULTIRT 12207 * ire, also set up the send-to queue 12208 * accordingly. 12209 */ 12210 ire = ire1; 12211 ire1 = NULL; 12212 stq = ire->ire_stq; 12213 nce = ire->ire_nce; 12214 ill = ire_to_ill(ire); 12215 mp = next_mp; 12216 next_mp = NULL; 12217 continue; 12218 } else { 12219 ASSERT(next_mp == NULL); 12220 ASSERT(ire1 == NULL); 12221 break; 12222 } 12223 } 12224 12225 /* 12226 * Update ire and MIB counters; for save_ire, this has 12227 * been done by the caller. 12228 */ 12229 if (ire != save_ire) { 12230 UPDATE_OB_PKT_COUNT(ire); 12231 ire->ire_last_used_time = lbolt; 12232 12233 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 12234 BUMP_MIB(ill->ill_ip_mib, 12235 ipIfStatsHCOutMcastPkts); 12236 UPDATE_MIB(ill->ill_ip_mib, 12237 ipIfStatsHCOutMcastOctets, 12238 ntohs(ip6h->ip6_plen) + 12239 IPV6_HDR_LEN); 12240 } 12241 } 12242 12243 /* 12244 * Send it down. XXX Do we want to flow control AH/ESP 12245 * packets that carry TCP payloads? We don't flow 12246 * control TCP packets, but we should also not 12247 * flow-control TCP packets that have been protected. 12248 * We don't have an easy way to find out if an AH/ESP 12249 * packet was originally TCP or not currently. 12250 */ 12251 if (io == NULL) { 12252 BUMP_MIB(ill->ill_ip_mib, 12253 ipIfStatsHCOutTransmits); 12254 UPDATE_MIB(ill->ill_ip_mib, 12255 ipIfStatsHCOutOctets, 12256 ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN); 12257 putnext(stq, mp); 12258 } else { 12259 /* 12260 * Safety Pup says: make sure this is 12261 * going to the right interface! 12262 */ 12263 if (io->ipsec_out_capab_ill_index != 12264 ill_index) { 12265 /* IPsec kstats: bump lose counter */ 12266 freemsg(mp1); 12267 } else { 12268 BUMP_MIB(ill->ill_ip_mib, 12269 ipIfStatsHCOutTransmits); 12270 UPDATE_MIB(ill->ill_ip_mib, 12271 ipIfStatsHCOutOctets, 12272 ntohs(ip6h->ip6_plen) + 12273 IPV6_HDR_LEN); 12274 ipsec_hw_putnext(stq, mp); 12275 } 12276 } 12277 12278 if (nce->nce_flags & (NCE_F_NONUD|NCE_F_PERMANENT)) { 12279 if (ire != save_ire) { 12280 ire_refrele(ire); 12281 } 12282 if (multirt_send) { 12283 ASSERT(ire1 != NULL); 12284 /* 12285 * Proceed with the next RTF_MULTIRT 12286 * ire, also set up the send-to queue 12287 * accordingly. 12288 */ 12289 ire = ire1; 12290 ire1 = NULL; 12291 stq = ire->ire_stq; 12292 nce = ire->ire_nce; 12293 ill = ire_to_ill(ire); 12294 mp = next_mp; 12295 next_mp = NULL; 12296 continue; 12297 } 12298 ASSERT(next_mp == NULL); 12299 ASSERT(ire1 == NULL); 12300 return; 12301 } 12302 12303 ASSERT(nce->nce_state != ND_INCOMPLETE); 12304 12305 /* 12306 * Check for upper layer advice 12307 */ 12308 if (flags & IPV6_REACHABILITY_CONFIRMATION) { 12309 /* 12310 * It should be o.k. to check the state without 12311 * a lock here, at most we lose an advice. 12312 */ 12313 nce->nce_last = TICK_TO_MSEC(lbolt64); 12314 if (nce->nce_state != ND_REACHABLE) { 12315 12316 mutex_enter(&nce->nce_lock); 12317 nce->nce_state = ND_REACHABLE; 12318 nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT; 12319 mutex_exit(&nce->nce_lock); 12320 (void) untimeout(nce->nce_timeout_id); 12321 if (ip_debug > 2) { 12322 /* ip1dbg */ 12323 pr_addr_dbg("ip_xmit_v6: state" 12324 " for %s changed to" 12325 " REACHABLE\n", AF_INET6, 12326 &ire->ire_addr_v6); 12327 } 12328 } 12329 if (ire != save_ire) { 12330 ire_refrele(ire); 12331 } 12332 if (multirt_send) { 12333 ASSERT(ire1 != NULL); 12334 /* 12335 * Proceed with the next RTF_MULTIRT 12336 * ire, also set up the send-to queue 12337 * accordingly. 12338 */ 12339 ire = ire1; 12340 ire1 = NULL; 12341 stq = ire->ire_stq; 12342 nce = ire->ire_nce; 12343 ill = ire_to_ill(ire); 12344 mp = next_mp; 12345 next_mp = NULL; 12346 continue; 12347 } 12348 ASSERT(next_mp == NULL); 12349 ASSERT(ire1 == NULL); 12350 return; 12351 } 12352 12353 delta = TICK_TO_MSEC(lbolt64) - nce->nce_last; 12354 ip1dbg(("ip_xmit_v6: delta = %" PRId64 12355 " ill_reachable_time = %d \n", delta, 12356 ill->ill_reachable_time)); 12357 if (delta > (uint64_t)ill->ill_reachable_time) { 12358 nce = ire->ire_nce; 12359 mutex_enter(&nce->nce_lock); 12360 switch (nce->nce_state) { 12361 case ND_REACHABLE: 12362 case ND_STALE: 12363 /* 12364 * ND_REACHABLE is identical to 12365 * ND_STALE in this specific case. If 12366 * reachable time has expired for this 12367 * neighbor (delta is greater than 12368 * reachable time), conceptually, the 12369 * neighbor cache is no longer in 12370 * REACHABLE state, but already in 12371 * STALE state. So the correct 12372 * transition here is to ND_DELAY. 12373 */ 12374 nce->nce_state = ND_DELAY; 12375 mutex_exit(&nce->nce_lock); 12376 NDP_RESTART_TIMER(nce, 12377 ipst->ips_delay_first_probe_time); 12378 if (ip_debug > 3) { 12379 /* ip2dbg */ 12380 pr_addr_dbg("ip_xmit_v6: state" 12381 " for %s changed to" 12382 " DELAY\n", AF_INET6, 12383 &ire->ire_addr_v6); 12384 } 12385 break; 12386 case ND_DELAY: 12387 case ND_PROBE: 12388 mutex_exit(&nce->nce_lock); 12389 /* Timers have already started */ 12390 break; 12391 case ND_UNREACHABLE: 12392 /* 12393 * ndp timer has detected that this nce 12394 * is unreachable and initiated deleting 12395 * this nce and all its associated IREs. 12396 * This is a race where we found the 12397 * ire before it was deleted and have 12398 * just sent out a packet using this 12399 * unreachable nce. 12400 */ 12401 mutex_exit(&nce->nce_lock); 12402 break; 12403 default: 12404 ASSERT(0); 12405 } 12406 } 12407 12408 if (multirt_send) { 12409 ASSERT(ire1 != NULL); 12410 /* 12411 * Proceed with the next RTF_MULTIRT ire, 12412 * Also set up the send-to queue accordingly. 12413 */ 12414 if (ire != save_ire) { 12415 ire_refrele(ire); 12416 } 12417 ire = ire1; 12418 ire1 = NULL; 12419 stq = ire->ire_stq; 12420 nce = ire->ire_nce; 12421 ill = ire_to_ill(ire); 12422 mp = next_mp; 12423 next_mp = NULL; 12424 } 12425 } while (multirt_send); 12426 /* 12427 * In the multirouting case, release the last ire used for 12428 * emission. save_ire will be released by the caller. 12429 */ 12430 if (ire != save_ire) { 12431 ire_refrele(ire); 12432 } 12433 } else { 12434 /* 12435 * Queue packet if we have an conn to give back pressure. 12436 * We can't queue packets intended for hardware acceleration 12437 * since we've tossed that state already. If the packet is 12438 * being fed back from ire_send_v6, we don't know the 12439 * position in the queue to enqueue the packet and we discard 12440 * the packet. 12441 */ 12442 if (ipst->ips_ip_output_queue && (connp != NULL) && 12443 (io == NULL) && (caller != IRE_SEND)) { 12444 if (caller == IP_WSRV) { 12445 connp->conn_did_putbq = 1; 12446 (void) putbq(connp->conn_wq, mp); 12447 conn_drain_insert(connp); 12448 /* 12449 * caller == IP_WSRV implies we are 12450 * the service thread, and the 12451 * queue is already noenabled. 12452 * The check for canput and 12453 * the putbq is not atomic. 12454 * So we need to check again. 12455 */ 12456 if (canput(stq->q_next)) 12457 connp->conn_did_putbq = 0; 12458 } else { 12459 (void) putq(connp->conn_wq, mp); 12460 } 12461 return; 12462 } 12463 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); 12464 freemsg(mp); 12465 return; 12466 } 12467 } 12468 12469 /* 12470 * pr_addr_dbg function provides the needed buffer space to call 12471 * inet_ntop() function's 3rd argument. This function should be 12472 * used by any kernel routine which wants to save INET6_ADDRSTRLEN 12473 * stack buffer space in it's own stack frame. This function uses 12474 * a buffer from it's own stack and prints the information. 12475 * Example: pr_addr_dbg("func: no route for %s\n ", AF_INET, addr) 12476 * 12477 * Note: This function can call inet_ntop() once. 12478 */ 12479 void 12480 pr_addr_dbg(char *fmt1, int af, const void *addr) 12481 { 12482 char buf[INET6_ADDRSTRLEN]; 12483 12484 if (fmt1 == NULL) { 12485 ip0dbg(("pr_addr_dbg: Wrong arguments\n")); 12486 return; 12487 } 12488 12489 /* 12490 * This does not compare debug level and just prints 12491 * out. Thus it is the responsibility of the caller 12492 * to check the appropriate debug-level before calling 12493 * this function. 12494 */ 12495 if (ip_debug > 0) { 12496 printf(fmt1, inet_ntop(af, addr, buf, sizeof (buf))); 12497 } 12498 12499 12500 } 12501 12502 12503 /* 12504 * Return the length in bytes of the IPv6 headers (base header, ip6i_t 12505 * if needed and extension headers) that will be needed based on the 12506 * ip6_pkt_t structure passed by the caller. 12507 * 12508 * The returned length does not include the length of the upper level 12509 * protocol (ULP) header. 12510 */ 12511 int 12512 ip_total_hdrs_len_v6(ip6_pkt_t *ipp) 12513 { 12514 int len; 12515 12516 len = IPV6_HDR_LEN; 12517 if (ipp->ipp_fields & IPPF_HAS_IP6I) 12518 len += sizeof (ip6i_t); 12519 if (ipp->ipp_fields & IPPF_HOPOPTS) { 12520 ASSERT(ipp->ipp_hopoptslen != 0); 12521 len += ipp->ipp_hopoptslen; 12522 } 12523 if (ipp->ipp_fields & IPPF_RTHDR) { 12524 ASSERT(ipp->ipp_rthdrlen != 0); 12525 len += ipp->ipp_rthdrlen; 12526 } 12527 /* 12528 * En-route destination options 12529 * Only do them if there's a routing header as well 12530 */ 12531 if ((ipp->ipp_fields & (IPPF_RTDSTOPTS|IPPF_RTHDR)) == 12532 (IPPF_RTDSTOPTS|IPPF_RTHDR)) { 12533 ASSERT(ipp->ipp_rtdstoptslen != 0); 12534 len += ipp->ipp_rtdstoptslen; 12535 } 12536 if (ipp->ipp_fields & IPPF_DSTOPTS) { 12537 ASSERT(ipp->ipp_dstoptslen != 0); 12538 len += ipp->ipp_dstoptslen; 12539 } 12540 return (len); 12541 } 12542 12543 /* 12544 * All-purpose routine to build a header chain of an IPv6 header 12545 * followed by any required extension headers and a proto header, 12546 * preceeded (where necessary) by an ip6i_t private header. 12547 * 12548 * The fields of the IPv6 header that are derived from the ip6_pkt_t 12549 * will be filled in appropriately. 12550 * Thus the caller must fill in the rest of the IPv6 header, such as 12551 * traffic class/flowid, source address (if not set here), hoplimit (if not 12552 * set here) and destination address. 12553 * 12554 * The extension headers and ip6i_t header will all be fully filled in. 12555 */ 12556 void 12557 ip_build_hdrs_v6(uchar_t *ext_hdrs, uint_t ext_hdrs_len, 12558 ip6_pkt_t *ipp, uint8_t protocol) 12559 { 12560 uint8_t *nxthdr_ptr; 12561 uint8_t *cp; 12562 ip6i_t *ip6i; 12563 ip6_t *ip6h = (ip6_t *)ext_hdrs; 12564 12565 /* 12566 * If sending private ip6i_t header down (checksum info, nexthop, 12567 * or ifindex), adjust ip header pointer and set ip6i_t header pointer, 12568 * then fill it in. (The checksum info will be filled in by icmp). 12569 */ 12570 if (ipp->ipp_fields & IPPF_HAS_IP6I) { 12571 ip6i = (ip6i_t *)ip6h; 12572 ip6h = (ip6_t *)&ip6i[1]; 12573 12574 ip6i->ip6i_flags = 0; 12575 ip6i->ip6i_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 12576 if (ipp->ipp_fields & IPPF_IFINDEX || 12577 ipp->ipp_fields & IPPF_SCOPE_ID) { 12578 ASSERT(ipp->ipp_ifindex != 0); 12579 ip6i->ip6i_flags |= IP6I_IFINDEX; 12580 ip6i->ip6i_ifindex = ipp->ipp_ifindex; 12581 } 12582 if (ipp->ipp_fields & IPPF_ADDR) { 12583 /* 12584 * Enable per-packet source address verification if 12585 * IPV6_PKTINFO specified the source address. 12586 * ip6_src is set in the transport's _wput function. 12587 */ 12588 ASSERT(!IN6_IS_ADDR_UNSPECIFIED( 12589 &ipp->ipp_addr)); 12590 ip6i->ip6i_flags |= IP6I_VERIFY_SRC; 12591 } 12592 if (ipp->ipp_fields & IPPF_UNICAST_HOPS) { 12593 ip6h->ip6_hops = ipp->ipp_unicast_hops; 12594 /* 12595 * We need to set this flag so that IP doesn't 12596 * rewrite the IPv6 header's hoplimit with the 12597 * current default value. 12598 */ 12599 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 12600 } 12601 if (ipp->ipp_fields & IPPF_NEXTHOP) { 12602 ASSERT(!IN6_IS_ADDR_UNSPECIFIED( 12603 &ipp->ipp_nexthop)); 12604 ip6i->ip6i_flags |= IP6I_NEXTHOP; 12605 ip6i->ip6i_nexthop = ipp->ipp_nexthop; 12606 } 12607 /* 12608 * tell IP this is an ip6i_t private header 12609 */ 12610 ip6i->ip6i_nxt = IPPROTO_RAW; 12611 } 12612 /* Initialize IPv6 header */ 12613 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 12614 if (ipp->ipp_fields & IPPF_TCLASS) { 12615 ip6h->ip6_vcf = (ip6h->ip6_vcf & ~IPV6_FLOWINFO_TCLASS) | 12616 (ipp->ipp_tclass << 20); 12617 } 12618 if (ipp->ipp_fields & IPPF_ADDR) 12619 ip6h->ip6_src = ipp->ipp_addr; 12620 12621 nxthdr_ptr = (uint8_t *)&ip6h->ip6_nxt; 12622 cp = (uint8_t *)&ip6h[1]; 12623 /* 12624 * Here's where we have to start stringing together 12625 * any extension headers in the right order: 12626 * Hop-by-hop, destination, routing, and final destination opts. 12627 */ 12628 if (ipp->ipp_fields & IPPF_HOPOPTS) { 12629 /* Hop-by-hop options */ 12630 ip6_hbh_t *hbh = (ip6_hbh_t *)cp; 12631 12632 *nxthdr_ptr = IPPROTO_HOPOPTS; 12633 nxthdr_ptr = &hbh->ip6h_nxt; 12634 12635 bcopy(ipp->ipp_hopopts, cp, ipp->ipp_hopoptslen); 12636 cp += ipp->ipp_hopoptslen; 12637 } 12638 /* 12639 * En-route destination options 12640 * Only do them if there's a routing header as well 12641 */ 12642 if ((ipp->ipp_fields & (IPPF_RTDSTOPTS|IPPF_RTHDR)) == 12643 (IPPF_RTDSTOPTS|IPPF_RTHDR)) { 12644 ip6_dest_t *dst = (ip6_dest_t *)cp; 12645 12646 *nxthdr_ptr = IPPROTO_DSTOPTS; 12647 nxthdr_ptr = &dst->ip6d_nxt; 12648 12649 bcopy(ipp->ipp_rtdstopts, cp, ipp->ipp_rtdstoptslen); 12650 cp += ipp->ipp_rtdstoptslen; 12651 } 12652 /* 12653 * Routing header next 12654 */ 12655 if (ipp->ipp_fields & IPPF_RTHDR) { 12656 ip6_rthdr_t *rt = (ip6_rthdr_t *)cp; 12657 12658 *nxthdr_ptr = IPPROTO_ROUTING; 12659 nxthdr_ptr = &rt->ip6r_nxt; 12660 12661 bcopy(ipp->ipp_rthdr, cp, ipp->ipp_rthdrlen); 12662 cp += ipp->ipp_rthdrlen; 12663 } 12664 /* 12665 * Do ultimate destination options 12666 */ 12667 if (ipp->ipp_fields & IPPF_DSTOPTS) { 12668 ip6_dest_t *dest = (ip6_dest_t *)cp; 12669 12670 *nxthdr_ptr = IPPROTO_DSTOPTS; 12671 nxthdr_ptr = &dest->ip6d_nxt; 12672 12673 bcopy(ipp->ipp_dstopts, cp, ipp->ipp_dstoptslen); 12674 cp += ipp->ipp_dstoptslen; 12675 } 12676 /* 12677 * Now set the last header pointer to the proto passed in 12678 */ 12679 *nxthdr_ptr = protocol; 12680 ASSERT((int)(cp - ext_hdrs) == ext_hdrs_len); 12681 } 12682 12683 /* 12684 * Return a pointer to the routing header extension header 12685 * in the IPv6 header(s) chain passed in. 12686 * If none found, return NULL 12687 * Assumes that all extension headers are in same mblk as the v6 header 12688 */ 12689 ip6_rthdr_t * 12690 ip_find_rthdr_v6(ip6_t *ip6h, uint8_t *endptr) 12691 { 12692 ip6_dest_t *desthdr; 12693 ip6_frag_t *fraghdr; 12694 uint_t hdrlen; 12695 uint8_t nexthdr; 12696 uint8_t *ptr = (uint8_t *)&ip6h[1]; 12697 12698 if (ip6h->ip6_nxt == IPPROTO_ROUTING) 12699 return ((ip6_rthdr_t *)ptr); 12700 12701 /* 12702 * The routing header will precede all extension headers 12703 * other than the hop-by-hop and destination options 12704 * extension headers, so if we see anything other than those, 12705 * we're done and didn't find it. 12706 * We could see a destination options header alone but no 12707 * routing header, in which case we'll return NULL as soon as 12708 * we see anything after that. 12709 * Hop-by-hop and destination option headers are identical, 12710 * so we can use either one we want as a template. 12711 */ 12712 nexthdr = ip6h->ip6_nxt; 12713 while (ptr < endptr) { 12714 /* Is there enough left for len + nexthdr? */ 12715 if (ptr + MIN_EHDR_LEN > endptr) 12716 return (NULL); 12717 12718 switch (nexthdr) { 12719 case IPPROTO_HOPOPTS: 12720 case IPPROTO_DSTOPTS: 12721 /* Assumes the headers are identical for hbh and dst */ 12722 desthdr = (ip6_dest_t *)ptr; 12723 hdrlen = 8 * (desthdr->ip6d_len + 1); 12724 nexthdr = desthdr->ip6d_nxt; 12725 break; 12726 12727 case IPPROTO_ROUTING: 12728 return ((ip6_rthdr_t *)ptr); 12729 12730 case IPPROTO_FRAGMENT: 12731 fraghdr = (ip6_frag_t *)ptr; 12732 hdrlen = sizeof (ip6_frag_t); 12733 nexthdr = fraghdr->ip6f_nxt; 12734 break; 12735 12736 default: 12737 return (NULL); 12738 } 12739 ptr += hdrlen; 12740 } 12741 return (NULL); 12742 } 12743 12744 /* 12745 * Called for source-routed packets originating on this node. 12746 * Manipulates the original routing header by moving every entry up 12747 * one slot, placing the first entry in the v6 header's v6_dst field, 12748 * and placing the ultimate destination in the routing header's last 12749 * slot. 12750 * 12751 * Returns the checksum diference between the ultimate destination 12752 * (last hop in the routing header when the packet is sent) and 12753 * the first hop (ip6_dst when the packet is sent) 12754 */ 12755 /* ARGSUSED2 */ 12756 uint32_t 12757 ip_massage_options_v6(ip6_t *ip6h, ip6_rthdr_t *rth, netstack_t *ns) 12758 { 12759 uint_t numaddr; 12760 uint_t i; 12761 in6_addr_t *addrptr; 12762 in6_addr_t tmp; 12763 ip6_rthdr0_t *rthdr = (ip6_rthdr0_t *)rth; 12764 uint32_t cksm; 12765 uint32_t addrsum = 0; 12766 uint16_t *ptr; 12767 12768 /* 12769 * Perform any processing needed for source routing. 12770 * We know that all extension headers will be in the same mblk 12771 * as the IPv6 header. 12772 */ 12773 12774 /* 12775 * If no segments left in header, or the header length field is zero, 12776 * don't move hop addresses around; 12777 * Checksum difference is zero. 12778 */ 12779 if ((rthdr->ip6r0_segleft == 0) || (rthdr->ip6r0_len == 0)) 12780 return (0); 12781 12782 ptr = (uint16_t *)&ip6h->ip6_dst; 12783 cksm = 0; 12784 for (i = 0; i < (sizeof (in6_addr_t) / sizeof (uint16_t)); i++) { 12785 cksm += ptr[i]; 12786 } 12787 cksm = (cksm & 0xFFFF) + (cksm >> 16); 12788 12789 /* 12790 * Here's where the fun begins - we have to 12791 * move all addresses up one spot, take the 12792 * first hop and make it our first ip6_dst, 12793 * and place the ultimate destination in the 12794 * newly-opened last slot. 12795 */ 12796 addrptr = (in6_addr_t *)((char *)rthdr + sizeof (*rthdr)); 12797 numaddr = rthdr->ip6r0_len / 2; 12798 tmp = *addrptr; 12799 for (i = 0; i < (numaddr - 1); addrptr++, i++) { 12800 *addrptr = addrptr[1]; 12801 } 12802 *addrptr = ip6h->ip6_dst; 12803 ip6h->ip6_dst = tmp; 12804 12805 /* 12806 * From the checksummed ultimate destination subtract the checksummed 12807 * current ip6_dst (the first hop address). Return that number. 12808 * (In the v4 case, the second part of this is done in each routine 12809 * that calls ip_massage_options(). We do it all in this one place 12810 * for v6). 12811 */ 12812 ptr = (uint16_t *)&ip6h->ip6_dst; 12813 for (i = 0; i < (sizeof (in6_addr_t) / sizeof (uint16_t)); i++) { 12814 addrsum += ptr[i]; 12815 } 12816 cksm -= ((addrsum >> 16) + (addrsum & 0xFFFF)); 12817 if ((int)cksm < 0) 12818 cksm--; 12819 cksm = (cksm & 0xFFFF) + (cksm >> 16); 12820 12821 return (cksm); 12822 } 12823 12824 /* 12825 * Propagate a multicast group membership operation (join/leave) (*fn) on 12826 * all interfaces crossed by the related multirt routes. 12827 * The call is considered successful if the operation succeeds 12828 * on at least one interface. 12829 * The function is called if the destination address in the packet to send 12830 * is multirouted. 12831 */ 12832 int 12833 ip_multirt_apply_membership_v6(int (*fn)(conn_t *, boolean_t, 12834 const in6_addr_t *, int, mcast_record_t, const in6_addr_t *, mblk_t *), 12835 ire_t *ire, conn_t *connp, boolean_t checkonly, const in6_addr_t *v6grp, 12836 mcast_record_t fmode, const in6_addr_t *v6src, mblk_t *first_mp) 12837 { 12838 ire_t *ire_gw; 12839 irb_t *irb; 12840 int index, error = 0; 12841 opt_restart_t *or; 12842 ip_stack_t *ipst = ire->ire_ipst; 12843 12844 irb = ire->ire_bucket; 12845 ASSERT(irb != NULL); 12846 12847 ASSERT(DB_TYPE(first_mp) == M_CTL); 12848 or = (opt_restart_t *)first_mp->b_rptr; 12849 12850 IRB_REFHOLD(irb); 12851 for (; ire != NULL; ire = ire->ire_next) { 12852 if ((ire->ire_flags & RTF_MULTIRT) == 0) 12853 continue; 12854 if (!IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, v6grp)) 12855 continue; 12856 12857 ire_gw = ire_ftable_lookup_v6(&ire->ire_gateway_addr_v6, 0, 0, 12858 IRE_INTERFACE, NULL, NULL, ALL_ZONES, 0, NULL, 12859 MATCH_IRE_RECURSIVE | MATCH_IRE_TYPE, ipst); 12860 /* No resolver exists for the gateway; skip this ire. */ 12861 if (ire_gw == NULL) 12862 continue; 12863 index = ire_gw->ire_ipif->ipif_ill->ill_phyint->phyint_ifindex; 12864 /* 12865 * A resolver exists: we can get the interface on which we have 12866 * to apply the operation. 12867 */ 12868 error = fn(connp, checkonly, v6grp, index, fmode, v6src, 12869 first_mp); 12870 if (error == 0) 12871 or->or_private = CGTP_MCAST_SUCCESS; 12872 12873 if (ip_debug > 0) { 12874 ulong_t off; 12875 char *ksym; 12876 12877 ksym = kobj_getsymname((uintptr_t)fn, &off); 12878 ip2dbg(("ip_multirt_apply_membership_v6: " 12879 "called %s, multirt group 0x%08x via itf 0x%08x, " 12880 "error %d [success %u]\n", 12881 ksym ? ksym : "?", 12882 ntohl(V4_PART_OF_V6((*v6grp))), 12883 ntohl(V4_PART_OF_V6(ire_gw->ire_src_addr_v6)), 12884 error, or->or_private)); 12885 } 12886 12887 ire_refrele(ire_gw); 12888 if (error == EINPROGRESS) { 12889 IRB_REFRELE(irb); 12890 return (error); 12891 } 12892 } 12893 IRB_REFRELE(irb); 12894 /* 12895 * Consider the call as successful if we succeeded on at least 12896 * one interface. Otherwise, return the last encountered error. 12897 */ 12898 return (or->or_private == CGTP_MCAST_SUCCESS ? 0 : error); 12899 } 12900 12901 void 12902 *ip6_kstat_init(netstackid_t stackid, ip6_stat_t *ip6_statisticsp) 12903 { 12904 kstat_t *ksp; 12905 12906 ip6_stat_t template = { 12907 { "ip6_udp_fast_path", KSTAT_DATA_UINT64 }, 12908 { "ip6_udp_slow_path", KSTAT_DATA_UINT64 }, 12909 { "ip6_udp_fannorm", KSTAT_DATA_UINT64 }, 12910 { "ip6_udp_fanmb", KSTAT_DATA_UINT64 }, 12911 { "ip6_out_sw_cksum", KSTAT_DATA_UINT64 }, 12912 { "ip6_in_sw_cksum", KSTAT_DATA_UINT64 }, 12913 { "ip6_tcp_in_full_hw_cksum_err", KSTAT_DATA_UINT64 }, 12914 { "ip6_tcp_in_part_hw_cksum_err", KSTAT_DATA_UINT64 }, 12915 { "ip6_tcp_in_sw_cksum_err", KSTAT_DATA_UINT64 }, 12916 { "ip6_tcp_out_sw_cksum_bytes", KSTAT_DATA_UINT64 }, 12917 { "ip6_udp_in_full_hw_cksum_err", KSTAT_DATA_UINT64 }, 12918 { "ip6_udp_in_part_hw_cksum_err", KSTAT_DATA_UINT64 }, 12919 { "ip6_udp_in_sw_cksum_err", KSTAT_DATA_UINT64 }, 12920 { "ip6_udp_out_sw_cksum_bytes", KSTAT_DATA_UINT64 }, 12921 { "ip6_frag_mdt_pkt_out", KSTAT_DATA_UINT64 }, 12922 { "ip6_frag_mdt_discarded", KSTAT_DATA_UINT64 }, 12923 { "ip6_frag_mdt_allocfail", KSTAT_DATA_UINT64 }, 12924 { "ip6_frag_mdt_addpdescfail", KSTAT_DATA_UINT64 }, 12925 { "ip6_frag_mdt_allocd", KSTAT_DATA_UINT64 }, 12926 }; 12927 ksp = kstat_create_netstack("ip", 0, "ip6stat", "net", 12928 KSTAT_TYPE_NAMED, sizeof (template) / sizeof (kstat_named_t), 12929 KSTAT_FLAG_VIRTUAL, stackid); 12930 12931 if (ksp == NULL) 12932 return (NULL); 12933 12934 bcopy(&template, ip6_statisticsp, sizeof (template)); 12935 ksp->ks_data = (void *)ip6_statisticsp; 12936 ksp->ks_private = (void *)(uintptr_t)stackid; 12937 12938 kstat_install(ksp); 12939 return (ksp); 12940 } 12941 12942 void 12943 ip6_kstat_fini(netstackid_t stackid, kstat_t *ksp) 12944 { 12945 if (ksp != NULL) { 12946 ASSERT(stackid == (netstackid_t)(uintptr_t)ksp->ks_private); 12947 kstat_delete_netstack(ksp, stackid); 12948 } 12949 } 12950 12951 /* 12952 * The following two functions set and get the value for the 12953 * IPV6_SRC_PREFERENCES socket option. 12954 */ 12955 int 12956 ip6_set_src_preferences(conn_t *connp, uint32_t prefs) 12957 { 12958 /* 12959 * We only support preferences that are covered by 12960 * IPV6_PREFER_SRC_MASK. 12961 */ 12962 if (prefs & ~IPV6_PREFER_SRC_MASK) 12963 return (EINVAL); 12964 12965 /* 12966 * Look for conflicting preferences or default preferences. If 12967 * both bits of a related pair are clear, the application wants the 12968 * system's default value for that pair. Both bits in a pair can't 12969 * be set. 12970 */ 12971 if ((prefs & IPV6_PREFER_SRC_MIPMASK) == 0) { 12972 prefs |= IPV6_PREFER_SRC_MIPDEFAULT; 12973 } else if ((prefs & IPV6_PREFER_SRC_MIPMASK) == 12974 IPV6_PREFER_SRC_MIPMASK) { 12975 return (EINVAL); 12976 } 12977 if ((prefs & IPV6_PREFER_SRC_TMPMASK) == 0) { 12978 prefs |= IPV6_PREFER_SRC_TMPDEFAULT; 12979 } else if ((prefs & IPV6_PREFER_SRC_TMPMASK) == 12980 IPV6_PREFER_SRC_TMPMASK) { 12981 return (EINVAL); 12982 } 12983 if ((prefs & IPV6_PREFER_SRC_CGAMASK) == 0) { 12984 prefs |= IPV6_PREFER_SRC_CGADEFAULT; 12985 } else if ((prefs & IPV6_PREFER_SRC_CGAMASK) == 12986 IPV6_PREFER_SRC_CGAMASK) { 12987 return (EINVAL); 12988 } 12989 12990 connp->conn_src_preferences = prefs; 12991 return (0); 12992 } 12993 12994 size_t 12995 ip6_get_src_preferences(conn_t *connp, uint32_t *val) 12996 { 12997 *val = connp->conn_src_preferences; 12998 return (sizeof (connp->conn_src_preferences)); 12999 } 13000 13001 int 13002 ip6_set_pktinfo(cred_t *cr, conn_t *connp, struct in6_pktinfo *pkti, mblk_t *mp) 13003 { 13004 ill_t *ill; 13005 ire_t *ire; 13006 int error; 13007 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 13008 13009 /* 13010 * Verify the source address and ifindex. Privileged users can use 13011 * any source address. For ancillary data the source address is 13012 * checked in ip_wput_v6. 13013 */ 13014 if (pkti->ipi6_ifindex != 0) { 13015 ASSERT(connp != NULL); 13016 ill = ill_lookup_on_ifindex(pkti->ipi6_ifindex, B_TRUE, 13017 CONNP_TO_WQ(connp), mp, ip_restart_optmgmt, &error, ipst); 13018 if (ill == NULL) { 13019 /* 13020 * We just want to know if the interface exists, we 13021 * don't really care about the ill pointer itself. 13022 */ 13023 if (error != EINPROGRESS) 13024 return (error); 13025 error = 0; /* Ensure we don't use it below */ 13026 } else { 13027 ill_refrele(ill); 13028 } 13029 } 13030 if (!IN6_IS_ADDR_UNSPECIFIED(&pkti->ipi6_addr) && 13031 secpolicy_net_rawaccess(cr) != 0) { 13032 ire = ire_route_lookup_v6(&pkti->ipi6_addr, 0, 0, 13033 (IRE_LOCAL|IRE_LOOPBACK), NULL, NULL, 13034 connp->conn_zoneid, NULL, MATCH_IRE_TYPE, ipst); 13035 if (ire != NULL) 13036 ire_refrele(ire); 13037 else 13038 return (ENXIO); 13039 } 13040 return (0); 13041 } 13042 13043 /* 13044 * Get the size of the IP options (including the IP headers size) 13045 * without including the AH header's size. If till_ah is B_FALSE, 13046 * and if AH header is present, dest options beyond AH header will 13047 * also be included in the returned size. 13048 */ 13049 int 13050 ipsec_ah_get_hdr_size_v6(mblk_t *mp, boolean_t till_ah) 13051 { 13052 ip6_t *ip6h; 13053 uint8_t nexthdr; 13054 uint8_t *whereptr; 13055 ip6_hbh_t *hbhhdr; 13056 ip6_dest_t *dsthdr; 13057 ip6_rthdr_t *rthdr; 13058 int ehdrlen; 13059 int size; 13060 ah_t *ah; 13061 13062 ip6h = (ip6_t *)mp->b_rptr; 13063 size = IPV6_HDR_LEN; 13064 nexthdr = ip6h->ip6_nxt; 13065 whereptr = (uint8_t *)&ip6h[1]; 13066 for (;;) { 13067 /* Assume IP has already stripped it */ 13068 ASSERT(nexthdr != IPPROTO_FRAGMENT && nexthdr != IPPROTO_RAW); 13069 switch (nexthdr) { 13070 case IPPROTO_HOPOPTS: 13071 hbhhdr = (ip6_hbh_t *)whereptr; 13072 nexthdr = hbhhdr->ip6h_nxt; 13073 ehdrlen = 8 * (hbhhdr->ip6h_len + 1); 13074 break; 13075 case IPPROTO_DSTOPTS: 13076 dsthdr = (ip6_dest_t *)whereptr; 13077 nexthdr = dsthdr->ip6d_nxt; 13078 ehdrlen = 8 * (dsthdr->ip6d_len + 1); 13079 break; 13080 case IPPROTO_ROUTING: 13081 rthdr = (ip6_rthdr_t *)whereptr; 13082 nexthdr = rthdr->ip6r_nxt; 13083 ehdrlen = 8 * (rthdr->ip6r_len + 1); 13084 break; 13085 default : 13086 if (till_ah) { 13087 ASSERT(nexthdr == IPPROTO_AH); 13088 return (size); 13089 } 13090 /* 13091 * If we don't have a AH header to traverse, 13092 * return now. This happens normally for 13093 * outbound datagrams where we have not inserted 13094 * the AH header. 13095 */ 13096 if (nexthdr != IPPROTO_AH) { 13097 return (size); 13098 } 13099 13100 /* 13101 * We don't include the AH header's size 13102 * to be symmetrical with other cases where 13103 * we either don't have a AH header (outbound) 13104 * or peek into the AH header yet (inbound and 13105 * not pulled up yet). 13106 */ 13107 ah = (ah_t *)whereptr; 13108 nexthdr = ah->ah_nexthdr; 13109 ehdrlen = (ah->ah_length << 2) + 8; 13110 13111 if (nexthdr == IPPROTO_DSTOPTS) { 13112 if (whereptr + ehdrlen >= mp->b_wptr) { 13113 /* 13114 * The destination options header 13115 * is not part of the first mblk. 13116 */ 13117 whereptr = mp->b_cont->b_rptr; 13118 } else { 13119 whereptr += ehdrlen; 13120 } 13121 13122 dsthdr = (ip6_dest_t *)whereptr; 13123 ehdrlen = 8 * (dsthdr->ip6d_len + 1); 13124 size += ehdrlen; 13125 } 13126 return (size); 13127 } 13128 whereptr += ehdrlen; 13129 size += ehdrlen; 13130 } 13131 } 13132