1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* 26 * Copyright (c) 1990 Mentat Inc. 27 */ 28 29 #pragma ident "%Z%%M% %I% %E% SMI" 30 31 #include <sys/types.h> 32 #include <sys/stream.h> 33 #include <sys/dlpi.h> 34 #include <sys/stropts.h> 35 #include <sys/sysmacros.h> 36 #include <sys/strsun.h> 37 #include <sys/strlog.h> 38 #include <sys/strsubr.h> 39 #define _SUN_TPI_VERSION 2 40 #include <sys/tihdr.h> 41 #include <sys/ddi.h> 42 #include <sys/sunddi.h> 43 #include <sys/cmn_err.h> 44 #include <sys/debug.h> 45 #include <sys/sdt.h> 46 #include <sys/kobj.h> 47 #include <sys/zone.h> 48 #include <sys/neti.h> 49 #include <sys/hook.h> 50 51 #include <sys/kmem.h> 52 #include <sys/systm.h> 53 #include <sys/param.h> 54 #include <sys/socket.h> 55 #include <sys/vtrace.h> 56 #include <sys/isa_defs.h> 57 #include <sys/atomic.h> 58 #include <sys/iphada.h> 59 #include <sys/policy.h> 60 #include <net/if.h> 61 #include <net/if_types.h> 62 #include <net/route.h> 63 #include <net/if_dl.h> 64 #include <sys/sockio.h> 65 #include <netinet/in.h> 66 #include <netinet/ip6.h> 67 #include <netinet/icmp6.h> 68 #include <netinet/sctp.h> 69 70 #include <inet/common.h> 71 #include <inet/mi.h> 72 #include <inet/mib2.h> 73 #include <inet/nd.h> 74 #include <inet/arp.h> 75 76 #include <inet/ip.h> 77 #include <inet/ip_impl.h> 78 #include <inet/ip6.h> 79 #include <inet/ip6_asp.h> 80 #include <inet/tcp.h> 81 #include <inet/tcp_impl.h> 82 #include <inet/udp_impl.h> 83 #include <inet/ipp_common.h> 84 85 #include <inet/ip_multi.h> 86 #include <inet/ip_if.h> 87 #include <inet/ip_ire.h> 88 #include <inet/ip_rts.h> 89 #include <inet/optcom.h> 90 #include <inet/ip_ndp.h> 91 #include <net/pfkeyv2.h> 92 #include <inet/ipsec_info.h> 93 #include <inet/sadb.h> 94 #include <inet/ipsec_impl.h> 95 #include <inet/tun.h> 96 #include <inet/sctp_ip.h> 97 #include <sys/pattr.h> 98 #include <inet/ipclassifier.h> 99 #include <inet/ipsecah.h> 100 #include <inet/udp_impl.h> 101 #include <sys/squeue.h> 102 103 #include <sys/tsol/label.h> 104 #include <sys/tsol/tnet.h> 105 106 #include <rpc/pmap_prot.h> 107 108 /* Temporary; for CR 6451644 work-around */ 109 #include <sys/ethernet.h> 110 111 extern squeue_func_t ip_input_proc; 112 113 /* 114 * Naming conventions: 115 * These rules should be judiciously applied 116 * if there is a need to identify something as IPv6 versus IPv4 117 * IPv6 funcions will end with _v6 in the ip module. 118 * IPv6 funcions will end with _ipv6 in the transport modules. 119 * IPv6 macros: 120 * Some macros end with _V6; e.g. ILL_FRAG_HASH_V6 121 * Some macros start with V6_; e.g. V6_OR_V4_INADDR_ANY 122 * And then there are ..V4_PART_OF_V6. 123 * The intent is that macros in the ip module end with _V6. 124 * IPv6 global variables will start with ipv6_ 125 * IPv6 structures will start with ipv6 126 * IPv6 defined constants should start with IPV6_ 127 * (but then there are NDP_DEFAULT_VERS_PRI_AND_FLOW, etc) 128 */ 129 130 /* 131 * ip6opt_ls is used to enable IPv6 (via /etc/system on TX systems). 132 * We need to do this because we didn't obtain the IP6OPT_LS (0x0a) 133 * from IANA. This mechanism will remain in effect until an official 134 * number is obtained. 135 */ 136 uchar_t ip6opt_ls; 137 138 const in6_addr_t ipv6_all_ones = 139 { 0xffffffffU, 0xffffffffU, 0xffffffffU, 0xffffffffU }; 140 const in6_addr_t ipv6_all_zeros = { 0, 0, 0, 0 }; 141 142 #ifdef _BIG_ENDIAN 143 const in6_addr_t ipv6_unspecified_group = { 0xff000000U, 0, 0, 0 }; 144 #else /* _BIG_ENDIAN */ 145 const in6_addr_t ipv6_unspecified_group = { 0x000000ffU, 0, 0, 0 }; 146 #endif /* _BIG_ENDIAN */ 147 148 #ifdef _BIG_ENDIAN 149 const in6_addr_t ipv6_loopback = { 0, 0, 0, 0x00000001U }; 150 #else /* _BIG_ENDIAN */ 151 const in6_addr_t ipv6_loopback = { 0, 0, 0, 0x01000000U }; 152 #endif /* _BIG_ENDIAN */ 153 154 #ifdef _BIG_ENDIAN 155 const in6_addr_t ipv6_all_hosts_mcast = { 0xff020000U, 0, 0, 0x00000001U }; 156 #else /* _BIG_ENDIAN */ 157 const in6_addr_t ipv6_all_hosts_mcast = { 0x000002ffU, 0, 0, 0x01000000U }; 158 #endif /* _BIG_ENDIAN */ 159 160 #ifdef _BIG_ENDIAN 161 const in6_addr_t ipv6_all_rtrs_mcast = { 0xff020000U, 0, 0, 0x00000002U }; 162 #else /* _BIG_ENDIAN */ 163 const in6_addr_t ipv6_all_rtrs_mcast = { 0x000002ffU, 0, 0, 0x02000000U }; 164 #endif /* _BIG_ENDIAN */ 165 166 #ifdef _BIG_ENDIAN 167 const in6_addr_t ipv6_all_v2rtrs_mcast = { 0xff020000U, 0, 0, 0x00000016U }; 168 #else /* _BIG_ENDIAN */ 169 const in6_addr_t ipv6_all_v2rtrs_mcast = { 0x000002ffU, 0, 0, 0x16000000U }; 170 #endif /* _BIG_ENDIAN */ 171 172 #ifdef _BIG_ENDIAN 173 const in6_addr_t ipv6_solicited_node_mcast = 174 { 0xff020000U, 0, 0x00000001U, 0xff000000U }; 175 #else /* _BIG_ENDIAN */ 176 const in6_addr_t ipv6_solicited_node_mcast = 177 { 0x000002ffU, 0, 0x01000000U, 0x000000ffU }; 178 #endif /* _BIG_ENDIAN */ 179 180 /* Leave room for ip_newroute to tack on the src and target addresses */ 181 #define OK_RESOLVER_MP_V6(mp) \ 182 ((mp) && ((mp)->b_wptr - (mp)->b_rptr) >= (2 * IPV6_ADDR_LEN)) 183 184 static void icmp_inbound_too_big_v6(queue_t *, mblk_t *, ill_t *ill, 185 boolean_t, zoneid_t); 186 static void icmp_pkt_v6(queue_t *, mblk_t *, void *, size_t, 187 const in6_addr_t *, boolean_t, zoneid_t, ip_stack_t *); 188 static void icmp_redirect_v6(queue_t *, mblk_t *, ill_t *ill); 189 static int ip_bind_connected_v6(conn_t *, mblk_t *, in6_addr_t *, 190 uint16_t, const in6_addr_t *, ip6_pkt_t *, uint16_t, 191 boolean_t, boolean_t, boolean_t, boolean_t); 192 static boolean_t ip_bind_insert_ire_v6(mblk_t *, ire_t *, const in6_addr_t *, 193 iulp_t *, ip_stack_t *); 194 static int ip_bind_laddr_v6(conn_t *, mblk_t *, const in6_addr_t *, 195 uint16_t, boolean_t, boolean_t, boolean_t); 196 static void ip_fanout_proto_v6(queue_t *, mblk_t *, ip6_t *, ill_t *, 197 ill_t *, uint8_t, uint_t, uint_t, boolean_t, zoneid_t); 198 static void ip_fanout_tcp_v6(queue_t *, mblk_t *, ip6_t *, ill_t *, 199 ill_t *, uint_t, uint_t, boolean_t, zoneid_t); 200 static void ip_fanout_udp_v6(queue_t *, mblk_t *, ip6_t *, uint32_t, 201 ill_t *, ill_t *, uint_t, boolean_t, zoneid_t); 202 static int ip_process_options_v6(queue_t *, mblk_t *, ip6_t *, 203 uint8_t *, uint_t, uint8_t, ip_stack_t *); 204 static mblk_t *ip_rput_frag_v6(queue_t *, mblk_t *, ip6_t *, 205 ip6_frag_t *, uint_t, uint_t *, uint32_t *, uint16_t *); 206 static boolean_t ip_source_routed_v6(ip6_t *, mblk_t *, ip_stack_t *); 207 static void ip_wput_ire_v6(queue_t *, mblk_t *, ire_t *, int, int, 208 conn_t *, int, int, int, zoneid_t); 209 210 void ip_rput_v6(queue_t *, mblk_t *); 211 static void ip_wput_v6(queue_t *, mblk_t *); 212 213 /* 214 * A template for an IPv6 AR_ENTRY_QUERY 215 */ 216 static areq_t ipv6_areq_template = { 217 AR_ENTRY_QUERY, /* cmd */ 218 sizeof (areq_t)+(2*IPV6_ADDR_LEN), /* name offset */ 219 sizeof (areq_t), /* name len (filled by ill_arp_alloc) */ 220 IP6_DL_SAP, /* protocol, from arps perspective */ 221 sizeof (areq_t), /* target addr offset */ 222 IPV6_ADDR_LEN, /* target addr_length */ 223 0, /* flags */ 224 sizeof (areq_t) + IPV6_ADDR_LEN, /* sender addr offset */ 225 IPV6_ADDR_LEN, /* sender addr length */ 226 6, /* xmit_count */ 227 1000, /* (re)xmit_interval in milliseconds */ 228 4 /* max # of requests to buffer */ 229 /* anything else filled in by the code */ 230 }; 231 232 struct qinit rinit_ipv6 = { 233 (pfi_t)ip_rput_v6, 234 NULL, 235 ip_open, 236 ip_close, 237 NULL, 238 &ip_mod_info 239 }; 240 241 struct qinit winit_ipv6 = { 242 (pfi_t)ip_wput_v6, 243 (pfi_t)ip_wsrv, 244 ip_open, 245 ip_close, 246 NULL, 247 &ip_mod_info 248 }; 249 250 /* 251 * Handle IPv6 ICMP packets sent to us. Consume the mblk passed in. 252 * The message has already been checksummed and if needed, 253 * a copy has been made to be sent any interested ICMP client (conn) 254 * Note that this is different than icmp_inbound() which does the fanout 255 * to conn's as well as local processing of the ICMP packets. 256 * 257 * All error messages are passed to the matching transport stream. 258 * 259 * Zones notes: 260 * The packet is only processed in the context of the specified zone: typically 261 * only this zone will reply to an echo request. This means that the caller must 262 * call icmp_inbound_v6() for each relevant zone. 263 */ 264 static void 265 icmp_inbound_v6(queue_t *q, mblk_t *mp, ill_t *ill, uint_t hdr_length, 266 boolean_t mctl_present, uint_t flags, zoneid_t zoneid, mblk_t *dl_mp) 267 { 268 icmp6_t *icmp6; 269 ip6_t *ip6h; 270 boolean_t interested; 271 ip6i_t *ip6i; 272 in6_addr_t origsrc; 273 ire_t *ire; 274 mblk_t *first_mp; 275 ipsec_in_t *ii; 276 ip_stack_t *ipst = ill->ill_ipst; 277 278 ASSERT(ill != NULL); 279 first_mp = mp; 280 if (mctl_present) { 281 mp = first_mp->b_cont; 282 ASSERT(mp != NULL); 283 284 ii = (ipsec_in_t *)first_mp->b_rptr; 285 ASSERT(ii->ipsec_in_type == IPSEC_IN); 286 } 287 288 ip6h = (ip6_t *)mp->b_rptr; 289 290 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInMsgs); 291 292 if ((mp->b_wptr - mp->b_rptr) < (hdr_length + ICMP6_MINLEN)) { 293 if (!pullupmsg(mp, hdr_length + ICMP6_MINLEN)) { 294 ip1dbg(("icmp_inbound_v6: pullupmsg failed\n")); 295 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors); 296 freemsg(first_mp); 297 return; 298 } 299 ip6h = (ip6_t *)mp->b_rptr; 300 } 301 if (ipst->ips_icmp_accept_clear_messages == 0) { 302 first_mp = ipsec_check_global_policy(first_mp, NULL, 303 NULL, ip6h, mctl_present, ipst->ips_netstack); 304 if (first_mp == NULL) 305 return; 306 } 307 308 /* 309 * On a labeled system, we have to check whether the zone itself is 310 * permitted to receive raw traffic. 311 */ 312 if (is_system_labeled()) { 313 if (zoneid == ALL_ZONES) 314 zoneid = tsol_packet_to_zoneid(mp); 315 if (!tsol_can_accept_raw(mp, B_FALSE)) { 316 ip1dbg(("icmp_inbound_v6: zone %d can't receive raw", 317 zoneid)); 318 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors); 319 freemsg(first_mp); 320 return; 321 } 322 } 323 324 icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]); 325 ip2dbg(("icmp_inbound_v6: type %d code %d\n", icmp6->icmp6_type, 326 icmp6->icmp6_code)); 327 interested = !(icmp6->icmp6_type & ICMP6_INFOMSG_MASK); 328 329 /* Initiate IPPF processing here */ 330 if (IP6_IN_IPP(flags, ipst)) { 331 332 /* 333 * If the ifindex changes due to SIOCSLIFINDEX 334 * packet may return to IP on the wrong ill. 335 */ 336 ip_process(IPP_LOCAL_IN, &mp, ill->ill_phyint->phyint_ifindex); 337 if (mp == NULL) { 338 if (mctl_present) { 339 freeb(first_mp); 340 } 341 return; 342 } 343 } 344 345 switch (icmp6->icmp6_type) { 346 case ICMP6_DST_UNREACH: 347 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInDestUnreachs); 348 if (icmp6->icmp6_code == ICMP6_DST_UNREACH_ADMIN) 349 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInAdminProhibs); 350 break; 351 352 case ICMP6_TIME_EXCEEDED: 353 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInTimeExcds); 354 break; 355 356 case ICMP6_PARAM_PROB: 357 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInParmProblems); 358 break; 359 360 case ICMP6_PACKET_TOO_BIG: 361 icmp_inbound_too_big_v6(q, first_mp, ill, mctl_present, 362 zoneid); 363 return; 364 case ICMP6_ECHO_REQUEST: 365 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInEchos); 366 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst) && 367 !ipst->ips_ipv6_resp_echo_mcast) 368 break; 369 370 /* 371 * We must have exclusive use of the mblk to convert it to 372 * a response. 373 * If not, we copy it. 374 */ 375 if (mp->b_datap->db_ref > 1) { 376 mblk_t *mp1; 377 378 mp1 = copymsg(mp); 379 freemsg(mp); 380 if (mp1 == NULL) { 381 BUMP_MIB(ill->ill_icmp6_mib, 382 ipv6IfIcmpInErrors); 383 if (mctl_present) 384 freeb(first_mp); 385 return; 386 } 387 mp = mp1; 388 ip6h = (ip6_t *)mp->b_rptr; 389 icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]); 390 if (mctl_present) 391 first_mp->b_cont = mp; 392 else 393 first_mp = mp; 394 } 395 396 /* 397 * Turn the echo into an echo reply. 398 * Remove any extension headers (do not reverse a source route) 399 * and clear the flow id (keep traffic class for now). 400 */ 401 if (hdr_length != IPV6_HDR_LEN) { 402 int i; 403 404 for (i = 0; i < IPV6_HDR_LEN; i++) 405 mp->b_rptr[hdr_length - i - 1] = 406 mp->b_rptr[IPV6_HDR_LEN - i - 1]; 407 mp->b_rptr += (hdr_length - IPV6_HDR_LEN); 408 ip6h = (ip6_t *)mp->b_rptr; 409 ip6h->ip6_nxt = IPPROTO_ICMPV6; 410 hdr_length = IPV6_HDR_LEN; 411 } 412 ip6h->ip6_vcf &= ~IPV6_FLOWINFO_FLOWLABEL; 413 icmp6->icmp6_type = ICMP6_ECHO_REPLY; 414 415 ip6h->ip6_plen = 416 htons((uint16_t)(msgdsize(mp) - IPV6_HDR_LEN)); 417 origsrc = ip6h->ip6_src; 418 /* 419 * Reverse the source and destination addresses. 420 * If the return address is a multicast, zero out the source 421 * (ip_wput_v6 will set an address). 422 */ 423 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 424 ip6h->ip6_src = ipv6_all_zeros; 425 ip6h->ip6_dst = origsrc; 426 } else { 427 ip6h->ip6_src = ip6h->ip6_dst; 428 ip6h->ip6_dst = origsrc; 429 } 430 431 /* set the hop limit */ 432 ip6h->ip6_hops = ipst->ips_ipv6_def_hops; 433 434 /* 435 * Prepare for checksum by putting icmp length in the icmp 436 * checksum field. The checksum is calculated in ip_wput_v6. 437 */ 438 icmp6->icmp6_cksum = ip6h->ip6_plen; 439 /* 440 * ICMP echo replies should go out on the same interface 441 * the request came on as probes used by in.mpathd for 442 * detecting NIC failures are ECHO packets. We turn-off load 443 * spreading by allocating a ip6i and setting ip6i_attach_if 444 * to B_TRUE which is handled both by ip_wput_v6 and 445 * ip_newroute_v6. If we don't turnoff load spreading, 446 * the packets might get dropped if there are no 447 * non-FAILED/INACTIVE interfaces for it to go out on and 448 * in.mpathd would wrongly detect a failure or mis-detect 449 * a NIC failure as a link failure. As load spreading can 450 * happen only if ill_group is not NULL, we do only for 451 * that case and this does not affect the normal case. 452 * 453 * We force this only on echo packets that came from on-link 454 * hosts. We restrict this to link-local addresses which 455 * is used by in.mpathd for probing. In the IPv6 case, 456 * default routes typically have an ire_ipif pointer and 457 * hence a MATCH_IRE_ILL later in ip_newroute_v6/ip_wput_v6 458 * might work. As a default route out of this interface 459 * may not be present, enforcing this packet to go out in 460 * this case may not work. 461 */ 462 if (ill->ill_group != NULL && 463 IN6_IS_ADDR_LINKLOCAL(&origsrc)) { 464 /* 465 * If we are sending replies to ourselves, don't 466 * set ATTACH_IF as we may not be able to find 467 * the IRE_LOCAL on this ill i.e setting ATTACH_IF 468 * causes ip_wput_v6 to look for an IRE_LOCAL on 469 * "ill" which it may not find and will try to 470 * create an IRE_CACHE for our local address. Once 471 * we do this, we will try to forward all packets 472 * meant to our LOCAL address. 473 */ 474 ire = ire_cache_lookup_v6(&ip6h->ip6_dst, ALL_ZONES, 475 NULL, ipst); 476 if (ire == NULL || ire->ire_type != IRE_LOCAL) { 477 mp = ip_add_info_v6(mp, NULL, &ip6h->ip6_dst); 478 if (mp == NULL) { 479 BUMP_MIB(ill->ill_icmp6_mib, 480 ipv6IfIcmpInErrors); 481 if (ire != NULL) 482 ire_refrele(ire); 483 if (mctl_present) 484 freeb(first_mp); 485 return; 486 } else if (mctl_present) { 487 first_mp->b_cont = mp; 488 } else { 489 first_mp = mp; 490 } 491 ip6i = (ip6i_t *)mp->b_rptr; 492 ip6i->ip6i_flags = IP6I_ATTACH_IF; 493 ip6i->ip6i_ifindex = 494 ill->ill_phyint->phyint_ifindex; 495 } 496 if (ire != NULL) 497 ire_refrele(ire); 498 } 499 500 if (!mctl_present) { 501 /* 502 * This packet should go out the same way as it 503 * came in i.e in clear. To make sure that global 504 * policy will not be applied to this in ip_wput, 505 * we attach a IPSEC_IN mp and clear ipsec_in_secure. 506 */ 507 ASSERT(first_mp == mp); 508 first_mp = ipsec_in_alloc(B_FALSE, ipst->ips_netstack); 509 if (first_mp == NULL) { 510 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 511 freemsg(mp); 512 return; 513 } 514 ii = (ipsec_in_t *)first_mp->b_rptr; 515 516 /* This is not a secure packet */ 517 ii->ipsec_in_secure = B_FALSE; 518 first_mp->b_cont = mp; 519 } 520 ii->ipsec_in_zoneid = zoneid; 521 ASSERT(zoneid != ALL_ZONES); 522 if (!ipsec_in_to_out(first_mp, NULL, ip6h)) { 523 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 524 return; 525 } 526 put(WR(q), first_mp); 527 return; 528 529 case ICMP6_ECHO_REPLY: 530 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInEchoReplies); 531 break; 532 533 case ND_ROUTER_SOLICIT: 534 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInRouterSolicits); 535 break; 536 537 case ND_ROUTER_ADVERT: 538 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInRouterAdvertisements); 539 break; 540 541 case ND_NEIGHBOR_SOLICIT: 542 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInNeighborSolicits); 543 if (mctl_present) 544 freeb(first_mp); 545 /* XXX may wish to pass first_mp up to ndp_input someday. */ 546 ndp_input(ill, mp, dl_mp); 547 return; 548 549 case ND_NEIGHBOR_ADVERT: 550 BUMP_MIB(ill->ill_icmp6_mib, 551 ipv6IfIcmpInNeighborAdvertisements); 552 if (mctl_present) 553 freeb(first_mp); 554 /* XXX may wish to pass first_mp up to ndp_input someday. */ 555 ndp_input(ill, mp, dl_mp); 556 return; 557 558 case ND_REDIRECT: { 559 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInRedirects); 560 561 if (ipst->ips_ipv6_ignore_redirect) 562 break; 563 564 /* 565 * As there is no upper client to deliver, we don't 566 * need the first_mp any more. 567 */ 568 if (mctl_present) 569 freeb(first_mp); 570 if (!pullupmsg(mp, -1)) { 571 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects); 572 break; 573 } 574 icmp_redirect_v6(q, mp, ill); 575 return; 576 } 577 578 /* 579 * The next three icmp messages will be handled by MLD. 580 * Pass all valid MLD packets up to any process(es) 581 * listening on a raw ICMP socket. MLD messages are 582 * freed by mld_input function. 583 */ 584 case MLD_LISTENER_QUERY: 585 case MLD_LISTENER_REPORT: 586 case MLD_LISTENER_REDUCTION: 587 if (mctl_present) 588 freeb(first_mp); 589 mld_input(q, mp, ill); 590 return; 591 default: 592 break; 593 } 594 if (interested) { 595 icmp_inbound_error_fanout_v6(q, first_mp, ip6h, icmp6, ill, 596 mctl_present, zoneid); 597 } else { 598 freemsg(first_mp); 599 } 600 } 601 602 /* 603 * Process received IPv6 ICMP Packet too big. 604 * After updating any IRE it does the fanout to any matching transport streams. 605 * Assumes the IPv6 plus ICMPv6 headers have been pulled up but nothing else. 606 */ 607 /* ARGSUSED */ 608 static void 609 icmp_inbound_too_big_v6(queue_t *q, mblk_t *mp, ill_t *ill, 610 boolean_t mctl_present, zoneid_t zoneid) 611 { 612 ip6_t *ip6h; 613 ip6_t *inner_ip6h; 614 icmp6_t *icmp6; 615 uint16_t hdr_length; 616 uint32_t mtu; 617 ire_t *ire, *first_ire; 618 mblk_t *first_mp; 619 ip_stack_t *ipst = ill->ill_ipst; 620 621 first_mp = mp; 622 if (mctl_present) 623 mp = first_mp->b_cont; 624 /* 625 * We must have exclusive use of the mblk to update the MTU 626 * in the packet. 627 * If not, we copy it. 628 * 629 * If there's an M_CTL present, we know that allocated first_mp 630 * earlier in this function, so we know first_mp has refcnt of one. 631 */ 632 ASSERT(!mctl_present || first_mp->b_datap->db_ref == 1); 633 if (mp->b_datap->db_ref > 1) { 634 mblk_t *mp1; 635 636 mp1 = copymsg(mp); 637 freemsg(mp); 638 if (mp1 == NULL) { 639 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 640 if (mctl_present) 641 freeb(first_mp); 642 return; 643 } 644 mp = mp1; 645 if (mctl_present) 646 first_mp->b_cont = mp; 647 else 648 first_mp = mp; 649 } 650 ip6h = (ip6_t *)mp->b_rptr; 651 if (ip6h->ip6_nxt != IPPROTO_ICMPV6) 652 hdr_length = ip_hdr_length_v6(mp, ip6h); 653 else 654 hdr_length = IPV6_HDR_LEN; 655 656 icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]); 657 ASSERT((size_t)(mp->b_wptr - mp->b_rptr) >= hdr_length + ICMP6_MINLEN); 658 inner_ip6h = (ip6_t *)&icmp6[1]; /* Packet in error */ 659 if ((uchar_t *)&inner_ip6h[1] > mp->b_wptr) { 660 if (!pullupmsg(mp, (uchar_t *)&inner_ip6h[1] - mp->b_rptr)) { 661 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 662 freemsg(first_mp); 663 return; 664 } 665 ip6h = (ip6_t *)mp->b_rptr; 666 icmp6 = (icmp6_t *)&mp->b_rptr[hdr_length]; 667 inner_ip6h = (ip6_t *)&icmp6[1]; 668 } 669 670 /* 671 * For link local destinations matching simply on IRE type is not 672 * sufficient. Same link local addresses for different ILL's is 673 * possible. 674 */ 675 676 if (IN6_IS_ADDR_LINKLOCAL(&inner_ip6h->ip6_dst)) { 677 first_ire = ire_ctable_lookup_v6(&inner_ip6h->ip6_dst, NULL, 678 IRE_CACHE, ill->ill_ipif, ALL_ZONES, NULL, 679 MATCH_IRE_TYPE | MATCH_IRE_ILL_GROUP, ipst); 680 681 if (first_ire == NULL) { 682 if (ip_debug > 2) { 683 /* ip1dbg */ 684 pr_addr_dbg("icmp_inbound_too_big_v6:" 685 "no ire for dst %s\n", AF_INET6, 686 &inner_ip6h->ip6_dst); 687 } 688 freemsg(first_mp); 689 return; 690 } 691 692 mtu = ntohl(icmp6->icmp6_mtu); 693 rw_enter(&first_ire->ire_bucket->irb_lock, RW_READER); 694 for (ire = first_ire; ire != NULL && 695 IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, &inner_ip6h->ip6_dst); 696 ire = ire->ire_next) { 697 mutex_enter(&ire->ire_lock); 698 if (mtu < IPV6_MIN_MTU) { 699 ip1dbg(("Received mtu less than IPv6 " 700 "min mtu %d: %d\n", IPV6_MIN_MTU, mtu)); 701 mtu = IPV6_MIN_MTU; 702 /* 703 * If an mtu less than IPv6 min mtu is received, 704 * we must include a fragment header in 705 * subsequent packets. 706 */ 707 ire->ire_frag_flag |= IPH_FRAG_HDR; 708 } 709 ip1dbg(("Received mtu from router: %d\n", mtu)); 710 ire->ire_max_frag = MIN(ire->ire_max_frag, mtu); 711 /* Record the new max frag size for the ULP. */ 712 if (ire->ire_frag_flag & IPH_FRAG_HDR) { 713 /* 714 * If we need a fragment header in every packet 715 * (above case or multirouting), make sure the 716 * ULP takes it into account when computing the 717 * payload size. 718 */ 719 icmp6->icmp6_mtu = htonl(ire->ire_max_frag - 720 sizeof (ip6_frag_t)); 721 } else { 722 icmp6->icmp6_mtu = htonl(ire->ire_max_frag); 723 } 724 mutex_exit(&ire->ire_lock); 725 } 726 rw_exit(&first_ire->ire_bucket->irb_lock); 727 ire_refrele(first_ire); 728 } else { 729 irb_t *irb = NULL; 730 /* 731 * for non-link local destinations we match only on the IRE type 732 */ 733 ire = ire_ctable_lookup_v6(&inner_ip6h->ip6_dst, NULL, 734 IRE_CACHE, ill->ill_ipif, ALL_ZONES, NULL, MATCH_IRE_TYPE, 735 ipst); 736 if (ire == NULL) { 737 if (ip_debug > 2) { 738 /* ip1dbg */ 739 pr_addr_dbg("icmp_inbound_too_big_v6:" 740 "no ire for dst %s\n", 741 AF_INET6, &inner_ip6h->ip6_dst); 742 } 743 freemsg(first_mp); 744 return; 745 } 746 irb = ire->ire_bucket; 747 ire_refrele(ire); 748 rw_enter(&irb->irb_lock, RW_READER); 749 for (ire = irb->irb_ire; ire != NULL; ire = ire->ire_next) { 750 if (IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, 751 &inner_ip6h->ip6_dst)) { 752 mtu = ntohl(icmp6->icmp6_mtu); 753 mutex_enter(&ire->ire_lock); 754 if (mtu < IPV6_MIN_MTU) { 755 ip1dbg(("Received mtu less than IPv6" 756 "min mtu %d: %d\n", 757 IPV6_MIN_MTU, mtu)); 758 mtu = IPV6_MIN_MTU; 759 /* 760 * If an mtu less than IPv6 min mtu is 761 * received, we must include a fragment 762 * header in subsequent packets. 763 */ 764 ire->ire_frag_flag |= IPH_FRAG_HDR; 765 } 766 767 ip1dbg(("Received mtu from router: %d\n", mtu)); 768 ire->ire_max_frag = MIN(ire->ire_max_frag, mtu); 769 /* Record the new max frag size for the ULP. */ 770 if (ire->ire_frag_flag & IPH_FRAG_HDR) { 771 /* 772 * If we need a fragment header in 773 * every packet (above case or 774 * multirouting), make sure the ULP 775 * takes it into account when computing 776 * the payload size. 777 */ 778 icmp6->icmp6_mtu = 779 htonl(ire->ire_max_frag - 780 sizeof (ip6_frag_t)); 781 } else { 782 icmp6->icmp6_mtu = 783 htonl(ire->ire_max_frag); 784 } 785 mutex_exit(&ire->ire_lock); 786 } 787 } 788 rw_exit(&irb->irb_lock); 789 } 790 icmp_inbound_error_fanout_v6(q, first_mp, ip6h, icmp6, ill, 791 mctl_present, zoneid); 792 } 793 794 static void 795 pkt_too_big(conn_t *connp, void *arg) 796 { 797 mblk_t *mp; 798 799 if (!connp->conn_ipv6_recvpathmtu) 800 return; 801 802 /* create message and drop it on this connections read queue */ 803 if ((mp = dupb((mblk_t *)arg)) == NULL) { 804 return; 805 } 806 mp->b_datap->db_type = M_CTL; 807 808 putnext(connp->conn_rq, mp); 809 } 810 811 /* 812 * Fanout received ICMPv6 error packets to the transports. 813 * Assumes the IPv6 plus ICMPv6 headers have been pulled up but nothing else. 814 */ 815 void 816 icmp_inbound_error_fanout_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, 817 icmp6_t *icmp6, ill_t *ill, boolean_t mctl_present, zoneid_t zoneid) 818 { 819 uint16_t *up; /* Pointer to ports in ULP header */ 820 uint32_t ports; /* reversed ports for fanout */ 821 ip6_t rip6h; /* With reversed addresses */ 822 uint16_t hdr_length; 823 uint8_t *nexthdrp; 824 uint8_t nexthdr; 825 mblk_t *first_mp; 826 ipsec_in_t *ii; 827 tcpha_t *tcpha; 828 conn_t *connp; 829 ip_stack_t *ipst = ill->ill_ipst; 830 831 first_mp = mp; 832 if (mctl_present) { 833 mp = first_mp->b_cont; 834 ASSERT(mp != NULL); 835 836 ii = (ipsec_in_t *)first_mp->b_rptr; 837 ASSERT(ii->ipsec_in_type == IPSEC_IN); 838 } else { 839 ii = NULL; 840 } 841 842 hdr_length = (uint16_t)((uchar_t *)icmp6 - (uchar_t *)ip6h); 843 ASSERT((size_t)(mp->b_wptr - (uchar_t *)icmp6) >= ICMP6_MINLEN); 844 845 /* 846 * Need to pullup everything in order to use 847 * ip_hdr_length_nexthdr_v6() 848 */ 849 if (mp->b_cont != NULL) { 850 if (!pullupmsg(mp, -1)) { 851 ip1dbg(("icmp_inbound_error_fanout_v6: " 852 "pullupmsg failed\n")); 853 goto drop_pkt; 854 } 855 ip6h = (ip6_t *)mp->b_rptr; 856 icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]); 857 } 858 859 ip6h = (ip6_t *)&icmp6[1]; /* Packet in error */ 860 if ((uchar_t *)&ip6h[1] > mp->b_wptr) 861 goto drop_pkt; 862 863 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_length, &nexthdrp)) 864 goto drop_pkt; 865 nexthdr = *nexthdrp; 866 867 /* Set message type, must be done after pullups */ 868 mp->b_datap->db_type = M_CTL; 869 870 if (icmp6->icmp6_type == ICMP6_PACKET_TOO_BIG) { 871 /* 872 * Deliver indication of ICMP6_PACKET_TOO_BIG to interested 873 * sockets. 874 * 875 * Note I don't like walking every connection to deliver 876 * this information to a set of listeners. A separate 877 * list could be kept to keep the cost of this down. 878 */ 879 ipcl_walk(pkt_too_big, (void *)mp, ipst); 880 } 881 882 /* Try to pass the ICMP message to clients who need it */ 883 switch (nexthdr) { 884 case IPPROTO_UDP: { 885 /* 886 * Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of 887 * UDP header to get the port information. 888 */ 889 if ((uchar_t *)ip6h + hdr_length + ICMP_MIN_TP_HDR_LEN > 890 mp->b_wptr) { 891 break; 892 } 893 /* 894 * Attempt to find a client stream based on port. 895 * Note that we do a reverse lookup since the header is 896 * in the form we sent it out. 897 * The rip6h header is only used for the IPCL_UDP_MATCH_V6 898 * and we only set the src and dst addresses and nexthdr. 899 */ 900 up = (uint16_t *)((uchar_t *)ip6h + hdr_length); 901 rip6h.ip6_src = ip6h->ip6_dst; 902 rip6h.ip6_dst = ip6h->ip6_src; 903 rip6h.ip6_nxt = nexthdr; 904 ((uint16_t *)&ports)[0] = up[1]; 905 ((uint16_t *)&ports)[1] = up[0]; 906 907 ip_fanout_udp_v6(q, first_mp, &rip6h, ports, ill, ill, 908 IP6_NO_IPPOLICY, mctl_present, zoneid); 909 return; 910 } 911 case IPPROTO_TCP: { 912 /* 913 * Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of 914 * the TCP header to get the port information. 915 */ 916 if ((uchar_t *)ip6h + hdr_length + ICMP_MIN_TP_HDR_LEN > 917 mp->b_wptr) { 918 break; 919 } 920 921 /* 922 * Attempt to find a client stream based on port. 923 * Note that we do a reverse lookup since the header is 924 * in the form we sent it out. 925 * The rip6h header is only used for the IP_TCP_*MATCH_V6 and 926 * we only set the src and dst addresses and nexthdr. 927 */ 928 929 tcpha = (tcpha_t *)((char *)ip6h + hdr_length); 930 connp = ipcl_tcp_lookup_reversed_ipv6(ip6h, tcpha, 931 TCPS_LISTEN, ill->ill_phyint->phyint_ifindex, ipst); 932 if (connp == NULL) { 933 goto drop_pkt; 934 } 935 936 squeue_fill(connp->conn_sqp, first_mp, tcp_input, 937 connp, SQTAG_TCP6_INPUT_ICMP_ERR); 938 return; 939 940 } 941 case IPPROTO_SCTP: 942 /* 943 * Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of 944 * the SCTP header to get the port information. 945 */ 946 if ((uchar_t *)ip6h + hdr_length + ICMP_MIN_TP_HDR_LEN > 947 mp->b_wptr) { 948 break; 949 } 950 951 up = (uint16_t *)((uchar_t *)ip6h + hdr_length); 952 ((uint16_t *)&ports)[0] = up[1]; 953 ((uint16_t *)&ports)[1] = up[0]; 954 ip_fanout_sctp(mp, ill, (ipha_t *)ip6h, ports, 0, mctl_present, 955 IP6_NO_IPPOLICY, 0, zoneid); 956 return; 957 case IPPROTO_ESP: 958 case IPPROTO_AH: { 959 int ipsec_rc; 960 ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec; 961 962 /* 963 * We need a IPSEC_IN in the front to fanout to AH/ESP. 964 * We will re-use the IPSEC_IN if it is already present as 965 * AH/ESP will not affect any fields in the IPSEC_IN for 966 * ICMP errors. If there is no IPSEC_IN, allocate a new 967 * one and attach it in the front. 968 */ 969 if (ii != NULL) { 970 /* 971 * ip_fanout_proto_again converts the ICMP errors 972 * that come back from AH/ESP to M_DATA so that 973 * if it is non-AH/ESP and we do a pullupmsg in 974 * this function, it would work. Convert it back 975 * to M_CTL before we send up as this is a ICMP 976 * error. This could have been generated locally or 977 * by some router. Validate the inner IPSEC 978 * headers. 979 * 980 * NOTE : ill_index is used by ip_fanout_proto_again 981 * to locate the ill. 982 */ 983 ASSERT(ill != NULL); 984 ii->ipsec_in_ill_index = 985 ill->ill_phyint->phyint_ifindex; 986 ii->ipsec_in_rill_index = ii->ipsec_in_ill_index; 987 first_mp->b_cont->b_datap->db_type = M_CTL; 988 } else { 989 /* 990 * IPSEC_IN is not present. We attach a ipsec_in 991 * message and send up to IPSEC for validating 992 * and removing the IPSEC headers. Clear 993 * ipsec_in_secure so that when we return 994 * from IPSEC, we don't mistakenly think that this 995 * is a secure packet came from the network. 996 * 997 * NOTE : ill_index is used by ip_fanout_proto_again 998 * to locate the ill. 999 */ 1000 ASSERT(first_mp == mp); 1001 first_mp = ipsec_in_alloc(B_FALSE, ipst->ips_netstack); 1002 ASSERT(ill != NULL); 1003 if (first_mp == NULL) { 1004 freemsg(mp); 1005 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 1006 return; 1007 } 1008 ii = (ipsec_in_t *)first_mp->b_rptr; 1009 1010 /* This is not a secure packet */ 1011 ii->ipsec_in_secure = B_FALSE; 1012 first_mp->b_cont = mp; 1013 mp->b_datap->db_type = M_CTL; 1014 ii->ipsec_in_ill_index = 1015 ill->ill_phyint->phyint_ifindex; 1016 ii->ipsec_in_rill_index = ii->ipsec_in_ill_index; 1017 } 1018 1019 if (!ipsec_loaded(ipss)) { 1020 ip_proto_not_sup(q, first_mp, 0, zoneid, ipst); 1021 return; 1022 } 1023 1024 if (nexthdr == IPPROTO_ESP) 1025 ipsec_rc = ipsecesp_icmp_error(first_mp); 1026 else 1027 ipsec_rc = ipsecah_icmp_error(first_mp); 1028 if (ipsec_rc == IPSEC_STATUS_FAILED) 1029 return; 1030 1031 ip_fanout_proto_again(first_mp, ill, ill, NULL); 1032 return; 1033 } 1034 case IPPROTO_ENCAP: 1035 case IPPROTO_IPV6: 1036 if ((uint8_t *)ip6h + hdr_length + 1037 (nexthdr == IPPROTO_ENCAP ? sizeof (ipha_t) : 1038 sizeof (ip6_t)) > mp->b_wptr) 1039 goto drop_pkt; 1040 1041 if (nexthdr == IPPROTO_ENCAP || 1042 !IN6_ARE_ADDR_EQUAL( 1043 &((ip6_t *)(((uint8_t *)ip6h) + hdr_length))->ip6_src, 1044 &ip6h->ip6_src) || 1045 !IN6_ARE_ADDR_EQUAL( 1046 &((ip6_t *)(((uint8_t *)ip6h) + hdr_length))->ip6_dst, 1047 &ip6h->ip6_dst)) { 1048 /* 1049 * For tunnels that have used IPsec protection, 1050 * we need to adjust the MTU to take into account 1051 * the IPsec overhead. 1052 */ 1053 if (ii != NULL) 1054 icmp6->icmp6_mtu = htonl( 1055 ntohl(icmp6->icmp6_mtu) - 1056 ipsec_in_extra_length(first_mp)); 1057 } else { 1058 /* 1059 * Self-encapsulated case. As in the ipv4 case, 1060 * we need to strip the 2nd IP header. Since mp 1061 * is already pulled-up, we can simply bcopy 1062 * the 3rd header + data over the 2nd header. 1063 */ 1064 uint16_t unused_len; 1065 ip6_t *inner_ip6h = (ip6_t *) 1066 ((uchar_t *)ip6h + hdr_length); 1067 1068 /* 1069 * Make sure we don't do recursion more than once. 1070 */ 1071 if (!ip_hdr_length_nexthdr_v6(mp, inner_ip6h, 1072 &unused_len, &nexthdrp) || 1073 *nexthdrp == IPPROTO_IPV6) { 1074 goto drop_pkt; 1075 } 1076 1077 /* 1078 * We are about to modify the packet. Make a copy if 1079 * someone else has a reference to it. 1080 */ 1081 if (DB_REF(mp) > 1) { 1082 mblk_t *mp1; 1083 uint16_t icmp6_offset; 1084 1085 mp1 = copymsg(mp); 1086 if (mp1 == NULL) { 1087 goto drop_pkt; 1088 } 1089 icmp6_offset = (uint16_t) 1090 ((uchar_t *)icmp6 - mp->b_rptr); 1091 freemsg(mp); 1092 mp = mp1; 1093 1094 icmp6 = (icmp6_t *)(mp->b_rptr + icmp6_offset); 1095 ip6h = (ip6_t *)&icmp6[1]; 1096 inner_ip6h = (ip6_t *) 1097 ((uchar_t *)ip6h + hdr_length); 1098 1099 if (mctl_present) 1100 first_mp->b_cont = mp; 1101 else 1102 first_mp = mp; 1103 } 1104 1105 /* 1106 * Need to set db_type back to M_DATA before 1107 * refeeding mp into this function. 1108 */ 1109 DB_TYPE(mp) = M_DATA; 1110 1111 /* 1112 * Copy the 3rd header + remaining data on top 1113 * of the 2nd header. 1114 */ 1115 bcopy(inner_ip6h, ip6h, 1116 mp->b_wptr - (uchar_t *)inner_ip6h); 1117 1118 /* 1119 * Subtract length of the 2nd header. 1120 */ 1121 mp->b_wptr -= hdr_length; 1122 1123 /* 1124 * Now recurse, and see what I _really_ should be 1125 * doing here. 1126 */ 1127 icmp_inbound_error_fanout_v6(q, first_mp, 1128 (ip6_t *)mp->b_rptr, icmp6, ill, mctl_present, 1129 zoneid); 1130 return; 1131 } 1132 /* FALLTHRU */ 1133 default: 1134 /* 1135 * The rip6h header is only used for the lookup and we 1136 * only set the src and dst addresses and nexthdr. 1137 */ 1138 rip6h.ip6_src = ip6h->ip6_dst; 1139 rip6h.ip6_dst = ip6h->ip6_src; 1140 rip6h.ip6_nxt = nexthdr; 1141 ip_fanout_proto_v6(q, first_mp, &rip6h, ill, ill, nexthdr, 0, 1142 IP6_NO_IPPOLICY, mctl_present, zoneid); 1143 return; 1144 } 1145 /* NOTREACHED */ 1146 drop_pkt: 1147 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors); 1148 ip1dbg(("icmp_inbound_error_fanout_v6: drop pkt\n")); 1149 freemsg(first_mp); 1150 } 1151 1152 /* 1153 * Process received IPv6 ICMP Redirect messages. 1154 */ 1155 /* ARGSUSED */ 1156 static void 1157 icmp_redirect_v6(queue_t *q, mblk_t *mp, ill_t *ill) 1158 { 1159 ip6_t *ip6h; 1160 uint16_t hdr_length; 1161 nd_redirect_t *rd; 1162 ire_t *ire; 1163 ire_t *prev_ire; 1164 ire_t *redir_ire; 1165 in6_addr_t *src, *dst, *gateway; 1166 nd_opt_hdr_t *opt; 1167 nce_t *nce; 1168 int nce_flags = 0; 1169 int err = 0; 1170 boolean_t redirect_to_router = B_FALSE; 1171 int len; 1172 int optlen; 1173 iulp_t ulp_info = { 0 }; 1174 ill_t *prev_ire_ill; 1175 ipif_t *ipif; 1176 ip_stack_t *ipst = ill->ill_ipst; 1177 1178 ip6h = (ip6_t *)mp->b_rptr; 1179 if (ip6h->ip6_nxt != IPPROTO_ICMPV6) 1180 hdr_length = ip_hdr_length_v6(mp, ip6h); 1181 else 1182 hdr_length = IPV6_HDR_LEN; 1183 1184 rd = (nd_redirect_t *)&mp->b_rptr[hdr_length]; 1185 len = mp->b_wptr - mp->b_rptr - hdr_length; 1186 src = &ip6h->ip6_src; 1187 dst = &rd->nd_rd_dst; 1188 gateway = &rd->nd_rd_target; 1189 1190 /* Verify if it is a valid redirect */ 1191 if (!IN6_IS_ADDR_LINKLOCAL(src) || 1192 (ip6h->ip6_hops != IPV6_MAX_HOPS) || 1193 (rd->nd_rd_code != 0) || 1194 (len < sizeof (nd_redirect_t)) || 1195 (IN6_IS_ADDR_V4MAPPED(dst)) || 1196 (IN6_IS_ADDR_MULTICAST(dst))) { 1197 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects); 1198 freemsg(mp); 1199 return; 1200 } 1201 1202 if (!(IN6_IS_ADDR_LINKLOCAL(gateway) || 1203 IN6_ARE_ADDR_EQUAL(gateway, dst))) { 1204 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects); 1205 freemsg(mp); 1206 return; 1207 } 1208 1209 if (len > sizeof (nd_redirect_t)) { 1210 if (!ndp_verify_optlen((nd_opt_hdr_t *)&rd[1], 1211 len - sizeof (nd_redirect_t))) { 1212 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects); 1213 freemsg(mp); 1214 return; 1215 } 1216 } 1217 1218 if (!IN6_ARE_ADDR_EQUAL(gateway, dst)) { 1219 redirect_to_router = B_TRUE; 1220 nce_flags |= NCE_F_ISROUTER; 1221 } 1222 1223 /* ipif will be refreleased afterwards */ 1224 ipif = ipif_get_next_ipif(NULL, ill); 1225 if (ipif == NULL) { 1226 freemsg(mp); 1227 return; 1228 } 1229 1230 /* 1231 * Verify that the IP source address of the redirect is 1232 * the same as the current first-hop router for the specified 1233 * ICMP destination address. 1234 * Also, Make sure we had a route for the dest in question and 1235 * that route was pointing to the old gateway (the source of the 1236 * redirect packet.) 1237 */ 1238 1239 prev_ire = ire_route_lookup_v6(dst, 0, src, 0, ipif, NULL, 1240 ALL_ZONES, NULL, MATCH_IRE_GW | MATCH_IRE_ILL_GROUP | 1241 MATCH_IRE_DEFAULT, ipst); 1242 1243 /* 1244 * Check that 1245 * the redirect was not from ourselves 1246 * old gateway is still directly reachable 1247 */ 1248 if (prev_ire == NULL || 1249 prev_ire->ire_type == IRE_LOCAL) { 1250 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects); 1251 ipif_refrele(ipif); 1252 goto fail_redirect; 1253 } 1254 prev_ire_ill = ire_to_ill(prev_ire); 1255 ASSERT(prev_ire_ill != NULL); 1256 if (prev_ire_ill->ill_flags & ILLF_NONUD) 1257 nce_flags |= NCE_F_NONUD; 1258 1259 /* 1260 * Should we use the old ULP info to create the new gateway? From 1261 * a user's perspective, we should inherit the info so that it 1262 * is a "smooth" transition. If we do not do that, then new 1263 * connections going thru the new gateway will have no route metrics, 1264 * which is counter-intuitive to user. From a network point of 1265 * view, this may or may not make sense even though the new gateway 1266 * is still directly connected to us so the route metrics should not 1267 * change much. 1268 * 1269 * But if the old ire_uinfo is not initialized, we do another 1270 * recursive lookup on the dest using the new gateway. There may 1271 * be a route to that. If so, use it to initialize the redirect 1272 * route. 1273 */ 1274 if (prev_ire->ire_uinfo.iulp_set) { 1275 bcopy(&prev_ire->ire_uinfo, &ulp_info, sizeof (iulp_t)); 1276 } else if (redirect_to_router) { 1277 /* 1278 * Only do the following if the redirection is really to 1279 * a router. 1280 */ 1281 ire_t *tmp_ire; 1282 ire_t *sire; 1283 1284 tmp_ire = ire_ftable_lookup_v6(dst, 0, gateway, 0, NULL, &sire, 1285 ALL_ZONES, 0, NULL, 1286 (MATCH_IRE_RECURSIVE | MATCH_IRE_GW | MATCH_IRE_DEFAULT), 1287 ipst); 1288 if (sire != NULL) { 1289 bcopy(&sire->ire_uinfo, &ulp_info, sizeof (iulp_t)); 1290 ASSERT(tmp_ire != NULL); 1291 ire_refrele(tmp_ire); 1292 ire_refrele(sire); 1293 } else if (tmp_ire != NULL) { 1294 bcopy(&tmp_ire->ire_uinfo, &ulp_info, 1295 sizeof (iulp_t)); 1296 ire_refrele(tmp_ire); 1297 } 1298 } 1299 1300 optlen = mp->b_wptr - mp->b_rptr - hdr_length - sizeof (nd_redirect_t); 1301 opt = (nd_opt_hdr_t *)&rd[1]; 1302 opt = ndp_get_option(opt, optlen, ND_OPT_TARGET_LINKADDR); 1303 if (opt != NULL) { 1304 err = ndp_lookup_then_add(ill, 1305 (uchar_t *)&opt[1], /* Link layer address */ 1306 gateway, 1307 &ipv6_all_ones, /* prefix mask */ 1308 &ipv6_all_zeros, /* Mapping mask */ 1309 0, 1310 nce_flags, 1311 ND_STALE, 1312 &nce, 1313 NULL, 1314 NULL); 1315 switch (err) { 1316 case 0: 1317 NCE_REFRELE(nce); 1318 break; 1319 case EEXIST: 1320 /* 1321 * Check to see if link layer address has changed and 1322 * process the nce_state accordingly. 1323 */ 1324 ndp_process(nce, (uchar_t *)&opt[1], 0, B_FALSE); 1325 NCE_REFRELE(nce); 1326 break; 1327 default: 1328 ip1dbg(("icmp_redirect_v6: NCE create failed %d\n", 1329 err)); 1330 ipif_refrele(ipif); 1331 goto fail_redirect; 1332 } 1333 } 1334 if (redirect_to_router) { 1335 /* icmp_redirect_ok_v6() must have already verified this */ 1336 ASSERT(IN6_IS_ADDR_LINKLOCAL(gateway)); 1337 1338 /* 1339 * Create a Route Association. This will allow us to remember 1340 * a router told us to use the particular gateway. 1341 */ 1342 ire = ire_create_v6( 1343 dst, 1344 &ipv6_all_ones, /* mask */ 1345 &prev_ire->ire_src_addr_v6, /* source addr */ 1346 gateway, /* gateway addr */ 1347 &prev_ire->ire_max_frag, /* max frag */ 1348 NULL, /* Fast Path header */ 1349 NULL, /* no rfq */ 1350 NULL, /* no stq */ 1351 IRE_HOST, 1352 NULL, 1353 prev_ire->ire_ipif, 1354 NULL, 1355 0, 1356 0, 1357 (RTF_DYNAMIC | RTF_GATEWAY | RTF_HOST), 1358 &ulp_info, 1359 NULL, 1360 NULL, 1361 ipst); 1362 } else { 1363 queue_t *stq; 1364 1365 stq = (ipif->ipif_net_type == IRE_IF_RESOLVER) 1366 ? ipif->ipif_rq : ipif->ipif_wq; 1367 1368 /* 1369 * Just create an on link entry, i.e. interface route. 1370 */ 1371 ire = ire_create_v6( 1372 dst, /* gateway == dst */ 1373 &ipv6_all_ones, /* mask */ 1374 &prev_ire->ire_src_addr_v6, /* source addr */ 1375 &ipv6_all_zeros, /* gateway addr */ 1376 &prev_ire->ire_max_frag, /* max frag */ 1377 NULL, /* Fast Path header */ 1378 NULL, /* ire rfq */ 1379 stq, /* ire stq */ 1380 ipif->ipif_net_type, /* IF_[NO]RESOLVER */ 1381 NULL, 1382 prev_ire->ire_ipif, 1383 &ipv6_all_ones, 1384 0, 1385 0, 1386 (RTF_DYNAMIC | RTF_HOST), 1387 &ulp_info, 1388 NULL, 1389 NULL, 1390 ipst); 1391 } 1392 1393 /* Release reference from earlier ipif_get_next_ipif() */ 1394 ipif_refrele(ipif); 1395 1396 if (ire == NULL) 1397 goto fail_redirect; 1398 1399 if (ire_add(&ire, NULL, NULL, NULL, B_FALSE) == 0) { 1400 1401 /* tell routing sockets that we received a redirect */ 1402 ip_rts_change_v6(RTM_REDIRECT, 1403 &rd->nd_rd_dst, 1404 &rd->nd_rd_target, 1405 &ipv6_all_ones, 0, &ire->ire_src_addr_v6, 1406 (RTF_DYNAMIC | RTF_GATEWAY | RTF_HOST), 0, 1407 (RTA_DST | RTA_GATEWAY | RTA_NETMASK | RTA_AUTHOR), ipst); 1408 1409 /* 1410 * Delete any existing IRE_HOST type ires for this destination. 1411 * This together with the added IRE has the effect of 1412 * modifying an existing redirect. 1413 */ 1414 redir_ire = ire_ftable_lookup_v6(dst, 0, src, IRE_HOST, 1415 ire->ire_ipif, NULL, ALL_ZONES, 0, NULL, 1416 (MATCH_IRE_GW | MATCH_IRE_TYPE | MATCH_IRE_ILL_GROUP), 1417 ipst); 1418 1419 ire_refrele(ire); /* Held in ire_add_v6 */ 1420 1421 if (redir_ire != NULL) { 1422 if (redir_ire->ire_flags & RTF_DYNAMIC) 1423 ire_delete(redir_ire); 1424 ire_refrele(redir_ire); 1425 } 1426 } 1427 1428 if (prev_ire->ire_type == IRE_CACHE) 1429 ire_delete(prev_ire); 1430 ire_refrele(prev_ire); 1431 prev_ire = NULL; 1432 1433 fail_redirect: 1434 if (prev_ire != NULL) 1435 ire_refrele(prev_ire); 1436 freemsg(mp); 1437 } 1438 1439 static ill_t * 1440 ip_queue_to_ill_v6(queue_t *q, ip_stack_t *ipst) 1441 { 1442 ill_t *ill; 1443 1444 ASSERT(WR(q) == q); 1445 1446 if (q->q_next != NULL) { 1447 ill = (ill_t *)q->q_ptr; 1448 if (ILL_CAN_LOOKUP(ill)) 1449 ill_refhold(ill); 1450 else 1451 ill = NULL; 1452 } else { 1453 ill = ill_lookup_on_name(ipif_loopback_name, B_FALSE, B_TRUE, 1454 NULL, NULL, NULL, NULL, NULL, ipst); 1455 } 1456 if (ill == NULL) 1457 ip0dbg(("ip_queue_to_ill_v6: no ill\n")); 1458 return (ill); 1459 } 1460 1461 /* 1462 * Assigns an appropriate source address to the packet. 1463 * If origdst is one of our IP addresses that use it as the source. 1464 * If the queue is an ill queue then select a source from that ill. 1465 * Otherwise pick a source based on a route lookup back to the origsrc. 1466 * 1467 * src is the return parameter. Returns a pointer to src or NULL if failure. 1468 */ 1469 static in6_addr_t * 1470 icmp_pick_source_v6(queue_t *wq, in6_addr_t *origsrc, in6_addr_t *origdst, 1471 in6_addr_t *src, zoneid_t zoneid, ip_stack_t *ipst) 1472 { 1473 ill_t *ill; 1474 ire_t *ire; 1475 ipif_t *ipif; 1476 1477 ASSERT(!(wq->q_flag & QREADR)); 1478 if (wq->q_next != NULL) { 1479 ill = (ill_t *)wq->q_ptr; 1480 } else { 1481 ill = NULL; 1482 } 1483 1484 ire = ire_route_lookup_v6(origdst, 0, 0, (IRE_LOCAL|IRE_LOOPBACK), 1485 NULL, NULL, zoneid, NULL, (MATCH_IRE_TYPE|MATCH_IRE_ZONEONLY), 1486 ipst); 1487 if (ire != NULL) { 1488 /* Destined to one of our addresses */ 1489 *src = *origdst; 1490 ire_refrele(ire); 1491 return (src); 1492 } 1493 if (ire != NULL) { 1494 ire_refrele(ire); 1495 ire = NULL; 1496 } 1497 if (ill == NULL) { 1498 /* What is the route back to the original source? */ 1499 ire = ire_route_lookup_v6(origsrc, 0, 0, 0, 1500 NULL, NULL, zoneid, NULL, 1501 (MATCH_IRE_DEFAULT|MATCH_IRE_RECURSIVE), ipst); 1502 if (ire == NULL) { 1503 BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsOutNoRoutes); 1504 return (NULL); 1505 } 1506 /* 1507 * Does not matter whether we use ire_stq or ire_ipif here. 1508 * Just pick an ill for ICMP replies. 1509 */ 1510 ASSERT(ire->ire_ipif != NULL); 1511 ill = ire->ire_ipif->ipif_ill; 1512 ire_refrele(ire); 1513 } 1514 ipif = ipif_select_source_v6(ill, origsrc, RESTRICT_TO_NONE, 1515 IPV6_PREFER_SRC_DEFAULT, zoneid); 1516 if (ipif != NULL) { 1517 *src = ipif->ipif_v6src_addr; 1518 ipif_refrele(ipif); 1519 return (src); 1520 } 1521 /* 1522 * Unusual case - can't find a usable source address to reach the 1523 * original source. Use what in the route to the source. 1524 */ 1525 ire = ire_route_lookup_v6(origsrc, 0, 0, 0, 1526 NULL, NULL, zoneid, NULL, 1527 (MATCH_IRE_DEFAULT|MATCH_IRE_RECURSIVE), ipst); 1528 if (ire == NULL) { 1529 BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsOutNoRoutes); 1530 return (NULL); 1531 } 1532 ASSERT(ire != NULL); 1533 *src = ire->ire_src_addr_v6; 1534 ire_refrele(ire); 1535 return (src); 1536 } 1537 1538 /* 1539 * Build and ship an IPv6 ICMP message using the packet data in mp, 1540 * and the ICMP header pointed to by "stuff". (May be called as 1541 * writer.) 1542 * Note: assumes that icmp_pkt_err_ok_v6 has been called to 1543 * verify that an icmp error packet can be sent. 1544 * 1545 * If q is an ill write side queue (which is the case when packets 1546 * arrive from ip_rput) then ip_wput code will ensure that packets to 1547 * link-local destinations are sent out that ill. 1548 * 1549 * If v6src_ptr is set use it as a source. Otherwise select a reasonable 1550 * source address (see above function). 1551 */ 1552 static void 1553 icmp_pkt_v6(queue_t *q, mblk_t *mp, void *stuff, size_t len, 1554 const in6_addr_t *v6src_ptr, boolean_t mctl_present, zoneid_t zoneid, 1555 ip_stack_t *ipst) 1556 { 1557 ip6_t *ip6h; 1558 in6_addr_t v6dst; 1559 size_t len_needed; 1560 size_t msg_len; 1561 mblk_t *mp1; 1562 icmp6_t *icmp6; 1563 ill_t *ill; 1564 in6_addr_t v6src; 1565 mblk_t *ipsec_mp; 1566 ipsec_out_t *io; 1567 1568 ill = ip_queue_to_ill_v6(q, ipst); 1569 if (ill == NULL) { 1570 freemsg(mp); 1571 return; 1572 } 1573 1574 if (mctl_present) { 1575 /* 1576 * If it is : 1577 * 1578 * 1) a IPSEC_OUT, then this is caused by outbound 1579 * datagram originating on this host. IPSEC processing 1580 * may or may not have been done. Refer to comments above 1581 * icmp_inbound_error_fanout for details. 1582 * 1583 * 2) a IPSEC_IN if we are generating a icmp_message 1584 * for an incoming datagram destined for us i.e called 1585 * from ip_fanout_send_icmp. 1586 */ 1587 ipsec_info_t *in; 1588 1589 ipsec_mp = mp; 1590 mp = ipsec_mp->b_cont; 1591 1592 in = (ipsec_info_t *)ipsec_mp->b_rptr; 1593 ip6h = (ip6_t *)mp->b_rptr; 1594 1595 ASSERT(in->ipsec_info_type == IPSEC_OUT || 1596 in->ipsec_info_type == IPSEC_IN); 1597 1598 if (in->ipsec_info_type == IPSEC_IN) { 1599 /* 1600 * Convert the IPSEC_IN to IPSEC_OUT. 1601 */ 1602 if (!ipsec_in_to_out(ipsec_mp, NULL, ip6h)) { 1603 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 1604 ill_refrele(ill); 1605 return; 1606 } 1607 } else { 1608 ASSERT(in->ipsec_info_type == IPSEC_OUT); 1609 io = (ipsec_out_t *)in; 1610 /* 1611 * Clear out ipsec_out_proc_begin, so we do a fresh 1612 * ire lookup. 1613 */ 1614 io->ipsec_out_proc_begin = B_FALSE; 1615 } 1616 } else { 1617 /* 1618 * This is in clear. The icmp message we are building 1619 * here should go out in clear. 1620 */ 1621 ipsec_in_t *ii; 1622 ASSERT(mp->b_datap->db_type == M_DATA); 1623 ipsec_mp = ipsec_in_alloc(B_FALSE, ipst->ips_netstack); 1624 if (ipsec_mp == NULL) { 1625 freemsg(mp); 1626 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 1627 ill_refrele(ill); 1628 return; 1629 } 1630 ii = (ipsec_in_t *)ipsec_mp->b_rptr; 1631 1632 /* This is not a secure packet */ 1633 ii->ipsec_in_secure = B_FALSE; 1634 /* 1635 * For trusted extensions using a shared IP address we can 1636 * send using any zoneid. 1637 */ 1638 if (zoneid == ALL_ZONES) 1639 ii->ipsec_in_zoneid = GLOBAL_ZONEID; 1640 else 1641 ii->ipsec_in_zoneid = zoneid; 1642 ipsec_mp->b_cont = mp; 1643 ip6h = (ip6_t *)mp->b_rptr; 1644 /* 1645 * Convert the IPSEC_IN to IPSEC_OUT. 1646 */ 1647 if (!ipsec_in_to_out(ipsec_mp, NULL, ip6h)) { 1648 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 1649 ill_refrele(ill); 1650 return; 1651 } 1652 } 1653 io = (ipsec_out_t *)ipsec_mp->b_rptr; 1654 1655 if (v6src_ptr != NULL) { 1656 v6src = *v6src_ptr; 1657 } else { 1658 if (icmp_pick_source_v6(q, &ip6h->ip6_src, &ip6h->ip6_dst, 1659 &v6src, zoneid, ipst) == NULL) { 1660 freemsg(ipsec_mp); 1661 ill_refrele(ill); 1662 return; 1663 } 1664 } 1665 v6dst = ip6h->ip6_src; 1666 len_needed = ipst->ips_ipv6_icmp_return - IPV6_HDR_LEN - len; 1667 msg_len = msgdsize(mp); 1668 if (msg_len > len_needed) { 1669 if (!adjmsg(mp, len_needed - msg_len)) { 1670 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutErrors); 1671 freemsg(ipsec_mp); 1672 ill_refrele(ill); 1673 return; 1674 } 1675 msg_len = len_needed; 1676 } 1677 mp1 = allocb(IPV6_HDR_LEN + len, BPRI_HI); 1678 if (mp1 == NULL) { 1679 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutErrors); 1680 freemsg(ipsec_mp); 1681 ill_refrele(ill); 1682 return; 1683 } 1684 ill_refrele(ill); 1685 mp1->b_cont = mp; 1686 mp = mp1; 1687 ASSERT(ipsec_mp->b_datap->db_type == M_CTL && 1688 io->ipsec_out_type == IPSEC_OUT); 1689 ipsec_mp->b_cont = mp; 1690 1691 /* 1692 * Set ipsec_out_icmp_loopback so we can let the ICMP messages this 1693 * node generates be accepted in peace by all on-host destinations. 1694 * If we do NOT assume that all on-host destinations trust 1695 * self-generated ICMP messages, then rework here, ip.c, and spd.c. 1696 * (Look for ipsec_out_icmp_loopback). 1697 */ 1698 io->ipsec_out_icmp_loopback = B_TRUE; 1699 1700 ip6h = (ip6_t *)mp->b_rptr; 1701 mp1->b_wptr = (uchar_t *)ip6h + (IPV6_HDR_LEN + len); 1702 1703 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 1704 ip6h->ip6_nxt = IPPROTO_ICMPV6; 1705 ip6h->ip6_hops = ipst->ips_ipv6_def_hops; 1706 ip6h->ip6_dst = v6dst; 1707 ip6h->ip6_src = v6src; 1708 msg_len += IPV6_HDR_LEN + len; 1709 if (msg_len > IP_MAXPACKET + IPV6_HDR_LEN) { 1710 (void) adjmsg(mp, IP_MAXPACKET + IPV6_HDR_LEN - msg_len); 1711 msg_len = IP_MAXPACKET + IPV6_HDR_LEN; 1712 } 1713 ip6h->ip6_plen = htons((uint16_t)(msgdsize(mp) - IPV6_HDR_LEN)); 1714 icmp6 = (icmp6_t *)&ip6h[1]; 1715 bcopy(stuff, (char *)icmp6, len); 1716 /* 1717 * Prepare for checksum by putting icmp length in the icmp 1718 * checksum field. The checksum is calculated in ip_wput_v6. 1719 */ 1720 icmp6->icmp6_cksum = ip6h->ip6_plen; 1721 if (icmp6->icmp6_type == ND_REDIRECT) { 1722 ip6h->ip6_hops = IPV6_MAX_HOPS; 1723 } 1724 /* Send to V6 writeside put routine */ 1725 put(q, ipsec_mp); 1726 } 1727 1728 /* 1729 * Update the output mib when ICMPv6 packets are sent. 1730 */ 1731 static void 1732 icmp_update_out_mib_v6(ill_t *ill, icmp6_t *icmp6) 1733 { 1734 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutMsgs); 1735 1736 switch (icmp6->icmp6_type) { 1737 case ICMP6_DST_UNREACH: 1738 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutDestUnreachs); 1739 if (icmp6->icmp6_code == ICMP6_DST_UNREACH_ADMIN) 1740 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutAdminProhibs); 1741 break; 1742 1743 case ICMP6_TIME_EXCEEDED: 1744 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutTimeExcds); 1745 break; 1746 1747 case ICMP6_PARAM_PROB: 1748 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutParmProblems); 1749 break; 1750 1751 case ICMP6_PACKET_TOO_BIG: 1752 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutPktTooBigs); 1753 break; 1754 1755 case ICMP6_ECHO_REQUEST: 1756 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutEchos); 1757 break; 1758 1759 case ICMP6_ECHO_REPLY: 1760 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutEchoReplies); 1761 break; 1762 1763 case ND_ROUTER_SOLICIT: 1764 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutRouterSolicits); 1765 break; 1766 1767 case ND_ROUTER_ADVERT: 1768 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutRouterAdvertisements); 1769 break; 1770 1771 case ND_NEIGHBOR_SOLICIT: 1772 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutNeighborSolicits); 1773 break; 1774 1775 case ND_NEIGHBOR_ADVERT: 1776 BUMP_MIB(ill->ill_icmp6_mib, 1777 ipv6IfIcmpOutNeighborAdvertisements); 1778 break; 1779 1780 case ND_REDIRECT: 1781 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutRedirects); 1782 break; 1783 1784 case MLD_LISTENER_QUERY: 1785 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutGroupMembQueries); 1786 break; 1787 1788 case MLD_LISTENER_REPORT: 1789 case MLD_V2_LISTENER_REPORT: 1790 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutGroupMembResponses); 1791 break; 1792 1793 case MLD_LISTENER_REDUCTION: 1794 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutGroupMembReductions); 1795 break; 1796 } 1797 } 1798 1799 /* 1800 * Check if it is ok to send an ICMPv6 error packet in 1801 * response to the IP packet in mp. 1802 * Free the message and return null if no 1803 * ICMP error packet should be sent. 1804 */ 1805 static mblk_t * 1806 icmp_pkt_err_ok_v6(queue_t *q, mblk_t *mp, 1807 boolean_t llbcast, boolean_t mcast_ok, ip_stack_t *ipst) 1808 { 1809 ip6_t *ip6h; 1810 1811 if (!mp) 1812 return (NULL); 1813 1814 ip6h = (ip6_t *)mp->b_rptr; 1815 1816 /* Check if source address uniquely identifies the host */ 1817 1818 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_src) || 1819 IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_src) || 1820 IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 1821 freemsg(mp); 1822 return (NULL); 1823 } 1824 1825 if (ip6h->ip6_nxt == IPPROTO_ICMPV6) { 1826 size_t len_needed = IPV6_HDR_LEN + ICMP6_MINLEN; 1827 icmp6_t *icmp6; 1828 1829 if (mp->b_wptr - mp->b_rptr < len_needed) { 1830 if (!pullupmsg(mp, len_needed)) { 1831 ill_t *ill; 1832 1833 ill = ip_queue_to_ill_v6(q, ipst); 1834 if (ill == NULL) { 1835 BUMP_MIB(&ipst->ips_icmp6_mib, 1836 ipv6IfIcmpInErrors); 1837 } else { 1838 BUMP_MIB(ill->ill_icmp6_mib, 1839 ipv6IfIcmpInErrors); 1840 ill_refrele(ill); 1841 } 1842 freemsg(mp); 1843 return (NULL); 1844 } 1845 ip6h = (ip6_t *)mp->b_rptr; 1846 } 1847 icmp6 = (icmp6_t *)&ip6h[1]; 1848 /* Explicitly do not generate errors in response to redirects */ 1849 if (ICMP6_IS_ERROR(icmp6->icmp6_type) || 1850 icmp6->icmp6_type == ND_REDIRECT) { 1851 freemsg(mp); 1852 return (NULL); 1853 } 1854 } 1855 /* 1856 * Check that the destination is not multicast and that the packet 1857 * was not sent on link layer broadcast or multicast. (Exception 1858 * is Packet too big message as per the draft - when mcast_ok is set.) 1859 */ 1860 if (!mcast_ok && 1861 (llbcast || IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst))) { 1862 freemsg(mp); 1863 return (NULL); 1864 } 1865 if (icmp_err_rate_limit(ipst)) { 1866 /* 1867 * Only send ICMP error packets every so often. 1868 * This should be done on a per port/source basis, 1869 * but for now this will suffice. 1870 */ 1871 freemsg(mp); 1872 return (NULL); 1873 } 1874 return (mp); 1875 } 1876 1877 /* 1878 * Generate an ICMPv6 redirect message. 1879 * Include target link layer address option if it exits. 1880 * Always include redirect header. 1881 */ 1882 static void 1883 icmp_send_redirect_v6(queue_t *q, mblk_t *mp, in6_addr_t *targetp, 1884 in6_addr_t *dest, ill_t *ill, boolean_t llbcast) 1885 { 1886 nd_redirect_t *rd; 1887 nd_opt_rd_hdr_t *rdh; 1888 uchar_t *buf; 1889 nce_t *nce = NULL; 1890 nd_opt_hdr_t *opt; 1891 int len; 1892 int ll_opt_len = 0; 1893 int max_redir_hdr_data_len; 1894 int pkt_len; 1895 in6_addr_t *srcp; 1896 ip_stack_t *ipst = ill->ill_ipst; 1897 1898 /* 1899 * We are called from ip_rput where we could 1900 * not have attached an IPSEC_IN. 1901 */ 1902 ASSERT(mp->b_datap->db_type == M_DATA); 1903 1904 mp = icmp_pkt_err_ok_v6(q, mp, llbcast, B_FALSE, ipst); 1905 if (mp == NULL) 1906 return; 1907 nce = ndp_lookup_v6(ill, targetp, B_FALSE); 1908 if (nce != NULL && nce->nce_state != ND_INCOMPLETE) { 1909 ll_opt_len = (sizeof (nd_opt_hdr_t) + 1910 ill->ill_phys_addr_length + 7)/8 * 8; 1911 } 1912 len = sizeof (nd_redirect_t) + sizeof (nd_opt_rd_hdr_t) + ll_opt_len; 1913 ASSERT(len % 4 == 0); 1914 buf = kmem_alloc(len, KM_NOSLEEP); 1915 if (buf == NULL) { 1916 if (nce != NULL) 1917 NCE_REFRELE(nce); 1918 freemsg(mp); 1919 return; 1920 } 1921 1922 rd = (nd_redirect_t *)buf; 1923 rd->nd_rd_type = (uint8_t)ND_REDIRECT; 1924 rd->nd_rd_code = 0; 1925 rd->nd_rd_reserved = 0; 1926 rd->nd_rd_target = *targetp; 1927 rd->nd_rd_dst = *dest; 1928 1929 opt = (nd_opt_hdr_t *)(buf + sizeof (nd_redirect_t)); 1930 if (nce != NULL && ll_opt_len != 0) { 1931 opt->nd_opt_type = ND_OPT_TARGET_LINKADDR; 1932 opt->nd_opt_len = ll_opt_len/8; 1933 bcopy((char *)nce->nce_res_mp->b_rptr + 1934 NCE_LL_ADDR_OFFSET(ill), &opt[1], 1935 ill->ill_phys_addr_length); 1936 } 1937 if (nce != NULL) 1938 NCE_REFRELE(nce); 1939 rdh = (nd_opt_rd_hdr_t *)(buf + sizeof (nd_redirect_t) + ll_opt_len); 1940 rdh->nd_opt_rh_type = (uint8_t)ND_OPT_REDIRECTED_HEADER; 1941 /* max_redir_hdr_data_len and nd_opt_rh_len must be multiple of 8 */ 1942 max_redir_hdr_data_len = 1943 (ipst->ips_ipv6_icmp_return - IPV6_HDR_LEN - len)/8*8; 1944 pkt_len = msgdsize(mp); 1945 /* Make sure mp is 8 byte aligned */ 1946 if (pkt_len > max_redir_hdr_data_len) { 1947 rdh->nd_opt_rh_len = (max_redir_hdr_data_len + 1948 sizeof (nd_opt_rd_hdr_t))/8; 1949 (void) adjmsg(mp, max_redir_hdr_data_len - pkt_len); 1950 } else { 1951 rdh->nd_opt_rh_len = (pkt_len + sizeof (nd_opt_rd_hdr_t))/8; 1952 (void) adjmsg(mp, -(pkt_len % 8)); 1953 } 1954 rdh->nd_opt_rh_reserved1 = 0; 1955 rdh->nd_opt_rh_reserved2 = 0; 1956 /* ipif_v6src_addr contains the link-local source address */ 1957 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 1958 if (ill->ill_group != NULL) { 1959 /* 1960 * The receiver of the redirect will verify whether it 1961 * had a route through us (srcp that we will use in 1962 * the redirect) or not. As we load spread even link-locals, 1963 * we don't know which source address the receiver of 1964 * redirect has in its route for communicating with us. 1965 * Thus we randomly choose a source here and finally we 1966 * should get to the right one and it will eventually 1967 * accept the redirect from us. We can't call 1968 * ip_lookup_scope_v6 because we don't have the right 1969 * link-local address here. Thus we randomly choose one. 1970 */ 1971 int cnt = ill->ill_group->illgrp_ill_count; 1972 1973 ill = ill->ill_group->illgrp_ill; 1974 cnt = ++ipst->ips_icmp_redirect_v6_src_index % cnt; 1975 while (cnt--) 1976 ill = ill->ill_group_next; 1977 srcp = &ill->ill_ipif->ipif_v6src_addr; 1978 } else { 1979 srcp = &ill->ill_ipif->ipif_v6src_addr; 1980 } 1981 rw_exit(&ipst->ips_ill_g_lock); 1982 /* Redirects sent by router, and router is global zone */ 1983 icmp_pkt_v6(q, mp, buf, len, srcp, B_FALSE, GLOBAL_ZONEID, ipst); 1984 kmem_free(buf, len); 1985 } 1986 1987 1988 /* Generate an ICMP time exceeded message. (May be called as writer.) */ 1989 void 1990 icmp_time_exceeded_v6(queue_t *q, mblk_t *mp, uint8_t code, 1991 boolean_t llbcast, boolean_t mcast_ok, zoneid_t zoneid, 1992 ip_stack_t *ipst) 1993 { 1994 icmp6_t icmp6; 1995 boolean_t mctl_present; 1996 mblk_t *first_mp; 1997 1998 EXTRACT_PKT_MP(mp, first_mp, mctl_present); 1999 2000 mp = icmp_pkt_err_ok_v6(q, mp, llbcast, mcast_ok, ipst); 2001 if (mp == NULL) { 2002 if (mctl_present) 2003 freeb(first_mp); 2004 return; 2005 } 2006 bzero(&icmp6, sizeof (icmp6_t)); 2007 icmp6.icmp6_type = ICMP6_TIME_EXCEEDED; 2008 icmp6.icmp6_code = code; 2009 icmp_pkt_v6(q, first_mp, &icmp6, sizeof (icmp6_t), NULL, mctl_present, 2010 zoneid, ipst); 2011 } 2012 2013 /* 2014 * Generate an ICMP unreachable message. 2015 */ 2016 void 2017 icmp_unreachable_v6(queue_t *q, mblk_t *mp, uint8_t code, 2018 boolean_t llbcast, boolean_t mcast_ok, zoneid_t zoneid, 2019 ip_stack_t *ipst) 2020 { 2021 icmp6_t icmp6; 2022 boolean_t mctl_present; 2023 mblk_t *first_mp; 2024 2025 EXTRACT_PKT_MP(mp, first_mp, mctl_present); 2026 2027 mp = icmp_pkt_err_ok_v6(q, mp, llbcast, mcast_ok, ipst); 2028 if (mp == NULL) { 2029 if (mctl_present) 2030 freeb(first_mp); 2031 return; 2032 } 2033 bzero(&icmp6, sizeof (icmp6_t)); 2034 icmp6.icmp6_type = ICMP6_DST_UNREACH; 2035 icmp6.icmp6_code = code; 2036 icmp_pkt_v6(q, first_mp, &icmp6, sizeof (icmp6_t), NULL, mctl_present, 2037 zoneid, ipst); 2038 } 2039 2040 /* 2041 * Generate an ICMP pkt too big message. 2042 */ 2043 static void 2044 icmp_pkt2big_v6(queue_t *q, mblk_t *mp, uint32_t mtu, 2045 boolean_t llbcast, boolean_t mcast_ok, zoneid_t zoneid, ip_stack_t *ipst) 2046 { 2047 icmp6_t icmp6; 2048 mblk_t *first_mp; 2049 boolean_t mctl_present; 2050 2051 EXTRACT_PKT_MP(mp, first_mp, mctl_present); 2052 2053 mp = icmp_pkt_err_ok_v6(q, mp, llbcast, mcast_ok, ipst); 2054 if (mp == NULL) { 2055 if (mctl_present) 2056 freeb(first_mp); 2057 return; 2058 } 2059 bzero(&icmp6, sizeof (icmp6_t)); 2060 icmp6.icmp6_type = ICMP6_PACKET_TOO_BIG; 2061 icmp6.icmp6_code = 0; 2062 icmp6.icmp6_mtu = htonl(mtu); 2063 2064 icmp_pkt_v6(q, first_mp, &icmp6, sizeof (icmp6_t), NULL, mctl_present, 2065 zoneid, ipst); 2066 } 2067 2068 /* 2069 * Generate an ICMP parameter problem message. (May be called as writer.) 2070 * 'offset' is the offset from the beginning of the packet in error. 2071 */ 2072 static void 2073 icmp_param_problem_v6(queue_t *q, mblk_t *mp, uint8_t code, 2074 uint32_t offset, boolean_t llbcast, boolean_t mcast_ok, zoneid_t zoneid, 2075 ip_stack_t *ipst) 2076 { 2077 icmp6_t icmp6; 2078 boolean_t mctl_present; 2079 mblk_t *first_mp; 2080 2081 EXTRACT_PKT_MP(mp, first_mp, mctl_present); 2082 2083 mp = icmp_pkt_err_ok_v6(q, mp, llbcast, mcast_ok, ipst); 2084 if (mp == NULL) { 2085 if (mctl_present) 2086 freeb(first_mp); 2087 return; 2088 } 2089 bzero((char *)&icmp6, sizeof (icmp6_t)); 2090 icmp6.icmp6_type = ICMP6_PARAM_PROB; 2091 icmp6.icmp6_code = code; 2092 icmp6.icmp6_pptr = htonl(offset); 2093 icmp_pkt_v6(q, first_mp, &icmp6, sizeof (icmp6_t), NULL, mctl_present, 2094 zoneid, ipst); 2095 } 2096 2097 /* 2098 * This code will need to take into account the possibility of binding 2099 * to a link local address on a multi-homed host, in which case the 2100 * outgoing interface (from the conn) will need to be used when getting 2101 * an ire for the dst. Going through proper outgoing interface and 2102 * choosing the source address corresponding to the outgoing interface 2103 * is necessary when the destination address is a link-local address and 2104 * IPV6_BOUND_IF or IPV6_PKTINFO or scope_id has been set. 2105 * This can happen when active connection is setup; thus ipp pointer 2106 * is passed here from tcp_connect_*() routines, in non-TCP cases NULL 2107 * pointer is passed as ipp pointer. 2108 */ 2109 mblk_t * 2110 ip_bind_v6(queue_t *q, mblk_t *mp, conn_t *connp, ip6_pkt_t *ipp) 2111 { 2112 ssize_t len; 2113 int protocol; 2114 struct T_bind_req *tbr; 2115 sin6_t *sin6; 2116 ipa6_conn_t *ac6; 2117 in6_addr_t *v6srcp; 2118 in6_addr_t *v6dstp; 2119 uint16_t lport; 2120 uint16_t fport; 2121 uchar_t *ucp; 2122 mblk_t *mp1; 2123 boolean_t ire_requested; 2124 boolean_t ipsec_policy_set; 2125 int error = 0; 2126 boolean_t local_bind; 2127 boolean_t orig_pkt_isv6 = connp->conn_pkt_isv6; 2128 ipa6_conn_x_t *acx6; 2129 boolean_t verify_dst; 2130 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 2131 2132 ASSERT(connp->conn_af_isv6); 2133 len = mp->b_wptr - mp->b_rptr; 2134 if (len < (sizeof (*tbr) + 1)) { 2135 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 2136 "ip_bind_v6: bogus msg, len %ld", len); 2137 goto bad_addr; 2138 } 2139 /* Back up and extract the protocol identifier. */ 2140 mp->b_wptr--; 2141 tbr = (struct T_bind_req *)mp->b_rptr; 2142 /* Reset the message type in preparation for shipping it back. */ 2143 mp->b_datap->db_type = M_PCPROTO; 2144 2145 protocol = *mp->b_wptr & 0xFF; 2146 connp->conn_ulp = (uint8_t)protocol; 2147 2148 /* 2149 * Check for a zero length address. This is from a protocol that 2150 * wants to register to receive all packets of its type. 2151 */ 2152 if (tbr->ADDR_length == 0) { 2153 if ((protocol == IPPROTO_TCP || protocol == IPPROTO_SCTP || 2154 protocol == IPPROTO_ESP || protocol == IPPROTO_AH) && 2155 ipst->ips_ipcl_proto_fanout_v6[protocol].connf_head != 2156 NULL) { 2157 /* 2158 * TCP, SCTP, AH, and ESP have single protocol fanouts. 2159 * Do not allow others to bind to these. 2160 */ 2161 goto bad_addr; 2162 } 2163 2164 /* 2165 * 2166 * The udp module never sends down a zero-length address, 2167 * and allowing this on a labeled system will break MLP 2168 * functionality. 2169 */ 2170 if (is_system_labeled() && protocol == IPPROTO_UDP) 2171 goto bad_addr; 2172 2173 /* Allow ipsec plumbing */ 2174 if (connp->conn_mac_exempt && protocol != IPPROTO_AH && 2175 protocol != IPPROTO_ESP) 2176 goto bad_addr; 2177 2178 connp->conn_srcv6 = ipv6_all_zeros; 2179 ipcl_proto_insert_v6(connp, protocol); 2180 2181 tbr->PRIM_type = T_BIND_ACK; 2182 return (mp); 2183 } 2184 2185 /* Extract the address pointer from the message. */ 2186 ucp = (uchar_t *)mi_offset_param(mp, tbr->ADDR_offset, 2187 tbr->ADDR_length); 2188 if (ucp == NULL) { 2189 ip1dbg(("ip_bind_v6: no address\n")); 2190 goto bad_addr; 2191 } 2192 if (!OK_32PTR(ucp)) { 2193 ip1dbg(("ip_bind_v6: unaligned address\n")); 2194 goto bad_addr; 2195 } 2196 mp1 = mp->b_cont; /* trailing mp if any */ 2197 ire_requested = (mp1 && mp1->b_datap->db_type == IRE_DB_REQ_TYPE); 2198 ipsec_policy_set = (mp1 && mp1->b_datap->db_type == IPSEC_POLICY_SET); 2199 2200 switch (tbr->ADDR_length) { 2201 default: 2202 ip1dbg(("ip_bind_v6: bad address length %d\n", 2203 (int)tbr->ADDR_length)); 2204 goto bad_addr; 2205 2206 case IPV6_ADDR_LEN: 2207 /* Verification of local address only */ 2208 v6srcp = (in6_addr_t *)ucp; 2209 lport = 0; 2210 local_bind = B_TRUE; 2211 break; 2212 2213 case sizeof (sin6_t): 2214 sin6 = (sin6_t *)ucp; 2215 v6srcp = &sin6->sin6_addr; 2216 lport = sin6->sin6_port; 2217 local_bind = B_TRUE; 2218 break; 2219 2220 case sizeof (ipa6_conn_t): 2221 /* 2222 * Verify that both the source and destination addresses 2223 * are valid. 2224 * Note that we allow connect to broadcast and multicast 2225 * addresses when ire_requested is set. Thus the ULP 2226 * has to check for IRE_BROADCAST and multicast. 2227 */ 2228 ac6 = (ipa6_conn_t *)ucp; 2229 v6srcp = &ac6->ac6_laddr; 2230 v6dstp = &ac6->ac6_faddr; 2231 fport = ac6->ac6_fport; 2232 /* For raw socket, the local port is not set. */ 2233 lport = ac6->ac6_lport != 0 ? ac6->ac6_lport : 2234 connp->conn_lport; 2235 local_bind = B_FALSE; 2236 /* Always verify destination reachability. */ 2237 verify_dst = B_TRUE; 2238 break; 2239 2240 case sizeof (ipa6_conn_x_t): 2241 /* 2242 * Verify that the source address is valid. 2243 * Note that we allow connect to broadcast and multicast 2244 * addresses when ire_requested is set. Thus the ULP 2245 * has to check for IRE_BROADCAST and multicast. 2246 */ 2247 acx6 = (ipa6_conn_x_t *)ucp; 2248 ac6 = &acx6->ac6x_conn; 2249 v6srcp = &ac6->ac6_laddr; 2250 v6dstp = &ac6->ac6_faddr; 2251 fport = ac6->ac6_fport; 2252 lport = ac6->ac6_lport; 2253 local_bind = B_FALSE; 2254 /* 2255 * Client that passed ipa6_conn_x_t to us specifies whether to 2256 * verify destination reachability. 2257 */ 2258 verify_dst = (acx6->ac6x_flags & ACX_VERIFY_DST) != 0; 2259 break; 2260 } 2261 if (local_bind) { 2262 if (IN6_IS_ADDR_V4MAPPED(v6srcp) && !connp->conn_ipv6_v6only) { 2263 /* Bind to IPv4 address */ 2264 ipaddr_t v4src; 2265 2266 IN6_V4MAPPED_TO_IPADDR(v6srcp, v4src); 2267 2268 error = ip_bind_laddr(connp, mp, v4src, lport, 2269 ire_requested, ipsec_policy_set, 2270 tbr->ADDR_length != IPV6_ADDR_LEN); 2271 if (error != 0) 2272 goto bad_addr; 2273 connp->conn_pkt_isv6 = B_FALSE; 2274 } else { 2275 if (IN6_IS_ADDR_V4MAPPED(v6srcp)) { 2276 error = 0; 2277 goto bad_addr; 2278 } 2279 error = ip_bind_laddr_v6(connp, mp, v6srcp, lport, 2280 ire_requested, ipsec_policy_set, 2281 (tbr->ADDR_length != IPV6_ADDR_LEN)); 2282 if (error != 0) 2283 goto bad_addr; 2284 connp->conn_pkt_isv6 = B_TRUE; 2285 } 2286 } else { 2287 /* 2288 * Bind to local and remote address. Local might be 2289 * unspecified in which case it will be extracted from 2290 * ire_src_addr_v6 2291 */ 2292 if (IN6_IS_ADDR_V4MAPPED(v6dstp) && !connp->conn_ipv6_v6only) { 2293 /* Connect to IPv4 address */ 2294 ipaddr_t v4src; 2295 ipaddr_t v4dst; 2296 2297 /* Is the source unspecified or mapped? */ 2298 if (!IN6_IS_ADDR_V4MAPPED(v6srcp) && 2299 !IN6_IS_ADDR_UNSPECIFIED(v6srcp)) { 2300 ip1dbg(("ip_bind_v6: " 2301 "dst is mapped, but not the src\n")); 2302 goto bad_addr; 2303 } 2304 IN6_V4MAPPED_TO_IPADDR(v6srcp, v4src); 2305 IN6_V4MAPPED_TO_IPADDR(v6dstp, v4dst); 2306 2307 /* 2308 * XXX Fix needed. Need to pass ipsec_policy_set 2309 * instead of B_FALSE. 2310 */ 2311 2312 /* Always verify destination reachability. */ 2313 error = ip_bind_connected(connp, mp, &v4src, lport, 2314 v4dst, fport, ire_requested, ipsec_policy_set, 2315 B_TRUE, B_TRUE); 2316 if (error != 0) 2317 goto bad_addr; 2318 IN6_IPADDR_TO_V4MAPPED(v4src, v6srcp); 2319 connp->conn_pkt_isv6 = B_FALSE; 2320 } else if (IN6_IS_ADDR_V4MAPPED(v6srcp)) { 2321 ip1dbg(("ip_bind_v6: " 2322 "src is mapped, but not the dst\n")); 2323 goto bad_addr; 2324 } else { 2325 error = ip_bind_connected_v6(connp, mp, v6srcp, 2326 lport, v6dstp, ipp, fport, ire_requested, 2327 ipsec_policy_set, B_TRUE, verify_dst); 2328 if (error != 0) 2329 goto bad_addr; 2330 connp->conn_pkt_isv6 = B_TRUE; 2331 } 2332 } 2333 /* Update qinfo if v4/v6 changed */ 2334 if ((orig_pkt_isv6 != connp->conn_pkt_isv6) && 2335 !(IPCL_IS_TCP(connp) || IPCL_IS_UDP(connp))) { 2336 if (connp->conn_pkt_isv6) 2337 ip_setqinfo(RD(q), IPV6_MINOR, B_TRUE, ipst); 2338 else 2339 ip_setqinfo(RD(q), IPV4_MINOR, B_TRUE, ipst); 2340 } 2341 2342 /* 2343 * Pass the IPSEC headers size in ire_ipsec_overhead. 2344 * We can't do this in ip_bind_insert_ire because the policy 2345 * may not have been inherited at that point in time and hence 2346 * conn_out_enforce_policy may not be set. 2347 */ 2348 mp1 = mp->b_cont; 2349 if (ire_requested && connp->conn_out_enforce_policy && 2350 mp1 != NULL && DB_TYPE(mp1) == IRE_DB_REQ_TYPE) { 2351 ire_t *ire = (ire_t *)mp1->b_rptr; 2352 ASSERT(MBLKL(mp1) >= sizeof (ire_t)); 2353 ire->ire_ipsec_overhead = (conn_ipsec_length(connp)); 2354 } 2355 2356 /* Send it home. */ 2357 mp->b_datap->db_type = M_PCPROTO; 2358 tbr->PRIM_type = T_BIND_ACK; 2359 return (mp); 2360 2361 bad_addr: 2362 if (error == EINPROGRESS) 2363 return (NULL); 2364 if (error > 0) 2365 mp = mi_tpi_err_ack_alloc(mp, TSYSERR, error); 2366 else 2367 mp = mi_tpi_err_ack_alloc(mp, TBADADDR, 0); 2368 return (mp); 2369 } 2370 2371 /* 2372 * Here address is verified to be a valid local address. 2373 * If the IRE_DB_REQ_TYPE mp is present, a multicast 2374 * address is also considered a valid local address. 2375 * In the case of a multicast address, however, the 2376 * upper protocol is expected to reset the src address 2377 * to 0 if it sees an ire with IN6_IS_ADDR_MULTICAST returned so that 2378 * no packets are emitted with multicast address as 2379 * source address. 2380 * The addresses valid for bind are: 2381 * (1) - in6addr_any 2382 * (2) - IP address of an UP interface 2383 * (3) - IP address of a DOWN interface 2384 * (4) - a multicast address. In this case 2385 * the conn will only receive packets destined to 2386 * the specified multicast address. Note: the 2387 * application still has to issue an 2388 * IPV6_JOIN_GROUP socket option. 2389 * 2390 * In all the above cases, the bound address must be valid in the current zone. 2391 * When the address is loopback or multicast, there might be many matching IREs 2392 * so bind has to look up based on the zone. 2393 */ 2394 static int 2395 ip_bind_laddr_v6(conn_t *connp, mblk_t *mp, const in6_addr_t *v6src, 2396 uint16_t lport, boolean_t ire_requested, boolean_t ipsec_policy_set, 2397 boolean_t fanout_insert) 2398 { 2399 int error = 0; 2400 ire_t *src_ire = NULL; 2401 ipif_t *ipif = NULL; 2402 mblk_t *policy_mp; 2403 zoneid_t zoneid; 2404 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 2405 2406 if (ipsec_policy_set) 2407 policy_mp = mp->b_cont; 2408 2409 /* 2410 * If it was previously connected, conn_fully_bound would have 2411 * been set. 2412 */ 2413 connp->conn_fully_bound = B_FALSE; 2414 2415 zoneid = connp->conn_zoneid; 2416 2417 if (!IN6_IS_ADDR_UNSPECIFIED(v6src)) { 2418 src_ire = ire_route_lookup_v6(v6src, 0, 0, 2419 0, NULL, NULL, zoneid, NULL, MATCH_IRE_ZONEONLY, ipst); 2420 /* 2421 * If an address other than in6addr_any is requested, 2422 * we verify that it is a valid address for bind 2423 * Note: Following code is in if-else-if form for 2424 * readability compared to a condition check. 2425 */ 2426 ASSERT(src_ire == NULL || !(src_ire->ire_type & IRE_BROADCAST)); 2427 if (IRE_IS_LOCAL(src_ire)) { 2428 /* 2429 * (2) Bind to address of local UP interface 2430 */ 2431 ipif = src_ire->ire_ipif; 2432 } else if (IN6_IS_ADDR_MULTICAST(v6src)) { 2433 ipif_t *multi_ipif = NULL; 2434 ire_t *save_ire; 2435 /* 2436 * (4) bind to multicast address. 2437 * Fake out the IRE returned to upper 2438 * layer to be a broadcast IRE in 2439 * ip_bind_insert_ire_v6(). 2440 * Pass other information that matches 2441 * the ipif (e.g. the source address). 2442 * conn_multicast_ill is only used for 2443 * IPv6 packets 2444 */ 2445 mutex_enter(&connp->conn_lock); 2446 if (connp->conn_multicast_ill != NULL) { 2447 (void) ipif_lookup_zoneid( 2448 connp->conn_multicast_ill, zoneid, 0, 2449 &multi_ipif); 2450 } else { 2451 /* 2452 * Look for default like 2453 * ip_wput_v6 2454 */ 2455 multi_ipif = ipif_lookup_group_v6( 2456 &ipv6_unspecified_group, zoneid, ipst); 2457 } 2458 mutex_exit(&connp->conn_lock); 2459 save_ire = src_ire; 2460 src_ire = NULL; 2461 if (multi_ipif == NULL || !ire_requested || 2462 (src_ire = ipif_to_ire_v6(multi_ipif)) == NULL) { 2463 src_ire = save_ire; 2464 error = EADDRNOTAVAIL; 2465 } else { 2466 ASSERT(src_ire != NULL); 2467 if (save_ire != NULL) 2468 ire_refrele(save_ire); 2469 } 2470 if (multi_ipif != NULL) 2471 ipif_refrele(multi_ipif); 2472 } else { 2473 *mp->b_wptr++ = (char)connp->conn_ulp; 2474 ipif = ipif_lookup_addr_v6(v6src, NULL, zoneid, 2475 CONNP_TO_WQ(connp), mp, ip_wput_nondata, &error, 2476 ipst); 2477 if (ipif == NULL) { 2478 if (error == EINPROGRESS) { 2479 if (src_ire != NULL) 2480 ire_refrele(src_ire); 2481 return (error); 2482 } 2483 /* 2484 * Not a valid address for bind 2485 */ 2486 error = EADDRNOTAVAIL; 2487 } else { 2488 ipif_refrele(ipif); 2489 } 2490 /* 2491 * Just to keep it consistent with the processing in 2492 * ip_bind_v6(). 2493 */ 2494 mp->b_wptr--; 2495 } 2496 2497 if (error != 0) { 2498 /* Red Alert! Attempting to be a bogon! */ 2499 if (ip_debug > 2) { 2500 /* ip1dbg */ 2501 pr_addr_dbg("ip_bind_laddr_v6: bad src" 2502 " address %s\n", AF_INET6, v6src); 2503 } 2504 goto bad_addr; 2505 } 2506 } 2507 2508 /* 2509 * Allow setting new policies. For example, disconnects come 2510 * down as ipa_t bind. As we would have set conn_policy_cached 2511 * to B_TRUE before, we should set it to B_FALSE, so that policy 2512 * can change after the disconnect. 2513 */ 2514 connp->conn_policy_cached = B_FALSE; 2515 2516 /* If not fanout_insert this was just an address verification */ 2517 if (fanout_insert) { 2518 /* 2519 * The addresses have been verified. Time to insert in 2520 * the correct fanout list. 2521 */ 2522 connp->conn_srcv6 = *v6src; 2523 connp->conn_remv6 = ipv6_all_zeros; 2524 connp->conn_lport = lport; 2525 connp->conn_fport = 0; 2526 2527 /* 2528 * We need to make sure that the conn_recv is set to a non-null 2529 * value before we insert the conn_t into the classifier table. 2530 * This is to avoid a race with an incoming packet which does 2531 * an ipcl_classify(). 2532 */ 2533 if (*mp->b_wptr == IPPROTO_TCP) 2534 connp->conn_recv = tcp_conn_request; 2535 error = ipcl_bind_insert_v6(connp, *mp->b_wptr, v6src, lport); 2536 } 2537 if (error == 0) { 2538 if (ire_requested) { 2539 if (!ip_bind_insert_ire_v6(mp, src_ire, v6src, NULL, 2540 ipst)) { 2541 error = -1; 2542 goto bad_addr; 2543 } 2544 } else if (ipsec_policy_set) { 2545 if (!ip_bind_ipsec_policy_set(connp, policy_mp)) { 2546 error = -1; 2547 goto bad_addr; 2548 } 2549 } 2550 } else if (connp->conn_ulp == IPPROTO_TCP) { 2551 connp->conn_recv = tcp_input; 2552 } 2553 bad_addr: 2554 if (error != 0) { 2555 if (connp->conn_anon_port) { 2556 (void) tsol_mlp_anon(crgetzone(connp->conn_cred), 2557 connp->conn_mlp_type, connp->conn_ulp, ntohs(lport), 2558 B_FALSE); 2559 } 2560 connp->conn_mlp_type = mlptSingle; 2561 } 2562 2563 if (src_ire != NULL) 2564 ire_refrele(src_ire); 2565 2566 if (ipsec_policy_set) { 2567 ASSERT(policy_mp != NULL); 2568 freeb(policy_mp); 2569 /* 2570 * As of now assume that nothing else accompanies 2571 * IPSEC_POLICY_SET. 2572 */ 2573 mp->b_cont = NULL; 2574 } 2575 return (error); 2576 } 2577 2578 /* ARGSUSED */ 2579 static void 2580 ip_bind_connected_resume_v6(ipsq_t *ipsq, queue_t *q, mblk_t *mp, 2581 void *dummy_arg) 2582 { 2583 conn_t *connp = NULL; 2584 t_scalar_t prim; 2585 2586 ASSERT(DB_TYPE(mp) == M_PROTO || DB_TYPE(mp) == M_PCPROTO); 2587 2588 if (CONN_Q(q)) 2589 connp = Q_TO_CONN(q); 2590 ASSERT(connp != NULL); 2591 2592 prim = ((union T_primitives *)mp->b_rptr)->type; 2593 ASSERT(prim == O_T_BIND_REQ || prim == T_BIND_REQ); 2594 2595 if (IPCL_IS_TCP(connp)) { 2596 /* Pass sticky_ipp for scope_id and pktinfo */ 2597 mp = ip_bind_v6(q, mp, connp, &connp->conn_tcp->tcp_sticky_ipp); 2598 } else { 2599 /* For UDP and ICMP */ 2600 mp = ip_bind_v6(q, mp, connp, NULL); 2601 } 2602 if (mp != NULL) { 2603 if (IPCL_IS_TCP(connp)) { 2604 CONN_INC_REF(connp); 2605 squeue_fill(connp->conn_sqp, mp, ip_resume_tcp_bind, 2606 connp, SQTAG_TCP_RPUTOTHER); 2607 } else if (IPCL_IS_UDP(connp)) { 2608 udp_resume_bind(connp, mp); 2609 } else { 2610 qreply(q, mp); 2611 CONN_OPER_PENDING_DONE(connp); 2612 } 2613 } 2614 } 2615 2616 /* 2617 * Verify that both the source and destination addresses 2618 * are valid. If verify_dst, then destination address must also be reachable, 2619 * i.e. have a route. Protocols like TCP want this. Tunnels do not. 2620 * It takes ip6_pkt_t * as one of the arguments to determine correct 2621 * source address when IPV6_PKTINFO or scope_id is set along with a link-local 2622 * destination address. Note that parameter ipp is only useful for TCP connect 2623 * when scope_id is set or IPV6_PKTINFO option is set with an ifindex. For all 2624 * non-TCP cases, it is NULL and for all other tcp cases it is not useful. 2625 * 2626 */ 2627 static int 2628 ip_bind_connected_v6(conn_t *connp, mblk_t *mp, in6_addr_t *v6src, 2629 uint16_t lport, const in6_addr_t *v6dst, ip6_pkt_t *ipp, uint16_t fport, 2630 boolean_t ire_requested, boolean_t ipsec_policy_set, 2631 boolean_t fanout_insert, boolean_t verify_dst) 2632 { 2633 ire_t *src_ire; 2634 ire_t *dst_ire; 2635 int error = 0; 2636 int protocol; 2637 mblk_t *policy_mp; 2638 ire_t *sire = NULL; 2639 ire_t *md_dst_ire = NULL; 2640 ill_t *md_ill = NULL; 2641 ill_t *dst_ill = NULL; 2642 ipif_t *src_ipif = NULL; 2643 zoneid_t zoneid; 2644 boolean_t ill_held = B_FALSE; 2645 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 2646 2647 src_ire = dst_ire = NULL; 2648 /* 2649 * NOTE: The protocol is beyond the wptr because that's how 2650 * the undocumented transport<-->IP T_BIND_REQ behavior works. 2651 */ 2652 protocol = *mp->b_wptr & 0xFF; 2653 2654 /* 2655 * If we never got a disconnect before, clear it now. 2656 */ 2657 connp->conn_fully_bound = B_FALSE; 2658 2659 if (ipsec_policy_set) { 2660 policy_mp = mp->b_cont; 2661 } 2662 2663 zoneid = connp->conn_zoneid; 2664 2665 if (IN6_IS_ADDR_MULTICAST(v6dst)) { 2666 ipif_t *ipif; 2667 2668 /* 2669 * Use an "emulated" IRE_BROADCAST to tell the transport it 2670 * is a multicast. 2671 * Pass other information that matches 2672 * the ipif (e.g. the source address). 2673 * 2674 * conn_multicast_ill is only used for IPv6 packets 2675 */ 2676 mutex_enter(&connp->conn_lock); 2677 if (connp->conn_multicast_ill != NULL) { 2678 (void) ipif_lookup_zoneid(connp->conn_multicast_ill, 2679 zoneid, 0, &ipif); 2680 } else { 2681 /* Look for default like ip_wput_v6 */ 2682 ipif = ipif_lookup_group_v6(v6dst, zoneid, ipst); 2683 } 2684 mutex_exit(&connp->conn_lock); 2685 if (ipif == NULL || !ire_requested || 2686 (dst_ire = ipif_to_ire_v6(ipif)) == NULL) { 2687 if (ipif != NULL) 2688 ipif_refrele(ipif); 2689 if (ip_debug > 2) { 2690 /* ip1dbg */ 2691 pr_addr_dbg("ip_bind_connected_v6: bad " 2692 "connected multicast %s\n", AF_INET6, 2693 v6dst); 2694 } 2695 error = ENETUNREACH; 2696 goto bad_addr; 2697 } 2698 if (ipif != NULL) 2699 ipif_refrele(ipif); 2700 } else { 2701 dst_ire = ire_route_lookup_v6(v6dst, NULL, NULL, 0, 2702 NULL, &sire, zoneid, MBLK_GETLABEL(mp), 2703 MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT | 2704 MATCH_IRE_PARENT | MATCH_IRE_RJ_BHOLE | MATCH_IRE_SECATTR, 2705 ipst); 2706 /* 2707 * We also prevent ire's with src address INADDR_ANY to 2708 * be used, which are created temporarily for 2709 * sending out packets from endpoints that have 2710 * conn_unspec_src set. 2711 */ 2712 if (dst_ire == NULL || 2713 (dst_ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) || 2714 IN6_IS_ADDR_UNSPECIFIED(&dst_ire->ire_src_addr_v6)) { 2715 /* 2716 * When verifying destination reachability, we always 2717 * complain. 2718 * 2719 * When not verifying destination reachability but we 2720 * found an IRE, i.e. the destination is reachable, 2721 * then the other tests still apply and we complain. 2722 */ 2723 if (verify_dst || (dst_ire != NULL)) { 2724 if (ip_debug > 2) { 2725 /* ip1dbg */ 2726 pr_addr_dbg("ip_bind_connected_v6: bad" 2727 " connected dst %s\n", AF_INET6, 2728 v6dst); 2729 } 2730 if (dst_ire == NULL || 2731 !(dst_ire->ire_type & IRE_HOST)) { 2732 error = ENETUNREACH; 2733 } else { 2734 error = EHOSTUNREACH; 2735 } 2736 goto bad_addr; 2737 } 2738 } 2739 } 2740 2741 /* 2742 * We now know that routing will allow us to reach the destination. 2743 * Check whether Trusted Solaris policy allows communication with this 2744 * host, and pretend that the destination is unreachable if not. 2745 * 2746 * This is never a problem for TCP, since that transport is known to 2747 * compute the label properly as part of the tcp_rput_other T_BIND_ACK 2748 * handling. If the remote is unreachable, it will be detected at that 2749 * point, so there's no reason to check it here. 2750 * 2751 * Note that for sendto (and other datagram-oriented friends), this 2752 * check is done as part of the data path label computation instead. 2753 * The check here is just to make non-TCP connect() report the right 2754 * error. 2755 */ 2756 if (dst_ire != NULL && is_system_labeled() && 2757 !IPCL_IS_TCP(connp) && 2758 tsol_compute_label_v6(DB_CREDDEF(mp, connp->conn_cred), v6dst, NULL, 2759 connp->conn_mac_exempt, ipst) != 0) { 2760 error = EHOSTUNREACH; 2761 if (ip_debug > 2) { 2762 pr_addr_dbg("ip_bind_connected: no label for dst %s\n", 2763 AF_INET6, v6dst); 2764 } 2765 goto bad_addr; 2766 } 2767 2768 /* 2769 * If the app does a connect(), it means that it will most likely 2770 * send more than 1 packet to the destination. It makes sense 2771 * to clear the temporary flag. 2772 */ 2773 if (dst_ire != NULL && dst_ire->ire_type == IRE_CACHE && 2774 (dst_ire->ire_marks & IRE_MARK_TEMPORARY)) { 2775 irb_t *irb = dst_ire->ire_bucket; 2776 2777 rw_enter(&irb->irb_lock, RW_WRITER); 2778 dst_ire->ire_marks &= ~IRE_MARK_TEMPORARY; 2779 irb->irb_tmp_ire_cnt--; 2780 rw_exit(&irb->irb_lock); 2781 } 2782 2783 ASSERT(dst_ire == NULL || dst_ire->ire_ipversion == IPV6_VERSION); 2784 2785 /* 2786 * See if we should notify ULP about MDT; we do this whether or not 2787 * ire_requested is TRUE, in order to handle active connects; MDT 2788 * eligibility tests for passive connects are handled separately 2789 * through tcp_adapt_ire(). We do this before the source address 2790 * selection, because dst_ire may change after a call to 2791 * ipif_select_source_v6(). This is a best-effort check, as the 2792 * packet for this connection may not actually go through 2793 * dst_ire->ire_stq, and the exact IRE can only be known after 2794 * calling ip_newroute_v6(). This is why we further check on the 2795 * IRE during Multidata packet transmission in tcp_multisend(). 2796 */ 2797 if (ipst->ips_ip_multidata_outbound && !ipsec_policy_set && 2798 dst_ire != NULL && 2799 !(dst_ire->ire_type & (IRE_LOCAL | IRE_LOOPBACK | IRE_BROADCAST)) && 2800 (md_ill = ire_to_ill(dst_ire), md_ill != NULL) && 2801 ILL_MDT_CAPABLE(md_ill)) { 2802 md_dst_ire = dst_ire; 2803 IRE_REFHOLD(md_dst_ire); 2804 } 2805 2806 if (dst_ire != NULL && 2807 dst_ire->ire_type == IRE_LOCAL && 2808 dst_ire->ire_zoneid != zoneid && 2809 dst_ire->ire_zoneid != ALL_ZONES) { 2810 src_ire = ire_ftable_lookup_v6(v6dst, 0, 0, 0, NULL, NULL, 2811 zoneid, 0, NULL, 2812 MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT | 2813 MATCH_IRE_RJ_BHOLE, ipst); 2814 if (src_ire == NULL) { 2815 error = EHOSTUNREACH; 2816 goto bad_addr; 2817 } else if (src_ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) { 2818 if (!(src_ire->ire_type & IRE_HOST)) 2819 error = ENETUNREACH; 2820 else 2821 error = EHOSTUNREACH; 2822 goto bad_addr; 2823 } 2824 if (IN6_IS_ADDR_UNSPECIFIED(v6src)) { 2825 src_ipif = src_ire->ire_ipif; 2826 ipif_refhold(src_ipif); 2827 *v6src = src_ipif->ipif_v6lcl_addr; 2828 } 2829 ire_refrele(src_ire); 2830 src_ire = NULL; 2831 } else if (IN6_IS_ADDR_UNSPECIFIED(v6src) && dst_ire != NULL) { 2832 if ((sire != NULL) && (sire->ire_flags & RTF_SETSRC)) { 2833 *v6src = sire->ire_src_addr_v6; 2834 ire_refrele(dst_ire); 2835 dst_ire = sire; 2836 sire = NULL; 2837 } else if (dst_ire->ire_type == IRE_CACHE && 2838 (dst_ire->ire_flags & RTF_SETSRC)) { 2839 ASSERT(dst_ire->ire_zoneid == zoneid || 2840 dst_ire->ire_zoneid == ALL_ZONES); 2841 *v6src = dst_ire->ire_src_addr_v6; 2842 } else { 2843 /* 2844 * Pick a source address so that a proper inbound load 2845 * spreading would happen. Use dst_ill specified by the 2846 * app. when socket option or scopeid is set. 2847 */ 2848 int err; 2849 2850 if (ipp != NULL && ipp->ipp_ifindex != 0) { 2851 uint_t if_index; 2852 2853 /* 2854 * Scope id or IPV6_PKTINFO 2855 */ 2856 2857 if_index = ipp->ipp_ifindex; 2858 dst_ill = ill_lookup_on_ifindex( 2859 if_index, B_TRUE, NULL, NULL, NULL, NULL, 2860 ipst); 2861 if (dst_ill == NULL) { 2862 ip1dbg(("ip_bind_connected_v6:" 2863 " bad ifindex %d\n", if_index)); 2864 error = EADDRNOTAVAIL; 2865 goto bad_addr; 2866 } 2867 ill_held = B_TRUE; 2868 } else if (connp->conn_outgoing_ill != NULL) { 2869 /* 2870 * For IPV6_BOUND_IF socket option, 2871 * conn_outgoing_ill should be set 2872 * already in TCP or UDP/ICMP. 2873 */ 2874 dst_ill = conn_get_held_ill(connp, 2875 &connp->conn_outgoing_ill, &err); 2876 if (err == ILL_LOOKUP_FAILED) { 2877 ip1dbg(("ip_bind_connected_v6:" 2878 "no ill for bound_if\n")); 2879 error = EADDRNOTAVAIL; 2880 goto bad_addr; 2881 } 2882 ill_held = B_TRUE; 2883 } else if (dst_ire->ire_stq != NULL) { 2884 /* No need to hold ill here */ 2885 dst_ill = (ill_t *)dst_ire->ire_stq->q_ptr; 2886 } else { 2887 /* No need to hold ill here */ 2888 dst_ill = dst_ire->ire_ipif->ipif_ill; 2889 } 2890 if (!ip6_asp_can_lookup(ipst)) { 2891 *mp->b_wptr++ = (char)protocol; 2892 ip6_asp_pending_op(CONNP_TO_WQ(connp), mp, 2893 ip_bind_connected_resume_v6); 2894 error = EINPROGRESS; 2895 goto refrele_and_quit; 2896 } 2897 src_ipif = ipif_select_source_v6(dst_ill, v6dst, 2898 RESTRICT_TO_NONE, connp->conn_src_preferences, 2899 zoneid); 2900 ip6_asp_table_refrele(ipst); 2901 if (src_ipif == NULL) { 2902 pr_addr_dbg("ip_bind_connected_v6: " 2903 "no usable source address for " 2904 "connection to %s\n", AF_INET6, v6dst); 2905 error = EADDRNOTAVAIL; 2906 goto bad_addr; 2907 } 2908 *v6src = src_ipif->ipif_v6lcl_addr; 2909 } 2910 } 2911 2912 /* 2913 * We do ire_route_lookup_v6() here (and not an interface lookup) 2914 * as we assert that v6src should only come from an 2915 * UP interface for hard binding. 2916 */ 2917 src_ire = ire_route_lookup_v6(v6src, 0, 0, 0, NULL, 2918 NULL, zoneid, NULL, MATCH_IRE_ZONEONLY, ipst); 2919 2920 /* src_ire must be a local|loopback */ 2921 if (!IRE_IS_LOCAL(src_ire)) { 2922 if (ip_debug > 2) { 2923 /* ip1dbg */ 2924 pr_addr_dbg("ip_bind_connected_v6: bad " 2925 "connected src %s\n", AF_INET6, v6src); 2926 } 2927 error = EADDRNOTAVAIL; 2928 goto bad_addr; 2929 } 2930 2931 /* 2932 * If the source address is a loopback address, the 2933 * destination had best be local or multicast. 2934 * The transports that can't handle multicast will reject 2935 * those addresses. 2936 */ 2937 if (src_ire->ire_type == IRE_LOOPBACK && 2938 !(IRE_IS_LOCAL(dst_ire) || IN6_IS_ADDR_MULTICAST(v6dst) || 2939 IN6_IS_ADDR_V4MAPPED_CLASSD(v6dst))) { 2940 ip1dbg(("ip_bind_connected_v6: bad connected loopback\n")); 2941 error = -1; 2942 goto bad_addr; 2943 } 2944 /* 2945 * Allow setting new policies. For example, disconnects come 2946 * down as ipa_t bind. As we would have set conn_policy_cached 2947 * to B_TRUE before, we should set it to B_FALSE, so that policy 2948 * can change after the disconnect. 2949 */ 2950 connp->conn_policy_cached = B_FALSE; 2951 2952 /* 2953 * The addresses have been verified. Initialize the conn 2954 * before calling the policy as they expect the conns 2955 * initialized. 2956 */ 2957 connp->conn_srcv6 = *v6src; 2958 connp->conn_remv6 = *v6dst; 2959 connp->conn_lport = lport; 2960 connp->conn_fport = fport; 2961 2962 ASSERT(!(ipsec_policy_set && ire_requested)); 2963 if (ire_requested) { 2964 iulp_t *ulp_info = NULL; 2965 2966 /* 2967 * Note that sire will not be NULL if this is an off-link 2968 * connection and there is not cache for that dest yet. 2969 * 2970 * XXX Because of an existing bug, if there are multiple 2971 * default routes, the IRE returned now may not be the actual 2972 * default route used (default routes are chosen in a 2973 * round robin fashion). So if the metrics for different 2974 * default routes are different, we may return the wrong 2975 * metrics. This will not be a problem if the existing 2976 * bug is fixed. 2977 */ 2978 if (sire != NULL) 2979 ulp_info = &(sire->ire_uinfo); 2980 2981 if (!ip_bind_insert_ire_v6(mp, dst_ire, v6dst, ulp_info, 2982 ipst)) { 2983 error = -1; 2984 goto bad_addr; 2985 } 2986 } else if (ipsec_policy_set) { 2987 if (!ip_bind_ipsec_policy_set(connp, policy_mp)) { 2988 error = -1; 2989 goto bad_addr; 2990 } 2991 } 2992 2993 /* 2994 * Cache IPsec policy in this conn. If we have per-socket policy, 2995 * we'll cache that. If we don't, we'll inherit global policy. 2996 * 2997 * We can't insert until the conn reflects the policy. Note that 2998 * conn_policy_cached is set by ipsec_conn_cache_policy() even for 2999 * connections where we don't have a policy. This is to prevent 3000 * global policy lookups in the inbound path. 3001 * 3002 * If we insert before we set conn_policy_cached, 3003 * CONN_INBOUND_POLICY_PRESENT_V6() check can still evaluate true 3004 * because global policy cound be non-empty. We normally call 3005 * ipsec_check_policy() for conn_policy_cached connections only if 3006 * conn_in_enforce_policy is set. But in this case, 3007 * conn_policy_cached can get set anytime since we made the 3008 * CONN_INBOUND_POLICY_PRESENT_V6() check and ipsec_check_policy() 3009 * is called, which will make the above assumption false. Thus, we 3010 * need to insert after we set conn_policy_cached. 3011 */ 3012 if ((error = ipsec_conn_cache_policy(connp, B_FALSE)) != 0) 3013 goto bad_addr; 3014 3015 /* If not fanout_insert this was just an address verification */ 3016 if (fanout_insert) { 3017 /* 3018 * The addresses have been verified. Time to insert in 3019 * the correct fanout list. 3020 * We need to make sure that the conn_recv is set to a non-null 3021 * value before we insert the conn_t into the classifier table. 3022 * This is to avoid a race with an incoming packet which does 3023 * an ipcl_classify(). 3024 */ 3025 if (protocol == IPPROTO_TCP) 3026 connp->conn_recv = tcp_input; 3027 error = ipcl_conn_insert_v6(connp, protocol, v6src, v6dst, 3028 connp->conn_ports, 3029 IPCL_IS_TCP(connp) ? connp->conn_tcp->tcp_bound_if : 0); 3030 } 3031 if (error == 0) { 3032 connp->conn_fully_bound = B_TRUE; 3033 /* 3034 * Our initial checks for MDT have passed; the IRE is not 3035 * LOCAL/LOOPBACK/BROADCAST, and the link layer seems to 3036 * be supporting MDT. Pass the IRE, IPC and ILL into 3037 * ip_mdinfo_return(), which performs further checks 3038 * against them and upon success, returns the MDT info 3039 * mblk which we will attach to the bind acknowledgment. 3040 */ 3041 if (md_dst_ire != NULL) { 3042 mblk_t *mdinfo_mp; 3043 3044 ASSERT(md_ill != NULL); 3045 ASSERT(md_ill->ill_mdt_capab != NULL); 3046 if ((mdinfo_mp = ip_mdinfo_return(md_dst_ire, connp, 3047 md_ill->ill_name, md_ill->ill_mdt_capab)) != NULL) 3048 linkb(mp, mdinfo_mp); 3049 } 3050 } 3051 bad_addr: 3052 if (ipsec_policy_set) { 3053 ASSERT(policy_mp != NULL); 3054 freeb(policy_mp); 3055 /* 3056 * As of now assume that nothing else accompanies 3057 * IPSEC_POLICY_SET. 3058 */ 3059 mp->b_cont = NULL; 3060 } 3061 refrele_and_quit: 3062 if (src_ire != NULL) 3063 IRE_REFRELE(src_ire); 3064 if (dst_ire != NULL) 3065 IRE_REFRELE(dst_ire); 3066 if (sire != NULL) 3067 IRE_REFRELE(sire); 3068 if (src_ipif != NULL) 3069 ipif_refrele(src_ipif); 3070 if (md_dst_ire != NULL) 3071 IRE_REFRELE(md_dst_ire); 3072 if (ill_held && dst_ill != NULL) 3073 ill_refrele(dst_ill); 3074 return (error); 3075 } 3076 3077 /* 3078 * Insert the ire in b_cont. Returns false if it fails (due to lack of space). 3079 * Makes the IRE be IRE_BROADCAST if dst is a multicast address. 3080 */ 3081 /* ARGSUSED4 */ 3082 static boolean_t 3083 ip_bind_insert_ire_v6(mblk_t *mp, ire_t *ire, const in6_addr_t *dst, 3084 iulp_t *ulp_info, ip_stack_t *ipst) 3085 { 3086 mblk_t *mp1; 3087 ire_t *ret_ire; 3088 3089 mp1 = mp->b_cont; 3090 ASSERT(mp1 != NULL); 3091 3092 if (ire != NULL) { 3093 /* 3094 * mp1 initialized above to IRE_DB_REQ_TYPE 3095 * appended mblk. Its <upper protocol>'s 3096 * job to make sure there is room. 3097 */ 3098 if ((mp1->b_datap->db_lim - mp1->b_rptr) < sizeof (ire_t)) 3099 return (B_FALSE); 3100 3101 mp1->b_datap->db_type = IRE_DB_TYPE; 3102 mp1->b_wptr = mp1->b_rptr + sizeof (ire_t); 3103 bcopy(ire, mp1->b_rptr, sizeof (ire_t)); 3104 ret_ire = (ire_t *)mp1->b_rptr; 3105 if (IN6_IS_ADDR_MULTICAST(dst) || 3106 IN6_IS_ADDR_V4MAPPED_CLASSD(dst)) { 3107 ret_ire->ire_type = IRE_BROADCAST; 3108 ret_ire->ire_addr_v6 = *dst; 3109 } 3110 if (ulp_info != NULL) { 3111 bcopy(ulp_info, &(ret_ire->ire_uinfo), 3112 sizeof (iulp_t)); 3113 } 3114 ret_ire->ire_mp = mp1; 3115 } else { 3116 /* 3117 * No IRE was found. Remove IRE mblk. 3118 */ 3119 mp->b_cont = mp1->b_cont; 3120 freeb(mp1); 3121 } 3122 return (B_TRUE); 3123 } 3124 3125 /* 3126 * Add an ip6i_t header to the front of the mblk. 3127 * Inline if possible else allocate a separate mblk containing only the ip6i_t. 3128 * Returns NULL if allocation fails (and frees original message). 3129 * Used in outgoing path when going through ip_newroute_*v6(). 3130 * Used in incoming path to pass ifindex to transports. 3131 */ 3132 mblk_t * 3133 ip_add_info_v6(mblk_t *mp, ill_t *ill, const in6_addr_t *dst) 3134 { 3135 mblk_t *mp1; 3136 ip6i_t *ip6i; 3137 ip6_t *ip6h; 3138 3139 ip6h = (ip6_t *)mp->b_rptr; 3140 ip6i = (ip6i_t *)(mp->b_rptr - sizeof (ip6i_t)); 3141 if ((uchar_t *)ip6i < mp->b_datap->db_base || 3142 mp->b_datap->db_ref > 1) { 3143 mp1 = allocb(sizeof (ip6i_t), BPRI_MED); 3144 if (mp1 == NULL) { 3145 freemsg(mp); 3146 return (NULL); 3147 } 3148 mp1->b_wptr = mp1->b_rptr = mp1->b_datap->db_lim; 3149 mp1->b_cont = mp; 3150 mp = mp1; 3151 ip6i = (ip6i_t *)(mp->b_rptr - sizeof (ip6i_t)); 3152 } 3153 mp->b_rptr = (uchar_t *)ip6i; 3154 ip6i->ip6i_vcf = ip6h->ip6_vcf; 3155 ip6i->ip6i_nxt = IPPROTO_RAW; 3156 if (ill != NULL) { 3157 ip6i->ip6i_flags = IP6I_IFINDEX; 3158 ip6i->ip6i_ifindex = ill->ill_phyint->phyint_ifindex; 3159 } else { 3160 ip6i->ip6i_flags = 0; 3161 } 3162 ip6i->ip6i_nexthop = *dst; 3163 return (mp); 3164 } 3165 3166 /* 3167 * Handle protocols with which IP is less intimate. There 3168 * can be more than one stream bound to a particular 3169 * protocol. When this is the case, normally each one gets a copy 3170 * of any incoming packets. 3171 * However, if the packet was tunneled and not multicast we only send to it 3172 * the first match. 3173 * 3174 * Zones notes: 3175 * Packets will be distributed to streams in all zones. This is really only 3176 * useful for ICMPv6 as only applications in the global zone can create raw 3177 * sockets for other protocols. 3178 */ 3179 static void 3180 ip_fanout_proto_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, ill_t *ill, 3181 ill_t *inill, uint8_t nexthdr, uint_t nexthdr_offset, uint_t flags, 3182 boolean_t mctl_present, zoneid_t zoneid) 3183 { 3184 queue_t *rq; 3185 mblk_t *mp1, *first_mp1; 3186 in6_addr_t dst = ip6h->ip6_dst; 3187 in6_addr_t src = ip6h->ip6_src; 3188 boolean_t one_only; 3189 mblk_t *first_mp = mp; 3190 boolean_t secure, shared_addr; 3191 conn_t *connp, *first_connp, *next_connp; 3192 connf_t *connfp; 3193 ip_stack_t *ipst = inill->ill_ipst; 3194 ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec; 3195 3196 if (mctl_present) { 3197 mp = first_mp->b_cont; 3198 secure = ipsec_in_is_secure(first_mp); 3199 ASSERT(mp != NULL); 3200 } else { 3201 secure = B_FALSE; 3202 } 3203 3204 /* 3205 * If the packet was tunneled and not multicast we only send to it 3206 * the first match. 3207 */ 3208 one_only = ((nexthdr == IPPROTO_ENCAP || nexthdr == IPPROTO_IPV6) && 3209 !IN6_IS_ADDR_MULTICAST(&dst)); 3210 3211 shared_addr = (zoneid == ALL_ZONES); 3212 if (shared_addr) { 3213 /* 3214 * We don't allow multilevel ports for raw IP, so no need to 3215 * check for that here. 3216 */ 3217 zoneid = tsol_packet_to_zoneid(mp); 3218 } 3219 3220 connfp = &ipst->ips_ipcl_proto_fanout_v6[nexthdr]; 3221 mutex_enter(&connfp->connf_lock); 3222 connp = connfp->connf_head; 3223 for (connp = connfp->connf_head; connp != NULL; 3224 connp = connp->conn_next) { 3225 if (IPCL_PROTO_MATCH_V6(connp, nexthdr, ip6h, ill, flags, 3226 zoneid) && 3227 (!is_system_labeled() || 3228 tsol_receive_local(mp, &dst, IPV6_VERSION, shared_addr, 3229 connp))) 3230 break; 3231 } 3232 3233 if (connp == NULL || connp->conn_upq == NULL) { 3234 /* 3235 * No one bound to this port. Is 3236 * there a client that wants all 3237 * unclaimed datagrams? 3238 */ 3239 mutex_exit(&connfp->connf_lock); 3240 if (ip_fanout_send_icmp_v6(q, first_mp, flags, 3241 ICMP6_PARAM_PROB, ICMP6_PARAMPROB_NEXTHEADER, 3242 nexthdr_offset, mctl_present, zoneid, ipst)) { 3243 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInUnknownProtos); 3244 } 3245 3246 return; 3247 } 3248 3249 CONN_INC_REF(connp); 3250 first_connp = connp; 3251 3252 /* 3253 * XXX: Fix the multiple protocol listeners case. We should not 3254 * be walking the conn->next list here. 3255 */ 3256 if (one_only) { 3257 /* 3258 * Only send message to one tunnel driver by immediately 3259 * terminating the loop. 3260 */ 3261 connp = NULL; 3262 } else { 3263 connp = connp->conn_next; 3264 3265 } 3266 for (;;) { 3267 while (connp != NULL) { 3268 if (IPCL_PROTO_MATCH_V6(connp, nexthdr, ip6h, ill, 3269 flags, zoneid) && 3270 (!is_system_labeled() || 3271 tsol_receive_local(mp, &dst, IPV6_VERSION, 3272 shared_addr, connp))) 3273 break; 3274 connp = connp->conn_next; 3275 } 3276 3277 /* 3278 * Just copy the data part alone. The mctl part is 3279 * needed just for verifying policy and it is never 3280 * sent up. 3281 */ 3282 if (connp == NULL || connp->conn_upq == NULL || 3283 (((first_mp1 = dupmsg(first_mp)) == NULL) && 3284 ((first_mp1 = ip_copymsg(first_mp)) == NULL))) { 3285 /* 3286 * No more intested clients or memory 3287 * allocation failed 3288 */ 3289 connp = first_connp; 3290 break; 3291 } 3292 mp1 = mctl_present ? first_mp1->b_cont : first_mp1; 3293 CONN_INC_REF(connp); 3294 mutex_exit(&connfp->connf_lock); 3295 rq = connp->conn_rq; 3296 /* 3297 * For link-local always add ifindex so that transport can set 3298 * sin6_scope_id. Avoid it for ICMP error fanout. 3299 */ 3300 if ((connp->conn_ip_recvpktinfo || 3301 IN6_IS_ADDR_LINKLOCAL(&src)) && 3302 (flags & IP_FF_IPINFO)) { 3303 /* Add header */ 3304 mp1 = ip_add_info_v6(mp1, inill, &dst); 3305 } 3306 if (mp1 == NULL) { 3307 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3308 } else if (!canputnext(rq)) { 3309 if (flags & IP_FF_RAWIP) { 3310 BUMP_MIB(ill->ill_ip_mib, 3311 rawipIfStatsInOverflows); 3312 } else { 3313 BUMP_MIB(ill->ill_icmp6_mib, 3314 ipv6IfIcmpInOverflows); 3315 } 3316 3317 freemsg(mp1); 3318 } else { 3319 /* 3320 * Don't enforce here if we're a tunnel - let "tun" do 3321 * it instead. 3322 */ 3323 if (!IPCL_IS_IPTUN(connp) && 3324 (CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) || 3325 secure)) { 3326 first_mp1 = ipsec_check_inbound_policy 3327 (first_mp1, connp, NULL, ip6h, 3328 mctl_present); 3329 } 3330 if (first_mp1 != NULL) { 3331 if (mctl_present) 3332 freeb(first_mp1); 3333 BUMP_MIB(ill->ill_ip_mib, 3334 ipIfStatsHCInDelivers); 3335 putnext(rq, mp1); 3336 } 3337 } 3338 mutex_enter(&connfp->connf_lock); 3339 /* Follow the next pointer before releasing the conn. */ 3340 next_connp = connp->conn_next; 3341 CONN_DEC_REF(connp); 3342 connp = next_connp; 3343 } 3344 3345 /* Last one. Send it upstream. */ 3346 mutex_exit(&connfp->connf_lock); 3347 3348 /* Initiate IPPF processing */ 3349 if (IP6_IN_IPP(flags, ipst)) { 3350 uint_t ifindex; 3351 3352 mutex_enter(&ill->ill_lock); 3353 ifindex = ill->ill_phyint->phyint_ifindex; 3354 mutex_exit(&ill->ill_lock); 3355 ip_process(IPP_LOCAL_IN, &mp, ifindex); 3356 if (mp == NULL) { 3357 CONN_DEC_REF(connp); 3358 if (mctl_present) 3359 freeb(first_mp); 3360 return; 3361 } 3362 } 3363 3364 /* 3365 * For link-local always add ifindex so that transport can set 3366 * sin6_scope_id. Avoid it for ICMP error fanout. 3367 */ 3368 if ((connp->conn_ip_recvpktinfo || IN6_IS_ADDR_LINKLOCAL(&src)) && 3369 (flags & IP_FF_IPINFO)) { 3370 /* Add header */ 3371 mp = ip_add_info_v6(mp, inill, &dst); 3372 if (mp == NULL) { 3373 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3374 CONN_DEC_REF(connp); 3375 if (mctl_present) 3376 freeb(first_mp); 3377 return; 3378 } else if (mctl_present) { 3379 first_mp->b_cont = mp; 3380 } else { 3381 first_mp = mp; 3382 } 3383 } 3384 3385 rq = connp->conn_rq; 3386 if (!canputnext(rq)) { 3387 if (flags & IP_FF_RAWIP) { 3388 BUMP_MIB(ill->ill_ip_mib, rawipIfStatsInOverflows); 3389 } else { 3390 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInOverflows); 3391 } 3392 3393 freemsg(first_mp); 3394 } else { 3395 if (IPCL_IS_IPTUN(connp)) { 3396 /* 3397 * Tunneled packet. We enforce policy in the tunnel 3398 * module itself. 3399 * 3400 * Send the WHOLE packet up (incl. IPSEC_IN) without 3401 * a policy check. 3402 */ 3403 putnext(rq, first_mp); 3404 CONN_DEC_REF(connp); 3405 return; 3406 } 3407 /* 3408 * Don't enforce here if we're a tunnel - let "tun" do 3409 * it instead. 3410 */ 3411 if (nexthdr != IPPROTO_ENCAP && nexthdr != IPPROTO_IPV6 && 3412 (CONN_INBOUND_POLICY_PRESENT(connp, ipss) || secure)) { 3413 first_mp = ipsec_check_inbound_policy(first_mp, connp, 3414 NULL, ip6h, mctl_present); 3415 if (first_mp == NULL) { 3416 CONN_DEC_REF(connp); 3417 return; 3418 } 3419 } 3420 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 3421 putnext(rq, mp); 3422 if (mctl_present) 3423 freeb(first_mp); 3424 } 3425 CONN_DEC_REF(connp); 3426 } 3427 3428 /* 3429 * Send an ICMP error after patching up the packet appropriately. Returns 3430 * non-zero if the appropriate MIB should be bumped; zero otherwise. 3431 */ 3432 int 3433 ip_fanout_send_icmp_v6(queue_t *q, mblk_t *mp, uint_t flags, 3434 uint_t icmp_type, uint8_t icmp_code, uint_t nexthdr_offset, 3435 boolean_t mctl_present, zoneid_t zoneid, ip_stack_t *ipst) 3436 { 3437 ip6_t *ip6h; 3438 mblk_t *first_mp; 3439 boolean_t secure; 3440 unsigned char db_type; 3441 ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec; 3442 3443 first_mp = mp; 3444 if (mctl_present) { 3445 mp = mp->b_cont; 3446 secure = ipsec_in_is_secure(first_mp); 3447 ASSERT(mp != NULL); 3448 } else { 3449 /* 3450 * If this is an ICMP error being reported - which goes 3451 * up as M_CTLs, we need to convert them to M_DATA till 3452 * we finish checking with global policy because 3453 * ipsec_check_global_policy() assumes M_DATA as clear 3454 * and M_CTL as secure. 3455 */ 3456 db_type = mp->b_datap->db_type; 3457 mp->b_datap->db_type = M_DATA; 3458 secure = B_FALSE; 3459 } 3460 /* 3461 * We are generating an icmp error for some inbound packet. 3462 * Called from all ip_fanout_(udp, tcp, proto) functions. 3463 * Before we generate an error, check with global policy 3464 * to see whether this is allowed to enter the system. As 3465 * there is no "conn", we are checking with global policy. 3466 */ 3467 ip6h = (ip6_t *)mp->b_rptr; 3468 if (secure || ipss->ipsec_inbound_v6_policy_present) { 3469 first_mp = ipsec_check_global_policy(first_mp, NULL, 3470 NULL, ip6h, mctl_present, ipst->ips_netstack); 3471 if (first_mp == NULL) 3472 return (0); 3473 } 3474 3475 if (!mctl_present) 3476 mp->b_datap->db_type = db_type; 3477 3478 if (flags & IP_FF_SEND_ICMP) { 3479 if (flags & IP_FF_HDR_COMPLETE) { 3480 if (ip_hdr_complete_v6(ip6h, zoneid, ipst)) { 3481 freemsg(first_mp); 3482 return (1); 3483 } 3484 } 3485 switch (icmp_type) { 3486 case ICMP6_DST_UNREACH: 3487 icmp_unreachable_v6(WR(q), first_mp, icmp_code, 3488 B_FALSE, B_FALSE, zoneid, ipst); 3489 break; 3490 case ICMP6_PARAM_PROB: 3491 icmp_param_problem_v6(WR(q), first_mp, icmp_code, 3492 nexthdr_offset, B_FALSE, B_FALSE, zoneid, ipst); 3493 break; 3494 default: 3495 #ifdef DEBUG 3496 panic("ip_fanout_send_icmp_v6: wrong type"); 3497 /*NOTREACHED*/ 3498 #else 3499 freemsg(first_mp); 3500 break; 3501 #endif 3502 } 3503 } else { 3504 freemsg(first_mp); 3505 return (0); 3506 } 3507 3508 return (1); 3509 } 3510 3511 3512 /* 3513 * Fanout for TCP packets 3514 * The caller puts <fport, lport> in the ports parameter. 3515 */ 3516 static void 3517 ip_fanout_tcp_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, ill_t *ill, ill_t *inill, 3518 uint_t flags, uint_t hdr_len, boolean_t mctl_present, zoneid_t zoneid) 3519 { 3520 mblk_t *first_mp; 3521 boolean_t secure; 3522 conn_t *connp; 3523 tcph_t *tcph; 3524 boolean_t syn_present = B_FALSE; 3525 ip_stack_t *ipst = inill->ill_ipst; 3526 ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec; 3527 3528 first_mp = mp; 3529 if (mctl_present) { 3530 mp = first_mp->b_cont; 3531 secure = ipsec_in_is_secure(first_mp); 3532 ASSERT(mp != NULL); 3533 } else { 3534 secure = B_FALSE; 3535 } 3536 3537 connp = ipcl_classify_v6(mp, IPPROTO_TCP, hdr_len, zoneid, ipst); 3538 3539 if (connp == NULL || 3540 !conn_wantpacket_v6(connp, ill, ip6h, flags, zoneid)) { 3541 /* 3542 * No hard-bound match. Send Reset. 3543 */ 3544 dblk_t *dp = mp->b_datap; 3545 uint32_t ill_index; 3546 3547 ASSERT((dp->db_struioflag & STRUIO_IP) == 0); 3548 3549 /* Initiate IPPf processing, if needed. */ 3550 if (IPP_ENABLED(IPP_LOCAL_IN, ipst) && 3551 (flags & IP6_NO_IPPOLICY)) { 3552 ill_index = ill->ill_phyint->phyint_ifindex; 3553 ip_process(IPP_LOCAL_IN, &first_mp, ill_index); 3554 if (first_mp == NULL) { 3555 if (connp != NULL) 3556 CONN_DEC_REF(connp); 3557 return; 3558 } 3559 } 3560 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 3561 tcp_xmit_listeners_reset(first_mp, hdr_len, zoneid, 3562 ipst->ips_netstack->netstack_tcp); 3563 if (connp != NULL) 3564 CONN_DEC_REF(connp); 3565 return; 3566 } 3567 3568 tcph = (tcph_t *)&mp->b_rptr[hdr_len]; 3569 if ((tcph->th_flags[0] & (TH_SYN|TH_ACK|TH_RST|TH_URG)) == TH_SYN) { 3570 if (connp->conn_flags & IPCL_TCP) { 3571 squeue_t *sqp; 3572 3573 /* 3574 * For fused tcp loopback, assign the eager's 3575 * squeue to be that of the active connect's. 3576 */ 3577 if ((flags & IP_FF_LOOPBACK) && do_tcp_fusion && 3578 !CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) && 3579 !secure && 3580 !IP6_IN_IPP(flags, ipst)) { 3581 ASSERT(Q_TO_CONN(q) != NULL); 3582 sqp = Q_TO_CONN(q)->conn_sqp; 3583 } else { 3584 sqp = IP_SQUEUE_GET(lbolt); 3585 } 3586 3587 mp->b_datap->db_struioflag |= STRUIO_EAGER; 3588 DB_CKSUMSTART(mp) = (intptr_t)sqp; 3589 3590 /* 3591 * db_cksumstuff is unused in the incoming 3592 * path; Thus store the ifindex here. It will 3593 * be cleared in tcp_conn_create_v6(). 3594 */ 3595 DB_CKSUMSTUFF(mp) = 3596 (intptr_t)ill->ill_phyint->phyint_ifindex; 3597 syn_present = B_TRUE; 3598 } 3599 } 3600 3601 if (IPCL_IS_TCP(connp) && IPCL_IS_BOUND(connp) && !syn_present) { 3602 uint_t flags = (unsigned int)tcph->th_flags[0] & 0xFF; 3603 if ((flags & TH_RST) || (flags & TH_URG)) { 3604 CONN_DEC_REF(connp); 3605 freemsg(first_mp); 3606 return; 3607 } 3608 if (flags & TH_ACK) { 3609 tcp_xmit_listeners_reset(first_mp, hdr_len, zoneid, 3610 ipst->ips_netstack->netstack_tcp); 3611 CONN_DEC_REF(connp); 3612 return; 3613 } 3614 3615 CONN_DEC_REF(connp); 3616 freemsg(first_mp); 3617 return; 3618 } 3619 3620 if (CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) || secure) { 3621 first_mp = ipsec_check_inbound_policy(first_mp, connp, 3622 NULL, ip6h, mctl_present); 3623 if (first_mp == NULL) { 3624 CONN_DEC_REF(connp); 3625 return; 3626 } 3627 if (IPCL_IS_TCP(connp) && IPCL_IS_BOUND(connp)) { 3628 ASSERT(syn_present); 3629 if (mctl_present) { 3630 ASSERT(first_mp != mp); 3631 first_mp->b_datap->db_struioflag |= 3632 STRUIO_POLICY; 3633 } else { 3634 ASSERT(first_mp == mp); 3635 mp->b_datap->db_struioflag &= 3636 ~STRUIO_EAGER; 3637 mp->b_datap->db_struioflag |= 3638 STRUIO_POLICY; 3639 } 3640 } else { 3641 /* 3642 * Discard first_mp early since we're dealing with a 3643 * fully-connected conn_t and tcp doesn't do policy in 3644 * this case. Also, if someone is bound to IPPROTO_TCP 3645 * over raw IP, they don't expect to see a M_CTL. 3646 */ 3647 if (mctl_present) { 3648 freeb(first_mp); 3649 mctl_present = B_FALSE; 3650 } 3651 first_mp = mp; 3652 } 3653 } 3654 3655 /* Initiate IPPF processing */ 3656 if (IP6_IN_IPP(flags, ipst)) { 3657 uint_t ifindex; 3658 3659 mutex_enter(&ill->ill_lock); 3660 ifindex = ill->ill_phyint->phyint_ifindex; 3661 mutex_exit(&ill->ill_lock); 3662 ip_process(IPP_LOCAL_IN, &mp, ifindex); 3663 if (mp == NULL) { 3664 CONN_DEC_REF(connp); 3665 if (mctl_present) { 3666 freeb(first_mp); 3667 } 3668 return; 3669 } else if (mctl_present) { 3670 /* 3671 * ip_add_info_v6 might return a new mp. 3672 */ 3673 ASSERT(first_mp != mp); 3674 first_mp->b_cont = mp; 3675 } else { 3676 first_mp = mp; 3677 } 3678 } 3679 3680 /* 3681 * For link-local always add ifindex so that TCP can bind to that 3682 * interface. Avoid it for ICMP error fanout. 3683 */ 3684 if (!syn_present && ((connp->conn_ip_recvpktinfo || 3685 IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src)) && 3686 (flags & IP_FF_IPINFO))) { 3687 /* Add header */ 3688 mp = ip_add_info_v6(mp, inill, &ip6h->ip6_dst); 3689 if (mp == NULL) { 3690 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3691 CONN_DEC_REF(connp); 3692 if (mctl_present) 3693 freeb(first_mp); 3694 return; 3695 } else if (mctl_present) { 3696 ASSERT(first_mp != mp); 3697 first_mp->b_cont = mp; 3698 } else { 3699 first_mp = mp; 3700 } 3701 } 3702 3703 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 3704 if (IPCL_IS_TCP(connp)) { 3705 (*ip_input_proc)(connp->conn_sqp, first_mp, 3706 connp->conn_recv, connp, SQTAG_IP6_TCP_INPUT); 3707 } else { 3708 putnext(connp->conn_rq, first_mp); 3709 CONN_DEC_REF(connp); 3710 } 3711 } 3712 3713 /* 3714 * Fanout for UDP packets. 3715 * The caller puts <fport, lport> in the ports parameter. 3716 * ire_type must be IRE_BROADCAST for multicast and broadcast packets. 3717 * 3718 * If SO_REUSEADDR is set all multicast and broadcast packets 3719 * will be delivered to all streams bound to the same port. 3720 * 3721 * Zones notes: 3722 * Multicast packets will be distributed to streams in all zones. 3723 */ 3724 static void 3725 ip_fanout_udp_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, uint32_t ports, 3726 ill_t *ill, ill_t *inill, uint_t flags, boolean_t mctl_present, 3727 zoneid_t zoneid) 3728 { 3729 uint32_t dstport, srcport; 3730 in6_addr_t dst; 3731 mblk_t *first_mp; 3732 boolean_t secure; 3733 conn_t *connp; 3734 connf_t *connfp; 3735 conn_t *first_conn; 3736 conn_t *next_conn; 3737 mblk_t *mp1, *first_mp1; 3738 in6_addr_t src; 3739 boolean_t shared_addr; 3740 ip_stack_t *ipst = inill->ill_ipst; 3741 ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec; 3742 3743 first_mp = mp; 3744 if (mctl_present) { 3745 mp = first_mp->b_cont; 3746 secure = ipsec_in_is_secure(first_mp); 3747 ASSERT(mp != NULL); 3748 } else { 3749 secure = B_FALSE; 3750 } 3751 3752 /* Extract ports in net byte order */ 3753 dstport = htons(ntohl(ports) & 0xFFFF); 3754 srcport = htons(ntohl(ports) >> 16); 3755 dst = ip6h->ip6_dst; 3756 src = ip6h->ip6_src; 3757 3758 shared_addr = (zoneid == ALL_ZONES); 3759 if (shared_addr) { 3760 /* 3761 * No need to handle exclusive-stack zones since ALL_ZONES 3762 * only applies to the shared stack. 3763 */ 3764 zoneid = tsol_mlp_findzone(IPPROTO_UDP, dstport); 3765 /* 3766 * If no shared MLP is found, tsol_mlp_findzone returns 3767 * ALL_ZONES. In that case, we assume it's SLP, and 3768 * search for the zone based on the packet label. 3769 * That will also return ALL_ZONES on failure, but 3770 * we never allow conn_zoneid to be set to ALL_ZONES. 3771 */ 3772 if (zoneid == ALL_ZONES) 3773 zoneid = tsol_packet_to_zoneid(mp); 3774 } 3775 3776 /* Attempt to find a client stream based on destination port. */ 3777 connfp = &ipst->ips_ipcl_udp_fanout[IPCL_UDP_HASH(dstport, ipst)]; 3778 mutex_enter(&connfp->connf_lock); 3779 connp = connfp->connf_head; 3780 if (!IN6_IS_ADDR_MULTICAST(&dst)) { 3781 /* 3782 * Not multicast. Send to the one (first) client we find. 3783 */ 3784 while (connp != NULL) { 3785 if (IPCL_UDP_MATCH_V6(connp, dstport, dst, srcport, 3786 src) && IPCL_ZONE_MATCH(connp, zoneid) && 3787 conn_wantpacket_v6(connp, ill, ip6h, 3788 flags, zoneid)) { 3789 break; 3790 } 3791 connp = connp->conn_next; 3792 } 3793 if (connp == NULL || connp->conn_upq == NULL) 3794 goto notfound; 3795 3796 if (is_system_labeled() && 3797 !tsol_receive_local(mp, &dst, IPV6_VERSION, shared_addr, 3798 connp)) 3799 goto notfound; 3800 3801 /* Found a client */ 3802 CONN_INC_REF(connp); 3803 mutex_exit(&connfp->connf_lock); 3804 3805 if (CONN_UDP_FLOWCTLD(connp)) { 3806 freemsg(first_mp); 3807 CONN_DEC_REF(connp); 3808 return; 3809 } 3810 if (CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) || secure) { 3811 first_mp = ipsec_check_inbound_policy(first_mp, 3812 connp, NULL, ip6h, mctl_present); 3813 if (first_mp == NULL) { 3814 CONN_DEC_REF(connp); 3815 return; 3816 } 3817 } 3818 /* Initiate IPPF processing */ 3819 if (IP6_IN_IPP(flags, ipst)) { 3820 uint_t ifindex; 3821 3822 mutex_enter(&ill->ill_lock); 3823 ifindex = ill->ill_phyint->phyint_ifindex; 3824 mutex_exit(&ill->ill_lock); 3825 ip_process(IPP_LOCAL_IN, &mp, ifindex); 3826 if (mp == NULL) { 3827 CONN_DEC_REF(connp); 3828 if (mctl_present) 3829 freeb(first_mp); 3830 return; 3831 } 3832 } 3833 /* 3834 * For link-local always add ifindex so that 3835 * transport can set sin6_scope_id. Avoid it for 3836 * ICMP error fanout. 3837 */ 3838 if ((connp->conn_ip_recvpktinfo || 3839 IN6_IS_ADDR_LINKLOCAL(&src)) && 3840 (flags & IP_FF_IPINFO)) { 3841 /* Add header */ 3842 mp = ip_add_info_v6(mp, inill, &dst); 3843 if (mp == NULL) { 3844 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3845 CONN_DEC_REF(connp); 3846 if (mctl_present) 3847 freeb(first_mp); 3848 return; 3849 } else if (mctl_present) { 3850 first_mp->b_cont = mp; 3851 } else { 3852 first_mp = mp; 3853 } 3854 } 3855 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 3856 3857 /* Send it upstream */ 3858 CONN_UDP_RECV(connp, mp); 3859 3860 IP6_STAT(ipst, ip6_udp_fannorm); 3861 CONN_DEC_REF(connp); 3862 if (mctl_present) 3863 freeb(first_mp); 3864 return; 3865 } 3866 3867 while (connp != NULL) { 3868 if ((IPCL_UDP_MATCH_V6(connp, dstport, dst, srcport, src)) && 3869 conn_wantpacket_v6(connp, ill, ip6h, flags, zoneid) && 3870 (!is_system_labeled() || 3871 tsol_receive_local(mp, &dst, IPV6_VERSION, shared_addr, 3872 connp))) 3873 break; 3874 connp = connp->conn_next; 3875 } 3876 3877 if (connp == NULL || connp->conn_upq == NULL) 3878 goto notfound; 3879 3880 first_conn = connp; 3881 3882 CONN_INC_REF(connp); 3883 connp = connp->conn_next; 3884 for (;;) { 3885 while (connp != NULL) { 3886 if (IPCL_UDP_MATCH_V6(connp, dstport, dst, srcport, 3887 src) && conn_wantpacket_v6(connp, ill, ip6h, 3888 flags, zoneid) && 3889 (!is_system_labeled() || 3890 tsol_receive_local(mp, &dst, IPV6_VERSION, 3891 shared_addr, connp))) 3892 break; 3893 connp = connp->conn_next; 3894 } 3895 /* 3896 * Just copy the data part alone. The mctl part is 3897 * needed just for verifying policy and it is never 3898 * sent up. 3899 */ 3900 if (connp == NULL || 3901 (((first_mp1 = dupmsg(first_mp)) == NULL) && 3902 ((first_mp1 = ip_copymsg(first_mp)) 3903 == NULL))) { 3904 /* 3905 * No more interested clients or memory 3906 * allocation failed 3907 */ 3908 connp = first_conn; 3909 break; 3910 } 3911 mp1 = mctl_present ? first_mp1->b_cont : first_mp1; 3912 CONN_INC_REF(connp); 3913 mutex_exit(&connfp->connf_lock); 3914 /* 3915 * For link-local always add ifindex so that transport 3916 * can set sin6_scope_id. Avoid it for ICMP error 3917 * fanout. 3918 */ 3919 if ((connp->conn_ip_recvpktinfo || 3920 IN6_IS_ADDR_LINKLOCAL(&src)) && 3921 (flags & IP_FF_IPINFO)) { 3922 /* Add header */ 3923 mp1 = ip_add_info_v6(mp1, inill, &dst); 3924 } 3925 /* mp1 could have changed */ 3926 if (mctl_present) 3927 first_mp1->b_cont = mp1; 3928 else 3929 first_mp1 = mp1; 3930 if (mp1 == NULL) { 3931 if (mctl_present) 3932 freeb(first_mp1); 3933 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3934 goto next_one; 3935 } 3936 if (CONN_UDP_FLOWCTLD(connp)) { 3937 BUMP_MIB(ill->ill_ip_mib, udpIfStatsInOverflows); 3938 freemsg(first_mp1); 3939 goto next_one; 3940 } 3941 3942 if (CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) || secure) { 3943 first_mp1 = ipsec_check_inbound_policy 3944 (first_mp1, connp, NULL, ip6h, 3945 mctl_present); 3946 } 3947 if (first_mp1 != NULL) { 3948 if (mctl_present) 3949 freeb(first_mp1); 3950 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 3951 3952 /* Send it upstream */ 3953 CONN_UDP_RECV(connp, mp1); 3954 } 3955 next_one: 3956 mutex_enter(&connfp->connf_lock); 3957 /* Follow the next pointer before releasing the conn. */ 3958 next_conn = connp->conn_next; 3959 IP6_STAT(ipst, ip6_udp_fanmb); 3960 CONN_DEC_REF(connp); 3961 connp = next_conn; 3962 } 3963 3964 /* Last one. Send it upstream. */ 3965 mutex_exit(&connfp->connf_lock); 3966 3967 /* Initiate IPPF processing */ 3968 if (IP6_IN_IPP(flags, ipst)) { 3969 uint_t ifindex; 3970 3971 mutex_enter(&ill->ill_lock); 3972 ifindex = ill->ill_phyint->phyint_ifindex; 3973 mutex_exit(&ill->ill_lock); 3974 ip_process(IPP_LOCAL_IN, &mp, ifindex); 3975 if (mp == NULL) { 3976 CONN_DEC_REF(connp); 3977 if (mctl_present) { 3978 freeb(first_mp); 3979 } 3980 return; 3981 } 3982 } 3983 3984 /* 3985 * For link-local always add ifindex so that transport can set 3986 * sin6_scope_id. Avoid it for ICMP error fanout. 3987 */ 3988 if ((connp->conn_ip_recvpktinfo || 3989 IN6_IS_ADDR_LINKLOCAL(&src)) && (flags & IP_FF_IPINFO)) { 3990 /* Add header */ 3991 mp = ip_add_info_v6(mp, inill, &dst); 3992 if (mp == NULL) { 3993 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3994 CONN_DEC_REF(connp); 3995 if (mctl_present) 3996 freeb(first_mp); 3997 return; 3998 } else if (mctl_present) { 3999 first_mp->b_cont = mp; 4000 } else { 4001 first_mp = mp; 4002 } 4003 } 4004 if (CONN_UDP_FLOWCTLD(connp)) { 4005 BUMP_MIB(ill->ill_ip_mib, udpIfStatsInOverflows); 4006 freemsg(mp); 4007 } else { 4008 if (CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) || secure) { 4009 first_mp = ipsec_check_inbound_policy(first_mp, 4010 connp, NULL, ip6h, mctl_present); 4011 if (first_mp == NULL) { 4012 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 4013 CONN_DEC_REF(connp); 4014 return; 4015 } 4016 } 4017 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 4018 4019 /* Send it upstream */ 4020 CONN_UDP_RECV(connp, mp); 4021 } 4022 IP6_STAT(ipst, ip6_udp_fanmb); 4023 CONN_DEC_REF(connp); 4024 if (mctl_present) 4025 freeb(first_mp); 4026 return; 4027 4028 notfound: 4029 mutex_exit(&connfp->connf_lock); 4030 /* 4031 * No one bound to this port. Is 4032 * there a client that wants all 4033 * unclaimed datagrams? 4034 */ 4035 if (ipst->ips_ipcl_proto_fanout_v6[IPPROTO_UDP].connf_head != NULL) { 4036 ip_fanout_proto_v6(q, first_mp, ip6h, ill, inill, IPPROTO_UDP, 4037 0, flags | IP_FF_RAWIP | IP_FF_IPINFO, mctl_present, 4038 zoneid); 4039 } else { 4040 if (ip_fanout_send_icmp_v6(q, first_mp, flags, 4041 ICMP6_DST_UNREACH, ICMP6_DST_UNREACH_NOPORT, 0, 4042 mctl_present, zoneid, ipst)) { 4043 BUMP_MIB(ill->ill_ip_mib, udpIfStatsNoPorts); 4044 } 4045 } 4046 } 4047 4048 /* 4049 * int ip_find_hdr_v6() 4050 * 4051 * This routine is used by the upper layer protocols and the IP tunnel 4052 * module to: 4053 * - Set extension header pointers to appropriate locations 4054 * - Determine IPv6 header length and return it 4055 * - Return a pointer to the last nexthdr value 4056 * 4057 * The caller must initialize ipp_fields. 4058 * 4059 * NOTE: If multiple extension headers of the same type are present, 4060 * ip_find_hdr_v6() will set the respective extension header pointers 4061 * to the first one that it encounters in the IPv6 header. It also 4062 * skips fragment headers. This routine deals with malformed packets 4063 * of various sorts in which case the returned length is up to the 4064 * malformed part. 4065 */ 4066 int 4067 ip_find_hdr_v6(mblk_t *mp, ip6_t *ip6h, ip6_pkt_t *ipp, uint8_t *nexthdrp) 4068 { 4069 uint_t length, ehdrlen; 4070 uint8_t nexthdr; 4071 uint8_t *whereptr, *endptr; 4072 ip6_dest_t *tmpdstopts; 4073 ip6_rthdr_t *tmprthdr; 4074 ip6_hbh_t *tmphopopts; 4075 ip6_frag_t *tmpfraghdr; 4076 4077 length = IPV6_HDR_LEN; 4078 whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */ 4079 endptr = mp->b_wptr; 4080 4081 nexthdr = ip6h->ip6_nxt; 4082 while (whereptr < endptr) { 4083 /* Is there enough left for len + nexthdr? */ 4084 if (whereptr + MIN_EHDR_LEN > endptr) 4085 goto done; 4086 4087 switch (nexthdr) { 4088 case IPPROTO_HOPOPTS: 4089 tmphopopts = (ip6_hbh_t *)whereptr; 4090 ehdrlen = 8 * (tmphopopts->ip6h_len + 1); 4091 if ((uchar_t *)tmphopopts + ehdrlen > endptr) 4092 goto done; 4093 nexthdr = tmphopopts->ip6h_nxt; 4094 /* return only 1st hbh */ 4095 if (!(ipp->ipp_fields & IPPF_HOPOPTS)) { 4096 ipp->ipp_fields |= IPPF_HOPOPTS; 4097 ipp->ipp_hopopts = tmphopopts; 4098 ipp->ipp_hopoptslen = ehdrlen; 4099 } 4100 break; 4101 case IPPROTO_DSTOPTS: 4102 tmpdstopts = (ip6_dest_t *)whereptr; 4103 ehdrlen = 8 * (tmpdstopts->ip6d_len + 1); 4104 if ((uchar_t *)tmpdstopts + ehdrlen > endptr) 4105 goto done; 4106 nexthdr = tmpdstopts->ip6d_nxt; 4107 /* 4108 * ipp_dstopts is set to the destination header after a 4109 * routing header. 4110 * Assume it is a post-rthdr destination header 4111 * and adjust when we find an rthdr. 4112 */ 4113 if (!(ipp->ipp_fields & IPPF_DSTOPTS)) { 4114 ipp->ipp_fields |= IPPF_DSTOPTS; 4115 ipp->ipp_dstopts = tmpdstopts; 4116 ipp->ipp_dstoptslen = ehdrlen; 4117 } 4118 break; 4119 case IPPROTO_ROUTING: 4120 tmprthdr = (ip6_rthdr_t *)whereptr; 4121 ehdrlen = 8 * (tmprthdr->ip6r_len + 1); 4122 if ((uchar_t *)tmprthdr + ehdrlen > endptr) 4123 goto done; 4124 nexthdr = tmprthdr->ip6r_nxt; 4125 /* return only 1st rthdr */ 4126 if (!(ipp->ipp_fields & IPPF_RTHDR)) { 4127 ipp->ipp_fields |= IPPF_RTHDR; 4128 ipp->ipp_rthdr = tmprthdr; 4129 ipp->ipp_rthdrlen = ehdrlen; 4130 } 4131 /* 4132 * Make any destination header we've seen be a 4133 * pre-rthdr destination header. 4134 */ 4135 if (ipp->ipp_fields & IPPF_DSTOPTS) { 4136 ipp->ipp_fields &= ~IPPF_DSTOPTS; 4137 ipp->ipp_fields |= IPPF_RTDSTOPTS; 4138 ipp->ipp_rtdstopts = ipp->ipp_dstopts; 4139 ipp->ipp_dstopts = NULL; 4140 ipp->ipp_rtdstoptslen = ipp->ipp_dstoptslen; 4141 ipp->ipp_dstoptslen = 0; 4142 } 4143 break; 4144 case IPPROTO_FRAGMENT: 4145 tmpfraghdr = (ip6_frag_t *)whereptr; 4146 ehdrlen = sizeof (ip6_frag_t); 4147 if ((uchar_t *)tmpfraghdr + ehdrlen > endptr) 4148 goto done; 4149 nexthdr = tmpfraghdr->ip6f_nxt; 4150 if (!(ipp->ipp_fields & IPPF_FRAGHDR)) { 4151 ipp->ipp_fields |= IPPF_FRAGHDR; 4152 ipp->ipp_fraghdr = tmpfraghdr; 4153 ipp->ipp_fraghdrlen = ehdrlen; 4154 } 4155 break; 4156 case IPPROTO_NONE: 4157 default: 4158 goto done; 4159 } 4160 length += ehdrlen; 4161 whereptr += ehdrlen; 4162 } 4163 done: 4164 if (nexthdrp != NULL) 4165 *nexthdrp = nexthdr; 4166 return (length); 4167 } 4168 4169 int 4170 ip_hdr_complete_v6(ip6_t *ip6h, zoneid_t zoneid, ip_stack_t *ipst) 4171 { 4172 ire_t *ire; 4173 4174 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 4175 ire = ire_lookup_local_v6(zoneid, ipst); 4176 if (ire == NULL) { 4177 ip1dbg(("ip_hdr_complete_v6: no source IRE\n")); 4178 return (1); 4179 } 4180 ip6h->ip6_src = ire->ire_addr_v6; 4181 ire_refrele(ire); 4182 } 4183 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 4184 ip6h->ip6_hops = ipst->ips_ipv6_def_hops; 4185 return (0); 4186 } 4187 4188 /* 4189 * Try to determine where and what are the IPv6 header length and 4190 * pointer to nexthdr value for the upper layer protocol (or an 4191 * unknown next hdr). 4192 * 4193 * Parameters returns a pointer to the nexthdr value; 4194 * Must handle malformed packets of various sorts. 4195 * Function returns failure for malformed cases. 4196 */ 4197 boolean_t 4198 ip_hdr_length_nexthdr_v6(mblk_t *mp, ip6_t *ip6h, uint16_t *hdr_length_ptr, 4199 uint8_t **nexthdrpp) 4200 { 4201 uint16_t length; 4202 uint_t ehdrlen; 4203 uint8_t *nexthdrp; 4204 uint8_t *whereptr; 4205 uint8_t *endptr; 4206 ip6_dest_t *desthdr; 4207 ip6_rthdr_t *rthdr; 4208 ip6_frag_t *fraghdr; 4209 4210 ASSERT((IPH_HDR_VERSION(ip6h) & ~IP_FORWARD_PROG_BIT) == IPV6_VERSION); 4211 length = IPV6_HDR_LEN; 4212 whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */ 4213 endptr = mp->b_wptr; 4214 4215 nexthdrp = &ip6h->ip6_nxt; 4216 while (whereptr < endptr) { 4217 /* Is there enough left for len + nexthdr? */ 4218 if (whereptr + MIN_EHDR_LEN > endptr) 4219 break; 4220 4221 switch (*nexthdrp) { 4222 case IPPROTO_HOPOPTS: 4223 case IPPROTO_DSTOPTS: 4224 /* Assumes the headers are identical for hbh and dst */ 4225 desthdr = (ip6_dest_t *)whereptr; 4226 ehdrlen = 8 * (desthdr->ip6d_len + 1); 4227 if ((uchar_t *)desthdr + ehdrlen > endptr) 4228 return (B_FALSE); 4229 nexthdrp = &desthdr->ip6d_nxt; 4230 break; 4231 case IPPROTO_ROUTING: 4232 rthdr = (ip6_rthdr_t *)whereptr; 4233 ehdrlen = 8 * (rthdr->ip6r_len + 1); 4234 if ((uchar_t *)rthdr + ehdrlen > endptr) 4235 return (B_FALSE); 4236 nexthdrp = &rthdr->ip6r_nxt; 4237 break; 4238 case IPPROTO_FRAGMENT: 4239 fraghdr = (ip6_frag_t *)whereptr; 4240 ehdrlen = sizeof (ip6_frag_t); 4241 if ((uchar_t *)&fraghdr[1] > endptr) 4242 return (B_FALSE); 4243 nexthdrp = &fraghdr->ip6f_nxt; 4244 break; 4245 case IPPROTO_NONE: 4246 /* No next header means we're finished */ 4247 default: 4248 *hdr_length_ptr = length; 4249 *nexthdrpp = nexthdrp; 4250 return (B_TRUE); 4251 } 4252 length += ehdrlen; 4253 whereptr += ehdrlen; 4254 *hdr_length_ptr = length; 4255 *nexthdrpp = nexthdrp; 4256 } 4257 switch (*nexthdrp) { 4258 case IPPROTO_HOPOPTS: 4259 case IPPROTO_DSTOPTS: 4260 case IPPROTO_ROUTING: 4261 case IPPROTO_FRAGMENT: 4262 /* 4263 * If any know extension headers are still to be processed, 4264 * the packet's malformed (or at least all the IP header(s) are 4265 * not in the same mblk - and that should never happen. 4266 */ 4267 return (B_FALSE); 4268 4269 default: 4270 /* 4271 * If we get here, we know that all of the IP headers were in 4272 * the same mblk, even if the ULP header is in the next mblk. 4273 */ 4274 *hdr_length_ptr = length; 4275 *nexthdrpp = nexthdrp; 4276 return (B_TRUE); 4277 } 4278 } 4279 4280 /* 4281 * Return the length of the IPv6 related headers (including extension headers) 4282 * Returns a length even if the packet is malformed. 4283 */ 4284 int 4285 ip_hdr_length_v6(mblk_t *mp, ip6_t *ip6h) 4286 { 4287 uint16_t hdr_len; 4288 uint8_t *nexthdrp; 4289 4290 (void) ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_len, &nexthdrp); 4291 return (hdr_len); 4292 } 4293 4294 /* 4295 * Select an ill for the packet by considering load spreading across 4296 * a different ill in the group if dst_ill is part of some group. 4297 */ 4298 static ill_t * 4299 ip_newroute_get_dst_ill_v6(ill_t *dst_ill) 4300 { 4301 ill_t *ill; 4302 4303 /* 4304 * We schedule irrespective of whether the source address is 4305 * INADDR_UNSPECIED or not. 4306 */ 4307 ill = illgrp_scheduler(dst_ill); 4308 if (ill == NULL) 4309 return (NULL); 4310 4311 /* 4312 * For groups with names ip_sioctl_groupname ensures that all 4313 * ills are of same type. For groups without names, ifgrp_insert 4314 * ensures this. 4315 */ 4316 ASSERT(dst_ill->ill_type == ill->ill_type); 4317 4318 return (ill); 4319 } 4320 4321 /* 4322 * IPv6 - 4323 * ip_newroute_v6 is called by ip_rput_data_v6 or ip_wput_v6 whenever we need 4324 * to send out a packet to a destination address for which we do not have 4325 * specific routing information. 4326 * 4327 * Handle non-multicast packets. If ill is non-NULL the match is done 4328 * for that ill. 4329 * 4330 * When a specific ill is specified (using IPV6_PKTINFO, 4331 * IPV6_MULTICAST_IF, or IPV6_BOUND_IF) we will only match 4332 * on routing entries (ftable and ctable) that have a matching 4333 * ire->ire_ipif->ipif_ill. Thus this can only be used 4334 * for destinations that are on-link for the specific ill 4335 * and that can appear on multiple links. Thus it is useful 4336 * for multicast destinations, link-local destinations, and 4337 * at some point perhaps for site-local destinations (if the 4338 * node sits at a site boundary). 4339 * We create the cache entries in the regular ctable since 4340 * it can not "confuse" things for other destinations. 4341 * table. 4342 * 4343 * When ill is part of a ill group, we subject the packets 4344 * to load spreading even if the ill is specified by the 4345 * means described above. We disable only for IPV6_BOUND_PIF 4346 * and for the cases where IP6I_ATTACH_IF is set i.e NS/NA/ 4347 * Echo replies to link-local destinations have IP6I_ATTACH_IF 4348 * set. 4349 * 4350 * NOTE : These are the scopes of some of the variables that point at IRE, 4351 * which needs to be followed while making any future modifications 4352 * to avoid memory leaks. 4353 * 4354 * - ire and sire are the entries looked up initially by 4355 * ire_ftable_lookup_v6. 4356 * - ipif_ire is used to hold the interface ire associated with 4357 * the new cache ire. But it's scope is limited, so we always REFRELE 4358 * it before branching out to error paths. 4359 * - save_ire is initialized before ire_create, so that ire returned 4360 * by ire_create will not over-write the ire. We REFRELE save_ire 4361 * before breaking out of the switch. 4362 * 4363 * Thus on failures, we have to REFRELE only ire and sire, if they 4364 * are not NULL. 4365 * 4366 * v6srcp may be used in the future. Currently unused. 4367 */ 4368 /* ARGSUSED */ 4369 void 4370 ip_newroute_v6(queue_t *q, mblk_t *mp, const in6_addr_t *v6dstp, 4371 const in6_addr_t *v6srcp, ill_t *ill, zoneid_t zoneid, ip_stack_t *ipst) 4372 { 4373 in6_addr_t v6gw; 4374 in6_addr_t dst; 4375 ire_t *ire = NULL; 4376 ipif_t *src_ipif = NULL; 4377 ill_t *dst_ill = NULL; 4378 ire_t *sire = NULL; 4379 ire_t *save_ire; 4380 mblk_t *dlureq_mp; 4381 ip6_t *ip6h; 4382 int err = 0; 4383 mblk_t *first_mp; 4384 ipsec_out_t *io; 4385 ill_t *attach_ill = NULL; 4386 ushort_t ire_marks = 0; 4387 int match_flags; 4388 boolean_t ip6i_present; 4389 ire_t *first_sire = NULL; 4390 mblk_t *copy_mp = NULL; 4391 mblk_t *xmit_mp = NULL; 4392 in6_addr_t save_dst; 4393 uint32_t multirt_flags = 4394 MULTIRT_CACHEGW | MULTIRT_USESTAMP | MULTIRT_SETSTAMP; 4395 boolean_t multirt_is_resolvable; 4396 boolean_t multirt_resolve_next; 4397 boolean_t need_rele = B_FALSE; 4398 boolean_t do_attach_ill = B_FALSE; 4399 boolean_t ip6_asp_table_held = B_FALSE; 4400 tsol_ire_gw_secattr_t *attrp = NULL; 4401 tsol_gcgrp_t *gcgrp = NULL; 4402 tsol_gcgrp_addr_t ga; 4403 4404 ASSERT(!IN6_IS_ADDR_MULTICAST(v6dstp)); 4405 4406 first_mp = mp; 4407 if (mp->b_datap->db_type == M_CTL) { 4408 mp = mp->b_cont; 4409 io = (ipsec_out_t *)first_mp->b_rptr; 4410 ASSERT(io->ipsec_out_type == IPSEC_OUT); 4411 } else { 4412 io = NULL; 4413 } 4414 4415 /* 4416 * If this end point is bound to IPIF_NOFAILOVER, set bnf_ill and 4417 * bind_to_nofailover B_TRUE. We can't use conn to determine as it 4418 * could be NULL. 4419 * 4420 * This information can appear either in an ip6i_t or an IPSEC_OUT 4421 * message. 4422 */ 4423 ip6h = (ip6_t *)mp->b_rptr; 4424 ip6i_present = (ip6h->ip6_nxt == IPPROTO_RAW); 4425 if (ip6i_present || (io != NULL && io->ipsec_out_attach_if)) { 4426 if (!ip6i_present || 4427 ((ip6i_t *)ip6h)->ip6i_flags & IP6I_ATTACH_IF) { 4428 attach_ill = ip_grab_attach_ill(ill, first_mp, 4429 (ip6i_present ? ((ip6i_t *)ip6h)->ip6i_ifindex : 4430 io->ipsec_out_ill_index), B_TRUE, ipst); 4431 /* Failure case frees things for us. */ 4432 if (attach_ill == NULL) 4433 return; 4434 4435 /* 4436 * Check if we need an ire that will not be 4437 * looked up by anybody else i.e. HIDDEN. 4438 */ 4439 if (ill_is_probeonly(attach_ill)) 4440 ire_marks = IRE_MARK_HIDDEN; 4441 } 4442 } 4443 4444 if (IN6_IS_ADDR_LOOPBACK(v6dstp)) { 4445 ip1dbg(("ip_newroute_v6: dst with loopback addr\n")); 4446 goto icmp_err_ret; 4447 } else if ((v6srcp != NULL) && IN6_IS_ADDR_LOOPBACK(v6srcp)) { 4448 ip1dbg(("ip_newroute_v6: src with loopback addr\n")); 4449 goto icmp_err_ret; 4450 } 4451 4452 /* 4453 * If this IRE is created for forwarding or it is not for 4454 * TCP traffic, mark it as temporary. 4455 * 4456 * Is it sufficient just to check the next header?? 4457 */ 4458 if (mp->b_prev != NULL || !IP_FLOW_CONTROLLED_ULP(ip6h->ip6_nxt)) 4459 ire_marks |= IRE_MARK_TEMPORARY; 4460 4461 /* 4462 * Get what we can from ire_ftable_lookup_v6 which will follow an IRE 4463 * chain until it gets the most specific information available. 4464 * For example, we know that there is no IRE_CACHE for this dest, 4465 * but there may be an IRE_OFFSUBNET which specifies a gateway. 4466 * ire_ftable_lookup_v6 will look up the gateway, etc. 4467 */ 4468 4469 if (ill == NULL) { 4470 match_flags = MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT | 4471 MATCH_IRE_PARENT | MATCH_IRE_RJ_BHOLE | MATCH_IRE_SECATTR; 4472 ire = ire_ftable_lookup_v6(v6dstp, 0, 0, 0, 4473 NULL, &sire, zoneid, 0, MBLK_GETLABEL(mp), 4474 match_flags, ipst); 4475 /* 4476 * ire_add_then_send -> ip_newroute_v6 in the CGTP case passes 4477 * in a NULL ill, but the packet could be a neighbor 4478 * solicitation/advertisment and could have a valid attach_ill. 4479 */ 4480 if (attach_ill != NULL) 4481 ill_refrele(attach_ill); 4482 } else { 4483 if (attach_ill != NULL) { 4484 /* 4485 * attach_ill is set only for communicating with 4486 * on-link hosts. So, don't look for DEFAULT. 4487 * ip_wput_v6 passes the right ill in this case and 4488 * hence we can assert. 4489 */ 4490 ASSERT(ill == attach_ill); 4491 ill_refrele(attach_ill); 4492 do_attach_ill = B_TRUE; 4493 match_flags = MATCH_IRE_RJ_BHOLE | MATCH_IRE_ILL; 4494 } else { 4495 match_flags = MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT | 4496 MATCH_IRE_RJ_BHOLE | MATCH_IRE_ILL_GROUP; 4497 } 4498 match_flags |= MATCH_IRE_PARENT | MATCH_IRE_SECATTR; 4499 ire = ire_ftable_lookup_v6(v6dstp, NULL, NULL, 0, ill->ill_ipif, 4500 &sire, zoneid, 0, MBLK_GETLABEL(mp), match_flags, ipst); 4501 } 4502 4503 ip3dbg(("ip_newroute_v6: ire_ftable_lookup_v6() " 4504 "returned ire %p, sire %p\n", (void *)ire, (void *)sire)); 4505 4506 if (zoneid == ALL_ZONES && ire != NULL) { 4507 /* 4508 * In the forwarding case, we can use a route from any zone 4509 * since we won't change the source address. We can easily 4510 * assert that the source address is already set when there's no 4511 * ip6_info header - otherwise we'd have to call pullupmsg(). 4512 */ 4513 ASSERT(ip6i_present || 4514 !IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)); 4515 zoneid = ire->ire_zoneid; 4516 } 4517 4518 /* 4519 * We enter a loop that will be run only once in most cases. 4520 * The loop is re-entered in the case where the destination 4521 * can be reached through multiple RTF_MULTIRT-flagged routes. 4522 * The intention is to compute multiple routes to a single 4523 * destination in a single ip_newroute_v6 call. 4524 * The information is contained in sire->ire_flags. 4525 */ 4526 do { 4527 multirt_resolve_next = B_FALSE; 4528 4529 if (dst_ill != NULL) { 4530 ill_refrele(dst_ill); 4531 dst_ill = NULL; 4532 } 4533 if (src_ipif != NULL) { 4534 ipif_refrele(src_ipif); 4535 src_ipif = NULL; 4536 } 4537 if ((sire != NULL) && sire->ire_flags & RTF_MULTIRT) { 4538 ip3dbg(("ip_newroute_v6: starting new resolution " 4539 "with first_mp %p, tag %d\n", 4540 (void *)first_mp, MULTIRT_DEBUG_TAGGED(first_mp))); 4541 4542 /* 4543 * We check if there are trailing unresolved routes for 4544 * the destination contained in sire. 4545 */ 4546 multirt_is_resolvable = ire_multirt_lookup_v6(&ire, 4547 &sire, multirt_flags, MBLK_GETLABEL(mp), ipst); 4548 4549 ip3dbg(("ip_newroute_v6: multirt_is_resolvable %d, " 4550 "ire %p, sire %p\n", 4551 multirt_is_resolvable, (void *)ire, (void *)sire)); 4552 4553 if (!multirt_is_resolvable) { 4554 /* 4555 * No more multirt routes to resolve; give up 4556 * (all routes resolved or no more resolvable 4557 * routes). 4558 */ 4559 if (ire != NULL) { 4560 ire_refrele(ire); 4561 ire = NULL; 4562 } 4563 } else { 4564 ASSERT(sire != NULL); 4565 ASSERT(ire != NULL); 4566 /* 4567 * We simply use first_sire as a flag that 4568 * indicates if a resolvable multirt route has 4569 * already been found during the preceding 4570 * loops. If it is not the case, we may have 4571 * to send an ICMP error to report that the 4572 * destination is unreachable. We do not 4573 * IRE_REFHOLD first_sire. 4574 */ 4575 if (first_sire == NULL) { 4576 first_sire = sire; 4577 } 4578 } 4579 } 4580 if ((ire == NULL) || (ire == sire)) { 4581 /* 4582 * either ire == NULL (the destination cannot be 4583 * resolved) or ire == sire (the gateway cannot be 4584 * resolved). At this point, there are no more routes 4585 * to resolve for the destination, thus we exit. 4586 */ 4587 if (ip_debug > 3) { 4588 /* ip2dbg */ 4589 pr_addr_dbg("ip_newroute_v6: " 4590 "can't resolve %s\n", AF_INET6, v6dstp); 4591 } 4592 ip3dbg(("ip_newroute_v6: " 4593 "ire %p, sire %p, first_sire %p\n", 4594 (void *)ire, (void *)sire, (void *)first_sire)); 4595 4596 if (sire != NULL) { 4597 ire_refrele(sire); 4598 sire = NULL; 4599 } 4600 4601 if (first_sire != NULL) { 4602 /* 4603 * At least one multirt route has been found 4604 * in the same ip_newroute() call; there is no 4605 * need to report an ICMP error. 4606 * first_sire was not IRE_REFHOLDed. 4607 */ 4608 MULTIRT_DEBUG_UNTAG(first_mp); 4609 freemsg(first_mp); 4610 return; 4611 } 4612 ip_rts_change_v6(RTM_MISS, v6dstp, 0, 0, 0, 0, 0, 0, 4613 RTA_DST, ipst); 4614 goto icmp_err_ret; 4615 } 4616 4617 ASSERT(ire->ire_ipversion == IPV6_VERSION); 4618 4619 /* 4620 * Verify that the returned IRE does not have either the 4621 * RTF_REJECT or RTF_BLACKHOLE flags set and that the IRE is 4622 * either an IRE_CACHE, IRE_IF_NORESOLVER or IRE_IF_RESOLVER. 4623 */ 4624 if ((ire->ire_flags & (RTF_REJECT | RTF_BLACKHOLE)) || 4625 (ire->ire_type & (IRE_CACHE | IRE_INTERFACE)) == 0) 4626 goto icmp_err_ret; 4627 4628 /* 4629 * Increment the ire_ob_pkt_count field for ire if it is an 4630 * INTERFACE (IF_RESOLVER or IF_NORESOLVER) IRE type, and 4631 * increment the same for the parent IRE, sire, if it is some 4632 * sort of prefix IRE (which includes DEFAULT, PREFIX, HOST 4633 * and HOST_REDIRECT). 4634 */ 4635 if ((ire->ire_type & IRE_INTERFACE) != 0) { 4636 UPDATE_OB_PKT_COUNT(ire); 4637 ire->ire_last_used_time = lbolt; 4638 } 4639 4640 if (sire != NULL) { 4641 mutex_enter(&sire->ire_lock); 4642 v6gw = sire->ire_gateway_addr_v6; 4643 mutex_exit(&sire->ire_lock); 4644 ASSERT((sire->ire_type & (IRE_CACHETABLE | 4645 IRE_INTERFACE)) == 0); 4646 UPDATE_OB_PKT_COUNT(sire); 4647 sire->ire_last_used_time = lbolt; 4648 } else { 4649 v6gw = ipv6_all_zeros; 4650 } 4651 4652 /* 4653 * We have a route to reach the destination. 4654 * 4655 * 1) If the interface is part of ill group, try to get a new 4656 * ill taking load spreading into account. 4657 * 4658 * 2) After selecting the ill, get a source address that might 4659 * create good inbound load spreading and that matches the 4660 * right scope. ipif_select_source_v6 does this for us. 4661 * 4662 * If the application specified the ill (ifindex), we still 4663 * load spread. Only if the packets needs to go out specifically 4664 * on a given ill e.g. bind to IPIF_NOFAILOVER address, 4665 * IPV6_BOUND_PIF we don't try to use a different ill for load 4666 * spreading. 4667 */ 4668 if (!do_attach_ill) { 4669 /* 4670 * If the interface belongs to an interface group, 4671 * make sure the next possible interface in the group 4672 * is used. This encourages load spreading among 4673 * peers in an interface group. However, in the case 4674 * of multirouting, load spreading is not used, as we 4675 * actually want to replicate outgoing packets through 4676 * particular interfaces. 4677 * 4678 * Note: While we pick a dst_ill we are really only 4679 * interested in the ill for load spreading. 4680 * The source ipif is determined by source address 4681 * selection below. 4682 */ 4683 if ((sire != NULL) && (sire->ire_flags & RTF_MULTIRT)) { 4684 dst_ill = ire->ire_ipif->ipif_ill; 4685 /* For uniformity do a refhold */ 4686 ill_refhold(dst_ill); 4687 } else { 4688 /* 4689 * If we are here trying to create an IRE_CACHE 4690 * for an offlink destination and have the 4691 * IRE_CACHE for the next hop and the latter is 4692 * using virtual IP source address selection i.e 4693 * it's ire->ire_ipif is pointing to a virtual 4694 * network interface (vni) then 4695 * ip_newroute_get_dst_ll() will return the vni 4696 * interface as the dst_ill. Since the vni is 4697 * virtual i.e not associated with any physical 4698 * interface, it cannot be the dst_ill, hence 4699 * in such a case call ip_newroute_get_dst_ll() 4700 * with the stq_ill instead of the ire_ipif ILL. 4701 * The function returns a refheld ill. 4702 */ 4703 if ((ire->ire_type == IRE_CACHE) && 4704 IS_VNI(ire->ire_ipif->ipif_ill)) 4705 dst_ill = ip_newroute_get_dst_ill_v6( 4706 ire->ire_stq->q_ptr); 4707 else 4708 dst_ill = ip_newroute_get_dst_ill_v6( 4709 ire->ire_ipif->ipif_ill); 4710 } 4711 if (dst_ill == NULL) { 4712 if (ip_debug > 2) { 4713 pr_addr_dbg("ip_newroute_v6 : no dst " 4714 "ill for dst %s\n", 4715 AF_INET6, v6dstp); 4716 } 4717 goto icmp_err_ret; 4718 } else if (dst_ill->ill_group == NULL && ill != NULL && 4719 dst_ill != ill) { 4720 /* 4721 * If "ill" is not part of any group, we should 4722 * have found a route matching "ill" as we 4723 * called ire_ftable_lookup_v6 with 4724 * MATCH_IRE_ILL_GROUP. 4725 * Rather than asserting when there is a 4726 * mismatch, we just drop the packet. 4727 */ 4728 ip0dbg(("ip_newroute_v6: BOUND_IF failed : " 4729 "dst_ill %s ill %s\n", 4730 dst_ill->ill_name, 4731 ill->ill_name)); 4732 goto icmp_err_ret; 4733 } 4734 } else { 4735 dst_ill = ire->ire_ipif->ipif_ill; 4736 /* For uniformity do refhold */ 4737 ill_refhold(dst_ill); 4738 /* 4739 * We should have found a route matching ill as we 4740 * called ire_ftable_lookup_v6 with MATCH_IRE_ILL. 4741 * Rather than asserting, while there is a mismatch, 4742 * we just drop the packet. 4743 */ 4744 if (dst_ill != ill) { 4745 ip0dbg(("ip_newroute_v6: Packet dropped as " 4746 "IP6I_ATTACH_IF ill is %s, " 4747 "ire->ire_ipif->ipif_ill is %s\n", 4748 ill->ill_name, 4749 dst_ill->ill_name)); 4750 goto icmp_err_ret; 4751 } 4752 } 4753 /* 4754 * Pick a source address which matches the scope of the 4755 * destination address. 4756 * For RTF_SETSRC routes, the source address is imposed by the 4757 * parent ire (sire). 4758 */ 4759 ASSERT(src_ipif == NULL); 4760 if (ire->ire_type == IRE_IF_RESOLVER && 4761 !IN6_IS_ADDR_UNSPECIFIED(&v6gw) && 4762 ip6_asp_can_lookup(ipst)) { 4763 /* 4764 * The ire cache entry we're adding is for the 4765 * gateway itself. The source address in this case 4766 * is relative to the gateway's address. 4767 */ 4768 ip6_asp_table_held = B_TRUE; 4769 src_ipif = ipif_select_source_v6(dst_ill, &v6gw, 4770 RESTRICT_TO_GROUP, IPV6_PREFER_SRC_DEFAULT, zoneid); 4771 if (src_ipif != NULL) 4772 ire_marks |= IRE_MARK_USESRC_CHECK; 4773 } else { 4774 if ((sire != NULL) && (sire->ire_flags & RTF_SETSRC)) { 4775 /* 4776 * Check that the ipif matching the requested 4777 * source address still exists. 4778 */ 4779 src_ipif = ipif_lookup_addr_v6( 4780 &sire->ire_src_addr_v6, NULL, zoneid, 4781 NULL, NULL, NULL, NULL, ipst); 4782 } 4783 if (src_ipif == NULL && ip6_asp_can_lookup(ipst)) { 4784 uint_t restrict_ill = RESTRICT_TO_NONE; 4785 4786 if (ip6i_present && ((ip6i_t *)ip6h)->ip6i_flags 4787 & IP6I_ATTACH_IF) 4788 restrict_ill = RESTRICT_TO_ILL; 4789 ip6_asp_table_held = B_TRUE; 4790 src_ipif = ipif_select_source_v6(dst_ill, 4791 v6dstp, restrict_ill, 4792 IPV6_PREFER_SRC_DEFAULT, zoneid); 4793 if (src_ipif != NULL) 4794 ire_marks |= IRE_MARK_USESRC_CHECK; 4795 } 4796 } 4797 4798 if (src_ipif == NULL) { 4799 if (ip_debug > 2) { 4800 /* ip1dbg */ 4801 pr_addr_dbg("ip_newroute_v6: no src for " 4802 "dst %s\n, ", AF_INET6, v6dstp); 4803 printf("ip_newroute_v6: interface name %s\n", 4804 dst_ill->ill_name); 4805 } 4806 goto icmp_err_ret; 4807 } 4808 4809 if (ip_debug > 3) { 4810 /* ip2dbg */ 4811 pr_addr_dbg("ip_newroute_v6: first hop %s\n", 4812 AF_INET6, &v6gw); 4813 } 4814 ip2dbg(("\tire type %s (%d)\n", 4815 ip_nv_lookup(ire_nv_tbl, ire->ire_type), ire->ire_type)); 4816 4817 /* 4818 * At this point in ip_newroute_v6(), ire is either the 4819 * IRE_CACHE of the next-hop gateway for an off-subnet 4820 * destination or an IRE_INTERFACE type that should be used 4821 * to resolve an on-subnet destination or an on-subnet 4822 * next-hop gateway. 4823 * 4824 * In the IRE_CACHE case, we have the following : 4825 * 4826 * 1) src_ipif - used for getting a source address. 4827 * 4828 * 2) dst_ill - from which we derive ire_stq/ire_rfq. This 4829 * means packets using this IRE_CACHE will go out on dst_ill. 4830 * 4831 * 3) The IRE sire will point to the prefix that is the longest 4832 * matching route for the destination. These prefix types 4833 * include IRE_DEFAULT, IRE_PREFIX, IRE_HOST. 4834 * 4835 * The newly created IRE_CACHE entry for the off-subnet 4836 * destination is tied to both the prefix route and the 4837 * interface route used to resolve the next-hop gateway 4838 * via the ire_phandle and ire_ihandle fields, respectively. 4839 * 4840 * In the IRE_INTERFACE case, we have the following : 4841 * 4842 * 1) src_ipif - used for getting a source address. 4843 * 4844 * 2) dst_ill - from which we derive ire_stq/ire_rfq. This 4845 * means packets using the IRE_CACHE that we will build 4846 * here will go out on dst_ill. 4847 * 4848 * 3) sire may or may not be NULL. But, the IRE_CACHE that is 4849 * to be created will only be tied to the IRE_INTERFACE that 4850 * was derived from the ire_ihandle field. 4851 * 4852 * If sire is non-NULL, it means the destination is off-link 4853 * and we will first create the IRE_CACHE for the gateway. 4854 * Next time through ip_newroute_v6, we will create the 4855 * IRE_CACHE for the final destination as described above. 4856 */ 4857 save_ire = ire; 4858 switch (ire->ire_type) { 4859 case IRE_CACHE: { 4860 ire_t *ipif_ire; 4861 4862 ASSERT(sire != NULL); 4863 if (IN6_IS_ADDR_UNSPECIFIED(&v6gw)) { 4864 mutex_enter(&ire->ire_lock); 4865 v6gw = ire->ire_gateway_addr_v6; 4866 mutex_exit(&ire->ire_lock); 4867 } 4868 /* 4869 * We need 3 ire's to create a new cache ire for an 4870 * off-link destination from the cache ire of the 4871 * gateway. 4872 * 4873 * 1. The prefix ire 'sire' 4874 * 2. The cache ire of the gateway 'ire' 4875 * 3. The interface ire 'ipif_ire' 4876 * 4877 * We have (1) and (2). We lookup (3) below. 4878 * 4879 * If there is no interface route to the gateway, 4880 * it is a race condition, where we found the cache 4881 * but the inteface route has been deleted. 4882 */ 4883 ipif_ire = ire_ihandle_lookup_offlink_v6(ire, sire); 4884 if (ipif_ire == NULL) { 4885 ip1dbg(("ip_newroute_v6:" 4886 "ire_ihandle_lookup_offlink_v6 failed\n")); 4887 goto icmp_err_ret; 4888 } 4889 /* 4890 * Assume DL_UNITDATA_REQ is same for all physical 4891 * interfaces in the ifgrp. If it isn't, this code will 4892 * have to be seriously rewhacked to allow the 4893 * fastpath probing (such that I cache the link 4894 * header in the IRE_CACHE) to work over ifgrps. 4895 * We have what we need to build an IRE_CACHE. 4896 */ 4897 /* 4898 * Note: the new ire inherits RTF_SETSRC 4899 * and RTF_MULTIRT to propagate these flags from prefix 4900 * to cache. 4901 */ 4902 4903 /* 4904 * Check cached gateway IRE for any security 4905 * attributes; if found, associate the gateway 4906 * credentials group to the destination IRE. 4907 */ 4908 if ((attrp = save_ire->ire_gw_secattr) != NULL) { 4909 mutex_enter(&attrp->igsa_lock); 4910 if ((gcgrp = attrp->igsa_gcgrp) != NULL) 4911 GCGRP_REFHOLD(gcgrp); 4912 mutex_exit(&attrp->igsa_lock); 4913 } 4914 4915 ire = ire_create_v6( 4916 v6dstp, /* dest address */ 4917 &ipv6_all_ones, /* mask */ 4918 &src_ipif->ipif_v6src_addr, /* source address */ 4919 &v6gw, /* gateway address */ 4920 &save_ire->ire_max_frag, 4921 NULL, /* Fast Path header */ 4922 dst_ill->ill_rq, /* recv-from queue */ 4923 dst_ill->ill_wq, /* send-to queue */ 4924 IRE_CACHE, 4925 NULL, 4926 src_ipif, 4927 &sire->ire_mask_v6, /* Parent mask */ 4928 sire->ire_phandle, /* Parent handle */ 4929 ipif_ire->ire_ihandle, /* Interface handle */ 4930 sire->ire_flags & /* flags if any */ 4931 (RTF_SETSRC | RTF_MULTIRT), 4932 &(sire->ire_uinfo), 4933 NULL, 4934 gcgrp, 4935 ipst); 4936 4937 if (ire == NULL) { 4938 if (gcgrp != NULL) { 4939 GCGRP_REFRELE(gcgrp); 4940 gcgrp = NULL; 4941 } 4942 ire_refrele(save_ire); 4943 ire_refrele(ipif_ire); 4944 break; 4945 } 4946 4947 /* reference now held by IRE */ 4948 gcgrp = NULL; 4949 4950 ire->ire_marks |= ire_marks; 4951 4952 /* 4953 * Prevent sire and ipif_ire from getting deleted. The 4954 * newly created ire is tied to both of them via the 4955 * phandle and ihandle respectively. 4956 */ 4957 IRB_REFHOLD(sire->ire_bucket); 4958 /* Has it been removed already ? */ 4959 if (sire->ire_marks & IRE_MARK_CONDEMNED) { 4960 IRB_REFRELE(sire->ire_bucket); 4961 ire_refrele(ipif_ire); 4962 ire_refrele(save_ire); 4963 break; 4964 } 4965 4966 IRB_REFHOLD(ipif_ire->ire_bucket); 4967 /* Has it been removed already ? */ 4968 if (ipif_ire->ire_marks & IRE_MARK_CONDEMNED) { 4969 IRB_REFRELE(ipif_ire->ire_bucket); 4970 IRB_REFRELE(sire->ire_bucket); 4971 ire_refrele(ipif_ire); 4972 ire_refrele(save_ire); 4973 break; 4974 } 4975 4976 xmit_mp = first_mp; 4977 if (ire->ire_flags & RTF_MULTIRT) { 4978 copy_mp = copymsg(first_mp); 4979 if (copy_mp != NULL) { 4980 xmit_mp = copy_mp; 4981 MULTIRT_DEBUG_TAG(first_mp); 4982 } 4983 } 4984 ire_add_then_send(q, ire, xmit_mp); 4985 if (ip6_asp_table_held) { 4986 ip6_asp_table_refrele(ipst); 4987 ip6_asp_table_held = B_FALSE; 4988 } 4989 ire_refrele(save_ire); 4990 4991 /* Assert that sire is not deleted yet. */ 4992 ASSERT(sire->ire_ptpn != NULL); 4993 IRB_REFRELE(sire->ire_bucket); 4994 4995 /* Assert that ipif_ire is not deleted yet. */ 4996 ASSERT(ipif_ire->ire_ptpn != NULL); 4997 IRB_REFRELE(ipif_ire->ire_bucket); 4998 ire_refrele(ipif_ire); 4999 5000 if (copy_mp != NULL) { 5001 /* 5002 * Search for the next unresolved 5003 * multirt route. 5004 */ 5005 copy_mp = NULL; 5006 ipif_ire = NULL; 5007 ire = NULL; 5008 /* re-enter the loop */ 5009 multirt_resolve_next = B_TRUE; 5010 continue; 5011 } 5012 ire_refrele(sire); 5013 ill_refrele(dst_ill); 5014 ipif_refrele(src_ipif); 5015 return; 5016 } 5017 case IRE_IF_NORESOLVER: 5018 /* 5019 * We have what we need to build an IRE_CACHE. 5020 * 5021 * Create a new dlureq_mp with the IPv6 gateway 5022 * address in destination address in the DLPI hdr 5023 * if the physical length is exactly 16 bytes. 5024 */ 5025 if (dst_ill->ill_phys_addr_length == IPV6_ADDR_LEN) { 5026 const in6_addr_t *addr; 5027 5028 if (!IN6_IS_ADDR_UNSPECIFIED(&v6gw)) 5029 addr = &v6gw; 5030 else 5031 addr = v6dstp; 5032 5033 dlureq_mp = ill_dlur_gen((uchar_t *)addr, 5034 dst_ill->ill_phys_addr_length, 5035 dst_ill->ill_sap, 5036 dst_ill->ill_sap_length); 5037 } else { 5038 /* 5039 * handle the Gated case, where we create 5040 * a NORESOLVER route for loopback. 5041 */ 5042 if (dst_ill->ill_net_type != IRE_IF_NORESOLVER) 5043 break; 5044 dlureq_mp = ill_dlur_gen(NULL, 5045 dst_ill->ill_phys_addr_length, 5046 dst_ill->ill_sap, 5047 dst_ill->ill_sap_length); 5048 } 5049 if (dlureq_mp == NULL) 5050 break; 5051 /* 5052 * TSol note: We are creating the ire cache for the 5053 * destination 'dst'. If 'dst' is offlink, going 5054 * through the first hop 'gw', the security attributes 5055 * of 'dst' must be set to point to the gateway 5056 * credentials of gateway 'gw'. If 'dst' is onlink, it 5057 * is possible that 'dst' is a potential gateway that is 5058 * referenced by some route that has some security 5059 * attributes. Thus in the former case, we need to do a 5060 * gcgrp_lookup of 'gw' while in the latter case we 5061 * need to do gcgrp_lookup of 'dst' itself. 5062 */ 5063 ga.ga_af = AF_INET6; 5064 if (!IN6_IS_ADDR_UNSPECIFIED(&v6gw)) 5065 ga.ga_addr = v6gw; 5066 else 5067 ga.ga_addr = *v6dstp; 5068 gcgrp = gcgrp_lookup(&ga, B_FALSE); 5069 5070 /* 5071 * Note: the new ire inherits sire flags RTF_SETSRC 5072 * and RTF_MULTIRT to propagate those rules from prefix 5073 * to cache. 5074 */ 5075 ire = ire_create_v6( 5076 v6dstp, /* dest address */ 5077 &ipv6_all_ones, /* mask */ 5078 &src_ipif->ipif_v6src_addr, /* source address */ 5079 &v6gw, /* gateway address */ 5080 &save_ire->ire_max_frag, 5081 NULL, /* Fast Path header */ 5082 dst_ill->ill_rq, /* recv-from queue */ 5083 dst_ill->ill_wq, /* send-to queue */ 5084 IRE_CACHE, 5085 dlureq_mp, 5086 src_ipif, 5087 &save_ire->ire_mask_v6, /* Parent mask */ 5088 (sire != NULL) ? /* Parent handle */ 5089 sire->ire_phandle : 0, 5090 save_ire->ire_ihandle, /* Interface handle */ 5091 (sire != NULL) ? /* flags if any */ 5092 sire->ire_flags & 5093 (RTF_SETSRC | RTF_MULTIRT) : 0, 5094 &(save_ire->ire_uinfo), 5095 NULL, 5096 gcgrp, 5097 ipst); 5098 5099 freeb(dlureq_mp); 5100 5101 if (ire == NULL) { 5102 if (gcgrp != NULL) { 5103 GCGRP_REFRELE(gcgrp); 5104 gcgrp = NULL; 5105 } 5106 ire_refrele(save_ire); 5107 break; 5108 } 5109 5110 /* reference now held by IRE */ 5111 gcgrp = NULL; 5112 5113 ire->ire_marks |= ire_marks; 5114 5115 if (!IN6_IS_ADDR_UNSPECIFIED(&v6gw)) 5116 dst = v6gw; 5117 else 5118 dst = *v6dstp; 5119 err = ndp_noresolver(dst_ill, &dst); 5120 if (err != 0) { 5121 ire_refrele(save_ire); 5122 break; 5123 } 5124 5125 /* Prevent save_ire from getting deleted */ 5126 IRB_REFHOLD(save_ire->ire_bucket); 5127 /* Has it been removed already ? */ 5128 if (save_ire->ire_marks & IRE_MARK_CONDEMNED) { 5129 IRB_REFRELE(save_ire->ire_bucket); 5130 ire_refrele(save_ire); 5131 break; 5132 } 5133 5134 xmit_mp = first_mp; 5135 /* 5136 * In case of MULTIRT, a copy of the current packet 5137 * to send is made to further re-enter the 5138 * loop and attempt another route resolution 5139 */ 5140 if ((sire != NULL) && sire->ire_flags & RTF_MULTIRT) { 5141 copy_mp = copymsg(first_mp); 5142 if (copy_mp != NULL) { 5143 xmit_mp = copy_mp; 5144 MULTIRT_DEBUG_TAG(first_mp); 5145 } 5146 } 5147 ire_add_then_send(q, ire, xmit_mp); 5148 if (ip6_asp_table_held) { 5149 ip6_asp_table_refrele(ipst); 5150 ip6_asp_table_held = B_FALSE; 5151 } 5152 5153 /* Assert that it is not deleted yet. */ 5154 ASSERT(save_ire->ire_ptpn != NULL); 5155 IRB_REFRELE(save_ire->ire_bucket); 5156 ire_refrele(save_ire); 5157 5158 if (copy_mp != NULL) { 5159 /* 5160 * If we found a (no)resolver, we ignore any 5161 * trailing top priority IRE_CACHE in 5162 * further loops. This ensures that we do not 5163 * omit any (no)resolver despite the priority 5164 * in this call. 5165 * IRE_CACHE, if any, will be processed 5166 * by another thread entering ip_newroute(), 5167 * (on resolver response, for example). 5168 * We use this to force multiple parallel 5169 * resolution as soon as a packet needs to be 5170 * sent. The result is, after one packet 5171 * emission all reachable routes are generally 5172 * resolved. 5173 * Otherwise, complete resolution of MULTIRT 5174 * routes would require several emissions as 5175 * side effect. 5176 */ 5177 multirt_flags &= ~MULTIRT_CACHEGW; 5178 5179 /* 5180 * Search for the next unresolved multirt 5181 * route. 5182 */ 5183 copy_mp = NULL; 5184 save_ire = NULL; 5185 ire = NULL; 5186 /* re-enter the loop */ 5187 multirt_resolve_next = B_TRUE; 5188 continue; 5189 } 5190 5191 /* Don't need sire anymore */ 5192 if (sire != NULL) 5193 ire_refrele(sire); 5194 ill_refrele(dst_ill); 5195 ipif_refrele(src_ipif); 5196 return; 5197 5198 case IRE_IF_RESOLVER: 5199 /* 5200 * We can't build an IRE_CACHE yet, but at least we 5201 * found a resolver that can help. 5202 */ 5203 dst = *v6dstp; 5204 5205 /* 5206 * To be at this point in the code with a non-zero gw 5207 * means that dst is reachable through a gateway that 5208 * we have never resolved. By changing dst to the gw 5209 * addr we resolve the gateway first. When 5210 * ire_add_then_send() tries to put the IP dg to dst, 5211 * it will reenter ip_newroute() at which time we will 5212 * find the IRE_CACHE for the gw and create another 5213 * IRE_CACHE above (for dst itself). 5214 */ 5215 if (!IN6_IS_ADDR_UNSPECIFIED(&v6gw)) { 5216 save_dst = dst; 5217 dst = v6gw; 5218 v6gw = ipv6_all_zeros; 5219 } 5220 if (dst_ill->ill_flags & ILLF_XRESOLV) { 5221 /* 5222 * Ask the external resolver to do its thing. 5223 * Make an mblk chain in the following form: 5224 * ARQ_REQ_MBLK-->IRE_MBLK-->packet 5225 */ 5226 mblk_t *ire_mp; 5227 mblk_t *areq_mp; 5228 areq_t *areq; 5229 in6_addr_t *addrp; 5230 5231 ip1dbg(("ip_newroute_v6:ILLF_XRESOLV\n")); 5232 if (ip6_asp_table_held) { 5233 ip6_asp_table_refrele(ipst); 5234 ip6_asp_table_held = B_FALSE; 5235 } 5236 ire = ire_create_mp_v6( 5237 &dst, /* dest address */ 5238 &ipv6_all_ones, /* mask */ 5239 &src_ipif->ipif_v6src_addr, 5240 /* source address */ 5241 &v6gw, /* gateway address */ 5242 NULL, /* Fast Path header */ 5243 dst_ill->ill_rq, /* recv-from queue */ 5244 dst_ill->ill_wq, /* send-to queue */ 5245 IRE_CACHE, 5246 NULL, 5247 src_ipif, 5248 &save_ire->ire_mask_v6, 5249 /* Parent mask */ 5250 0, 5251 save_ire->ire_ihandle, 5252 /* Interface handle */ 5253 0, /* flags if any */ 5254 &(save_ire->ire_uinfo), 5255 NULL, 5256 NULL, 5257 ipst); 5258 5259 ire_refrele(save_ire); 5260 if (ire == NULL) { 5261 ip1dbg(("ip_newroute_v6:" 5262 "ire is NULL\n")); 5263 break; 5264 } 5265 5266 if ((sire != NULL) && 5267 (sire->ire_flags & RTF_MULTIRT)) { 5268 /* 5269 * processing a copy of the packet to 5270 * send for further resolution loops 5271 */ 5272 copy_mp = copymsg(first_mp); 5273 if (copy_mp != NULL) 5274 MULTIRT_DEBUG_TAG(copy_mp); 5275 } 5276 ire->ire_marks |= ire_marks; 5277 ire_mp = ire->ire_mp; 5278 /* 5279 * Now create or find an nce for this interface. 5280 * The hw addr will need to to be set from 5281 * the reply to the AR_ENTRY_QUERY that 5282 * we're about to send. This will be done in 5283 * ire_add_v6(). 5284 */ 5285 err = ndp_resolver(dst_ill, &dst, mp, zoneid); 5286 switch (err) { 5287 case 0: 5288 /* 5289 * New cache entry created. 5290 * Break, then ask the external 5291 * resolver. 5292 */ 5293 break; 5294 case EINPROGRESS: 5295 /* 5296 * Resolution in progress; 5297 * packet has been queued by 5298 * ndp_resolver(). 5299 */ 5300 ire_delete(ire); 5301 ire = NULL; 5302 /* 5303 * Check if another multirt 5304 * route must be resolved. 5305 */ 5306 if (copy_mp != NULL) { 5307 /* 5308 * If we found a resolver, we 5309 * ignore any trailing top 5310 * priority IRE_CACHE in 5311 * further loops. The reason is 5312 * the same as for noresolver. 5313 */ 5314 multirt_flags &= 5315 ~MULTIRT_CACHEGW; 5316 /* 5317 * Search for the next 5318 * unresolved multirt route. 5319 */ 5320 first_mp = copy_mp; 5321 copy_mp = NULL; 5322 mp = first_mp; 5323 if (mp->b_datap->db_type == 5324 M_CTL) { 5325 mp = mp->b_cont; 5326 } 5327 ASSERT(sire != NULL); 5328 dst = save_dst; 5329 /* 5330 * re-enter the loop 5331 */ 5332 multirt_resolve_next = 5333 B_TRUE; 5334 continue; 5335 } 5336 5337 if (sire != NULL) 5338 ire_refrele(sire); 5339 ill_refrele(dst_ill); 5340 ipif_refrele(src_ipif); 5341 return; 5342 default: 5343 /* 5344 * Transient error; packet will be 5345 * freed. 5346 */ 5347 ire_delete(ire); 5348 ire = NULL; 5349 break; 5350 } 5351 if (err != 0) 5352 break; 5353 /* 5354 * Now set up the AR_ENTRY_QUERY and send it. 5355 */ 5356 areq_mp = ill_arp_alloc(dst_ill, 5357 (uchar_t *)&ipv6_areq_template, 5358 (caddr_t)&dst); 5359 if (areq_mp == NULL) { 5360 ip1dbg(("ip_newroute_v6:" 5361 "areq_mp is NULL\n")); 5362 freemsg(ire_mp); 5363 break; 5364 } 5365 areq = (areq_t *)areq_mp->b_rptr; 5366 addrp = (in6_addr_t *)((char *)areq + 5367 areq->areq_target_addr_offset); 5368 *addrp = dst; 5369 addrp = (in6_addr_t *)((char *)areq + 5370 areq->areq_sender_addr_offset); 5371 *addrp = src_ipif->ipif_v6src_addr; 5372 /* 5373 * link the chain, then send up to the resolver. 5374 */ 5375 linkb(areq_mp, ire_mp); 5376 linkb(areq_mp, mp); 5377 ip1dbg(("ip_newroute_v6:" 5378 "putnext to resolver\n")); 5379 putnext(dst_ill->ill_rq, areq_mp); 5380 /* 5381 * Check if another multirt route 5382 * must be resolved. 5383 */ 5384 ire = NULL; 5385 if (copy_mp != NULL) { 5386 /* 5387 * If we find a resolver, we ignore any 5388 * trailing top priority IRE_CACHE in 5389 * further loops. The reason is the 5390 * same as for noresolver. 5391 */ 5392 multirt_flags &= ~MULTIRT_CACHEGW; 5393 /* 5394 * Search for the next unresolved 5395 * multirt route. 5396 */ 5397 first_mp = copy_mp; 5398 copy_mp = NULL; 5399 mp = first_mp; 5400 if (mp->b_datap->db_type == M_CTL) { 5401 mp = mp->b_cont; 5402 } 5403 ASSERT(sire != NULL); 5404 dst = save_dst; 5405 /* 5406 * re-enter the loop 5407 */ 5408 multirt_resolve_next = B_TRUE; 5409 continue; 5410 } 5411 5412 if (sire != NULL) 5413 ire_refrele(sire); 5414 ill_refrele(dst_ill); 5415 ipif_refrele(src_ipif); 5416 return; 5417 } 5418 /* 5419 * Non-external resolver case. 5420 * 5421 * TSol note: Please see the note above the 5422 * IRE_IF_NORESOLVER case. 5423 */ 5424 ga.ga_af = AF_INET6; 5425 ga.ga_addr = dst; 5426 gcgrp = gcgrp_lookup(&ga, B_FALSE); 5427 5428 ire = ire_create_v6( 5429 &dst, /* dest address */ 5430 &ipv6_all_ones, /* mask */ 5431 &src_ipif->ipif_v6src_addr, /* source address */ 5432 &v6gw, /* gateway address */ 5433 &save_ire->ire_max_frag, 5434 NULL, /* Fast Path header */ 5435 dst_ill->ill_rq, /* recv-from queue */ 5436 dst_ill->ill_wq, /* send-to queue */ 5437 IRE_CACHE, 5438 NULL, 5439 src_ipif, 5440 &save_ire->ire_mask_v6, /* Parent mask */ 5441 0, 5442 save_ire->ire_ihandle, /* Interface handle */ 5443 0, /* flags if any */ 5444 &(save_ire->ire_uinfo), 5445 NULL, 5446 gcgrp, 5447 ipst); 5448 5449 if (ire == NULL) { 5450 if (gcgrp != NULL) { 5451 GCGRP_REFRELE(gcgrp); 5452 gcgrp = NULL; 5453 } 5454 ire_refrele(save_ire); 5455 break; 5456 } 5457 5458 /* reference now held by IRE */ 5459 gcgrp = NULL; 5460 5461 if ((sire != NULL) && 5462 (sire->ire_flags & RTF_MULTIRT)) { 5463 copy_mp = copymsg(first_mp); 5464 if (copy_mp != NULL) 5465 MULTIRT_DEBUG_TAG(copy_mp); 5466 } 5467 5468 ire->ire_marks |= ire_marks; 5469 err = ndp_resolver(dst_ill, &dst, first_mp, zoneid); 5470 switch (err) { 5471 case 0: 5472 /* Prevent save_ire from getting deleted */ 5473 IRB_REFHOLD(save_ire->ire_bucket); 5474 /* Has it been removed already ? */ 5475 if (save_ire->ire_marks & IRE_MARK_CONDEMNED) { 5476 IRB_REFRELE(save_ire->ire_bucket); 5477 ire_refrele(save_ire); 5478 break; 5479 } 5480 5481 /* 5482 * We have a resolved cache entry, 5483 * add in the IRE. 5484 */ 5485 ire_add_then_send(q, ire, first_mp); 5486 if (ip6_asp_table_held) { 5487 ip6_asp_table_refrele(ipst); 5488 ip6_asp_table_held = B_FALSE; 5489 } 5490 5491 /* Assert that it is not deleted yet. */ 5492 ASSERT(save_ire->ire_ptpn != NULL); 5493 IRB_REFRELE(save_ire->ire_bucket); 5494 ire_refrele(save_ire); 5495 /* 5496 * Check if another multirt route 5497 * must be resolved. 5498 */ 5499 ire = NULL; 5500 if (copy_mp != NULL) { 5501 /* 5502 * If we find a resolver, we ignore any 5503 * trailing top priority IRE_CACHE in 5504 * further loops. The reason is the 5505 * same as for noresolver. 5506 */ 5507 multirt_flags &= ~MULTIRT_CACHEGW; 5508 /* 5509 * Search for the next unresolved 5510 * multirt route. 5511 */ 5512 first_mp = copy_mp; 5513 copy_mp = NULL; 5514 mp = first_mp; 5515 if (mp->b_datap->db_type == M_CTL) { 5516 mp = mp->b_cont; 5517 } 5518 ASSERT(sire != NULL); 5519 dst = save_dst; 5520 /* 5521 * re-enter the loop 5522 */ 5523 multirt_resolve_next = B_TRUE; 5524 continue; 5525 } 5526 5527 if (sire != NULL) 5528 ire_refrele(sire); 5529 ill_refrele(dst_ill); 5530 ipif_refrele(src_ipif); 5531 return; 5532 5533 case EINPROGRESS: 5534 /* 5535 * mp was consumed - presumably queued. 5536 * No need for ire, presumably resolution is 5537 * in progress, and ire will be added when the 5538 * address is resolved. 5539 */ 5540 if (ip6_asp_table_held) { 5541 ip6_asp_table_refrele(ipst); 5542 ip6_asp_table_held = B_FALSE; 5543 } 5544 ASSERT(ire->ire_nce == NULL); 5545 ire_delete(ire); 5546 ire_refrele(save_ire); 5547 /* 5548 * Check if another multirt route 5549 * must be resolved. 5550 */ 5551 ire = NULL; 5552 if (copy_mp != NULL) { 5553 /* 5554 * If we find a resolver, we ignore any 5555 * trailing top priority IRE_CACHE in 5556 * further loops. The reason is the 5557 * same as for noresolver. 5558 */ 5559 multirt_flags &= ~MULTIRT_CACHEGW; 5560 /* 5561 * Search for the next unresolved 5562 * multirt route. 5563 */ 5564 first_mp = copy_mp; 5565 copy_mp = NULL; 5566 mp = first_mp; 5567 if (mp->b_datap->db_type == M_CTL) { 5568 mp = mp->b_cont; 5569 } 5570 ASSERT(sire != NULL); 5571 dst = save_dst; 5572 /* 5573 * re-enter the loop 5574 */ 5575 multirt_resolve_next = B_TRUE; 5576 continue; 5577 } 5578 if (sire != NULL) 5579 ire_refrele(sire); 5580 ill_refrele(dst_ill); 5581 ipif_refrele(src_ipif); 5582 return; 5583 default: 5584 /* Some transient error */ 5585 ASSERT(ire->ire_nce == NULL); 5586 ire_refrele(save_ire); 5587 break; 5588 } 5589 break; 5590 default: 5591 break; 5592 } 5593 if (ip6_asp_table_held) { 5594 ip6_asp_table_refrele(ipst); 5595 ip6_asp_table_held = B_FALSE; 5596 } 5597 } while (multirt_resolve_next); 5598 5599 err_ret: 5600 ip1dbg(("ip_newroute_v6: dropped\n")); 5601 if (src_ipif != NULL) 5602 ipif_refrele(src_ipif); 5603 if (dst_ill != NULL) { 5604 need_rele = B_TRUE; 5605 ill = dst_ill; 5606 } 5607 if (ill != NULL) { 5608 if (mp->b_prev != NULL) { 5609 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 5610 } else { 5611 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); 5612 } 5613 5614 if (need_rele) 5615 ill_refrele(ill); 5616 } else { 5617 if (mp->b_prev != NULL) { 5618 BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsInDiscards); 5619 } else { 5620 BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsOutDiscards); 5621 } 5622 } 5623 /* Did this packet originate externally? */ 5624 if (mp->b_prev) { 5625 mp->b_next = NULL; 5626 mp->b_prev = NULL; 5627 } 5628 if (copy_mp != NULL) { 5629 MULTIRT_DEBUG_UNTAG(copy_mp); 5630 freemsg(copy_mp); 5631 } 5632 MULTIRT_DEBUG_UNTAG(first_mp); 5633 freemsg(first_mp); 5634 if (ire != NULL) 5635 ire_refrele(ire); 5636 if (sire != NULL) 5637 ire_refrele(sire); 5638 return; 5639 5640 icmp_err_ret: 5641 if (ip6_asp_table_held) 5642 ip6_asp_table_refrele(ipst); 5643 if (src_ipif != NULL) 5644 ipif_refrele(src_ipif); 5645 if (dst_ill != NULL) { 5646 need_rele = B_TRUE; 5647 ill = dst_ill; 5648 } 5649 ip1dbg(("ip_newroute_v6: no route\n")); 5650 if (sire != NULL) 5651 ire_refrele(sire); 5652 /* 5653 * We need to set sire to NULL to avoid double freeing if we 5654 * ever goto err_ret from below. 5655 */ 5656 sire = NULL; 5657 ip6h = (ip6_t *)mp->b_rptr; 5658 /* Skip ip6i_t header if present */ 5659 if (ip6h->ip6_nxt == IPPROTO_RAW) { 5660 /* Make sure the IPv6 header is present */ 5661 if ((mp->b_wptr - (uchar_t *)ip6h) < 5662 sizeof (ip6i_t) + IPV6_HDR_LEN) { 5663 if (!pullupmsg(mp, sizeof (ip6i_t) + IPV6_HDR_LEN)) { 5664 ip1dbg(("ip_newroute_v6: pullupmsg failed\n")); 5665 goto err_ret; 5666 } 5667 } 5668 mp->b_rptr += sizeof (ip6i_t); 5669 ip6h = (ip6_t *)mp->b_rptr; 5670 } 5671 /* Did this packet originate externally? */ 5672 if (mp->b_prev) { 5673 if (ill != NULL) { 5674 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInNoRoutes); 5675 } else { 5676 BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsInNoRoutes); 5677 } 5678 mp->b_next = NULL; 5679 mp->b_prev = NULL; 5680 q = WR(q); 5681 } else { 5682 if (ill != NULL) { 5683 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutNoRoutes); 5684 } else { 5685 BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsOutNoRoutes); 5686 } 5687 if (ip_hdr_complete_v6(ip6h, zoneid, ipst)) { 5688 /* Failed */ 5689 if (copy_mp != NULL) { 5690 MULTIRT_DEBUG_UNTAG(copy_mp); 5691 freemsg(copy_mp); 5692 } 5693 MULTIRT_DEBUG_UNTAG(first_mp); 5694 freemsg(first_mp); 5695 if (ire != NULL) 5696 ire_refrele(ire); 5697 if (need_rele) 5698 ill_refrele(ill); 5699 return; 5700 } 5701 } 5702 5703 if (need_rele) 5704 ill_refrele(ill); 5705 5706 /* 5707 * At this point we will have ire only if RTF_BLACKHOLE 5708 * or RTF_REJECT flags are set on the IRE. It will not 5709 * generate ICMP6_DST_UNREACH_NOROUTE if RTF_BLACKHOLE is set. 5710 */ 5711 if (ire != NULL) { 5712 if (ire->ire_flags & RTF_BLACKHOLE) { 5713 ire_refrele(ire); 5714 if (copy_mp != NULL) { 5715 MULTIRT_DEBUG_UNTAG(copy_mp); 5716 freemsg(copy_mp); 5717 } 5718 MULTIRT_DEBUG_UNTAG(first_mp); 5719 freemsg(first_mp); 5720 return; 5721 } 5722 ire_refrele(ire); 5723 } 5724 if (ip_debug > 3) { 5725 /* ip2dbg */ 5726 pr_addr_dbg("ip_newroute_v6: no route to %s\n", 5727 AF_INET6, v6dstp); 5728 } 5729 icmp_unreachable_v6(WR(q), first_mp, ICMP6_DST_UNREACH_NOROUTE, 5730 B_FALSE, B_FALSE, zoneid, ipst); 5731 } 5732 5733 /* 5734 * ip_newroute_ipif_v6 is called by ip_wput_v6 and ip_wput_ipsec_out_v6 whenever 5735 * we need to send out a packet to a destination address for which we do not 5736 * have specific routing information. It is only used for multicast packets. 5737 * 5738 * If unspec_src we allow creating an IRE with source address zero. 5739 * ire_send_v6() will delete it after the packet is sent. 5740 */ 5741 void 5742 ip_newroute_ipif_v6(queue_t *q, mblk_t *mp, ipif_t *ipif, 5743 in6_addr_t v6dst, int unspec_src, zoneid_t zoneid) 5744 { 5745 ire_t *ire = NULL; 5746 ipif_t *src_ipif = NULL; 5747 int err = 0; 5748 ill_t *dst_ill = NULL; 5749 ire_t *save_ire; 5750 ushort_t ire_marks = 0; 5751 ipsec_out_t *io; 5752 ill_t *attach_ill = NULL; 5753 ill_t *ill; 5754 ip6_t *ip6h; 5755 mblk_t *first_mp; 5756 boolean_t ip6i_present; 5757 ire_t *fire = NULL; 5758 mblk_t *copy_mp = NULL; 5759 boolean_t multirt_resolve_next; 5760 in6_addr_t *v6dstp = &v6dst; 5761 boolean_t ipif_held = B_FALSE; 5762 boolean_t ill_held = B_FALSE; 5763 boolean_t ip6_asp_table_held = B_FALSE; 5764 ip_stack_t *ipst = ipif->ipif_ill->ill_ipst; 5765 5766 /* 5767 * This loop is run only once in most cases. 5768 * We loop to resolve further routes only when the destination 5769 * can be reached through multiple RTF_MULTIRT-flagged ires. 5770 */ 5771 do { 5772 multirt_resolve_next = B_FALSE; 5773 if (dst_ill != NULL) { 5774 ill_refrele(dst_ill); 5775 dst_ill = NULL; 5776 } 5777 5778 if (src_ipif != NULL) { 5779 ipif_refrele(src_ipif); 5780 src_ipif = NULL; 5781 } 5782 ASSERT(ipif != NULL); 5783 ill = ipif->ipif_ill; 5784 5785 ASSERT(!IN6_IS_ADDR_V4MAPPED(v6dstp)); 5786 if (ip_debug > 2) { 5787 /* ip1dbg */ 5788 pr_addr_dbg("ip_newroute_ipif_v6: v6dst %s\n", 5789 AF_INET6, v6dstp); 5790 printf("ip_newroute_ipif_v6: if %s, v6 %d\n", 5791 ill->ill_name, ipif->ipif_isv6); 5792 } 5793 5794 first_mp = mp; 5795 if (mp->b_datap->db_type == M_CTL) { 5796 mp = mp->b_cont; 5797 io = (ipsec_out_t *)first_mp->b_rptr; 5798 ASSERT(io->ipsec_out_type == IPSEC_OUT); 5799 } else { 5800 io = NULL; 5801 } 5802 5803 /* 5804 * If the interface is a pt-pt interface we look for an 5805 * IRE_IF_RESOLVER or IRE_IF_NORESOLVER that matches both the 5806 * local_address and the pt-pt destination address. 5807 * Otherwise we just match the local address. 5808 */ 5809 if (!(ill->ill_flags & ILLF_MULTICAST)) { 5810 goto err_ret; 5811 } 5812 /* 5813 * If this end point is bound to IPIF_NOFAILOVER, set bnf_ill 5814 * and bind_to_nofailover B_TRUE. We can't use conn to determine 5815 * as it could be NULL. 5816 * 5817 * This information can appear either in an ip6i_t or an 5818 * IPSEC_OUT message. 5819 */ 5820 ip6h = (ip6_t *)mp->b_rptr; 5821 ip6i_present = (ip6h->ip6_nxt == IPPROTO_RAW); 5822 if (ip6i_present || (io != NULL && io->ipsec_out_attach_if)) { 5823 if (!ip6i_present || 5824 ((ip6i_t *)ip6h)->ip6i_flags & IP6I_ATTACH_IF) { 5825 attach_ill = ip_grab_attach_ill(ill, first_mp, 5826 (ip6i_present ? 5827 ((ip6i_t *)ip6h)->ip6i_ifindex : 5828 io->ipsec_out_ill_index), B_TRUE, ipst); 5829 /* Failure case frees things for us. */ 5830 if (attach_ill == NULL) 5831 return; 5832 5833 /* 5834 * Check if we need an ire that will not be 5835 * looked up by anybody else i.e. HIDDEN. 5836 */ 5837 if (ill_is_probeonly(attach_ill)) 5838 ire_marks = IRE_MARK_HIDDEN; 5839 } 5840 } 5841 5842 /* 5843 * We check if an IRE_OFFSUBNET for the addr that goes through 5844 * ipif exists. We need it to determine if the RTF_SETSRC and/or 5845 * RTF_MULTIRT flags must be honored. 5846 */ 5847 fire = ipif_lookup_multi_ire_v6(ipif, v6dstp); 5848 ip2dbg(("ip_newroute_ipif_v6: " 5849 "ipif_lookup_multi_ire_v6(" 5850 "ipif %p, dst %08x) = fire %p\n", 5851 (void *)ipif, ntohl(V4_PART_OF_V6((*v6dstp))), 5852 (void *)fire)); 5853 5854 /* 5855 * If the application specified the ill (ifindex), we still 5856 * load spread. Only if the packets needs to go out specifically 5857 * on a given ill e.g. binding to IPIF_NOFAILOVER address or 5858 * IPV6_BOUND_PIF, or there is a parent ire entry that specified 5859 * multirouting, then we don't try to use a different ill for 5860 * load spreading. 5861 */ 5862 if (attach_ill == NULL) { 5863 /* 5864 * If the interface belongs to an interface group, 5865 * make sure the next possible interface in the group 5866 * is used. This encourages load spreading among peers 5867 * in an interface group. 5868 * 5869 * Note: While we pick a dst_ill we are really only 5870 * interested in the ill for load spreading. The source 5871 * ipif is determined by source address selection below. 5872 */ 5873 if ((fire != NULL) && (fire->ire_flags & RTF_MULTIRT)) { 5874 dst_ill = ipif->ipif_ill; 5875 /* For uniformity do a refhold */ 5876 ill_refhold(dst_ill); 5877 } else { 5878 /* refheld by ip_newroute_get_dst_ill_v6 */ 5879 dst_ill = 5880 ip_newroute_get_dst_ill_v6(ipif->ipif_ill); 5881 } 5882 if (dst_ill == NULL) { 5883 if (ip_debug > 2) { 5884 pr_addr_dbg("ip_newroute_ipif_v6: " 5885 "no dst ill for dst %s\n", 5886 AF_INET6, v6dstp); 5887 } 5888 goto err_ret; 5889 } 5890 } else { 5891 dst_ill = ipif->ipif_ill; 5892 /* 5893 * ip_wput_v6 passes the right ipif for IPIF_NOFAILOVER 5894 * and IPV6_BOUND_PIF case. 5895 */ 5896 ASSERT(dst_ill == attach_ill); 5897 /* attach_ill is already refheld */ 5898 } 5899 /* 5900 * Pick a source address which matches the scope of the 5901 * destination address. 5902 * For RTF_SETSRC routes, the source address is imposed by the 5903 * parent ire (fire). 5904 */ 5905 ASSERT(src_ipif == NULL); 5906 if ((fire != NULL) && (fire->ire_flags & RTF_SETSRC)) { 5907 /* 5908 * Check that the ipif matching the requested source 5909 * address still exists. 5910 */ 5911 src_ipif = 5912 ipif_lookup_addr_v6(&fire->ire_src_addr_v6, 5913 NULL, zoneid, NULL, NULL, NULL, NULL, ipst); 5914 } 5915 if (src_ipif == NULL && ip6_asp_can_lookup(ipst)) { 5916 ip6_asp_table_held = B_TRUE; 5917 src_ipif = ipif_select_source_v6(dst_ill, v6dstp, 5918 RESTRICT_TO_NONE, IPV6_PREFER_SRC_DEFAULT, zoneid); 5919 } 5920 5921 if (src_ipif == NULL) { 5922 if (!unspec_src) { 5923 if (ip_debug > 2) { 5924 /* ip1dbg */ 5925 pr_addr_dbg("ip_newroute_ipif_v6: " 5926 "no src for dst %s\n,", 5927 AF_INET6, v6dstp); 5928 printf(" through interface %s\n", 5929 dst_ill->ill_name); 5930 } 5931 goto err_ret; 5932 } 5933 src_ipif = ipif; 5934 ipif_refhold(src_ipif); 5935 } 5936 ire = ipif_to_ire_v6(ipif); 5937 if (ire == NULL) { 5938 if (ip_debug > 2) { 5939 /* ip1dbg */ 5940 pr_addr_dbg("ip_newroute_ipif_v6: v6src %s\n", 5941 AF_INET6, &ipif->ipif_v6lcl_addr); 5942 printf("ip_newroute_ipif_v6: " 5943 "if %s\n", dst_ill->ill_name); 5944 } 5945 goto err_ret; 5946 } 5947 if (ire->ire_flags & (RTF_REJECT | RTF_BLACKHOLE)) 5948 goto err_ret; 5949 5950 ASSERT(ire->ire_ipversion == IPV6_VERSION); 5951 5952 ip1dbg(("ip_newroute_ipif_v6: interface type %s (%d),", 5953 ip_nv_lookup(ire_nv_tbl, ire->ire_type), ire->ire_type)); 5954 if (ip_debug > 2) { 5955 /* ip1dbg */ 5956 pr_addr_dbg(" address %s\n", 5957 AF_INET6, &ire->ire_src_addr_v6); 5958 } 5959 save_ire = ire; 5960 ip2dbg(("ip_newroute_ipif: ire %p, ipif %p\n", 5961 (void *)ire, (void *)ipif)); 5962 5963 if ((fire != NULL) && (fire->ire_flags & RTF_MULTIRT)) { 5964 /* 5965 * an IRE_OFFSUBET was looked up 5966 * on that interface. 5967 * this ire has RTF_MULTIRT flag, 5968 * so the resolution loop 5969 * will be re-entered to resolve 5970 * additional routes on other 5971 * interfaces. For that purpose, 5972 * a copy of the packet is 5973 * made at this point. 5974 */ 5975 fire->ire_last_used_time = lbolt; 5976 copy_mp = copymsg(first_mp); 5977 if (copy_mp) { 5978 MULTIRT_DEBUG_TAG(copy_mp); 5979 } 5980 } 5981 5982 ASSERT((attach_ill == NULL) || (dst_ill == attach_ill)); 5983 switch (ire->ire_type) { 5984 case IRE_IF_NORESOLVER: { 5985 /* We have what we need to build an IRE_CACHE. */ 5986 mblk_t *dlureq_mp; 5987 5988 /* 5989 * Create a new dlureq_mp with the 5990 * IPv6 gateway address in destination address in the 5991 * DLPI hdr if the physical length is exactly 16 bytes. 5992 */ 5993 ASSERT(dst_ill->ill_isv6); 5994 if (dst_ill->ill_phys_addr_length == IPV6_ADDR_LEN) { 5995 dlureq_mp = ill_dlur_gen((uchar_t *)v6dstp, 5996 dst_ill->ill_phys_addr_length, 5997 dst_ill->ill_sap, 5998 dst_ill->ill_sap_length); 5999 } else { 6000 /* 6001 * handle the Gated case, where we create 6002 * a NORESOLVER route for loopback. 6003 */ 6004 if (dst_ill->ill_net_type != IRE_IF_NORESOLVER) 6005 break; 6006 dlureq_mp = ill_dlur_gen(NULL, 6007 dst_ill->ill_phys_addr_length, 6008 dst_ill->ill_sap, 6009 dst_ill->ill_sap_length); 6010 } 6011 6012 if (dlureq_mp == NULL) 6013 break; 6014 /* 6015 * The newly created ire will inherit the flags of the 6016 * parent ire, if any. 6017 */ 6018 ire = ire_create_v6( 6019 v6dstp, /* dest address */ 6020 &ipv6_all_ones, /* mask */ 6021 &src_ipif->ipif_v6src_addr, /* source address */ 6022 NULL, /* gateway address */ 6023 &save_ire->ire_max_frag, 6024 NULL, /* Fast Path header */ 6025 dst_ill->ill_rq, /* recv-from queue */ 6026 dst_ill->ill_wq, /* send-to queue */ 6027 IRE_CACHE, 6028 dlureq_mp, 6029 src_ipif, 6030 NULL, 6031 (fire != NULL) ? /* Parent handle */ 6032 fire->ire_phandle : 0, 6033 save_ire->ire_ihandle, /* Interface handle */ 6034 (fire != NULL) ? 6035 (fire->ire_flags & (RTF_SETSRC | RTF_MULTIRT)) : 6036 0, 6037 &ire_uinfo_null, 6038 NULL, 6039 NULL, 6040 ipst); 6041 6042 freeb(dlureq_mp); 6043 6044 if (ire == NULL) { 6045 ire_refrele(save_ire); 6046 break; 6047 } 6048 6049 ire->ire_marks |= ire_marks; 6050 6051 err = ndp_noresolver(dst_ill, v6dstp); 6052 if (err != 0) { 6053 ire_refrele(save_ire); 6054 break; 6055 } 6056 6057 /* Prevent save_ire from getting deleted */ 6058 IRB_REFHOLD(save_ire->ire_bucket); 6059 /* Has it been removed already ? */ 6060 if (save_ire->ire_marks & IRE_MARK_CONDEMNED) { 6061 IRB_REFRELE(save_ire->ire_bucket); 6062 ire_refrele(save_ire); 6063 break; 6064 } 6065 6066 ire_add_then_send(q, ire, first_mp); 6067 if (ip6_asp_table_held) { 6068 ip6_asp_table_refrele(ipst); 6069 ip6_asp_table_held = B_FALSE; 6070 } 6071 6072 /* Assert that it is not deleted yet. */ 6073 ASSERT(save_ire->ire_ptpn != NULL); 6074 IRB_REFRELE(save_ire->ire_bucket); 6075 ire_refrele(save_ire); 6076 if (fire != NULL) { 6077 ire_refrele(fire); 6078 fire = NULL; 6079 } 6080 6081 /* 6082 * The resolution loop is re-entered if we 6083 * actually are in a multirouting case. 6084 */ 6085 if (copy_mp != NULL) { 6086 boolean_t need_resolve = 6087 ire_multirt_need_resolve_v6(v6dstp, 6088 MBLK_GETLABEL(copy_mp), ipst); 6089 if (!need_resolve) { 6090 MULTIRT_DEBUG_UNTAG(copy_mp); 6091 freemsg(copy_mp); 6092 copy_mp = NULL; 6093 } else { 6094 /* 6095 * ipif_lookup_group_v6() calls 6096 * ire_lookup_multi_v6() that uses 6097 * ire_ftable_lookup_v6() to find 6098 * an IRE_INTERFACE for the group. 6099 * In the multirt case, 6100 * ire_lookup_multi_v6() then invokes 6101 * ire_multirt_lookup_v6() to find 6102 * the next resolvable ire. 6103 * As a result, we obtain a new 6104 * interface, derived from the 6105 * next ire. 6106 */ 6107 if (ipif_held) { 6108 ipif_refrele(ipif); 6109 ipif_held = B_FALSE; 6110 } 6111 ipif = ipif_lookup_group_v6(v6dstp, 6112 zoneid, ipst); 6113 ip2dbg(("ip_newroute_ipif: " 6114 "multirt dst %08x, ipif %p\n", 6115 ntohl(V4_PART_OF_V6((*v6dstp))), 6116 (void *)ipif)); 6117 if (ipif != NULL) { 6118 ipif_held = B_TRUE; 6119 mp = copy_mp; 6120 copy_mp = NULL; 6121 multirt_resolve_next = 6122 B_TRUE; 6123 continue; 6124 } else { 6125 freemsg(copy_mp); 6126 } 6127 } 6128 } 6129 ill_refrele(dst_ill); 6130 if (ipif_held) { 6131 ipif_refrele(ipif); 6132 ipif_held = B_FALSE; 6133 } 6134 if (src_ipif != NULL) 6135 ipif_refrele(src_ipif); 6136 return; 6137 } 6138 case IRE_IF_RESOLVER: { 6139 6140 ASSERT(dst_ill->ill_isv6); 6141 6142 /* 6143 * We obtain a partial IRE_CACHE which we will pass 6144 * along with the resolver query. When the response 6145 * comes back it will be there ready for us to add. 6146 */ 6147 /* 6148 * the newly created ire will inherit the flags of the 6149 * parent ire, if any. 6150 */ 6151 ire = ire_create_v6( 6152 v6dstp, /* dest address */ 6153 &ipv6_all_ones, /* mask */ 6154 &src_ipif->ipif_v6src_addr, /* source address */ 6155 NULL, /* gateway address */ 6156 &save_ire->ire_max_frag, 6157 NULL, /* Fast Path header */ 6158 dst_ill->ill_rq, /* recv-from queue */ 6159 dst_ill->ill_wq, /* send-to queue */ 6160 IRE_CACHE, 6161 NULL, 6162 src_ipif, 6163 NULL, 6164 (fire != NULL) ? /* Parent handle */ 6165 fire->ire_phandle : 0, 6166 save_ire->ire_ihandle, /* Interface handle */ 6167 (fire != NULL) ? 6168 (fire->ire_flags & (RTF_SETSRC | RTF_MULTIRT)) : 6169 0, 6170 &ire_uinfo_null, 6171 NULL, 6172 NULL, 6173 ipst); 6174 6175 if (ire == NULL) { 6176 ire_refrele(save_ire); 6177 break; 6178 } 6179 6180 ire->ire_marks |= ire_marks; 6181 6182 /* Resolve and add ire to the ctable */ 6183 err = ndp_resolver(dst_ill, v6dstp, first_mp, zoneid); 6184 switch (err) { 6185 case 0: 6186 /* Prevent save_ire from getting deleted */ 6187 IRB_REFHOLD(save_ire->ire_bucket); 6188 /* Has it been removed already ? */ 6189 if (save_ire->ire_marks & IRE_MARK_CONDEMNED) { 6190 IRB_REFRELE(save_ire->ire_bucket); 6191 ire_refrele(save_ire); 6192 break; 6193 } 6194 /* 6195 * We have a resolved cache entry, 6196 * add in the IRE. 6197 */ 6198 ire_add_then_send(q, ire, first_mp); 6199 if (ip6_asp_table_held) { 6200 ip6_asp_table_refrele(ipst); 6201 ip6_asp_table_held = B_FALSE; 6202 } 6203 6204 /* Assert that it is not deleted yet. */ 6205 ASSERT(save_ire->ire_ptpn != NULL); 6206 IRB_REFRELE(save_ire->ire_bucket); 6207 ire_refrele(save_ire); 6208 if (fire != NULL) { 6209 ire_refrele(fire); 6210 fire = NULL; 6211 } 6212 6213 /* 6214 * The resolution loop is re-entered if we 6215 * actually are in a multirouting case. 6216 */ 6217 if (copy_mp != NULL) { 6218 boolean_t need_resolve = 6219 ire_multirt_need_resolve_v6(v6dstp, 6220 MBLK_GETLABEL(copy_mp), ipst); 6221 if (!need_resolve) { 6222 MULTIRT_DEBUG_UNTAG(copy_mp); 6223 freemsg(copy_mp); 6224 copy_mp = NULL; 6225 } else { 6226 /* 6227 * ipif_lookup_group_v6() calls 6228 * ire_lookup_multi_v6() that 6229 * uses ire_ftable_lookup_v6() 6230 * to find an IRE_INTERFACE for 6231 * the group. In the multirt 6232 * case, ire_lookup_multi_v6() 6233 * then invokes 6234 * ire_multirt_lookup_v6() to 6235 * find the next resolvable ire. 6236 * As a result, we obtain a new 6237 * interface, derived from the 6238 * next ire. 6239 */ 6240 if (ipif_held) { 6241 ipif_refrele(ipif); 6242 ipif_held = B_FALSE; 6243 } 6244 ipif = ipif_lookup_group_v6( 6245 v6dstp, zoneid, ipst); 6246 ip2dbg(("ip_newroute_ipif: " 6247 "multirt dst %08x, " 6248 "ipif %p\n", 6249 ntohl(V4_PART_OF_V6( 6250 (*v6dstp))), 6251 (void *)ipif)); 6252 if (ipif != NULL) { 6253 ipif_held = B_TRUE; 6254 mp = copy_mp; 6255 copy_mp = NULL; 6256 multirt_resolve_next = 6257 B_TRUE; 6258 continue; 6259 } else { 6260 freemsg(copy_mp); 6261 } 6262 } 6263 } 6264 ill_refrele(dst_ill); 6265 if (ipif_held) { 6266 ipif_refrele(ipif); 6267 ipif_held = B_FALSE; 6268 } 6269 if (src_ipif != NULL) 6270 ipif_refrele(src_ipif); 6271 return; 6272 6273 case EINPROGRESS: 6274 /* 6275 * mp was consumed - presumably queued. 6276 * No need for ire, presumably resolution is 6277 * in progress, and ire will be added when the 6278 * address is resolved. 6279 */ 6280 if (ip6_asp_table_held) { 6281 ip6_asp_table_refrele(ipst); 6282 ip6_asp_table_held = B_FALSE; 6283 } 6284 ire_delete(ire); 6285 ire_refrele(save_ire); 6286 if (fire != NULL) { 6287 ire_refrele(fire); 6288 fire = NULL; 6289 } 6290 6291 /* 6292 * The resolution loop is re-entered if we 6293 * actually are in a multirouting case. 6294 */ 6295 if (copy_mp != NULL) { 6296 boolean_t need_resolve = 6297 ire_multirt_need_resolve_v6(v6dstp, 6298 MBLK_GETLABEL(copy_mp), ipst); 6299 if (!need_resolve) { 6300 MULTIRT_DEBUG_UNTAG(copy_mp); 6301 freemsg(copy_mp); 6302 copy_mp = NULL; 6303 } else { 6304 /* 6305 * ipif_lookup_group_v6() calls 6306 * ire_lookup_multi_v6() that 6307 * uses ire_ftable_lookup_v6() 6308 * to find an IRE_INTERFACE for 6309 * the group. In the multirt 6310 * case, ire_lookup_multi_v6() 6311 * then invokes 6312 * ire_multirt_lookup_v6() to 6313 * find the next resolvable ire. 6314 * As a result, we obtain a new 6315 * interface, derived from the 6316 * next ire. 6317 */ 6318 if (ipif_held) { 6319 ipif_refrele(ipif); 6320 ipif_held = B_FALSE; 6321 } 6322 ipif = ipif_lookup_group_v6( 6323 v6dstp, zoneid, ipst); 6324 ip2dbg(("ip_newroute_ipif: " 6325 "multirt dst %08x, " 6326 "ipif %p\n", 6327 ntohl(V4_PART_OF_V6( 6328 (*v6dstp))), 6329 (void *)ipif)); 6330 if (ipif != NULL) { 6331 ipif_held = B_TRUE; 6332 mp = copy_mp; 6333 copy_mp = NULL; 6334 multirt_resolve_next = 6335 B_TRUE; 6336 continue; 6337 } else { 6338 freemsg(copy_mp); 6339 } 6340 } 6341 } 6342 ill_refrele(dst_ill); 6343 if (ipif_held) { 6344 ipif_refrele(ipif); 6345 ipif_held = B_FALSE; 6346 } 6347 if (src_ipif != NULL) 6348 ipif_refrele(src_ipif); 6349 return; 6350 default: 6351 /* Some transient error */ 6352 ire_refrele(save_ire); 6353 break; 6354 } 6355 break; 6356 } 6357 default: 6358 break; 6359 } 6360 if (ip6_asp_table_held) { 6361 ip6_asp_table_refrele(ipst); 6362 ip6_asp_table_held = B_FALSE; 6363 } 6364 } while (multirt_resolve_next); 6365 6366 err_ret: 6367 if (ip6_asp_table_held) 6368 ip6_asp_table_refrele(ipst); 6369 if (ire != NULL) 6370 ire_refrele(ire); 6371 if (fire != NULL) 6372 ire_refrele(fire); 6373 if (ipif != NULL && ipif_held) 6374 ipif_refrele(ipif); 6375 if (src_ipif != NULL) 6376 ipif_refrele(src_ipif); 6377 /* Multicast - no point in trying to generate ICMP error */ 6378 ASSERT((attach_ill == NULL) || (dst_ill == attach_ill)); 6379 if (dst_ill != NULL) { 6380 ill = dst_ill; 6381 ill_held = B_TRUE; 6382 } 6383 if (mp->b_prev || mp->b_next) { 6384 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 6385 } else { 6386 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); 6387 } 6388 ip1dbg(("ip_newroute_ipif_v6: dropped\n")); 6389 mp->b_next = NULL; 6390 mp->b_prev = NULL; 6391 freemsg(first_mp); 6392 if (ill_held) 6393 ill_refrele(ill); 6394 } 6395 6396 /* 6397 * Parse and process any hop-by-hop or destination options. 6398 * 6399 * Assumes that q is an ill read queue so that ICMP errors for link-local 6400 * destinations are sent out the correct interface. 6401 * 6402 * Returns -1 if there was an error and mp has been consumed. 6403 * Returns 0 if no special action is needed. 6404 * Returns 1 if the packet contained a router alert option for this node 6405 * which is verified to be "interesting/known" for our implementation. 6406 * 6407 * XXX Note: In future as more hbh or dest options are defined, 6408 * it may be better to have different routines for hbh and dest 6409 * options as opt_type fields other than IP6OPT_PAD1 and IP6OPT_PADN 6410 * may have same value in different namespaces. Or is it same namespace ?? 6411 * Current code checks for each opt_type (other than pads) if it is in 6412 * the expected nexthdr (hbh or dest) 6413 */ 6414 static int 6415 ip_process_options_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, 6416 uint8_t *optptr, uint_t optlen, uint8_t hdr_type, ip_stack_t *ipst) 6417 { 6418 uint8_t opt_type; 6419 uint_t optused; 6420 int ret = 0; 6421 mblk_t *first_mp; 6422 const char *errtype; 6423 zoneid_t zoneid; 6424 ill_t *ill = q->q_ptr; 6425 6426 first_mp = mp; 6427 if (mp->b_datap->db_type == M_CTL) { 6428 mp = mp->b_cont; 6429 } 6430 6431 while (optlen != 0) { 6432 opt_type = *optptr; 6433 if (opt_type == IP6OPT_PAD1) { 6434 optused = 1; 6435 } else { 6436 if (optlen < 2) 6437 goto bad_opt; 6438 errtype = "malformed"; 6439 if (opt_type == ip6opt_ls) { 6440 optused = 2 + optptr[1]; 6441 if (optused > optlen) 6442 goto bad_opt; 6443 } else switch (opt_type) { 6444 case IP6OPT_PADN: 6445 /* 6446 * Note:We don't verify that (N-2) pad octets 6447 * are zero as required by spec. Adhere to 6448 * "be liberal in what you accept..." part of 6449 * implementation philosophy (RFC791,RFC1122) 6450 */ 6451 optused = 2 + optptr[1]; 6452 if (optused > optlen) 6453 goto bad_opt; 6454 break; 6455 6456 case IP6OPT_JUMBO: 6457 if (hdr_type != IPPROTO_HOPOPTS) 6458 goto opt_error; 6459 goto opt_error; /* XXX Not implemented! */ 6460 6461 case IP6OPT_ROUTER_ALERT: { 6462 struct ip6_opt_router *or; 6463 6464 if (hdr_type != IPPROTO_HOPOPTS) 6465 goto opt_error; 6466 optused = 2 + optptr[1]; 6467 if (optused > optlen) 6468 goto bad_opt; 6469 or = (struct ip6_opt_router *)optptr; 6470 /* Check total length and alignment */ 6471 if (optused != sizeof (*or) || 6472 ((uintptr_t)or->ip6or_value & 0x1) != 0) 6473 goto opt_error; 6474 /* Check value */ 6475 switch (*((uint16_t *)or->ip6or_value)) { 6476 case IP6_ALERT_MLD: 6477 case IP6_ALERT_RSVP: 6478 ret = 1; 6479 } 6480 break; 6481 } 6482 case IP6OPT_HOME_ADDRESS: { 6483 /* 6484 * Minimal support for the home address option 6485 * (which is required by all IPv6 nodes). 6486 * Implement by just swapping the home address 6487 * and source address. 6488 * XXX Note: this has IPsec implications since 6489 * AH needs to take this into account. 6490 * Also, when IPsec is used we need to ensure 6491 * that this is only processed once 6492 * in the received packet (to avoid swapping 6493 * back and forth). 6494 * NOTE:This option processing is considered 6495 * to be unsafe and prone to a denial of 6496 * service attack. 6497 * The current processing is not safe even with 6498 * IPsec secured IP packets. Since the home 6499 * address option processing requirement still 6500 * is in the IETF draft and in the process of 6501 * being redefined for its usage, it has been 6502 * decided to turn off the option by default. 6503 * If this section of code needs to be executed, 6504 * ndd variable ip6_ignore_home_address_opt 6505 * should be set to 0 at the user's own risk. 6506 */ 6507 struct ip6_opt_home_address *oh; 6508 in6_addr_t tmp; 6509 6510 if (ipst->ips_ipv6_ignore_home_address_opt) 6511 goto opt_error; 6512 6513 if (hdr_type != IPPROTO_DSTOPTS) 6514 goto opt_error; 6515 optused = 2 + optptr[1]; 6516 if (optused > optlen) 6517 goto bad_opt; 6518 6519 /* 6520 * We did this dest. opt the first time 6521 * around (i.e. before AH processing). 6522 * If we've done AH... stop now. 6523 */ 6524 if (first_mp != mp) { 6525 ipsec_in_t *ii; 6526 6527 ii = (ipsec_in_t *)first_mp->b_rptr; 6528 if (ii->ipsec_in_ah_sa != NULL) 6529 break; 6530 } 6531 6532 oh = (struct ip6_opt_home_address *)optptr; 6533 /* Check total length and alignment */ 6534 if (optused < sizeof (*oh) || 6535 ((uintptr_t)oh->ip6oh_addr & 0x7) != 0) 6536 goto opt_error; 6537 /* Swap ip6_src and the home address */ 6538 tmp = ip6h->ip6_src; 6539 /* XXX Note: only 8 byte alignment option */ 6540 ip6h->ip6_src = *(in6_addr_t *)oh->ip6oh_addr; 6541 *(in6_addr_t *)oh->ip6oh_addr = tmp; 6542 break; 6543 } 6544 6545 case IP6OPT_TUNNEL_LIMIT: 6546 if (hdr_type != IPPROTO_DSTOPTS) { 6547 goto opt_error; 6548 } 6549 optused = 2 + optptr[1]; 6550 if (optused > optlen) { 6551 goto bad_opt; 6552 } 6553 if (optused != 3) { 6554 goto opt_error; 6555 } 6556 break; 6557 6558 default: 6559 errtype = "unknown"; 6560 /* FALLTHROUGH */ 6561 opt_error: 6562 /* Determine which zone should send error */ 6563 zoneid = ipif_lookup_addr_zoneid_v6( 6564 &ip6h->ip6_dst, ill, ipst); 6565 switch (IP6OPT_TYPE(opt_type)) { 6566 case IP6OPT_TYPE_SKIP: 6567 optused = 2 + optptr[1]; 6568 if (optused > optlen) 6569 goto bad_opt; 6570 ip1dbg(("ip_process_options_v6: %s " 6571 "opt 0x%x skipped\n", 6572 errtype, opt_type)); 6573 break; 6574 case IP6OPT_TYPE_DISCARD: 6575 ip1dbg(("ip_process_options_v6: %s " 6576 "opt 0x%x; packet dropped\n", 6577 errtype, opt_type)); 6578 freemsg(first_mp); 6579 return (-1); 6580 case IP6OPT_TYPE_ICMP: 6581 if (zoneid == ALL_ZONES) { 6582 freemsg(first_mp); 6583 return (-1); 6584 } 6585 icmp_param_problem_v6(WR(q), first_mp, 6586 ICMP6_PARAMPROB_OPTION, 6587 (uint32_t)(optptr - 6588 (uint8_t *)ip6h), 6589 B_FALSE, B_FALSE, zoneid, ipst); 6590 return (-1); 6591 case IP6OPT_TYPE_FORCEICMP: 6592 if (zoneid == ALL_ZONES) { 6593 freemsg(first_mp); 6594 return (-1); 6595 } 6596 icmp_param_problem_v6(WR(q), first_mp, 6597 ICMP6_PARAMPROB_OPTION, 6598 (uint32_t)(optptr - 6599 (uint8_t *)ip6h), 6600 B_FALSE, B_TRUE, zoneid, ipst); 6601 return (-1); 6602 default: 6603 ASSERT(0); 6604 } 6605 } 6606 } 6607 optlen -= optused; 6608 optptr += optused; 6609 } 6610 return (ret); 6611 6612 bad_opt: 6613 /* Determine which zone should send error */ 6614 zoneid = ipif_lookup_addr_zoneid_v6(&ip6h->ip6_dst, ill, ipst); 6615 if (zoneid == ALL_ZONES) { 6616 freemsg(first_mp); 6617 } else { 6618 icmp_param_problem_v6(WR(q), first_mp, ICMP6_PARAMPROB_OPTION, 6619 (uint32_t)(optptr - (uint8_t *)ip6h), 6620 B_FALSE, B_FALSE, zoneid, ipst); 6621 } 6622 return (-1); 6623 } 6624 6625 /* 6626 * Process a routing header that is not yet empty. 6627 * Only handles type 0 routing headers. 6628 */ 6629 static void 6630 ip_process_rthdr(queue_t *q, mblk_t *mp, ip6_t *ip6h, ip6_rthdr_t *rth, 6631 ill_t *ill, uint_t flags, mblk_t *hada_mp, mblk_t *dl_mp) 6632 { 6633 ip6_rthdr0_t *rthdr; 6634 uint_t ehdrlen; 6635 uint_t numaddr; 6636 in6_addr_t *addrptr; 6637 in6_addr_t tmp; 6638 ip_stack_t *ipst = ill->ill_ipst; 6639 6640 ASSERT(rth->ip6r_segleft != 0); 6641 6642 if (!ipst->ips_ipv6_forward_src_routed) { 6643 /* XXX Check for source routed out same interface? */ 6644 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 6645 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInAddrErrors); 6646 freemsg(hada_mp); 6647 freemsg(mp); 6648 return; 6649 } 6650 6651 if (rth->ip6r_type != 0) { 6652 if (hada_mp != NULL) 6653 goto hada_drop; 6654 /* Sent by forwarding path, and router is global zone */ 6655 icmp_param_problem_v6(WR(q), mp, 6656 ICMP6_PARAMPROB_HEADER, 6657 (uint32_t)((uchar_t *)&rth->ip6r_type - (uchar_t *)ip6h), 6658 B_FALSE, B_FALSE, GLOBAL_ZONEID, ipst); 6659 return; 6660 } 6661 rthdr = (ip6_rthdr0_t *)rth; 6662 ehdrlen = 8 * (rthdr->ip6r0_len + 1); 6663 ASSERT(mp->b_rptr + ehdrlen <= mp->b_wptr); 6664 addrptr = (in6_addr_t *)((char *)rthdr + sizeof (*rthdr)); 6665 /* rthdr->ip6r0_len is twice the number of addresses in the header */ 6666 if (rthdr->ip6r0_len & 0x1) { 6667 /* An odd length is impossible */ 6668 if (hada_mp != NULL) 6669 goto hada_drop; 6670 /* Sent by forwarding path, and router is global zone */ 6671 icmp_param_problem_v6(WR(q), mp, 6672 ICMP6_PARAMPROB_HEADER, 6673 (uint32_t)((uchar_t *)&rthdr->ip6r0_len - (uchar_t *)ip6h), 6674 B_FALSE, B_FALSE, GLOBAL_ZONEID, ipst); 6675 return; 6676 } 6677 numaddr = rthdr->ip6r0_len / 2; 6678 if (rthdr->ip6r0_segleft > numaddr) { 6679 /* segleft exceeds number of addresses in routing header */ 6680 if (hada_mp != NULL) 6681 goto hada_drop; 6682 /* Sent by forwarding path, and router is global zone */ 6683 icmp_param_problem_v6(WR(q), mp, 6684 ICMP6_PARAMPROB_HEADER, 6685 (uint32_t)((uchar_t *)&rthdr->ip6r0_segleft - 6686 (uchar_t *)ip6h), 6687 B_FALSE, B_FALSE, GLOBAL_ZONEID, ipst); 6688 return; 6689 } 6690 addrptr += (numaddr - rthdr->ip6r0_segleft); 6691 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst) || 6692 IN6_IS_ADDR_MULTICAST(addrptr)) { 6693 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 6694 freemsg(hada_mp); 6695 freemsg(mp); 6696 return; 6697 } 6698 /* Swap */ 6699 tmp = *addrptr; 6700 *addrptr = ip6h->ip6_dst; 6701 ip6h->ip6_dst = tmp; 6702 rthdr->ip6r0_segleft--; 6703 /* Don't allow any mapped addresses - ip_wput_v6 can't handle them */ 6704 if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_dst)) { 6705 if (hada_mp != NULL) 6706 goto hada_drop; 6707 /* Sent by forwarding path, and router is global zone */ 6708 icmp_unreachable_v6(WR(q), mp, ICMP6_DST_UNREACH_NOROUTE, 6709 B_FALSE, B_FALSE, GLOBAL_ZONEID, ipst); 6710 return; 6711 } 6712 if (ip_check_v6_mblk(mp, ill) == 0) { 6713 ip6h = (ip6_t *)mp->b_rptr; 6714 ip_rput_data_v6(q, ill, mp, ip6h, flags, hada_mp, dl_mp); 6715 } 6716 return; 6717 hada_drop: 6718 /* IPsec kstats: bean counter? */ 6719 freemsg(hada_mp); 6720 freemsg(mp); 6721 } 6722 6723 /* 6724 * Read side put procedure for IPv6 module. 6725 */ 6726 void 6727 ip_rput_v6(queue_t *q, mblk_t *mp) 6728 { 6729 mblk_t *first_mp; 6730 mblk_t *hada_mp = NULL; 6731 ip6_t *ip6h; 6732 boolean_t ll_multicast = B_FALSE; 6733 boolean_t mctl_present = B_FALSE; 6734 ill_t *ill; 6735 struct iocblk *iocp; 6736 uint_t flags = 0; 6737 mblk_t *dl_mp; 6738 ip_stack_t *ipst; 6739 6740 ill = (ill_t *)q->q_ptr; 6741 ipst = ill->ill_ipst; 6742 if (ill->ill_state_flags & ILL_CONDEMNED) { 6743 union DL_primitives *dl; 6744 6745 dl = (union DL_primitives *)mp->b_rptr; 6746 /* 6747 * Things are opening or closing - only accept DLPI 6748 * ack messages. If the stream is closing and ip_wsrv 6749 * has completed, ip_close is out of the qwait, but has 6750 * not yet completed qprocsoff. Don't proceed any further 6751 * because the ill has been cleaned up and things hanging 6752 * off the ill have been freed. 6753 */ 6754 if ((mp->b_datap->db_type != M_PCPROTO) || 6755 (dl->dl_primitive == DL_UNITDATA_IND)) { 6756 inet_freemsg(mp); 6757 return; 6758 } 6759 } 6760 6761 dl_mp = NULL; 6762 switch (mp->b_datap->db_type) { 6763 case M_DATA: { 6764 int hlen; 6765 uchar_t *ucp; 6766 struct ether_header *eh; 6767 dl_unitdata_ind_t *dui; 6768 6769 /* 6770 * This is a work-around for CR 6451644, a bug in Nemo. It 6771 * should be removed when that problem is fixed. 6772 */ 6773 if (ill->ill_mactype == DL_ETHER && 6774 (hlen = MBLKHEAD(mp)) >= sizeof (struct ether_header) && 6775 (ucp = mp->b_rptr)[-1] == (IP6_DL_SAP & 0xFF) && 6776 ucp[-2] == (IP6_DL_SAP >> 8)) { 6777 if (hlen >= sizeof (struct ether_vlan_header) && 6778 ucp[-5] == 0 && ucp[-6] == 0x81) 6779 ucp -= sizeof (struct ether_vlan_header); 6780 else 6781 ucp -= sizeof (struct ether_header); 6782 /* 6783 * If it's a group address, then fabricate a 6784 * DL_UNITDATA_IND message. 6785 */ 6786 if ((ll_multicast = (ucp[0] & 1)) != 0 && 6787 (dl_mp = allocb(DL_UNITDATA_IND_SIZE + 16, 6788 BPRI_HI)) != NULL) { 6789 eh = (struct ether_header *)ucp; 6790 dui = (dl_unitdata_ind_t *)dl_mp->b_rptr; 6791 DB_TYPE(dl_mp) = M_PROTO; 6792 dl_mp->b_wptr = (uchar_t *)(dui + 1) + 16; 6793 dui->dl_primitive = DL_UNITDATA_IND; 6794 dui->dl_dest_addr_length = 8; 6795 dui->dl_dest_addr_offset = DL_UNITDATA_IND_SIZE; 6796 dui->dl_src_addr_length = 8; 6797 dui->dl_src_addr_offset = DL_UNITDATA_IND_SIZE + 6798 8; 6799 dui->dl_group_address = 1; 6800 ucp = (uchar_t *)(dui + 1); 6801 if (ill->ill_sap_length > 0) 6802 ucp += ill->ill_sap_length; 6803 bcopy(&eh->ether_dhost, ucp, 6); 6804 bcopy(&eh->ether_shost, ucp + 8, 6); 6805 ucp = (uchar_t *)(dui + 1); 6806 if (ill->ill_sap_length < 0) 6807 ucp += 8 + ill->ill_sap_length; 6808 bcopy(&eh->ether_type, ucp, 2); 6809 bcopy(&eh->ether_type, ucp + 8, 2); 6810 } 6811 } 6812 break; 6813 } 6814 6815 case M_PROTO: 6816 case M_PCPROTO: 6817 if (((dl_unitdata_ind_t *)mp->b_rptr)->dl_primitive != 6818 DL_UNITDATA_IND) { 6819 /* Go handle anything other than data elsewhere. */ 6820 ip_rput_dlpi(q, mp); 6821 return; 6822 } 6823 #define dlur ((dl_unitdata_ind_t *)mp->b_rptr) 6824 ll_multicast = dlur->dl_group_address; 6825 #undef dlur 6826 /* Save the DLPI header. */ 6827 dl_mp = mp; 6828 mp = mp->b_cont; 6829 dl_mp->b_cont = NULL; 6830 break; 6831 case M_BREAK: 6832 panic("ip_rput_v6: got an M_BREAK"); 6833 /*NOTREACHED*/ 6834 case M_IOCACK: 6835 iocp = (struct iocblk *)mp->b_rptr; 6836 switch (iocp->ioc_cmd) { 6837 case DL_IOC_HDR_INFO: 6838 ill = (ill_t *)q->q_ptr; 6839 ill_fastpath_ack(ill, mp); 6840 return; 6841 case SIOCSTUNPARAM: 6842 case SIOCGTUNPARAM: 6843 case OSIOCSTUNPARAM: 6844 case OSIOCGTUNPARAM: 6845 /* Go through qwriter */ 6846 break; 6847 default: 6848 putnext(q, mp); 6849 return; 6850 } 6851 /* FALLTHRU */ 6852 case M_ERROR: 6853 case M_HANGUP: 6854 mutex_enter(&ill->ill_lock); 6855 if (ill->ill_state_flags & ILL_CONDEMNED) { 6856 mutex_exit(&ill->ill_lock); 6857 freemsg(mp); 6858 return; 6859 } 6860 ill_refhold_locked(ill); 6861 mutex_exit(&ill->ill_lock); 6862 qwriter_ip(NULL, ill, q, mp, ip_rput_other, CUR_OP, B_FALSE); 6863 return; 6864 case M_CTL: 6865 if ((MBLKL(mp) > sizeof (int)) && 6866 ((da_ipsec_t *)mp->b_rptr)->da_type == IPHADA_M_CTL) { 6867 ASSERT(MBLKL(mp) >= sizeof (da_ipsec_t)); 6868 mctl_present = B_TRUE; 6869 break; 6870 } 6871 putnext(q, mp); 6872 return; 6873 case M_IOCNAK: 6874 iocp = (struct iocblk *)mp->b_rptr; 6875 switch (iocp->ioc_cmd) { 6876 case DL_IOC_HDR_INFO: 6877 case SIOCSTUNPARAM: 6878 case SIOCGTUNPARAM: 6879 case OSIOCSTUNPARAM: 6880 case OSIOCGTUNPARAM: 6881 mutex_enter(&ill->ill_lock); 6882 if (ill->ill_state_flags & ILL_CONDEMNED) { 6883 mutex_exit(&ill->ill_lock); 6884 freemsg(mp); 6885 return; 6886 } 6887 ill_refhold_locked(ill); 6888 mutex_exit(&ill->ill_lock); 6889 qwriter_ip(NULL, ill, q, mp, ip_rput_other, CUR_OP, 6890 B_FALSE); 6891 return; 6892 default: 6893 break; 6894 } 6895 /* FALLTHRU */ 6896 default: 6897 putnext(q, mp); 6898 return; 6899 } 6900 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInReceives); 6901 UPDATE_MIB(ill->ill_ip_mib, ipIfStatsHCInOctets, 6902 (mp->b_cont == NULL) ? MBLKL(mp) : msgdsize(mp)); 6903 /* 6904 * if db_ref > 1 then copymsg and free original. Packet may be 6905 * changed and do not want other entity who has a reference to this 6906 * message to trip over the changes. This is a blind change because 6907 * trying to catch all places that might change packet is too 6908 * difficult (since it may be a module above this one). 6909 */ 6910 if (mp->b_datap->db_ref > 1) { 6911 mblk_t *mp1; 6912 6913 mp1 = copymsg(mp); 6914 freemsg(mp); 6915 if (mp1 == NULL) { 6916 first_mp = NULL; 6917 goto discard; 6918 } 6919 mp = mp1; 6920 } 6921 first_mp = mp; 6922 if (mctl_present) { 6923 hada_mp = first_mp; 6924 mp = first_mp->b_cont; 6925 } 6926 6927 if (ip_check_v6_mblk(mp, ill) == -1) 6928 return; 6929 6930 ip6h = (ip6_t *)mp->b_rptr; 6931 6932 DTRACE_PROBE4(ip6__physical__in__start, 6933 ill_t *, ill, ill_t *, NULL, 6934 ip6_t *, ip6h, mblk_t *, first_mp); 6935 6936 FW_HOOKS6(ipst->ips_ip6_physical_in_event, 6937 ipst->ips_ipv6firewall_physical_in, 6938 ill, NULL, ip6h, first_mp, mp, ipst); 6939 6940 DTRACE_PROBE1(ip6__physical__in__end, mblk_t *, first_mp); 6941 6942 if (first_mp == NULL) 6943 return; 6944 6945 if ((ip6h->ip6_vcf & IPV6_VERS_AND_FLOW_MASK) == 6946 IPV6_DEFAULT_VERS_AND_FLOW) { 6947 /* 6948 * It may be a bit too expensive to do this mapped address 6949 * check here, but in the interest of robustness, it seems 6950 * like the correct place. 6951 * TODO: Avoid this check for e.g. connected TCP sockets 6952 */ 6953 if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_src)) { 6954 ip1dbg(("ip_rput_v6: pkt with mapped src addr\n")); 6955 goto discard; 6956 } 6957 6958 if (IN6_IS_ADDR_LOOPBACK(&ip6h->ip6_src)) { 6959 ip1dbg(("ip_rput_v6: pkt with loopback src")); 6960 goto discard; 6961 } else if (IN6_IS_ADDR_LOOPBACK(&ip6h->ip6_dst)) { 6962 ip1dbg(("ip_rput_v6: pkt with loopback dst")); 6963 goto discard; 6964 } 6965 6966 flags |= (ll_multicast ? IP6_IN_LLMCAST : 0); 6967 ip_rput_data_v6(q, ill, mp, ip6h, flags, hada_mp, dl_mp); 6968 } else { 6969 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInWrongIPVersion); 6970 goto discard; 6971 } 6972 freemsg(dl_mp); 6973 return; 6974 6975 discard: 6976 if (dl_mp != NULL) 6977 freeb(dl_mp); 6978 freemsg(first_mp); 6979 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 6980 } 6981 6982 /* 6983 * Walk through the IPv6 packet in mp and see if there's an AH header 6984 * in it. See if the AH header needs to get done before other headers in 6985 * the packet. (Worker function for ipsec_early_ah_v6().) 6986 */ 6987 #define IPSEC_HDR_DONT_PROCESS 0 6988 #define IPSEC_HDR_PROCESS 1 6989 #define IPSEC_MEMORY_ERROR 2 6990 static int 6991 ipsec_needs_processing_v6(mblk_t *mp, uint8_t *nexthdr) 6992 { 6993 uint_t length; 6994 uint_t ehdrlen; 6995 uint8_t *whereptr; 6996 uint8_t *endptr; 6997 uint8_t *nexthdrp; 6998 ip6_dest_t *desthdr; 6999 ip6_rthdr_t *rthdr; 7000 ip6_t *ip6h; 7001 7002 /* 7003 * For now just pullup everything. In general, the less pullups, 7004 * the better, but there's so much squirrelling through anyway, 7005 * it's just easier this way. 7006 */ 7007 if (!pullupmsg(mp, -1)) { 7008 return (IPSEC_MEMORY_ERROR); 7009 } 7010 7011 ip6h = (ip6_t *)mp->b_rptr; 7012 length = IPV6_HDR_LEN; 7013 whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */ 7014 endptr = mp->b_wptr; 7015 7016 /* 7017 * We can't just use the argument nexthdr in the place 7018 * of nexthdrp becaue we don't dereference nexthdrp 7019 * till we confirm whether it is a valid address. 7020 */ 7021 nexthdrp = &ip6h->ip6_nxt; 7022 while (whereptr < endptr) { 7023 /* Is there enough left for len + nexthdr? */ 7024 if (whereptr + MIN_EHDR_LEN > endptr) 7025 return (IPSEC_MEMORY_ERROR); 7026 7027 switch (*nexthdrp) { 7028 case IPPROTO_HOPOPTS: 7029 case IPPROTO_DSTOPTS: 7030 /* Assumes the headers are identical for hbh and dst */ 7031 desthdr = (ip6_dest_t *)whereptr; 7032 ehdrlen = 8 * (desthdr->ip6d_len + 1); 7033 if ((uchar_t *)desthdr + ehdrlen > endptr) 7034 return (IPSEC_MEMORY_ERROR); 7035 /* 7036 * Return DONT_PROCESS because of potential Mobile IPv6 7037 * cruft for destination options. 7038 */ 7039 if (*nexthdrp == IPPROTO_DSTOPTS) 7040 return (IPSEC_HDR_DONT_PROCESS); 7041 nexthdrp = &desthdr->ip6d_nxt; 7042 break; 7043 case IPPROTO_ROUTING: 7044 rthdr = (ip6_rthdr_t *)whereptr; 7045 7046 /* 7047 * If there's more hops left on the routing header, 7048 * return now with DON'T PROCESS. 7049 */ 7050 if (rthdr->ip6r_segleft > 0) 7051 return (IPSEC_HDR_DONT_PROCESS); 7052 7053 ehdrlen = 8 * (rthdr->ip6r_len + 1); 7054 if ((uchar_t *)rthdr + ehdrlen > endptr) 7055 return (IPSEC_MEMORY_ERROR); 7056 nexthdrp = &rthdr->ip6r_nxt; 7057 break; 7058 case IPPROTO_FRAGMENT: 7059 /* Wait for reassembly */ 7060 return (IPSEC_HDR_DONT_PROCESS); 7061 case IPPROTO_AH: 7062 *nexthdr = IPPROTO_AH; 7063 return (IPSEC_HDR_PROCESS); 7064 case IPPROTO_NONE: 7065 /* No next header means we're finished */ 7066 default: 7067 return (IPSEC_HDR_DONT_PROCESS); 7068 } 7069 length += ehdrlen; 7070 whereptr += ehdrlen; 7071 } 7072 panic("ipsec_needs_processing_v6"); 7073 /*NOTREACHED*/ 7074 } 7075 7076 /* 7077 * Path for AH if options are present. If this is the first time we are 7078 * sending a datagram to AH, allocate a IPSEC_IN message and prepend it. 7079 * Otherwise, just fanout. Return value answers the boolean question: 7080 * "Did I consume the mblk you sent me?" 7081 * 7082 * Sometimes AH needs to be done before other IPv6 headers for security 7083 * reasons. This function (and its ipsec_needs_processing_v6() above) 7084 * indicates if that is so, and fans out to the appropriate IPsec protocol 7085 * for the datagram passed in. 7086 */ 7087 static boolean_t 7088 ipsec_early_ah_v6(queue_t *q, mblk_t *first_mp, boolean_t mctl_present, 7089 ill_t *ill, ire_t *ire, mblk_t *hada_mp, zoneid_t zoneid) 7090 { 7091 mblk_t *mp; 7092 uint8_t nexthdr; 7093 ipsec_in_t *ii = NULL; 7094 ah_t *ah; 7095 ipsec_status_t ipsec_rc; 7096 ip_stack_t *ipst = ill->ill_ipst; 7097 netstack_t *ns = ipst->ips_netstack; 7098 ipsec_stack_t *ipss = ns->netstack_ipsec; 7099 7100 ASSERT((hada_mp == NULL) || (!mctl_present)); 7101 7102 switch (ipsec_needs_processing_v6( 7103 (mctl_present ? first_mp->b_cont : first_mp), &nexthdr)) { 7104 case IPSEC_MEMORY_ERROR: 7105 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 7106 freemsg(hada_mp); 7107 freemsg(first_mp); 7108 return (B_TRUE); 7109 case IPSEC_HDR_DONT_PROCESS: 7110 return (B_FALSE); 7111 } 7112 7113 /* Default means send it to AH! */ 7114 ASSERT(nexthdr == IPPROTO_AH); 7115 if (!mctl_present) { 7116 mp = first_mp; 7117 first_mp = ipsec_in_alloc(B_FALSE, ipst->ips_netstack); 7118 if (first_mp == NULL) { 7119 ip1dbg(("ipsec_early_ah_v6: IPSEC_IN " 7120 "allocation failure.\n")); 7121 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 7122 freemsg(hada_mp); 7123 freemsg(mp); 7124 return (B_TRUE); 7125 } 7126 /* 7127 * Store the ill_index so that when we come back 7128 * from IPSEC we ride on the same queue. 7129 */ 7130 ii = (ipsec_in_t *)first_mp->b_rptr; 7131 ii->ipsec_in_ill_index = ill->ill_phyint->phyint_ifindex; 7132 ii->ipsec_in_rill_index = ii->ipsec_in_ill_index; 7133 first_mp->b_cont = mp; 7134 } 7135 /* 7136 * Cache hardware acceleration info. 7137 */ 7138 if (hada_mp != NULL) { 7139 ASSERT(ii != NULL); 7140 IPSECHW_DEBUG(IPSECHW_PKT, ("ipsec_early_ah_v6: " 7141 "caching data attr.\n")); 7142 ii->ipsec_in_accelerated = B_TRUE; 7143 ii->ipsec_in_da = hada_mp; 7144 } 7145 7146 if (!ipsec_loaded(ipss)) { 7147 ip_proto_not_sup(q, first_mp, IP_FF_SEND_ICMP, zoneid, ipst); 7148 return (B_TRUE); 7149 } 7150 7151 ah = ipsec_inbound_ah_sa(first_mp, ns); 7152 if (ah == NULL) 7153 return (B_TRUE); 7154 ASSERT(ii->ipsec_in_ah_sa != NULL); 7155 ASSERT(ii->ipsec_in_ah_sa->ipsa_input_func != NULL); 7156 ipsec_rc = ii->ipsec_in_ah_sa->ipsa_input_func(first_mp, ah); 7157 7158 switch (ipsec_rc) { 7159 case IPSEC_STATUS_SUCCESS: 7160 /* we're done with IPsec processing, send it up */ 7161 ip_fanout_proto_again(first_mp, ill, ill, ire); 7162 break; 7163 case IPSEC_STATUS_FAILED: 7164 BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsInDiscards); 7165 break; 7166 case IPSEC_STATUS_PENDING: 7167 /* no action needed */ 7168 break; 7169 } 7170 return (B_TRUE); 7171 } 7172 7173 /* 7174 * Validate the IPv6 mblk for alignment. 7175 */ 7176 int 7177 ip_check_v6_mblk(mblk_t *mp, ill_t *ill) 7178 { 7179 int pkt_len, ip6_len; 7180 ip6_t *ip6h = (ip6_t *)mp->b_rptr; 7181 7182 /* check for alignment and full IPv6 header */ 7183 if (!OK_32PTR((uchar_t *)ip6h) || 7184 (mp->b_wptr - (uchar_t *)ip6h) < IPV6_HDR_LEN) { 7185 if (!pullupmsg(mp, IPV6_HDR_LEN)) { 7186 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 7187 ip1dbg(("ip_rput_v6: pullupmsg failed\n")); 7188 freemsg(mp); 7189 return (-1); 7190 } 7191 ip6h = (ip6_t *)mp->b_rptr; 7192 } 7193 7194 ASSERT(OK_32PTR((uchar_t *)ip6h) && 7195 (mp->b_wptr - (uchar_t *)ip6h) >= IPV6_HDR_LEN); 7196 7197 if (mp->b_cont == NULL) 7198 pkt_len = mp->b_wptr - mp->b_rptr; 7199 else 7200 pkt_len = msgdsize(mp); 7201 ip6_len = ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN; 7202 7203 /* 7204 * Check for bogus (too short packet) and packet which 7205 * was padded by the link layer. 7206 */ 7207 if (ip6_len != pkt_len) { 7208 ssize_t diff; 7209 7210 if (ip6_len > pkt_len) { 7211 ip1dbg(("ip_rput_data_v6: packet too short %d %d\n", 7212 ip6_len, pkt_len)); 7213 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInTruncatedPkts); 7214 freemsg(mp); 7215 return (-1); 7216 } 7217 diff = (ssize_t)(pkt_len - ip6_len); 7218 7219 if (!adjmsg(mp, -diff)) { 7220 ip1dbg(("ip_rput_data_v6: adjmsg failed\n")); 7221 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 7222 freemsg(mp); 7223 return (-1); 7224 } 7225 } 7226 return (0); 7227 } 7228 7229 /* 7230 * ip_rput_data_v6 -- received IPv6 packets in M_DATA messages show up here. 7231 * ip_rput_v6 has already verified alignment, the min length, the version, 7232 * and db_ref = 1. 7233 * 7234 * The ill passed in (the arg named inill) is the ill that the packet 7235 * actually arrived on. We need to remember this when saving the 7236 * input interface index into potential IPV6_PKTINFO data in 7237 * ip_add_info_v6(). 7238 * 7239 * This routine doesn't free dl_mp; that's the caller's responsibility on 7240 * return. (Note that the callers are complex enough that there's no tail 7241 * recursion here anyway.) 7242 */ 7243 void 7244 ip_rput_data_v6(queue_t *q, ill_t *inill, mblk_t *mp, ip6_t *ip6h, 7245 uint_t flags, mblk_t *hada_mp, mblk_t *dl_mp) 7246 { 7247 ire_t *ire = NULL; 7248 queue_t *rq; 7249 ill_t *ill = inill; 7250 ill_t *outill; 7251 ipif_t *ipif; 7252 uint8_t *whereptr; 7253 uint8_t nexthdr; 7254 uint16_t remlen; 7255 uint_t prev_nexthdr_offset; 7256 uint_t used; 7257 size_t pkt_len; 7258 uint16_t ip6_len; 7259 uint_t hdr_len; 7260 boolean_t mctl_present; 7261 mblk_t *first_mp; 7262 mblk_t *first_mp1; 7263 boolean_t no_forward; 7264 ip6_hbh_t *hbhhdr; 7265 boolean_t ll_multicast = (flags & IP6_IN_LLMCAST); 7266 conn_t *connp; 7267 ilm_t *ilm; 7268 uint32_t ports; 7269 uint_t ipif_id = 0; 7270 zoneid_t zoneid = GLOBAL_ZONEID; 7271 uint16_t hck_flags, reass_hck_flags; 7272 uint32_t reass_sum; 7273 boolean_t cksum_err; 7274 mblk_t *mp1; 7275 ip_stack_t *ipst = inill->ill_ipst; 7276 7277 EXTRACT_PKT_MP(mp, first_mp, mctl_present); 7278 7279 if (hada_mp != NULL) { 7280 /* 7281 * It's an IPsec accelerated packet. 7282 * Keep a pointer to the data attributes around until 7283 * we allocate the ipsecinfo structure. 7284 */ 7285 IPSECHW_DEBUG(IPSECHW_PKT, 7286 ("ip_rput_data_v6: inbound HW accelerated IPsec pkt\n")); 7287 hada_mp->b_cont = NULL; 7288 /* 7289 * Since it is accelerated, it came directly from 7290 * the ill. 7291 */ 7292 ASSERT(mctl_present == B_FALSE); 7293 ASSERT(mp->b_datap->db_type != M_CTL); 7294 } 7295 7296 ip6h = (ip6_t *)mp->b_rptr; 7297 ip6_len = ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN; 7298 pkt_len = ip6_len; 7299 7300 if (ILL_HCKSUM_CAPABLE(ill) && !mctl_present && dohwcksum) 7301 hck_flags = DB_CKSUMFLAGS(mp); 7302 else 7303 hck_flags = 0; 7304 7305 /* Clear checksum flags in case we need to forward */ 7306 DB_CKSUMFLAGS(mp) = 0; 7307 reass_sum = reass_hck_flags = 0; 7308 7309 nexthdr = ip6h->ip6_nxt; 7310 7311 prev_nexthdr_offset = (uint_t)((uchar_t *)&ip6h->ip6_nxt - 7312 (uchar_t *)ip6h); 7313 whereptr = (uint8_t *)&ip6h[1]; 7314 remlen = pkt_len - IPV6_HDR_LEN; /* Track how much is left */ 7315 7316 /* Process hop by hop header options */ 7317 if (nexthdr == IPPROTO_HOPOPTS) { 7318 uint_t ehdrlen; 7319 uint8_t *optptr; 7320 7321 if (remlen < MIN_EHDR_LEN) 7322 goto pkt_too_short; 7323 if (mp->b_cont != NULL && 7324 whereptr + MIN_EHDR_LEN > mp->b_wptr) { 7325 if (!pullupmsg(mp, IPV6_HDR_LEN + MIN_EHDR_LEN)) { 7326 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 7327 freemsg(hada_mp); 7328 freemsg(first_mp); 7329 return; 7330 } 7331 ip6h = (ip6_t *)mp->b_rptr; 7332 whereptr = (uint8_t *)ip6h + pkt_len - remlen; 7333 } 7334 hbhhdr = (ip6_hbh_t *)whereptr; 7335 nexthdr = hbhhdr->ip6h_nxt; 7336 prev_nexthdr_offset = (uint_t)(whereptr - (uint8_t *)ip6h); 7337 ehdrlen = 8 * (hbhhdr->ip6h_len + 1); 7338 7339 if (remlen < ehdrlen) 7340 goto pkt_too_short; 7341 if (mp->b_cont != NULL && 7342 whereptr + ehdrlen > mp->b_wptr) { 7343 if (!pullupmsg(mp, IPV6_HDR_LEN + ehdrlen)) { 7344 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 7345 freemsg(hada_mp); 7346 freemsg(first_mp); 7347 return; 7348 } 7349 ip6h = (ip6_t *)mp->b_rptr; 7350 whereptr = (uint8_t *)ip6h + pkt_len - remlen; 7351 hbhhdr = (ip6_hbh_t *)whereptr; 7352 } 7353 7354 optptr = whereptr + 2; 7355 whereptr += ehdrlen; 7356 remlen -= ehdrlen; 7357 switch (ip_process_options_v6(q, first_mp, ip6h, optptr, 7358 ehdrlen - 2, IPPROTO_HOPOPTS, ipst)) { 7359 case -1: 7360 /* 7361 * Packet has been consumed and any 7362 * needed ICMP messages sent. 7363 */ 7364 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors); 7365 freemsg(hada_mp); 7366 return; 7367 case 0: 7368 /* no action needed */ 7369 break; 7370 case 1: 7371 /* Known router alert */ 7372 goto ipv6forus; 7373 } 7374 } 7375 7376 /* 7377 * Attach any necessary label information to this packet. 7378 */ 7379 if (is_system_labeled() && !tsol_get_pkt_label(mp, IPV6_VERSION)) { 7380 if (ip6opt_ls != 0) 7381 ip0dbg(("tsol_get_pkt_label v6 failed\n")); 7382 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors); 7383 freemsg(hada_mp); 7384 freemsg(first_mp); 7385 return; 7386 } 7387 7388 /* 7389 * On incoming v6 multicast packets we will bypass the ire table, 7390 * and assume that the read queue corresponds to the targetted 7391 * interface. 7392 * 7393 * The effect of this is the same as the IPv4 original code, but is 7394 * much cleaner I think. See ip_rput for how that was done. 7395 */ 7396 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 7397 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInMcastPkts); 7398 UPDATE_MIB(ill->ill_ip_mib, ipIfStatsHCInMcastOctets, pkt_len); 7399 /* 7400 * XXX TODO Give to mrouted to for multicast forwarding. 7401 */ 7402 ILM_WALKER_HOLD(ill); 7403 ilm = ilm_lookup_ill_v6(ill, &ip6h->ip6_dst, ALL_ZONES); 7404 ILM_WALKER_RELE(ill); 7405 if (ilm == NULL) { 7406 if (ip_debug > 3) { 7407 /* ip2dbg */ 7408 pr_addr_dbg("ip_rput_data_v6: got mcast packet" 7409 " which is not for us: %s\n", AF_INET6, 7410 &ip6h->ip6_dst); 7411 } 7412 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 7413 freemsg(hada_mp); 7414 freemsg(first_mp); 7415 return; 7416 } 7417 if (ip_debug > 3) { 7418 /* ip2dbg */ 7419 pr_addr_dbg("ip_rput_data_v6: multicast for us: %s\n", 7420 AF_INET6, &ip6h->ip6_dst); 7421 } 7422 rq = ill->ill_rq; 7423 zoneid = GLOBAL_ZONEID; 7424 goto ipv6forus; 7425 } 7426 7427 ipif = ill->ill_ipif; 7428 7429 /* 7430 * If a packet was received on an interface that is a 6to4 tunnel, 7431 * incoming IPv6 packets, with a 6to4 addressed IPv6 destination, must 7432 * be checked to have a 6to4 prefix (2002:V4ADDR::/48) that is equal to 7433 * the 6to4 prefix of the address configured on the receiving interface. 7434 * Otherwise, the packet was delivered to this interface in error and 7435 * the packet must be dropped. 7436 */ 7437 if ((ill->ill_is_6to4tun) && IN6_IS_ADDR_6TO4(&ip6h->ip6_dst)) { 7438 7439 if (!IN6_ARE_6TO4_PREFIX_EQUAL(&ipif->ipif_v6lcl_addr, 7440 &ip6h->ip6_dst)) { 7441 if (ip_debug > 2) { 7442 /* ip1dbg */ 7443 pr_addr_dbg("ip_rput_data_v6: received 6to4 " 7444 "addressed packet which is not for us: " 7445 "%s\n", AF_INET6, &ip6h->ip6_dst); 7446 } 7447 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 7448 freemsg(first_mp); 7449 return; 7450 } 7451 } 7452 7453 /* 7454 * Find an ire that matches destination. For link-local addresses 7455 * we have to match the ill. 7456 * TBD for site local addresses. 7457 */ 7458 if (IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_dst)) { 7459 ire = ire_ctable_lookup_v6(&ip6h->ip6_dst, NULL, 7460 IRE_CACHE|IRE_LOCAL, ill->ill_ipif, ALL_ZONES, NULL, 7461 MATCH_IRE_TYPE | MATCH_IRE_ILL_GROUP, ipst); 7462 } else { 7463 ire = ire_cache_lookup_v6(&ip6h->ip6_dst, ALL_ZONES, 7464 MBLK_GETLABEL(mp), ipst); 7465 } 7466 if (ire == NULL) { 7467 /* 7468 * No matching IRE found. Mark this packet as having 7469 * originated externally. 7470 */ 7471 if (!(ill->ill_flags & ILLF_ROUTER) || ll_multicast) { 7472 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 7473 if (!(ill->ill_flags & ILLF_ROUTER)) { 7474 BUMP_MIB(ill->ill_ip_mib, 7475 ipIfStatsInAddrErrors); 7476 } 7477 freemsg(hada_mp); 7478 freemsg(first_mp); 7479 return; 7480 } 7481 if (ip6h->ip6_hops <= 1) { 7482 if (hada_mp != NULL) 7483 goto hada_drop; 7484 /* Sent by forwarding path, and router is global zone */ 7485 icmp_time_exceeded_v6(WR(q), first_mp, 7486 ICMP6_TIME_EXCEED_TRANSIT, ll_multicast, B_FALSE, 7487 GLOBAL_ZONEID, ipst); 7488 return; 7489 } 7490 /* 7491 * Per RFC 3513 section 2.5.2, we must not forward packets with 7492 * an unspecified source address. 7493 */ 7494 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 7495 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 7496 freemsg(hada_mp); 7497 freemsg(first_mp); 7498 return; 7499 } 7500 mp->b_prev = (mblk_t *)(uintptr_t) 7501 ill->ill_phyint->phyint_ifindex; 7502 ip_newroute_v6(q, mp, &ip6h->ip6_dst, &ip6h->ip6_src, 7503 IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_dst) ? ill : NULL, 7504 ALL_ZONES, ipst); 7505 return; 7506 } 7507 ipif_id = ire->ire_ipif->ipif_seqid; 7508 /* we have a matching IRE */ 7509 if (ire->ire_stq != NULL) { 7510 ill_group_t *ill_group; 7511 ill_group_t *ire_group; 7512 7513 /* 7514 * To be quicker, we may wish not to chase pointers 7515 * (ire->ire_ipif->ipif_ill...) and instead store the 7516 * forwarding policy in the ire. An unfortunate side- 7517 * effect of this would be requiring an ire flush whenever 7518 * the ILLF_ROUTER flag changes. For now, chase pointers 7519 * once and store in the boolean no_forward. 7520 * 7521 * This appears twice to keep it out of the non-forwarding, 7522 * yes-it's-for-us-on-the-right-interface case. 7523 */ 7524 no_forward = ((ill->ill_flags & 7525 ire->ire_ipif->ipif_ill->ill_flags & ILLF_ROUTER) == 0); 7526 7527 7528 ASSERT(first_mp == mp); 7529 /* 7530 * This ire has a send-to queue - forward the packet. 7531 */ 7532 if (no_forward || ll_multicast || (hada_mp != NULL)) { 7533 freemsg(hada_mp); 7534 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 7535 if (no_forward) { 7536 BUMP_MIB(ill->ill_ip_mib, 7537 ipIfStatsInAddrErrors); 7538 } 7539 freemsg(mp); 7540 ire_refrele(ire); 7541 return; 7542 } 7543 /* 7544 * ipIfStatsHCInForwDatagrams should only be increment if there 7545 * will be an attempt to forward the packet, which is why we 7546 * increment after the above condition has been checked. 7547 */ 7548 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInForwDatagrams); 7549 if (ip6h->ip6_hops <= 1) { 7550 ip1dbg(("ip_rput_data_v6: hop limit expired.\n")); 7551 /* Sent by forwarding path, and router is global zone */ 7552 icmp_time_exceeded_v6(WR(q), mp, 7553 ICMP6_TIME_EXCEED_TRANSIT, ll_multicast, B_FALSE, 7554 GLOBAL_ZONEID, ipst); 7555 ire_refrele(ire); 7556 return; 7557 } 7558 /* 7559 * Per RFC 3513 section 2.5.2, we must not forward packets with 7560 * an unspecified source address. 7561 */ 7562 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 7563 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 7564 freemsg(mp); 7565 ire_refrele(ire); 7566 return; 7567 } 7568 7569 if (is_system_labeled()) { 7570 mblk_t *mp1; 7571 7572 if ((mp1 = tsol_ip_forward(ire, mp)) == NULL) { 7573 BUMP_MIB(ill->ill_ip_mib, 7574 ipIfStatsForwProhibits); 7575 freemsg(mp); 7576 ire_refrele(ire); 7577 return; 7578 } 7579 /* Size may have changed */ 7580 mp = mp1; 7581 ip6h = (ip6_t *)mp->b_rptr; 7582 pkt_len = msgdsize(mp); 7583 } 7584 7585 if (pkt_len > ire->ire_max_frag) { 7586 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInTooBigErrors); 7587 /* Sent by forwarding path, and router is global zone */ 7588 icmp_pkt2big_v6(WR(q), mp, ire->ire_max_frag, 7589 ll_multicast, B_TRUE, GLOBAL_ZONEID, ipst); 7590 ire_refrele(ire); 7591 return; 7592 } 7593 7594 /* 7595 * Check to see if we're forwarding the packet to a 7596 * different link from which it came. If so, check the 7597 * source and destination addresses since routers must not 7598 * forward any packets with link-local source or 7599 * destination addresses to other links. Otherwise (if 7600 * we're forwarding onto the same link), conditionally send 7601 * a redirect message. 7602 */ 7603 ill_group = ill->ill_group; 7604 ire_group = ((ill_t *)(ire->ire_rfq)->q_ptr)->ill_group; 7605 if (ire->ire_rfq != q && (ill_group == NULL || 7606 ill_group != ire_group)) { 7607 if (IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_dst) || 7608 IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src)) { 7609 BUMP_MIB(ill->ill_ip_mib, 7610 ipIfStatsInAddrErrors); 7611 freemsg(mp); 7612 ire_refrele(ire); 7613 return; 7614 } 7615 /* TBD add site-local check at site boundary? */ 7616 } else if (ipst->ips_ipv6_send_redirects) { 7617 in6_addr_t *v6targ; 7618 in6_addr_t gw_addr_v6; 7619 ire_t *src_ire_v6 = NULL; 7620 7621 /* 7622 * Don't send a redirect when forwarding a source 7623 * routed packet. 7624 */ 7625 if (ip_source_routed_v6(ip6h, mp, ipst)) 7626 goto forward; 7627 7628 mutex_enter(&ire->ire_lock); 7629 gw_addr_v6 = ire->ire_gateway_addr_v6; 7630 mutex_exit(&ire->ire_lock); 7631 if (!IN6_IS_ADDR_UNSPECIFIED(&gw_addr_v6)) { 7632 v6targ = &gw_addr_v6; 7633 /* 7634 * We won't send redirects to a router 7635 * that doesn't have a link local 7636 * address, but will forward. 7637 */ 7638 if (!IN6_IS_ADDR_LINKLOCAL(v6targ)) { 7639 BUMP_MIB(ill->ill_ip_mib, 7640 ipIfStatsInAddrErrors); 7641 goto forward; 7642 } 7643 } else { 7644 v6targ = &ip6h->ip6_dst; 7645 } 7646 7647 src_ire_v6 = ire_ftable_lookup_v6(&ip6h->ip6_src, 7648 NULL, NULL, IRE_INTERFACE, ire->ire_ipif, NULL, 7649 ALL_ZONES, 0, NULL, 7650 MATCH_IRE_IPIF | MATCH_IRE_TYPE, 7651 ipst); 7652 7653 if (src_ire_v6 != NULL) { 7654 /* 7655 * The source is directly connected. 7656 */ 7657 mp1 = copymsg(mp); 7658 if (mp1 != NULL) { 7659 icmp_send_redirect_v6(WR(q), 7660 mp1, v6targ, &ip6h->ip6_dst, 7661 ill, B_FALSE); 7662 } 7663 ire_refrele(src_ire_v6); 7664 } 7665 } 7666 7667 forward: 7668 /* Hoplimit verified above */ 7669 ip6h->ip6_hops--; 7670 7671 outill = ire->ire_ipif->ipif_ill; 7672 7673 DTRACE_PROBE4(ip6__forwarding__start, 7674 ill_t *, inill, ill_t *, outill, 7675 ip6_t *, ip6h, mblk_t *, mp); 7676 7677 FW_HOOKS6(ipst->ips_ip6_forwarding_event, 7678 ipst->ips_ipv6firewall_forwarding, 7679 inill, outill, ip6h, mp, mp, ipst); 7680 7681 DTRACE_PROBE1(ip6__forwarding__end, mblk_t *, mp); 7682 7683 if (mp != NULL) { 7684 UPDATE_IB_PKT_COUNT(ire); 7685 ire->ire_last_used_time = lbolt; 7686 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCOutForwDatagrams); 7687 ip_xmit_v6(mp, ire, 0, NULL, B_FALSE, NULL); 7688 } 7689 IRE_REFRELE(ire); 7690 return; 7691 } 7692 rq = ire->ire_rfq; 7693 7694 /* 7695 * Need to put on correct queue for reassembly to find it. 7696 * No need to use put() since reassembly has its own locks. 7697 * Note: multicast packets and packets destined to addresses 7698 * assigned to loopback (ire_rfq is NULL) will be reassembled on 7699 * the arriving ill. 7700 */ 7701 if (rq != q) { 7702 boolean_t check_multi = B_TRUE; 7703 ill_group_t *ill_group = NULL; 7704 ill_group_t *ire_group = NULL; 7705 ill_t *ire_ill = NULL; 7706 uint_t ill_ifindex = ill->ill_usesrc_ifindex; 7707 7708 /* 7709 * To be quicker, we may wish not to chase pointers 7710 * (ire->ire_ipif->ipif_ill...) and instead store the 7711 * forwarding policy in the ire. An unfortunate side- 7712 * effect of this would be requiring an ire flush whenever 7713 * the ILLF_ROUTER flag changes. For now, chase pointers 7714 * once and store in the boolean no_forward. 7715 */ 7716 no_forward = ((ill->ill_flags & 7717 ire->ire_ipif->ipif_ill->ill_flags & ILLF_ROUTER) == 0); 7718 7719 ill_group = ill->ill_group; 7720 if (rq != NULL) { 7721 ire_ill = (ill_t *)(rq->q_ptr); 7722 ire_group = ire_ill->ill_group; 7723 } 7724 7725 /* 7726 * If it's part of the same IPMP group, or if it's a legal 7727 * address on the 'usesrc' interface, then bypass strict 7728 * checks. 7729 */ 7730 if (ill_group != NULL && ill_group == ire_group) { 7731 check_multi = B_FALSE; 7732 } else if (ill_ifindex != 0 && ire_ill != NULL && 7733 ill_ifindex == ire_ill->ill_phyint->phyint_ifindex) { 7734 check_multi = B_FALSE; 7735 } 7736 7737 ASSERT(!IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)); 7738 if (check_multi && ipst->ips_ipv6_strict_dst_multihoming && 7739 no_forward) { 7740 /* 7741 * This packet came in on an interface other than the 7742 * one associated with the destination address 7743 * and we are strict about matches. 7744 * 7745 * As long as the ills belong to the same group, 7746 * we don't consider them to arriving on the wrong 7747 * interface. Thus, when the switch is doing inbound 7748 * load spreading, we won't drop packets when we 7749 * are doing strict multihoming checks. 7750 */ 7751 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 7752 freemsg(hada_mp); 7753 freemsg(first_mp); 7754 ire_refrele(ire); 7755 return; 7756 } 7757 7758 if (rq != NULL) 7759 q = rq; 7760 7761 ill = (ill_t *)q->q_ptr; 7762 ASSERT(ill); 7763 } 7764 7765 zoneid = ire->ire_zoneid; 7766 UPDATE_IB_PKT_COUNT(ire); 7767 ire->ire_last_used_time = lbolt; 7768 /* Don't use the ire after this point. */ 7769 ire_refrele(ire); 7770 ipv6forus: 7771 /* 7772 * Looks like this packet is for us one way or another. 7773 * This is where we'll process destination headers etc. 7774 */ 7775 for (; ; ) { 7776 switch (nexthdr) { 7777 case IPPROTO_TCP: { 7778 uint16_t *up; 7779 uint32_t sum; 7780 int offset; 7781 7782 hdr_len = pkt_len - remlen; 7783 7784 if (hada_mp != NULL) { 7785 ip0dbg(("tcp hada drop\n")); 7786 goto hada_drop; 7787 } 7788 7789 7790 /* TCP needs all of the TCP header */ 7791 if (remlen < TCP_MIN_HEADER_LENGTH) 7792 goto pkt_too_short; 7793 if (mp->b_cont != NULL && 7794 whereptr + TCP_MIN_HEADER_LENGTH > mp->b_wptr) { 7795 if (!pullupmsg(mp, 7796 hdr_len + TCP_MIN_HEADER_LENGTH)) { 7797 BUMP_MIB(ill->ill_ip_mib, 7798 ipIfStatsInDiscards); 7799 freemsg(first_mp); 7800 return; 7801 } 7802 hck_flags = 0; 7803 ip6h = (ip6_t *)mp->b_rptr; 7804 whereptr = (uint8_t *)ip6h + hdr_len; 7805 } 7806 /* 7807 * Extract the offset field from the TCP header. 7808 */ 7809 offset = ((uchar_t *)ip6h)[hdr_len + 12] >> 4; 7810 if (offset != 5) { 7811 if (offset < 5) { 7812 ip1dbg(("ip_rput_data_v6: short " 7813 "TCP data offset")); 7814 BUMP_MIB(ill->ill_ip_mib, 7815 ipIfStatsInDiscards); 7816 freemsg(first_mp); 7817 return; 7818 } 7819 /* 7820 * There must be TCP options. 7821 * Make sure we can grab them. 7822 */ 7823 offset <<= 2; 7824 if (remlen < offset) 7825 goto pkt_too_short; 7826 if (mp->b_cont != NULL && 7827 whereptr + offset > mp->b_wptr) { 7828 if (!pullupmsg(mp, 7829 hdr_len + offset)) { 7830 BUMP_MIB(ill->ill_ip_mib, 7831 ipIfStatsInDiscards); 7832 freemsg(first_mp); 7833 return; 7834 } 7835 hck_flags = 0; 7836 ip6h = (ip6_t *)mp->b_rptr; 7837 whereptr = (uint8_t *)ip6h + hdr_len; 7838 } 7839 } 7840 7841 up = (uint16_t *)&ip6h->ip6_src; 7842 /* 7843 * TCP checksum calculation. First sum up the 7844 * pseudo-header fields: 7845 * - Source IPv6 address 7846 * - Destination IPv6 address 7847 * - TCP payload length 7848 * - TCP protocol ID 7849 */ 7850 sum = htons(IPPROTO_TCP + remlen) + 7851 up[0] + up[1] + up[2] + up[3] + 7852 up[4] + up[5] + up[6] + up[7] + 7853 up[8] + up[9] + up[10] + up[11] + 7854 up[12] + up[13] + up[14] + up[15]; 7855 7856 /* Fold initial sum */ 7857 sum = (sum & 0xffff) + (sum >> 16); 7858 7859 mp1 = mp->b_cont; 7860 7861 if ((hck_flags & (HCK_FULLCKSUM|HCK_PARTIALCKSUM)) == 0) 7862 IP6_STAT(ipst, ip6_in_sw_cksum); 7863 7864 IP_CKSUM_RECV(hck_flags, sum, (uchar_t *) 7865 ((uchar_t *)mp->b_rptr + DB_CKSUMSTART(mp)), 7866 (int32_t)(whereptr - (uchar_t *)mp->b_rptr), 7867 mp, mp1, cksum_err); 7868 7869 if (cksum_err) { 7870 BUMP_MIB(ill->ill_ip_mib, tcpIfStatsInErrs); 7871 7872 if (hck_flags & HCK_FULLCKSUM) { 7873 IP6_STAT(ipst, 7874 ip6_tcp_in_full_hw_cksum_err); 7875 } else if (hck_flags & HCK_PARTIALCKSUM) { 7876 IP6_STAT(ipst, 7877 ip6_tcp_in_part_hw_cksum_err); 7878 } else { 7879 IP6_STAT(ipst, ip6_tcp_in_sw_cksum_err); 7880 } 7881 freemsg(first_mp); 7882 return; 7883 } 7884 tcp_fanout: 7885 ip_fanout_tcp_v6(q, first_mp, ip6h, ill, inill, 7886 (flags|IP_FF_SEND_ICMP|IP_FF_SYN_ADDIRE| 7887 IP_FF_IPINFO), hdr_len, mctl_present, zoneid); 7888 return; 7889 } 7890 case IPPROTO_SCTP: 7891 { 7892 sctp_hdr_t *sctph; 7893 uint32_t calcsum, pktsum; 7894 uint_t hdr_len = pkt_len - remlen; 7895 sctp_stack_t *sctps; 7896 7897 sctps = inill->ill_ipst->ips_netstack->netstack_sctp; 7898 7899 /* SCTP needs all of the SCTP header */ 7900 if (remlen < sizeof (*sctph)) { 7901 goto pkt_too_short; 7902 } 7903 if (whereptr + sizeof (*sctph) > mp->b_wptr) { 7904 ASSERT(mp->b_cont != NULL); 7905 if (!pullupmsg(mp, hdr_len + sizeof (*sctph))) { 7906 BUMP_MIB(ill->ill_ip_mib, 7907 ipIfStatsInDiscards); 7908 freemsg(mp); 7909 return; 7910 } 7911 ip6h = (ip6_t *)mp->b_rptr; 7912 whereptr = (uint8_t *)ip6h + hdr_len; 7913 } 7914 7915 sctph = (sctp_hdr_t *)(mp->b_rptr + hdr_len); 7916 /* checksum */ 7917 pktsum = sctph->sh_chksum; 7918 sctph->sh_chksum = 0; 7919 calcsum = sctp_cksum(mp, hdr_len); 7920 if (calcsum != pktsum) { 7921 BUMP_MIB(&sctps->sctps_mib, sctpChecksumError); 7922 freemsg(mp); 7923 return; 7924 } 7925 sctph->sh_chksum = pktsum; 7926 ports = *(uint32_t *)(mp->b_rptr + hdr_len); 7927 if ((connp = sctp_fanout(&ip6h->ip6_src, &ip6h->ip6_dst, 7928 ports, ipif_id, zoneid, mp, sctps)) == NULL) { 7929 ip_fanout_sctp_raw(first_mp, ill, 7930 (ipha_t *)ip6h, B_FALSE, ports, 7931 mctl_present, 7932 (flags|IP_FF_SEND_ICMP|IP_FF_IPINFO), 7933 B_TRUE, ipif_id, zoneid); 7934 return; 7935 } 7936 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 7937 sctp_input(connp, (ipha_t *)ip6h, mp, first_mp, ill, 7938 B_FALSE, mctl_present); 7939 return; 7940 } 7941 case IPPROTO_UDP: { 7942 uint16_t *up; 7943 uint32_t sum; 7944 7945 hdr_len = pkt_len - remlen; 7946 7947 if (hada_mp != NULL) { 7948 ip0dbg(("udp hada drop\n")); 7949 goto hada_drop; 7950 } 7951 7952 /* Verify that at least the ports are present */ 7953 if (remlen < UDPH_SIZE) 7954 goto pkt_too_short; 7955 if (mp->b_cont != NULL && 7956 whereptr + UDPH_SIZE > mp->b_wptr) { 7957 if (!pullupmsg(mp, hdr_len + UDPH_SIZE)) { 7958 BUMP_MIB(ill->ill_ip_mib, 7959 ipIfStatsInDiscards); 7960 freemsg(first_mp); 7961 return; 7962 } 7963 hck_flags = 0; 7964 ip6h = (ip6_t *)mp->b_rptr; 7965 whereptr = (uint8_t *)ip6h + hdr_len; 7966 } 7967 7968 /* 7969 * Before going through the regular checksum 7970 * calculation, make sure the received checksum 7971 * is non-zero. RFC 2460 says, a 0x0000 checksum 7972 * in a UDP packet (within IPv6 packet) is invalid 7973 * and should be replaced by 0xffff. This makes 7974 * sense as regular checksum calculation will 7975 * pass for both the cases i.e. 0x0000 and 0xffff. 7976 * Removing one of the case makes error detection 7977 * stronger. 7978 */ 7979 7980 if (((udpha_t *)whereptr)->uha_checksum == 0) { 7981 /* 0x0000 checksum is invalid */ 7982 ip1dbg(("ip_rput_data_v6: Invalid UDP " 7983 "checksum value 0x0000\n")); 7984 BUMP_MIB(ill->ill_ip_mib, 7985 udpIfStatsInCksumErrs); 7986 freemsg(first_mp); 7987 return; 7988 } 7989 7990 up = (uint16_t *)&ip6h->ip6_src; 7991 7992 /* 7993 * UDP checksum calculation. First sum up the 7994 * pseudo-header fields: 7995 * - Source IPv6 address 7996 * - Destination IPv6 address 7997 * - UDP payload length 7998 * - UDP protocol ID 7999 */ 8000 8001 sum = htons(IPPROTO_UDP + remlen) + 8002 up[0] + up[1] + up[2] + up[3] + 8003 up[4] + up[5] + up[6] + up[7] + 8004 up[8] + up[9] + up[10] + up[11] + 8005 up[12] + up[13] + up[14] + up[15]; 8006 8007 /* Fold initial sum */ 8008 sum = (sum & 0xffff) + (sum >> 16); 8009 8010 if (reass_hck_flags != 0) { 8011 hck_flags = reass_hck_flags; 8012 8013 IP_CKSUM_RECV_REASS(hck_flags, 8014 (int32_t)(whereptr - (uchar_t *)mp->b_rptr), 8015 sum, reass_sum, cksum_err); 8016 } else { 8017 mp1 = mp->b_cont; 8018 8019 IP_CKSUM_RECV(hck_flags, sum, (uchar_t *) 8020 ((uchar_t *)mp->b_rptr + DB_CKSUMSTART(mp)), 8021 (int32_t)(whereptr - (uchar_t *)mp->b_rptr), 8022 mp, mp1, cksum_err); 8023 } 8024 8025 if ((hck_flags & (HCK_FULLCKSUM|HCK_PARTIALCKSUM)) == 0) 8026 IP6_STAT(ipst, ip6_in_sw_cksum); 8027 8028 if (cksum_err) { 8029 BUMP_MIB(ill->ill_ip_mib, 8030 udpIfStatsInCksumErrs); 8031 8032 if (hck_flags & HCK_FULLCKSUM) 8033 IP6_STAT(ipst, 8034 ip6_udp_in_full_hw_cksum_err); 8035 else if (hck_flags & HCK_PARTIALCKSUM) 8036 IP6_STAT(ipst, 8037 ip6_udp_in_part_hw_cksum_err); 8038 else 8039 IP6_STAT(ipst, ip6_udp_in_sw_cksum_err); 8040 8041 freemsg(first_mp); 8042 return; 8043 } 8044 goto udp_fanout; 8045 } 8046 case IPPROTO_ICMPV6: { 8047 uint16_t *up; 8048 uint32_t sum; 8049 uint_t hdr_len = pkt_len - remlen; 8050 8051 if (hada_mp != NULL) { 8052 ip0dbg(("icmp hada drop\n")); 8053 goto hada_drop; 8054 } 8055 8056 up = (uint16_t *)&ip6h->ip6_src; 8057 sum = htons(IPPROTO_ICMPV6 + remlen) + 8058 up[0] + up[1] + up[2] + up[3] + 8059 up[4] + up[5] + up[6] + up[7] + 8060 up[8] + up[9] + up[10] + up[11] + 8061 up[12] + up[13] + up[14] + up[15]; 8062 sum = (sum & 0xffff) + (sum >> 16); 8063 sum = IP_CSUM(mp, hdr_len, sum); 8064 if (sum != 0) { 8065 /* IPv6 ICMP checksum failed */ 8066 ip1dbg(("ip_rput_data_v6: ICMPv6 checksum " 8067 "failed %x\n", 8068 sum)); 8069 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInMsgs); 8070 BUMP_MIB(ill->ill_icmp6_mib, 8071 ipv6IfIcmpInErrors); 8072 freemsg(first_mp); 8073 return; 8074 } 8075 8076 icmp_fanout: 8077 /* Check variable for testing applications */ 8078 if (ipst->ips_ipv6_drop_inbound_icmpv6) { 8079 freemsg(first_mp); 8080 return; 8081 } 8082 /* 8083 * Assume that there is always at least one conn for 8084 * ICMPv6 (in.ndpd) i.e. don't optimize the case 8085 * where there is no conn. 8086 */ 8087 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 8088 ASSERT(!(ill->ill_phyint->phyint_flags & 8089 PHYI_LOOPBACK)); 8090 /* 8091 * In the multicast case, applications may have 8092 * joined the group from different zones, so we 8093 * need to deliver the packet to each of them. 8094 * Loop through the multicast memberships 8095 * structures (ilm) on the receive ill and send 8096 * a copy of the packet up each matching one. 8097 */ 8098 ILM_WALKER_HOLD(ill); 8099 for (ilm = ill->ill_ilm; ilm != NULL; 8100 ilm = ilm->ilm_next) { 8101 if (ilm->ilm_flags & ILM_DELETED) 8102 continue; 8103 if (!IN6_ARE_ADDR_EQUAL( 8104 &ilm->ilm_v6addr, &ip6h->ip6_dst)) 8105 continue; 8106 if (!ipif_lookup_zoneid(ill, 8107 ilm->ilm_zoneid, IPIF_UP, NULL)) 8108 continue; 8109 8110 first_mp1 = ip_copymsg(first_mp); 8111 if (first_mp1 == NULL) 8112 continue; 8113 icmp_inbound_v6(q, first_mp1, ill, 8114 hdr_len, mctl_present, 0, 8115 ilm->ilm_zoneid, dl_mp); 8116 } 8117 ILM_WALKER_RELE(ill); 8118 } else { 8119 first_mp1 = ip_copymsg(first_mp); 8120 if (first_mp1 != NULL) 8121 icmp_inbound_v6(q, first_mp1, ill, 8122 hdr_len, mctl_present, 0, zoneid, 8123 dl_mp); 8124 } 8125 /* FALLTHRU */ 8126 default: { 8127 /* 8128 * Handle protocols with which IPv6 is less intimate. 8129 */ 8130 uint_t proto_flags = IP_FF_RAWIP|IP_FF_IPINFO; 8131 8132 if (hada_mp != NULL) { 8133 ip0dbg(("default hada drop\n")); 8134 goto hada_drop; 8135 } 8136 8137 /* 8138 * Enable sending ICMP for "Unknown" nexthdr 8139 * case. i.e. where we did not FALLTHRU from 8140 * IPPROTO_ICMPV6 processing case above. 8141 * If we did FALLTHRU, then the packet has already been 8142 * processed for IPPF, don't process it again in 8143 * ip_fanout_proto_v6; set IP6_NO_IPPOLICY in the 8144 * flags 8145 */ 8146 if (nexthdr != IPPROTO_ICMPV6) 8147 proto_flags |= IP_FF_SEND_ICMP; 8148 else 8149 proto_flags |= IP6_NO_IPPOLICY; 8150 8151 ip_fanout_proto_v6(q, first_mp, ip6h, ill, inill, 8152 nexthdr, prev_nexthdr_offset, (flags|proto_flags), 8153 mctl_present, zoneid); 8154 return; 8155 } 8156 8157 case IPPROTO_DSTOPTS: { 8158 uint_t ehdrlen; 8159 uint8_t *optptr; 8160 ip6_dest_t *desthdr; 8161 8162 /* Check if AH is present. */ 8163 if (ipsec_early_ah_v6(q, first_mp, mctl_present, ill, 8164 ire, hada_mp, zoneid)) { 8165 ip0dbg(("dst early hada drop\n")); 8166 return; 8167 } 8168 8169 /* 8170 * Reinitialize pointers, as ipsec_early_ah_v6() does 8171 * complete pullups. We don't have to do more pullups 8172 * as a result. 8173 */ 8174 whereptr = (uint8_t *)((uintptr_t)mp->b_rptr + 8175 (uintptr_t)(whereptr - ((uint8_t *)ip6h))); 8176 ip6h = (ip6_t *)mp->b_rptr; 8177 8178 if (remlen < MIN_EHDR_LEN) 8179 goto pkt_too_short; 8180 8181 desthdr = (ip6_dest_t *)whereptr; 8182 nexthdr = desthdr->ip6d_nxt; 8183 prev_nexthdr_offset = (uint_t)(whereptr - 8184 (uint8_t *)ip6h); 8185 ehdrlen = 8 * (desthdr->ip6d_len + 1); 8186 if (remlen < ehdrlen) 8187 goto pkt_too_short; 8188 optptr = whereptr + 2; 8189 /* 8190 * Note: XXX This code does not seem to make 8191 * distinction between Destination Options Header 8192 * being before/after Routing Header which can 8193 * happen if we are at the end of source route. 8194 * This may become significant in future. 8195 * (No real significant Destination Options are 8196 * defined/implemented yet ). 8197 */ 8198 switch (ip_process_options_v6(q, first_mp, ip6h, optptr, 8199 ehdrlen - 2, IPPROTO_DSTOPTS, ipst)) { 8200 case -1: 8201 /* 8202 * Packet has been consumed and any needed 8203 * ICMP errors sent. 8204 */ 8205 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors); 8206 freemsg(hada_mp); 8207 return; 8208 case 0: 8209 /* No action needed continue */ 8210 break; 8211 case 1: 8212 /* 8213 * Unnexpected return value 8214 * (Router alert is a Hop-by-Hop option) 8215 */ 8216 #ifdef DEBUG 8217 panic("ip_rput_data_v6: router " 8218 "alert hbh opt indication in dest opt"); 8219 /*NOTREACHED*/ 8220 #else 8221 freemsg(hada_mp); 8222 freemsg(first_mp); 8223 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 8224 return; 8225 #endif 8226 } 8227 used = ehdrlen; 8228 break; 8229 } 8230 case IPPROTO_FRAGMENT: { 8231 ip6_frag_t *fraghdr; 8232 size_t no_frag_hdr_len; 8233 8234 if (hada_mp != NULL) { 8235 ip0dbg(("frag hada drop\n")); 8236 goto hada_drop; 8237 } 8238 8239 ASSERT(first_mp == mp); 8240 if (remlen < sizeof (ip6_frag_t)) 8241 goto pkt_too_short; 8242 8243 if (mp->b_cont != NULL && 8244 whereptr + sizeof (ip6_frag_t) > mp->b_wptr) { 8245 if (!pullupmsg(mp, 8246 pkt_len - remlen + sizeof (ip6_frag_t))) { 8247 BUMP_MIB(ill->ill_ip_mib, 8248 ipIfStatsInDiscards); 8249 freemsg(mp); 8250 return; 8251 } 8252 hck_flags = 0; 8253 ip6h = (ip6_t *)mp->b_rptr; 8254 whereptr = (uint8_t *)ip6h + pkt_len - remlen; 8255 } 8256 8257 fraghdr = (ip6_frag_t *)whereptr; 8258 used = (uint_t)sizeof (ip6_frag_t); 8259 BUMP_MIB(ill->ill_ip_mib, ipIfStatsReasmReqds); 8260 8261 /* 8262 * Invoke the CGTP (multirouting) filtering module to 8263 * process the incoming packet. Packets identified as 8264 * duplicates must be discarded. Filtering is active 8265 * only if the the ip_cgtp_filter ndd variable is 8266 * non-zero. 8267 * 8268 * Only applies to the shared stack since the 8269 * filter_ops do not carry an ip_stack_t or zoneid. 8270 */ 8271 if (ip_cgtp_filter && (ip_cgtp_filter_ops != NULL) && 8272 ipst->ips_netstack->netstack_stackid == 8273 GLOBAL_NETSTACKID) { 8274 int cgtp_flt_pkt = 8275 ip_cgtp_filter_ops->cfo_filter_v6( 8276 inill->ill_rq, ip6h, fraghdr); 8277 if (cgtp_flt_pkt == CGTP_IP_PKT_DUPLICATE) { 8278 freemsg(mp); 8279 return; 8280 } 8281 } 8282 8283 /* Restore the flags */ 8284 DB_CKSUMFLAGS(mp) = hck_flags; 8285 8286 mp = ip_rput_frag_v6(q, mp, ip6h, fraghdr, 8287 remlen - used, &prev_nexthdr_offset, 8288 &reass_sum, &reass_hck_flags); 8289 if (mp == NULL) { 8290 /* Reassembly is still pending */ 8291 return; 8292 } 8293 /* The first mblk are the headers before the frag hdr */ 8294 BUMP_MIB(ill->ill_ip_mib, ipIfStatsReasmOKs); 8295 8296 first_mp = mp; /* mp has most likely changed! */ 8297 no_frag_hdr_len = mp->b_wptr - mp->b_rptr; 8298 ip6h = (ip6_t *)mp->b_rptr; 8299 nexthdr = ((char *)ip6h)[prev_nexthdr_offset]; 8300 whereptr = mp->b_rptr + no_frag_hdr_len; 8301 remlen = ntohs(ip6h->ip6_plen) + 8302 (uint16_t)(IPV6_HDR_LEN - no_frag_hdr_len); 8303 pkt_len = msgdsize(mp); 8304 used = 0; 8305 break; 8306 } 8307 case IPPROTO_HOPOPTS: 8308 if (hada_mp != NULL) { 8309 ip0dbg(("hop hada drop\n")); 8310 goto hada_drop; 8311 } 8312 /* 8313 * Illegal header sequence. 8314 * (Hop-by-hop headers are processed above 8315 * and required to immediately follow IPv6 header) 8316 */ 8317 icmp_param_problem_v6(WR(q), first_mp, 8318 ICMP6_PARAMPROB_NEXTHEADER, 8319 prev_nexthdr_offset, 8320 B_FALSE, B_FALSE, zoneid, ipst); 8321 return; 8322 } 8323 case IPPROTO_ROUTING: { 8324 uint_t ehdrlen; 8325 ip6_rthdr_t *rthdr; 8326 8327 /* Check if AH is present. */ 8328 if (ipsec_early_ah_v6(q, first_mp, mctl_present, ill, 8329 ire, hada_mp, zoneid)) { 8330 ip0dbg(("routing hada drop\n")); 8331 return; 8332 } 8333 8334 /* 8335 * Reinitialize pointers, as ipsec_early_ah_v6() does 8336 * complete pullups. We don't have to do more pullups 8337 * as a result. 8338 */ 8339 whereptr = (uint8_t *)((uintptr_t)mp->b_rptr + 8340 (uintptr_t)(whereptr - ((uint8_t *)ip6h))); 8341 ip6h = (ip6_t *)mp->b_rptr; 8342 8343 if (remlen < MIN_EHDR_LEN) 8344 goto pkt_too_short; 8345 rthdr = (ip6_rthdr_t *)whereptr; 8346 nexthdr = rthdr->ip6r_nxt; 8347 prev_nexthdr_offset = (uint_t)(whereptr - 8348 (uint8_t *)ip6h); 8349 ehdrlen = 8 * (rthdr->ip6r_len + 1); 8350 if (remlen < ehdrlen) 8351 goto pkt_too_short; 8352 if (rthdr->ip6r_segleft != 0) { 8353 /* Not end of source route */ 8354 if (ll_multicast) { 8355 BUMP_MIB(ill->ill_ip_mib, 8356 ipIfStatsForwProhibits); 8357 freemsg(hada_mp); 8358 freemsg(mp); 8359 return; 8360 } 8361 ip_process_rthdr(q, mp, ip6h, rthdr, ill, 8362 flags, hada_mp, dl_mp); 8363 return; 8364 } 8365 used = ehdrlen; 8366 break; 8367 } 8368 case IPPROTO_AH: 8369 case IPPROTO_ESP: { 8370 /* 8371 * Fast path for AH/ESP. If this is the first time 8372 * we are sending a datagram to AH/ESP, allocate 8373 * a IPSEC_IN message and prepend it. Otherwise, 8374 * just fanout. 8375 */ 8376 8377 ipsec_in_t *ii; 8378 int ipsec_rc; 8379 ipsec_stack_t *ipss; 8380 8381 ipss = ipst->ips_netstack->netstack_ipsec; 8382 if (!mctl_present) { 8383 ASSERT(first_mp == mp); 8384 first_mp = ipsec_in_alloc(B_FALSE, 8385 ipst->ips_netstack); 8386 if (first_mp == NULL) { 8387 ip1dbg(("ip_rput_data_v6: IPSEC_IN " 8388 "allocation failure.\n")); 8389 BUMP_MIB(ill->ill_ip_mib, 8390 ipIfStatsInDiscards); 8391 freemsg(mp); 8392 return; 8393 } 8394 /* 8395 * Store the ill_index so that when we come back 8396 * from IPSEC we ride on the same queue. 8397 */ 8398 ii = (ipsec_in_t *)first_mp->b_rptr; 8399 ii->ipsec_in_ill_index = 8400 ill->ill_phyint->phyint_ifindex; 8401 ii->ipsec_in_rill_index = 8402 ii->ipsec_in_ill_index; 8403 first_mp->b_cont = mp; 8404 /* 8405 * Cache hardware acceleration info. 8406 */ 8407 if (hada_mp != NULL) { 8408 IPSECHW_DEBUG(IPSECHW_PKT, 8409 ("ip_rput_data_v6: " 8410 "caching data attr.\n")); 8411 ii->ipsec_in_accelerated = B_TRUE; 8412 ii->ipsec_in_da = hada_mp; 8413 hada_mp = NULL; 8414 } 8415 } else { 8416 ii = (ipsec_in_t *)first_mp->b_rptr; 8417 } 8418 8419 if (!ipsec_loaded(ipss)) { 8420 ip_proto_not_sup(q, first_mp, IP_FF_SEND_ICMP, 8421 ire->ire_zoneid, ipst); 8422 return; 8423 } 8424 8425 /* select inbound SA and have IPsec process the pkt */ 8426 if (nexthdr == IPPROTO_ESP) { 8427 esph_t *esph = ipsec_inbound_esp_sa(first_mp, 8428 ipst->ips_netstack); 8429 if (esph == NULL) 8430 return; 8431 ASSERT(ii->ipsec_in_esp_sa != NULL); 8432 ASSERT(ii->ipsec_in_esp_sa->ipsa_input_func != 8433 NULL); 8434 ipsec_rc = ii->ipsec_in_esp_sa->ipsa_input_func( 8435 first_mp, esph); 8436 } else { 8437 ah_t *ah = ipsec_inbound_ah_sa(first_mp, 8438 ipst->ips_netstack); 8439 if (ah == NULL) 8440 return; 8441 ASSERT(ii->ipsec_in_ah_sa != NULL); 8442 ASSERT(ii->ipsec_in_ah_sa->ipsa_input_func != 8443 NULL); 8444 ipsec_rc = ii->ipsec_in_ah_sa->ipsa_input_func( 8445 first_mp, ah); 8446 } 8447 8448 switch (ipsec_rc) { 8449 case IPSEC_STATUS_SUCCESS: 8450 break; 8451 case IPSEC_STATUS_FAILED: 8452 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 8453 /* FALLTHRU */ 8454 case IPSEC_STATUS_PENDING: 8455 return; 8456 } 8457 /* we're done with IPsec processing, send it up */ 8458 ip_fanout_proto_again(first_mp, ill, inill, ire); 8459 return; 8460 } 8461 case IPPROTO_NONE: 8462 /* All processing is done. Count as "delivered". */ 8463 freemsg(hada_mp); 8464 freemsg(first_mp); 8465 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 8466 return; 8467 } 8468 whereptr += used; 8469 ASSERT(remlen >= used); 8470 remlen -= used; 8471 } 8472 /* NOTREACHED */ 8473 8474 pkt_too_short: 8475 ip1dbg(("ip_rput_data_v6: packet too short %d %lu %d\n", 8476 ip6_len, pkt_len, remlen)); 8477 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInTruncatedPkts); 8478 freemsg(hada_mp); 8479 freemsg(first_mp); 8480 return; 8481 udp_fanout: 8482 if (mctl_present || IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 8483 connp = NULL; 8484 } else { 8485 connp = ipcl_classify_v6(mp, IPPROTO_UDP, hdr_len, zoneid, 8486 ipst); 8487 if ((connp != NULL) && (connp->conn_upq == NULL)) { 8488 CONN_DEC_REF(connp); 8489 connp = NULL; 8490 } 8491 } 8492 8493 if (connp == NULL) { 8494 uint32_t ports; 8495 8496 ports = *(uint32_t *)(mp->b_rptr + hdr_len + 8497 UDP_PORTS_OFFSET); 8498 IP6_STAT(ipst, ip6_udp_slow_path); 8499 ip_fanout_udp_v6(q, first_mp, ip6h, ports, ill, inill, 8500 (flags|IP_FF_SEND_ICMP|IP_FF_IPINFO), mctl_present, 8501 zoneid); 8502 return; 8503 } 8504 8505 if (CONN_UDP_FLOWCTLD(connp)) { 8506 freemsg(first_mp); 8507 BUMP_MIB(ill->ill_ip_mib, udpIfStatsInOverflows); 8508 CONN_DEC_REF(connp); 8509 return; 8510 } 8511 8512 /* Initiate IPPF processing */ 8513 if (IP6_IN_IPP(flags, ipst)) { 8514 ip_process(IPP_LOCAL_IN, &mp, ill->ill_phyint->phyint_ifindex); 8515 if (mp == NULL) { 8516 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 8517 CONN_DEC_REF(connp); 8518 return; 8519 } 8520 } 8521 8522 if (connp->conn_ip_recvpktinfo || 8523 IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src)) { 8524 mp = ip_add_info_v6(mp, inill, &ip6h->ip6_dst); 8525 if (mp == NULL) { 8526 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 8527 CONN_DEC_REF(connp); 8528 return; 8529 } 8530 } 8531 8532 IP6_STAT(ipst, ip6_udp_fast_path); 8533 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 8534 8535 /* Send it upstream */ 8536 CONN_UDP_RECV(connp, mp); 8537 8538 CONN_DEC_REF(connp); 8539 freemsg(hada_mp); 8540 return; 8541 8542 hada_drop: 8543 ip1dbg(("ip_rput_data_v6: malformed accelerated packet\n")); 8544 /* IPsec kstats: bump counter here */ 8545 freemsg(hada_mp); 8546 freemsg(first_mp); 8547 } 8548 8549 /* 8550 * Reassemble fragment. 8551 * When it returns a completed message the first mblk will only contain 8552 * the headers prior to the fragment header. 8553 * 8554 * prev_nexthdr_offset is an offset indication of where the nexthdr field is 8555 * of the preceding header. This is needed to patch the previous header's 8556 * nexthdr field when reassembly completes. 8557 */ 8558 static mblk_t * 8559 ip_rput_frag_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, 8560 ip6_frag_t *fraghdr, uint_t remlen, uint_t *prev_nexthdr_offset, 8561 uint32_t *cksum_val, uint16_t *cksum_flags) 8562 { 8563 ill_t *ill = (ill_t *)q->q_ptr; 8564 uint32_t ident = ntohl(fraghdr->ip6f_ident); 8565 uint16_t offset; 8566 boolean_t more_frags; 8567 uint8_t nexthdr = fraghdr->ip6f_nxt; 8568 in6_addr_t *v6dst_ptr; 8569 in6_addr_t *v6src_ptr; 8570 uint_t end; 8571 uint_t hdr_length; 8572 size_t count; 8573 ipf_t *ipf; 8574 ipf_t **ipfp; 8575 ipfb_t *ipfb; 8576 mblk_t *mp1; 8577 uint8_t ecn_info = 0; 8578 size_t msg_len; 8579 mblk_t *tail_mp; 8580 mblk_t *t_mp; 8581 boolean_t pruned = B_FALSE; 8582 uint32_t sum_val; 8583 uint16_t sum_flags; 8584 ip_stack_t *ipst = ill->ill_ipst; 8585 8586 if (cksum_val != NULL) 8587 *cksum_val = 0; 8588 if (cksum_flags != NULL) 8589 *cksum_flags = 0; 8590 8591 /* 8592 * We utilize hardware computed checksum info only for UDP since 8593 * IP fragmentation is a normal occurence for the protocol. In 8594 * addition, checksum offload support for IP fragments carrying 8595 * UDP payload is commonly implemented across network adapters. 8596 */ 8597 ASSERT(ill != NULL); 8598 if (nexthdr == IPPROTO_UDP && dohwcksum && ILL_HCKSUM_CAPABLE(ill) && 8599 (DB_CKSUMFLAGS(mp) & (HCK_FULLCKSUM | HCK_PARTIALCKSUM))) { 8600 mblk_t *mp1 = mp->b_cont; 8601 int32_t len; 8602 8603 /* Record checksum information from the packet */ 8604 sum_val = (uint32_t)DB_CKSUM16(mp); 8605 sum_flags = DB_CKSUMFLAGS(mp); 8606 8607 /* fragmented payload offset from beginning of mblk */ 8608 offset = (uint16_t)((uchar_t *)&fraghdr[1] - mp->b_rptr); 8609 8610 if ((sum_flags & HCK_PARTIALCKSUM) && 8611 (mp1 == NULL || mp1->b_cont == NULL) && 8612 offset >= (uint16_t)DB_CKSUMSTART(mp) && 8613 ((len = offset - (uint16_t)DB_CKSUMSTART(mp)) & 1) == 0) { 8614 uint32_t adj; 8615 /* 8616 * Partial checksum has been calculated by hardware 8617 * and attached to the packet; in addition, any 8618 * prepended extraneous data is even byte aligned. 8619 * If any such data exists, we adjust the checksum; 8620 * this would also handle any postpended data. 8621 */ 8622 IP_ADJCKSUM_PARTIAL(mp->b_rptr + DB_CKSUMSTART(mp), 8623 mp, mp1, len, adj); 8624 8625 /* One's complement subtract extraneous checksum */ 8626 if (adj >= sum_val) 8627 sum_val = ~(adj - sum_val) & 0xFFFF; 8628 else 8629 sum_val -= adj; 8630 } 8631 } else { 8632 sum_val = 0; 8633 sum_flags = 0; 8634 } 8635 8636 /* Clear hardware checksumming flag */ 8637 DB_CKSUMFLAGS(mp) = 0; 8638 8639 /* 8640 * Note: Fragment offset in header is in 8-octet units. 8641 * Clearing least significant 3 bits not only extracts 8642 * it but also gets it in units of octets. 8643 */ 8644 offset = ntohs(fraghdr->ip6f_offlg) & ~7; 8645 more_frags = (fraghdr->ip6f_offlg & IP6F_MORE_FRAG); 8646 8647 /* 8648 * Is the more frags flag on and the payload length not a multiple 8649 * of eight? 8650 */ 8651 if (more_frags && (ntohs(ip6h->ip6_plen) & 7)) { 8652 zoneid_t zoneid; 8653 8654 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors); 8655 zoneid = ipif_lookup_addr_zoneid_v6(&ip6h->ip6_dst, ill, ipst); 8656 if (zoneid == ALL_ZONES) { 8657 freemsg(mp); 8658 return (NULL); 8659 } 8660 icmp_param_problem_v6(WR(q), mp, ICMP6_PARAMPROB_HEADER, 8661 (uint32_t)((char *)&ip6h->ip6_plen - 8662 (char *)ip6h), B_FALSE, B_FALSE, zoneid, ipst); 8663 return (NULL); 8664 } 8665 8666 v6src_ptr = &ip6h->ip6_src; 8667 v6dst_ptr = &ip6h->ip6_dst; 8668 end = remlen; 8669 8670 hdr_length = (uint_t)((char *)&fraghdr[1] - (char *)ip6h); 8671 end += offset; 8672 8673 /* 8674 * Would fragment cause reassembled packet to have a payload length 8675 * greater than IP_MAXPACKET - the max payload size? 8676 */ 8677 if (end > IP_MAXPACKET) { 8678 zoneid_t zoneid; 8679 8680 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors); 8681 zoneid = ipif_lookup_addr_zoneid_v6(&ip6h->ip6_dst, ill, ipst); 8682 if (zoneid == ALL_ZONES) { 8683 freemsg(mp); 8684 return (NULL); 8685 } 8686 icmp_param_problem_v6(WR(q), mp, ICMP6_PARAMPROB_HEADER, 8687 (uint32_t)((char *)&fraghdr->ip6f_offlg - 8688 (char *)ip6h), B_FALSE, B_FALSE, zoneid, ipst); 8689 return (NULL); 8690 } 8691 8692 /* 8693 * This packet just has one fragment. Reassembly not 8694 * needed. 8695 */ 8696 if (!more_frags && offset == 0) { 8697 goto reass_done; 8698 } 8699 8700 /* 8701 * Drop the fragmented as early as possible, if 8702 * we don't have resource(s) to re-assemble. 8703 */ 8704 if (ipst->ips_ip_reass_queue_bytes == 0) { 8705 freemsg(mp); 8706 return (NULL); 8707 } 8708 8709 /* Record the ECN field info. */ 8710 ecn_info = (uint8_t)(ntohl(ip6h->ip6_vcf & htonl(~0xFFCFFFFF)) >> 20); 8711 /* 8712 * If this is not the first fragment, dump the unfragmentable 8713 * portion of the packet. 8714 */ 8715 if (offset) 8716 mp->b_rptr = (uchar_t *)&fraghdr[1]; 8717 8718 /* 8719 * Fragmentation reassembly. Each ILL has a hash table for 8720 * queueing packets undergoing reassembly for all IPIFs 8721 * associated with the ILL. The hash is based on the packet 8722 * IP ident field. The ILL frag hash table was allocated 8723 * as a timer block at the time the ILL was created. Whenever 8724 * there is anything on the reassembly queue, the timer will 8725 * be running. 8726 */ 8727 msg_len = MBLKSIZE(mp); 8728 tail_mp = mp; 8729 while (tail_mp->b_cont != NULL) { 8730 tail_mp = tail_mp->b_cont; 8731 msg_len += MBLKSIZE(tail_mp); 8732 } 8733 /* 8734 * If the reassembly list for this ILL will get too big 8735 * prune it. 8736 */ 8737 8738 if ((msg_len + sizeof (*ipf) + ill->ill_frag_count) >= 8739 ipst->ips_ip_reass_queue_bytes) { 8740 ill_frag_prune(ill, 8741 (ipst->ips_ip_reass_queue_bytes < msg_len) ? 0 : 8742 (ipst->ips_ip_reass_queue_bytes - msg_len)); 8743 pruned = B_TRUE; 8744 } 8745 8746 ipfb = &ill->ill_frag_hash_tbl[ILL_FRAG_HASH_V6(*v6src_ptr, ident)]; 8747 mutex_enter(&ipfb->ipfb_lock); 8748 8749 ipfp = &ipfb->ipfb_ipf; 8750 /* Try to find an existing fragment queue for this packet. */ 8751 for (;;) { 8752 ipf = ipfp[0]; 8753 if (ipf) { 8754 /* 8755 * It has to match on ident, source address, and 8756 * dest address. 8757 */ 8758 if (ipf->ipf_ident == ident && 8759 IN6_ARE_ADDR_EQUAL(&ipf->ipf_v6src, v6src_ptr) && 8760 IN6_ARE_ADDR_EQUAL(&ipf->ipf_v6dst, v6dst_ptr)) { 8761 8762 /* 8763 * If we have received too many 8764 * duplicate fragments for this packet 8765 * free it. 8766 */ 8767 if (ipf->ipf_num_dups > ip_max_frag_dups) { 8768 ill_frag_free_pkts(ill, ipfb, ipf, 1); 8769 freemsg(mp); 8770 mutex_exit(&ipfb->ipfb_lock); 8771 return (NULL); 8772 } 8773 8774 break; 8775 } 8776 ipfp = &ipf->ipf_hash_next; 8777 continue; 8778 } 8779 8780 8781 /* 8782 * If we pruned the list, do we want to store this new 8783 * fragment?. We apply an optimization here based on the 8784 * fact that most fragments will be received in order. 8785 * So if the offset of this incoming fragment is zero, 8786 * it is the first fragment of a new packet. We will 8787 * keep it. Otherwise drop the fragment, as we have 8788 * probably pruned the packet already (since the 8789 * packet cannot be found). 8790 */ 8791 8792 if (pruned && offset != 0) { 8793 mutex_exit(&ipfb->ipfb_lock); 8794 freemsg(mp); 8795 return (NULL); 8796 } 8797 8798 /* New guy. Allocate a frag message. */ 8799 mp1 = allocb(sizeof (*ipf), BPRI_MED); 8800 if (!mp1) { 8801 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 8802 freemsg(mp); 8803 partial_reass_done: 8804 mutex_exit(&ipfb->ipfb_lock); 8805 return (NULL); 8806 } 8807 8808 if (ipfb->ipfb_frag_pkts >= MAX_FRAG_PKTS(ipst)) { 8809 /* 8810 * Too many fragmented packets in this hash bucket. 8811 * Free the oldest. 8812 */ 8813 ill_frag_free_pkts(ill, ipfb, ipfb->ipfb_ipf, 1); 8814 } 8815 8816 mp1->b_cont = mp; 8817 8818 /* Initialize the fragment header. */ 8819 ipf = (ipf_t *)mp1->b_rptr; 8820 ipf->ipf_mp = mp1; 8821 ipf->ipf_ptphn = ipfp; 8822 ipfp[0] = ipf; 8823 ipf->ipf_hash_next = NULL; 8824 ipf->ipf_ident = ident; 8825 ipf->ipf_v6src = *v6src_ptr; 8826 ipf->ipf_v6dst = *v6dst_ptr; 8827 /* Record reassembly start time. */ 8828 ipf->ipf_timestamp = gethrestime_sec(); 8829 /* Record ipf generation and account for frag header */ 8830 ipf->ipf_gen = ill->ill_ipf_gen++; 8831 ipf->ipf_count = MBLKSIZE(mp1); 8832 ipf->ipf_protocol = nexthdr; 8833 ipf->ipf_nf_hdr_len = 0; 8834 ipf->ipf_prev_nexthdr_offset = 0; 8835 ipf->ipf_last_frag_seen = B_FALSE; 8836 ipf->ipf_ecn = ecn_info; 8837 ipf->ipf_num_dups = 0; 8838 ipfb->ipfb_frag_pkts++; 8839 ipf->ipf_checksum = 0; 8840 ipf->ipf_checksum_flags = 0; 8841 8842 /* Store checksum value in fragment header */ 8843 if (sum_flags != 0) { 8844 sum_val = (sum_val & 0xFFFF) + (sum_val >> 16); 8845 sum_val = (sum_val & 0xFFFF) + (sum_val >> 16); 8846 ipf->ipf_checksum = sum_val; 8847 ipf->ipf_checksum_flags = sum_flags; 8848 } 8849 8850 /* 8851 * We handle reassembly two ways. In the easy case, 8852 * where all the fragments show up in order, we do 8853 * minimal bookkeeping, and just clip new pieces on 8854 * the end. If we ever see a hole, then we go off 8855 * to ip_reassemble which has to mark the pieces and 8856 * keep track of the number of holes, etc. Obviously, 8857 * the point of having both mechanisms is so we can 8858 * handle the easy case as efficiently as possible. 8859 */ 8860 if (offset == 0) { 8861 /* Easy case, in-order reassembly so far. */ 8862 /* Update the byte count */ 8863 ipf->ipf_count += msg_len; 8864 ipf->ipf_tail_mp = tail_mp; 8865 /* 8866 * Keep track of next expected offset in 8867 * ipf_end. 8868 */ 8869 ipf->ipf_end = end; 8870 ipf->ipf_nf_hdr_len = hdr_length; 8871 ipf->ipf_prev_nexthdr_offset = *prev_nexthdr_offset; 8872 } else { 8873 /* Hard case, hole at the beginning. */ 8874 ipf->ipf_tail_mp = NULL; 8875 /* 8876 * ipf_end == 0 means that we have given up 8877 * on easy reassembly. 8878 */ 8879 ipf->ipf_end = 0; 8880 8881 /* Forget checksum offload from now on */ 8882 ipf->ipf_checksum_flags = 0; 8883 8884 /* 8885 * ipf_hole_cnt is set by ip_reassemble. 8886 * ipf_count is updated by ip_reassemble. 8887 * No need to check for return value here 8888 * as we don't expect reassembly to complete or 8889 * fail for the first fragment itself. 8890 */ 8891 (void) ip_reassemble(mp, ipf, offset, more_frags, ill, 8892 msg_len); 8893 } 8894 /* Update per ipfb and ill byte counts */ 8895 ipfb->ipfb_count += ipf->ipf_count; 8896 ASSERT(ipfb->ipfb_count > 0); /* Wraparound */ 8897 ill->ill_frag_count += ipf->ipf_count; 8898 ASSERT(ill->ill_frag_count > 0); /* Wraparound */ 8899 /* If the frag timer wasn't already going, start it. */ 8900 mutex_enter(&ill->ill_lock); 8901 ill_frag_timer_start(ill); 8902 mutex_exit(&ill->ill_lock); 8903 goto partial_reass_done; 8904 } 8905 8906 /* 8907 * If the packet's flag has changed (it could be coming up 8908 * from an interface different than the previous, therefore 8909 * possibly different checksum capability), then forget about 8910 * any stored checksum states. Otherwise add the value to 8911 * the existing one stored in the fragment header. 8912 */ 8913 if (sum_flags != 0 && sum_flags == ipf->ipf_checksum_flags) { 8914 sum_val += ipf->ipf_checksum; 8915 sum_val = (sum_val & 0xFFFF) + (sum_val >> 16); 8916 sum_val = (sum_val & 0xFFFF) + (sum_val >> 16); 8917 ipf->ipf_checksum = sum_val; 8918 } else if (ipf->ipf_checksum_flags != 0) { 8919 /* Forget checksum offload from now on */ 8920 ipf->ipf_checksum_flags = 0; 8921 } 8922 8923 /* 8924 * We have a new piece of a datagram which is already being 8925 * reassembled. Update the ECN info if all IP fragments 8926 * are ECN capable. If there is one which is not, clear 8927 * all the info. If there is at least one which has CE 8928 * code point, IP needs to report that up to transport. 8929 */ 8930 if (ecn_info != IPH_ECN_NECT && ipf->ipf_ecn != IPH_ECN_NECT) { 8931 if (ecn_info == IPH_ECN_CE) 8932 ipf->ipf_ecn = IPH_ECN_CE; 8933 } else { 8934 ipf->ipf_ecn = IPH_ECN_NECT; 8935 } 8936 8937 if (offset && ipf->ipf_end == offset) { 8938 /* The new fragment fits at the end */ 8939 ipf->ipf_tail_mp->b_cont = mp; 8940 /* Update the byte count */ 8941 ipf->ipf_count += msg_len; 8942 /* Update per ipfb and ill byte counts */ 8943 ipfb->ipfb_count += msg_len; 8944 ASSERT(ipfb->ipfb_count > 0); /* Wraparound */ 8945 ill->ill_frag_count += msg_len; 8946 ASSERT(ill->ill_frag_count > 0); /* Wraparound */ 8947 if (more_frags) { 8948 /* More to come. */ 8949 ipf->ipf_end = end; 8950 ipf->ipf_tail_mp = tail_mp; 8951 goto partial_reass_done; 8952 } 8953 } else { 8954 /* 8955 * Go do the hard cases. 8956 * Call ip_reassemble(). 8957 */ 8958 int ret; 8959 8960 if (offset == 0) { 8961 if (ipf->ipf_prev_nexthdr_offset == 0) { 8962 ipf->ipf_nf_hdr_len = hdr_length; 8963 ipf->ipf_prev_nexthdr_offset = 8964 *prev_nexthdr_offset; 8965 } 8966 } 8967 /* Save current byte count */ 8968 count = ipf->ipf_count; 8969 ret = ip_reassemble(mp, ipf, offset, more_frags, ill, msg_len); 8970 8971 /* Count of bytes added and subtracted (freeb()ed) */ 8972 count = ipf->ipf_count - count; 8973 if (count) { 8974 /* Update per ipfb and ill byte counts */ 8975 ipfb->ipfb_count += count; 8976 ASSERT(ipfb->ipfb_count > 0); /* Wraparound */ 8977 ill->ill_frag_count += count; 8978 ASSERT(ill->ill_frag_count > 0); /* Wraparound */ 8979 } 8980 if (ret == IP_REASS_PARTIAL) { 8981 goto partial_reass_done; 8982 } else if (ret == IP_REASS_FAILED) { 8983 /* Reassembly failed. Free up all resources */ 8984 ill_frag_free_pkts(ill, ipfb, ipf, 1); 8985 for (t_mp = mp; t_mp != NULL; t_mp = t_mp->b_cont) { 8986 IP_REASS_SET_START(t_mp, 0); 8987 IP_REASS_SET_END(t_mp, 0); 8988 } 8989 freemsg(mp); 8990 goto partial_reass_done; 8991 } 8992 8993 /* We will reach here iff 'ret' is IP_REASS_COMPLETE */ 8994 } 8995 /* 8996 * We have completed reassembly. Unhook the frag header from 8997 * the reassembly list. 8998 * 8999 * Grab the unfragmentable header length next header value out 9000 * of the first fragment 9001 */ 9002 ASSERT(ipf->ipf_nf_hdr_len != 0); 9003 hdr_length = ipf->ipf_nf_hdr_len; 9004 9005 /* 9006 * Before we free the frag header, record the ECN info 9007 * to report back to the transport. 9008 */ 9009 ecn_info = ipf->ipf_ecn; 9010 9011 /* 9012 * Store the nextheader field in the header preceding the fragment 9013 * header 9014 */ 9015 nexthdr = ipf->ipf_protocol; 9016 *prev_nexthdr_offset = ipf->ipf_prev_nexthdr_offset; 9017 ipfp = ipf->ipf_ptphn; 9018 9019 /* We need to supply these to caller */ 9020 if ((sum_flags = ipf->ipf_checksum_flags) != 0) 9021 sum_val = ipf->ipf_checksum; 9022 else 9023 sum_val = 0; 9024 9025 mp1 = ipf->ipf_mp; 9026 count = ipf->ipf_count; 9027 ipf = ipf->ipf_hash_next; 9028 if (ipf) 9029 ipf->ipf_ptphn = ipfp; 9030 ipfp[0] = ipf; 9031 ill->ill_frag_count -= count; 9032 ASSERT(ipfb->ipfb_count >= count); 9033 ipfb->ipfb_count -= count; 9034 ipfb->ipfb_frag_pkts--; 9035 mutex_exit(&ipfb->ipfb_lock); 9036 /* Ditch the frag header. */ 9037 mp = mp1->b_cont; 9038 freeb(mp1); 9039 9040 /* 9041 * Make sure the packet is good by doing some sanity 9042 * check. If bad we can silentely drop the packet. 9043 */ 9044 reass_done: 9045 if (hdr_length < sizeof (ip6_frag_t)) { 9046 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors); 9047 ip1dbg(("ip_rput_frag_v6: bad packet\n")); 9048 freemsg(mp); 9049 return (NULL); 9050 } 9051 9052 /* 9053 * Remove the fragment header from the initial header by 9054 * splitting the mblk into the non-fragmentable header and 9055 * everthing after the fragment extension header. This has the 9056 * side effect of putting all the headers that need destination 9057 * processing into the b_cont block-- on return this fact is 9058 * used in order to avoid having to look at the extensions 9059 * already processed. 9060 * 9061 * Note that this code assumes that the unfragmentable portion 9062 * of the header is in the first mblk and increments 9063 * the read pointer past it. If this assumption is broken 9064 * this code fails badly. 9065 */ 9066 if (mp->b_rptr + hdr_length != mp->b_wptr) { 9067 mblk_t *nmp; 9068 9069 if (!(nmp = dupb(mp))) { 9070 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 9071 ip1dbg(("ip_rput_frag_v6: dupb failed\n")); 9072 freemsg(mp); 9073 return (NULL); 9074 } 9075 nmp->b_cont = mp->b_cont; 9076 mp->b_cont = nmp; 9077 nmp->b_rptr += hdr_length; 9078 } 9079 mp->b_wptr = mp->b_rptr + hdr_length - sizeof (ip6_frag_t); 9080 9081 ip6h = (ip6_t *)mp->b_rptr; 9082 ((char *)ip6h)[*prev_nexthdr_offset] = nexthdr; 9083 9084 /* Restore original IP length in header. */ 9085 ip6h->ip6_plen = htons((uint16_t)(msgdsize(mp) - IPV6_HDR_LEN)); 9086 /* Record the ECN info. */ 9087 ip6h->ip6_vcf &= htonl(0xFFCFFFFF); 9088 ip6h->ip6_vcf |= htonl(ecn_info << 20); 9089 9090 /* Reassembly is successful; return checksum information if needed */ 9091 if (cksum_val != NULL) 9092 *cksum_val = sum_val; 9093 if (cksum_flags != NULL) 9094 *cksum_flags = sum_flags; 9095 9096 return (mp); 9097 } 9098 9099 /* 9100 * Walk through the options to see if there is a routing header. 9101 * If present get the destination which is the last address of 9102 * the option. 9103 */ 9104 in6_addr_t 9105 ip_get_dst_v6(ip6_t *ip6h, boolean_t *is_fragment) 9106 { 9107 uint8_t nexthdr; 9108 uint8_t *whereptr; 9109 ip6_hbh_t *hbhhdr; 9110 ip6_dest_t *dsthdr; 9111 ip6_rthdr0_t *rthdr; 9112 ip6_frag_t *fraghdr; 9113 int ehdrlen; 9114 int left; 9115 in6_addr_t *ap, rv; 9116 9117 if (is_fragment != NULL) 9118 *is_fragment = B_FALSE; 9119 9120 rv = ip6h->ip6_dst; 9121 9122 nexthdr = ip6h->ip6_nxt; 9123 whereptr = (uint8_t *)&ip6h[1]; 9124 for (;;) { 9125 9126 ASSERT(nexthdr != IPPROTO_RAW); 9127 switch (nexthdr) { 9128 case IPPROTO_HOPOPTS: 9129 hbhhdr = (ip6_hbh_t *)whereptr; 9130 nexthdr = hbhhdr->ip6h_nxt; 9131 ehdrlen = 8 * (hbhhdr->ip6h_len + 1); 9132 break; 9133 case IPPROTO_DSTOPTS: 9134 dsthdr = (ip6_dest_t *)whereptr; 9135 nexthdr = dsthdr->ip6d_nxt; 9136 ehdrlen = 8 * (dsthdr->ip6d_len + 1); 9137 break; 9138 case IPPROTO_ROUTING: 9139 rthdr = (ip6_rthdr0_t *)whereptr; 9140 nexthdr = rthdr->ip6r0_nxt; 9141 ehdrlen = 8 * (rthdr->ip6r0_len + 1); 9142 9143 left = rthdr->ip6r0_segleft; 9144 ap = (in6_addr_t *)((char *)rthdr + sizeof (*rthdr)); 9145 rv = *(ap + left - 1); 9146 /* 9147 * If the caller doesn't care whether the packet 9148 * is a fragment or not, we can stop here since 9149 * we have our destination. 9150 */ 9151 if (is_fragment == NULL) 9152 goto done; 9153 break; 9154 case IPPROTO_FRAGMENT: 9155 fraghdr = (ip6_frag_t *)whereptr; 9156 nexthdr = fraghdr->ip6f_nxt; 9157 ehdrlen = sizeof (ip6_frag_t); 9158 if (is_fragment != NULL) 9159 *is_fragment = B_TRUE; 9160 goto done; 9161 default : 9162 goto done; 9163 } 9164 whereptr += ehdrlen; 9165 } 9166 9167 done: 9168 return (rv); 9169 } 9170 9171 /* 9172 * ip_source_routed_v6: 9173 * This function is called by redirect code in ip_rput_data_v6 to 9174 * know whether this packet is source routed through this node i.e 9175 * whether this node (router) is part of the journey. This 9176 * function is called under two cases : 9177 * 9178 * case 1 : Routing header was processed by this node and 9179 * ip_process_rthdr replaced ip6_dst with the next hop 9180 * and we are forwarding the packet to the next hop. 9181 * 9182 * case 2 : Routing header was not processed by this node and we 9183 * are just forwarding the packet. 9184 * 9185 * For case (1) we don't want to send redirects. For case(2) we 9186 * want to send redirects. 9187 */ 9188 static boolean_t 9189 ip_source_routed_v6(ip6_t *ip6h, mblk_t *mp, ip_stack_t *ipst) 9190 { 9191 uint8_t nexthdr; 9192 in6_addr_t *addrptr; 9193 ip6_rthdr0_t *rthdr; 9194 uint8_t numaddr; 9195 ip6_hbh_t *hbhhdr; 9196 uint_t ehdrlen; 9197 uint8_t *byteptr; 9198 9199 ip2dbg(("ip_source_routed_v6\n")); 9200 nexthdr = ip6h->ip6_nxt; 9201 ehdrlen = IPV6_HDR_LEN; 9202 9203 /* if a routing hdr is preceeded by HOPOPT or DSTOPT */ 9204 while (nexthdr == IPPROTO_HOPOPTS || 9205 nexthdr == IPPROTO_DSTOPTS) { 9206 byteptr = (uint8_t *)ip6h + ehdrlen; 9207 /* 9208 * Check if we have already processed 9209 * packets or we are just a forwarding 9210 * router which only pulled up msgs up 9211 * to IPV6HDR and one HBH ext header 9212 */ 9213 if (byteptr + MIN_EHDR_LEN > mp->b_wptr) { 9214 ip2dbg(("ip_source_routed_v6: Extension" 9215 " headers not processed\n")); 9216 return (B_FALSE); 9217 } 9218 hbhhdr = (ip6_hbh_t *)byteptr; 9219 nexthdr = hbhhdr->ip6h_nxt; 9220 ehdrlen = ehdrlen + 8 * (hbhhdr->ip6h_len + 1); 9221 } 9222 switch (nexthdr) { 9223 case IPPROTO_ROUTING: 9224 byteptr = (uint8_t *)ip6h + ehdrlen; 9225 /* 9226 * If for some reason, we haven't pulled up 9227 * the routing hdr data mblk, then we must 9228 * not have processed it at all. So for sure 9229 * we are not part of the source routed journey. 9230 */ 9231 if (byteptr + MIN_EHDR_LEN > mp->b_wptr) { 9232 ip2dbg(("ip_source_routed_v6: Routing" 9233 " header not processed\n")); 9234 return (B_FALSE); 9235 } 9236 rthdr = (ip6_rthdr0_t *)byteptr; 9237 /* 9238 * Either we are an intermediate router or the 9239 * last hop before destination and we have 9240 * already processed the routing header. 9241 * If segment_left is greater than or equal to zero, 9242 * then we must be the (numaddr - segleft) entry 9243 * of the routing header. Although ip6r0_segleft 9244 * is a unit8_t variable, we still check for zero 9245 * or greater value, if in case the data type 9246 * is changed someday in future. 9247 */ 9248 if (rthdr->ip6r0_segleft > 0 || 9249 rthdr->ip6r0_segleft == 0) { 9250 ire_t *ire = NULL; 9251 9252 numaddr = rthdr->ip6r0_len / 2; 9253 addrptr = (in6_addr_t *)((char *)rthdr + 9254 sizeof (*rthdr)); 9255 addrptr += (numaddr - (rthdr->ip6r0_segleft + 1)); 9256 if (addrptr != NULL) { 9257 ire = ire_ctable_lookup_v6(addrptr, NULL, 9258 IRE_LOCAL, NULL, ALL_ZONES, NULL, 9259 MATCH_IRE_TYPE, 9260 ipst); 9261 if (ire != NULL) { 9262 ire_refrele(ire); 9263 return (B_TRUE); 9264 } 9265 ip1dbg(("ip_source_routed_v6: No ire found\n")); 9266 } 9267 } 9268 /* FALLTHRU */ 9269 default: 9270 ip2dbg(("ip_source_routed_v6: Not source routed here\n")); 9271 return (B_FALSE); 9272 } 9273 } 9274 9275 /* 9276 * ip_wput_v6 -- Packets sent down from transport modules show up here. 9277 * Assumes that the following set of headers appear in the first 9278 * mblk: 9279 * ip6i_t (if present) CAN also appear as a separate mblk. 9280 * ip6_t 9281 * Any extension headers 9282 * TCP/UDP/SCTP header (if present) 9283 * The routine can handle an ICMPv6 header that is not in the first mblk. 9284 * 9285 * The order to determine the outgoing interface is as follows: 9286 * 1. IPV6_BOUND_PIF is set, use that ill (conn_outgoing_pill) 9287 * 2. If conn_nofailover_ill is set then use that ill. 9288 * 3. If an ip6i_t with IP6I_IFINDEX set then use that ill. 9289 * 4. If q is an ill queue and (link local or multicast destination) then 9290 * use that ill. 9291 * 5. If IPV6_BOUND_IF has been set use that ill. 9292 * 6. For multicast: if IPV6_MULTICAST_IF has been set use it. Otherwise 9293 * look for the best IRE match for the unspecified group to determine 9294 * the ill. 9295 * 7. For unicast: Just do an IRE lookup for the best match. 9296 * 9297 * arg2 is always a queue_t *. 9298 * When that queue is an ill_t (i.e. q_next != NULL), then arg must be 9299 * the zoneid. 9300 * When that queue is not an ill_t, then arg must be a conn_t pointer. 9301 */ 9302 void 9303 ip_output_v6(void *arg, mblk_t *mp, void *arg2, int caller) 9304 { 9305 conn_t *connp = NULL; 9306 queue_t *q = (queue_t *)arg2; 9307 ire_t *ire = NULL; 9308 ire_t *sctp_ire = NULL; 9309 ip6_t *ip6h; 9310 in6_addr_t *v6dstp; 9311 ill_t *ill = NULL; 9312 ipif_t *ipif; 9313 ip6i_t *ip6i; 9314 int cksum_request; /* -1 => normal. */ 9315 /* 1 => Skip TCP/UDP/SCTP checksum */ 9316 /* Otherwise contains insert offset for checksum */ 9317 int unspec_src; 9318 boolean_t do_outrequests; /* Increment OutRequests? */ 9319 mib2_ipIfStatsEntry_t *mibptr; 9320 int match_flags = MATCH_IRE_ILL_GROUP; 9321 boolean_t attach_if = B_FALSE; 9322 mblk_t *first_mp; 9323 boolean_t mctl_present; 9324 ipsec_out_t *io; 9325 boolean_t drop_if_delayed = B_FALSE; 9326 boolean_t multirt_need_resolve = B_FALSE; 9327 mblk_t *copy_mp = NULL; 9328 int err; 9329 int ip6i_flags = 0; 9330 zoneid_t zoneid; 9331 ill_t *saved_ill = NULL; 9332 boolean_t conn_lock_held; 9333 boolean_t need_decref = B_FALSE; 9334 ip_stack_t *ipst; 9335 9336 if (q->q_next != NULL) { 9337 ill = (ill_t *)q->q_ptr; 9338 ipst = ill->ill_ipst; 9339 } else { 9340 connp = (conn_t *)arg; 9341 ASSERT(connp != NULL); 9342 ipst = connp->conn_netstack->netstack_ip; 9343 } 9344 9345 /* 9346 * Highest bit in version field is Reachability Confirmation bit 9347 * used by NUD in ip_xmit_v6(). 9348 */ 9349 #ifdef _BIG_ENDIAN 9350 #define IPVER(ip6h) ((((uint32_t *)ip6h)[0] >> 28) & 0x7) 9351 #else 9352 #define IPVER(ip6h) ((((uint32_t *)ip6h)[0] >> 4) & 0x7) 9353 #endif 9354 9355 /* 9356 * M_CTL comes from 6 places 9357 * 9358 * 1) TCP sends down IPSEC_OUT(M_CTL) for detached connections 9359 * both V4 and V6 datagrams. 9360 * 9361 * 2) AH/ESP sends down M_CTL after doing their job with both 9362 * V4 and V6 datagrams. 9363 * 9364 * 3) NDP callbacks when nce is resolved and IPSEC_OUT has been 9365 * attached. 9366 * 9367 * 4) Notifications from an external resolver (for XRESOLV ifs) 9368 * 9369 * 5) AH/ESP send down IPSEC_CTL(M_CTL) to be relayed to hardware for 9370 * IPsec hardware acceleration support. 9371 * 9372 * 6) TUN_HELLO. 9373 * 9374 * We need to handle (1)'s IPv6 case and (3) here. For the 9375 * IPv4 case in (1), and (2), IPSEC processing has already 9376 * started. The code in ip_wput() already knows how to handle 9377 * continuing IPSEC processing (for IPv4 and IPv6). All other 9378 * M_CTLs (including case (4)) are passed on to ip_wput_nondata() 9379 * for handling. 9380 */ 9381 first_mp = mp; 9382 mctl_present = B_FALSE; 9383 io = NULL; 9384 9385 /* Multidata transmit? */ 9386 if (DB_TYPE(mp) == M_MULTIDATA) { 9387 /* 9388 * We should never get here, since all Multidata messages 9389 * originating from tcp should have been directed over to 9390 * tcp_multisend() in the first place. 9391 */ 9392 BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsOutDiscards); 9393 freemsg(mp); 9394 return; 9395 } else if (DB_TYPE(mp) == M_CTL) { 9396 uint32_t mctltype = 0; 9397 uint32_t mlen = MBLKL(first_mp); 9398 9399 mp = mp->b_cont; 9400 mctl_present = B_TRUE; 9401 io = (ipsec_out_t *)first_mp->b_rptr; 9402 9403 /* 9404 * Validate this M_CTL message. The only three types of 9405 * M_CTL messages we expect to see in this code path are 9406 * ipsec_out_t or ipsec_in_t structures (allocated as 9407 * ipsec_info_t unions), or ipsec_ctl_t structures. 9408 * The ipsec_out_type and ipsec_in_type overlap in the two 9409 * data structures, and they are either set to IPSEC_OUT 9410 * or IPSEC_IN depending on which data structure it is. 9411 * ipsec_ctl_t is an IPSEC_CTL. 9412 * 9413 * All other M_CTL messages are sent to ip_wput_nondata() 9414 * for handling. 9415 */ 9416 if (mlen >= sizeof (io->ipsec_out_type)) 9417 mctltype = io->ipsec_out_type; 9418 9419 if ((mlen == sizeof (ipsec_ctl_t)) && 9420 (mctltype == IPSEC_CTL)) { 9421 ip_output(arg, first_mp, arg2, caller); 9422 return; 9423 } 9424 9425 if ((mlen < sizeof (ipsec_info_t)) || 9426 (mctltype != IPSEC_OUT && mctltype != IPSEC_IN) || 9427 mp == NULL) { 9428 ip_wput_nondata(NULL, q, first_mp, NULL); 9429 return; 9430 } 9431 /* NDP callbacks have q_next non-NULL. That's case #3. */ 9432 if (q->q_next == NULL) { 9433 ip6h = (ip6_t *)mp->b_rptr; 9434 /* 9435 * For a freshly-generated TCP dgram that needs IPV6 9436 * processing, don't call ip_wput immediately. We can 9437 * tell this by the ipsec_out_proc_begin. In-progress 9438 * IPSEC_OUT messages have proc_begin set to TRUE, 9439 * and we want to send all IPSEC_IN messages to 9440 * ip_wput() for IPsec processing or finishing. 9441 */ 9442 if (mctltype == IPSEC_IN || 9443 IPVER(ip6h) != IPV6_VERSION || 9444 io->ipsec_out_proc_begin) { 9445 mibptr = &ipst->ips_ip6_mib; 9446 goto notv6; 9447 } 9448 } 9449 } else if (DB_TYPE(mp) != M_DATA) { 9450 ip_wput_nondata(NULL, q, mp, NULL); 9451 return; 9452 } 9453 9454 ip6h = (ip6_t *)mp->b_rptr; 9455 9456 if (IPVER(ip6h) != IPV6_VERSION) { 9457 mibptr = &ipst->ips_ip6_mib; 9458 goto notv6; 9459 } 9460 9461 if (q->q_next != NULL) { 9462 /* 9463 * We don't know if this ill will be used for IPv6 9464 * until the ILLF_IPV6 flag is set via SIOCSLIFNAME. 9465 * ipif_set_values() sets the ill_isv6 flag to true if 9466 * ILLF_IPV6 is set. If the ill_isv6 flag isn't true, 9467 * just drop the packet. 9468 */ 9469 if (!ill->ill_isv6) { 9470 ip1dbg(("ip_wput_v6: Received an IPv6 packet before " 9471 "ILLF_IPV6 was set\n")); 9472 freemsg(first_mp); 9473 return; 9474 } 9475 /* For uniformity do a refhold */ 9476 mutex_enter(&ill->ill_lock); 9477 if (!ILL_CAN_LOOKUP(ill)) { 9478 mutex_exit(&ill->ill_lock); 9479 freemsg(first_mp); 9480 return; 9481 } 9482 ill_refhold_locked(ill); 9483 mutex_exit(&ill->ill_lock); 9484 mibptr = ill->ill_ip_mib; 9485 9486 ASSERT(mibptr != NULL); 9487 unspec_src = 0; 9488 BUMP_MIB(mibptr, ipIfStatsHCOutRequests); 9489 do_outrequests = B_FALSE; 9490 zoneid = (zoneid_t)(uintptr_t)arg; 9491 } else { 9492 connp = (conn_t *)arg; 9493 ASSERT(connp != NULL); 9494 zoneid = connp->conn_zoneid; 9495 9496 /* is queue flow controlled? */ 9497 if ((q->q_first || connp->conn_draining) && 9498 (caller == IP_WPUT)) { 9499 /* 9500 * 1) TCP sends down M_CTL for detached connections. 9501 * 2) AH/ESP sends down M_CTL. 9502 * 9503 * We don't flow control either of the above. Only 9504 * UDP and others are flow controlled for which we 9505 * can't have a M_CTL. 9506 */ 9507 ASSERT(first_mp == mp); 9508 (void) putq(q, mp); 9509 return; 9510 } 9511 mibptr = &ipst->ips_ip6_mib; 9512 unspec_src = connp->conn_unspec_src; 9513 do_outrequests = B_TRUE; 9514 if (mp->b_flag & MSGHASREF) { 9515 mp->b_flag &= ~MSGHASREF; 9516 ASSERT(connp->conn_ulp == IPPROTO_SCTP); 9517 SCTP_EXTRACT_IPINFO(mp, sctp_ire); 9518 need_decref = B_TRUE; 9519 } 9520 9521 /* 9522 * If there is a policy, try to attach an ipsec_out in 9523 * the front. At the end, first_mp either points to a 9524 * M_DATA message or IPSEC_OUT message linked to a 9525 * M_DATA message. We have to do it now as we might 9526 * lose the "conn" if we go through ip_newroute. 9527 */ 9528 if (!mctl_present && 9529 (connp->conn_out_enforce_policy || 9530 connp->conn_latch != NULL)) { 9531 ASSERT(first_mp == mp); 9532 /* XXX Any better way to get the protocol fast ? */ 9533 if (((mp = ipsec_attach_ipsec_out(mp, connp, NULL, 9534 connp->conn_ulp, ipst->ips_netstack)) == NULL)) { 9535 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 9536 if (need_decref) 9537 CONN_DEC_REF(connp); 9538 return; 9539 } else { 9540 ASSERT(mp->b_datap->db_type == M_CTL); 9541 first_mp = mp; 9542 mp = mp->b_cont; 9543 mctl_present = B_TRUE; 9544 io = (ipsec_out_t *)first_mp->b_rptr; 9545 } 9546 } 9547 } 9548 9549 /* check for alignment and full IPv6 header */ 9550 if (!OK_32PTR((uchar_t *)ip6h) || 9551 (mp->b_wptr - (uchar_t *)ip6h) < IPV6_HDR_LEN) { 9552 ip0dbg(("ip_wput_v6: bad alignment or length\n")); 9553 if (do_outrequests) 9554 BUMP_MIB(mibptr, ipIfStatsHCOutRequests); 9555 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 9556 freemsg(first_mp); 9557 if (ill != NULL) 9558 ill_refrele(ill); 9559 if (need_decref) 9560 CONN_DEC_REF(connp); 9561 return; 9562 } 9563 v6dstp = &ip6h->ip6_dst; 9564 cksum_request = -1; 9565 ip6i = NULL; 9566 9567 /* 9568 * Once neighbor discovery has completed, ndp_process() will provide 9569 * locally generated packets for which processing can be reattempted. 9570 * In these cases, connp is NULL and the original zone is part of a 9571 * prepended ipsec_out_t. 9572 */ 9573 if (io != NULL) { 9574 /* 9575 * When coming from icmp_input_v6, the zoneid might not match 9576 * for the loopback case, because inside icmp_input_v6 the 9577 * queue_t is a conn queue from the sending side. 9578 */ 9579 zoneid = io->ipsec_out_zoneid; 9580 ASSERT(zoneid != ALL_ZONES); 9581 } 9582 9583 if (ip6h->ip6_nxt == IPPROTO_RAW) { 9584 /* 9585 * This is an ip6i_t header followed by an ip6_hdr. 9586 * Check which fields are set. 9587 * 9588 * When the packet comes from a transport we should have 9589 * all needed headers in the first mblk. However, when 9590 * going through ip_newroute*_v6 the ip6i might be in 9591 * a separate mblk when we return here. In that case 9592 * we pullup everything to ensure that extension and transport 9593 * headers "stay" in the first mblk. 9594 */ 9595 ip6i = (ip6i_t *)ip6h; 9596 ip6i_flags = ip6i->ip6i_flags; 9597 9598 ASSERT((mp->b_wptr - (uchar_t *)ip6i) == sizeof (ip6i_t) || 9599 ((mp->b_wptr - (uchar_t *)ip6i) >= 9600 sizeof (ip6i_t) + IPV6_HDR_LEN)); 9601 9602 if ((mp->b_wptr - (uchar_t *)ip6i) == sizeof (ip6i_t)) { 9603 if (!pullupmsg(mp, -1)) { 9604 ip1dbg(("ip_wput_v6: pullupmsg failed\n")); 9605 if (do_outrequests) { 9606 BUMP_MIB(mibptr, 9607 ipIfStatsHCOutRequests); 9608 } 9609 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 9610 freemsg(first_mp); 9611 if (ill != NULL) 9612 ill_refrele(ill); 9613 if (need_decref) 9614 CONN_DEC_REF(connp); 9615 return; 9616 } 9617 ip6h = (ip6_t *)mp->b_rptr; 9618 v6dstp = &ip6h->ip6_dst; 9619 ip6i = (ip6i_t *)ip6h; 9620 } 9621 ip6h = (ip6_t *)&ip6i[1]; 9622 9623 /* 9624 * Advance rptr past the ip6i_t to get ready for 9625 * transmitting the packet. However, if the packet gets 9626 * passed to ip_newroute*_v6 then rptr is moved back so 9627 * that the ip6i_t header can be inspected when the 9628 * packet comes back here after passing through 9629 * ire_add_then_send. 9630 */ 9631 mp->b_rptr = (uchar_t *)ip6h; 9632 9633 /* 9634 * IP6I_ATTACH_IF is set in this function when we had a 9635 * conn and it was either bound to the IPFF_NOFAILOVER address 9636 * or IPV6_BOUND_PIF was set. These options override other 9637 * options that set the ifindex. We come here with 9638 * IP6I_ATTACH_IF set when we can't find the ire and 9639 * ip_newroute_v6 is feeding the packet for second time. 9640 */ 9641 if ((ip6i->ip6i_flags & IP6I_IFINDEX) || 9642 (ip6i->ip6i_flags & IP6I_ATTACH_IF)) { 9643 ASSERT(ip6i->ip6i_ifindex != 0); 9644 if (ill != NULL) 9645 ill_refrele(ill); 9646 ill = ill_lookup_on_ifindex(ip6i->ip6i_ifindex, 1, 9647 NULL, NULL, NULL, NULL, ipst); 9648 if (ill == NULL) { 9649 if (do_outrequests) { 9650 BUMP_MIB(mibptr, 9651 ipIfStatsHCOutRequests); 9652 } 9653 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 9654 ip1dbg(("ip_wput_v6: bad ifindex %d\n", 9655 ip6i->ip6i_ifindex)); 9656 if (need_decref) 9657 CONN_DEC_REF(connp); 9658 freemsg(first_mp); 9659 return; 9660 } 9661 mibptr = ill->ill_ip_mib; 9662 if (ip6i->ip6i_flags & IP6I_IFINDEX) { 9663 /* 9664 * Preserve the index so that when we return 9665 * from IPSEC processing, we know where to 9666 * send the packet. 9667 */ 9668 if (mctl_present) { 9669 ASSERT(io != NULL); 9670 io->ipsec_out_ill_index = 9671 ip6i->ip6i_ifindex; 9672 } 9673 } 9674 if (ip6i->ip6i_flags & IP6I_ATTACH_IF) { 9675 /* 9676 * This is a multipathing probe packet that has 9677 * been delayed in ND resolution. Drop the 9678 * packet for the reasons mentioned in 9679 * nce_queue_mp() 9680 */ 9681 if ((ip6i->ip6i_flags & IP6I_DROP_IFDELAYED) && 9682 (ip6i->ip6i_flags & IP6I_ND_DELAYED)) { 9683 freemsg(first_mp); 9684 ill_refrele(ill); 9685 if (need_decref) 9686 CONN_DEC_REF(connp); 9687 return; 9688 } 9689 } 9690 } 9691 if (ip6i->ip6i_flags & IP6I_VERIFY_SRC) { 9692 cred_t *cr = DB_CREDDEF(mp, GET_QUEUE_CRED(q)); 9693 9694 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)); 9695 if (secpolicy_net_rawaccess(cr) != 0) { 9696 /* 9697 * Use IPCL_ZONEID to honor SO_ALLZONES. 9698 */ 9699 ire = ire_route_lookup_v6(&ip6h->ip6_src, 9700 0, 0, (IRE_LOCAL|IRE_LOOPBACK), NULL, 9701 NULL, connp != NULL ? 9702 IPCL_ZONEID(connp) : zoneid, NULL, 9703 MATCH_IRE_TYPE | MATCH_IRE_ZONEONLY, ipst); 9704 if (ire == NULL) { 9705 if (do_outrequests) 9706 BUMP_MIB(mibptr, 9707 ipIfStatsHCOutRequests); 9708 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 9709 ip1dbg(("ip_wput_v6: bad source " 9710 "addr\n")); 9711 freemsg(first_mp); 9712 if (ill != NULL) 9713 ill_refrele(ill); 9714 if (need_decref) 9715 CONN_DEC_REF(connp); 9716 return; 9717 } 9718 ire_refrele(ire); 9719 } 9720 /* No need to verify again when using ip_newroute */ 9721 ip6i->ip6i_flags &= ~IP6I_VERIFY_SRC; 9722 } 9723 if (!(ip6i->ip6i_flags & IP6I_NEXTHOP)) { 9724 /* 9725 * Make sure they match since ip_newroute*_v6 etc might 9726 * (unknown to them) inspect ip6i_nexthop when 9727 * they think they access ip6_dst. 9728 */ 9729 ip6i->ip6i_nexthop = ip6h->ip6_dst; 9730 } 9731 if (ip6i->ip6i_flags & IP6I_NO_ULP_CKSUM) 9732 cksum_request = 1; 9733 if (ip6i->ip6i_flags & IP6I_RAW_CHECKSUM) 9734 cksum_request = ip6i->ip6i_checksum_off; 9735 if (ip6i->ip6i_flags & IP6I_UNSPEC_SRC) 9736 unspec_src = 1; 9737 9738 if (do_outrequests && ill != NULL) { 9739 BUMP_MIB(mibptr, ipIfStatsHCOutRequests); 9740 do_outrequests = B_FALSE; 9741 } 9742 /* 9743 * Store ip6i_t info that we need after we come back 9744 * from IPSEC processing. 9745 */ 9746 if (mctl_present) { 9747 ASSERT(io != NULL); 9748 io->ipsec_out_unspec_src = unspec_src; 9749 } 9750 } 9751 if (connp != NULL && connp->conn_dontroute) 9752 ip6h->ip6_hops = 1; 9753 9754 if (IN6_IS_ADDR_MULTICAST(v6dstp)) 9755 goto ipv6multicast; 9756 9757 /* 1. IPV6_BOUND_PIF takes precedence over all the ifindex settings. */ 9758 if (connp != NULL && connp->conn_outgoing_pill != NULL) { 9759 ill_t *conn_outgoing_pill; 9760 9761 conn_outgoing_pill = conn_get_held_ill(connp, 9762 &connp->conn_outgoing_pill, &err); 9763 if (err == ILL_LOOKUP_FAILED) { 9764 if (ill != NULL) 9765 ill_refrele(ill); 9766 if (need_decref) 9767 CONN_DEC_REF(connp); 9768 freemsg(first_mp); 9769 return; 9770 } 9771 if (conn_outgoing_pill != NULL) { 9772 if (ill != NULL) 9773 ill_refrele(ill); 9774 ill = conn_outgoing_pill; 9775 attach_if = B_TRUE; 9776 match_flags = MATCH_IRE_ILL; 9777 mibptr = ill->ill_ip_mib; 9778 9779 /* 9780 * Check if we need an ire that will not be 9781 * looked up by anybody else i.e. HIDDEN. 9782 */ 9783 if (ill_is_probeonly(ill)) 9784 match_flags |= MATCH_IRE_MARK_HIDDEN; 9785 goto send_from_ill; 9786 } 9787 } 9788 9789 /* 2. If ipc_nofailover_ill is set then use that ill. */ 9790 if (connp != NULL && connp->conn_nofailover_ill != NULL) { 9791 ill_t *conn_nofailover_ill; 9792 9793 conn_nofailover_ill = conn_get_held_ill(connp, 9794 &connp->conn_nofailover_ill, &err); 9795 if (err == ILL_LOOKUP_FAILED) { 9796 if (ill != NULL) 9797 ill_refrele(ill); 9798 if (need_decref) 9799 CONN_DEC_REF(connp); 9800 freemsg(first_mp); 9801 return; 9802 } 9803 if (conn_nofailover_ill != NULL) { 9804 if (ill != NULL) 9805 ill_refrele(ill); 9806 ill = conn_nofailover_ill; 9807 attach_if = B_TRUE; 9808 /* 9809 * Assumes that ipc_nofailover_ill is used only for 9810 * multipathing probe packets. These packets are better 9811 * dropped, if they are delayed in ND resolution, for 9812 * the reasons described in nce_queue_mp(). 9813 * IP6I_DROP_IFDELAYED will be set later on in this 9814 * function for this packet. 9815 */ 9816 drop_if_delayed = B_TRUE; 9817 match_flags = MATCH_IRE_ILL; 9818 mibptr = ill->ill_ip_mib; 9819 9820 /* 9821 * Check if we need an ire that will not be 9822 * looked up by anybody else i.e. HIDDEN. 9823 */ 9824 if (ill_is_probeonly(ill)) 9825 match_flags |= MATCH_IRE_MARK_HIDDEN; 9826 goto send_from_ill; 9827 } 9828 } 9829 9830 /* 9831 * Redo 1. If we did not find an IRE_CACHE the first time, we should 9832 * have an ip6i_t with IP6I_ATTACH_IF if IPV6_BOUND_PIF or 9833 * bind to the IPIF_NOFAILOVER address was used on this endpoint. 9834 */ 9835 if (ip6i != NULL && (ip6i->ip6i_flags & IP6I_ATTACH_IF)) { 9836 ASSERT(ip6i->ip6i_ifindex != 0); 9837 attach_if = B_TRUE; 9838 ASSERT(ill != NULL); 9839 match_flags = MATCH_IRE_ILL; 9840 9841 /* 9842 * Check if we need an ire that will not be 9843 * looked up by anybody else i.e. HIDDEN. 9844 */ 9845 if (ill_is_probeonly(ill)) 9846 match_flags |= MATCH_IRE_MARK_HIDDEN; 9847 goto send_from_ill; 9848 } 9849 9850 /* 3. If an ip6i_t with IP6I_IFINDEX set then use that ill. */ 9851 if (ip6i != NULL && (ip6i->ip6i_flags & IP6I_IFINDEX)) { 9852 ASSERT(ill != NULL); 9853 goto send_from_ill; 9854 } 9855 9856 /* 9857 * 4. If q is an ill queue and (link local or multicast destination) 9858 * then use that ill. 9859 */ 9860 if (ill != NULL && IN6_IS_ADDR_LINKLOCAL(v6dstp)) { 9861 goto send_from_ill; 9862 } 9863 9864 /* 5. If IPV6_BOUND_IF has been set use that ill. */ 9865 if (connp != NULL && connp->conn_outgoing_ill != NULL) { 9866 ill_t *conn_outgoing_ill; 9867 9868 conn_outgoing_ill = conn_get_held_ill(connp, 9869 &connp->conn_outgoing_ill, &err); 9870 if (err == ILL_LOOKUP_FAILED) { 9871 if (ill != NULL) 9872 ill_refrele(ill); 9873 if (need_decref) 9874 CONN_DEC_REF(connp); 9875 freemsg(first_mp); 9876 return; 9877 } 9878 if (ill != NULL) 9879 ill_refrele(ill); 9880 ill = conn_outgoing_ill; 9881 mibptr = ill->ill_ip_mib; 9882 goto send_from_ill; 9883 } 9884 9885 /* 9886 * 6. For unicast: Just do an IRE lookup for the best match. 9887 * If we get here for a link-local address it is rather random 9888 * what interface we pick on a multihomed host. 9889 * *If* there is an IRE_CACHE (and the link-local address 9890 * isn't duplicated on multi links) this will find the IRE_CACHE. 9891 * Otherwise it will use one of the matching IRE_INTERFACE routes 9892 * for the link-local prefix. Hence, applications 9893 * *should* be encouraged to specify an outgoing interface when sending 9894 * to a link local address. 9895 */ 9896 if (connp == NULL || (IP_FLOW_CONTROLLED_ULP(connp->conn_ulp) && 9897 !connp->conn_fully_bound)) { 9898 /* 9899 * We cache IRE_CACHEs to avoid lookups. We don't do 9900 * this for the tcp global queue and listen end point 9901 * as it does not really have a real destination to 9902 * talk to. 9903 */ 9904 ire = ire_cache_lookup_v6(v6dstp, zoneid, MBLK_GETLABEL(mp), 9905 ipst); 9906 } else { 9907 /* 9908 * IRE_MARK_CONDEMNED is marked in ire_delete. We don't 9909 * grab a lock here to check for CONDEMNED as it is okay 9910 * to send a packet or two with the IRE_CACHE that is going 9911 * away. 9912 */ 9913 mutex_enter(&connp->conn_lock); 9914 ire = sctp_ire != NULL ? sctp_ire : connp->conn_ire_cache; 9915 if (ire != NULL && 9916 IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, v6dstp) && 9917 !(ire->ire_marks & IRE_MARK_CONDEMNED)) { 9918 9919 IRE_REFHOLD(ire); 9920 mutex_exit(&connp->conn_lock); 9921 9922 } else { 9923 boolean_t cached = B_FALSE; 9924 9925 connp->conn_ire_cache = NULL; 9926 mutex_exit(&connp->conn_lock); 9927 /* Release the old ire */ 9928 if (ire != NULL && sctp_ire == NULL) 9929 IRE_REFRELE_NOTR(ire); 9930 9931 ire = ire_cache_lookup_v6(v6dstp, zoneid, 9932 MBLK_GETLABEL(mp), ipst); 9933 if (ire != NULL) { 9934 IRE_REFHOLD_NOTR(ire); 9935 9936 mutex_enter(&connp->conn_lock); 9937 if (!(connp->conn_state_flags & CONN_CLOSING) && 9938 (connp->conn_ire_cache == NULL)) { 9939 rw_enter(&ire->ire_bucket->irb_lock, 9940 RW_READER); 9941 if (!(ire->ire_marks & 9942 IRE_MARK_CONDEMNED)) { 9943 connp->conn_ire_cache = ire; 9944 cached = B_TRUE; 9945 } 9946 rw_exit(&ire->ire_bucket->irb_lock); 9947 } 9948 mutex_exit(&connp->conn_lock); 9949 9950 /* 9951 * We can continue to use the ire but since it 9952 * was not cached, we should drop the extra 9953 * reference. 9954 */ 9955 if (!cached) 9956 IRE_REFRELE_NOTR(ire); 9957 } 9958 } 9959 } 9960 9961 if (ire != NULL) { 9962 if (do_outrequests) { 9963 /* Handle IRE_LOCAL's that might appear here */ 9964 if (ire->ire_type == IRE_CACHE) { 9965 mibptr = ((ill_t *)ire->ire_stq->q_ptr)-> 9966 ill_ip_mib; 9967 } else { 9968 mibptr = ire->ire_ipif->ipif_ill->ill_ip_mib; 9969 } 9970 BUMP_MIB(mibptr, ipIfStatsHCOutRequests); 9971 } 9972 ASSERT(!attach_if); 9973 9974 /* 9975 * Check if the ire has the RTF_MULTIRT flag, inherited 9976 * from an IRE_OFFSUBNET ire entry in ip_newroute(). 9977 */ 9978 if (ire->ire_flags & RTF_MULTIRT) { 9979 /* 9980 * Force hop limit of multirouted packets if required. 9981 * The hop limit of such packets is bounded by the 9982 * ip_multirt_ttl ndd variable. 9983 * NDP packets must have a hop limit of 255; don't 9984 * change the hop limit in that case. 9985 */ 9986 if ((ipst->ips_ip_multirt_ttl > 0) && 9987 (ip6h->ip6_hops > ipst->ips_ip_multirt_ttl) && 9988 (ip6h->ip6_hops != IPV6_MAX_HOPS)) { 9989 if (ip_debug > 3) { 9990 ip2dbg(("ip_wput_v6: forcing multirt " 9991 "hop limit to %d (was %d) ", 9992 ipst->ips_ip_multirt_ttl, 9993 ip6h->ip6_hops)); 9994 pr_addr_dbg("v6dst %s\n", AF_INET6, 9995 &ire->ire_addr_v6); 9996 } 9997 ip6h->ip6_hops = ipst->ips_ip_multirt_ttl; 9998 } 9999 10000 /* 10001 * We look at this point if there are pending 10002 * unresolved routes. ire_multirt_need_resolve_v6() 10003 * checks in O(n) that all IRE_OFFSUBNET ire 10004 * entries for the packet's destination and 10005 * flagged RTF_MULTIRT are currently resolved. 10006 * If some remain unresolved, we do a copy 10007 * of the current message. It will be used 10008 * to initiate additional route resolutions. 10009 */ 10010 multirt_need_resolve = 10011 ire_multirt_need_resolve_v6(&ire->ire_addr_v6, 10012 MBLK_GETLABEL(first_mp), ipst); 10013 ip2dbg(("ip_wput_v6: ire %p, " 10014 "multirt_need_resolve %d, first_mp %p\n", 10015 (void *)ire, multirt_need_resolve, 10016 (void *)first_mp)); 10017 if (multirt_need_resolve) { 10018 copy_mp = copymsg(first_mp); 10019 if (copy_mp != NULL) { 10020 MULTIRT_DEBUG_TAG(copy_mp); 10021 } 10022 } 10023 } 10024 ip_wput_ire_v6(q, first_mp, ire, unspec_src, cksum_request, 10025 connp, caller, 0, ip6i_flags, zoneid); 10026 if (need_decref) { 10027 CONN_DEC_REF(connp); 10028 connp = NULL; 10029 } 10030 IRE_REFRELE(ire); 10031 10032 /* 10033 * Try to resolve another multiroute if 10034 * ire_multirt_need_resolve_v6() deemed it necessary. 10035 * copy_mp will be consumed (sent or freed) by 10036 * ip_newroute_v6(). 10037 */ 10038 if (copy_mp != NULL) { 10039 if (mctl_present) { 10040 ip6h = (ip6_t *)copy_mp->b_cont->b_rptr; 10041 } else { 10042 ip6h = (ip6_t *)copy_mp->b_rptr; 10043 } 10044 ip_newroute_v6(q, copy_mp, &ip6h->ip6_dst, 10045 &ip6h->ip6_src, NULL, zoneid, ipst); 10046 } 10047 if (ill != NULL) 10048 ill_refrele(ill); 10049 return; 10050 } 10051 10052 /* 10053 * No full IRE for this destination. Send it to 10054 * ip_newroute_v6 to see if anything else matches. 10055 * Mark this packet as having originated on this 10056 * machine. 10057 * Update rptr if there was an ip6i_t header. 10058 */ 10059 mp->b_prev = NULL; 10060 mp->b_next = NULL; 10061 if (ip6i != NULL) 10062 mp->b_rptr -= sizeof (ip6i_t); 10063 10064 if (unspec_src) { 10065 if (ip6i == NULL) { 10066 /* 10067 * Add ip6i_t header to carry unspec_src 10068 * until the packet comes back in ip_wput_v6. 10069 */ 10070 mp = ip_add_info_v6(mp, NULL, v6dstp); 10071 if (mp == NULL) { 10072 if (do_outrequests) 10073 BUMP_MIB(mibptr, 10074 ipIfStatsHCOutRequests); 10075 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10076 if (mctl_present) 10077 freeb(first_mp); 10078 if (ill != NULL) 10079 ill_refrele(ill); 10080 if (need_decref) 10081 CONN_DEC_REF(connp); 10082 return; 10083 } 10084 ip6i = (ip6i_t *)mp->b_rptr; 10085 10086 if (mctl_present) { 10087 ASSERT(first_mp != mp); 10088 first_mp->b_cont = mp; 10089 } else { 10090 first_mp = mp; 10091 } 10092 10093 if ((mp->b_wptr - (uchar_t *)ip6i) == 10094 sizeof (ip6i_t)) { 10095 /* 10096 * ndp_resolver called from ip_newroute_v6 10097 * expects pulled up message. 10098 */ 10099 if (!pullupmsg(mp, -1)) { 10100 ip1dbg(("ip_wput_v6: pullupmsg" 10101 " failed\n")); 10102 if (do_outrequests) { 10103 BUMP_MIB(mibptr, 10104 ipIfStatsHCOutRequests); 10105 } 10106 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10107 freemsg(first_mp); 10108 if (ill != NULL) 10109 ill_refrele(ill); 10110 if (need_decref) 10111 CONN_DEC_REF(connp); 10112 return; 10113 } 10114 ip6i = (ip6i_t *)mp->b_rptr; 10115 } 10116 ip6h = (ip6_t *)&ip6i[1]; 10117 v6dstp = &ip6h->ip6_dst; 10118 } 10119 ip6i->ip6i_flags |= IP6I_UNSPEC_SRC; 10120 if (mctl_present) { 10121 ASSERT(io != NULL); 10122 io->ipsec_out_unspec_src = unspec_src; 10123 } 10124 } 10125 if (do_outrequests) 10126 BUMP_MIB(mibptr, ipIfStatsHCOutRequests); 10127 if (need_decref) 10128 CONN_DEC_REF(connp); 10129 ip_newroute_v6(q, first_mp, v6dstp, &ip6h->ip6_src, NULL, zoneid, ipst); 10130 if (ill != NULL) 10131 ill_refrele(ill); 10132 return; 10133 10134 10135 /* 10136 * Handle multicast packets with or without an conn. 10137 * Assumes that the transports set ip6_hops taking 10138 * IPV6_MULTICAST_HOPS (and the other ways to set the hoplimit) 10139 * into account. 10140 */ 10141 ipv6multicast: 10142 ip2dbg(("ip_wput_v6: multicast\n")); 10143 10144 /* 10145 * 1. IPV6_BOUND_PIF takes precedence over all the ifindex settings 10146 * 2. If conn_nofailover_ill is set then use that ill. 10147 * 10148 * Hold the conn_lock till we refhold the ill of interest that is 10149 * pointed to from the conn. Since we cannot do an ill/ipif_refrele 10150 * while holding any locks, postpone the refrele until after the 10151 * conn_lock is dropped. 10152 */ 10153 if (connp != NULL) { 10154 mutex_enter(&connp->conn_lock); 10155 conn_lock_held = B_TRUE; 10156 } else { 10157 conn_lock_held = B_FALSE; 10158 } 10159 if (connp != NULL && connp->conn_outgoing_pill != NULL) { 10160 err = ill_check_and_refhold(connp->conn_outgoing_pill); 10161 if (err == ILL_LOOKUP_FAILED) { 10162 ip1dbg(("ip_output_v6: multicast" 10163 " conn_outgoing_pill no ipif\n")); 10164 multicast_discard: 10165 ASSERT(saved_ill == NULL); 10166 if (conn_lock_held) 10167 mutex_exit(&connp->conn_lock); 10168 if (ill != NULL) 10169 ill_refrele(ill); 10170 freemsg(first_mp); 10171 if (do_outrequests) 10172 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10173 if (need_decref) 10174 CONN_DEC_REF(connp); 10175 return; 10176 } 10177 saved_ill = ill; 10178 ill = connp->conn_outgoing_pill; 10179 attach_if = B_TRUE; 10180 match_flags = MATCH_IRE_ILL; 10181 mibptr = ill->ill_ip_mib; 10182 10183 /* 10184 * Check if we need an ire that will not be 10185 * looked up by anybody else i.e. HIDDEN. 10186 */ 10187 if (ill_is_probeonly(ill)) 10188 match_flags |= MATCH_IRE_MARK_HIDDEN; 10189 } else if (connp != NULL && connp->conn_nofailover_ill != NULL) { 10190 err = ill_check_and_refhold(connp->conn_nofailover_ill); 10191 if (err == ILL_LOOKUP_FAILED) { 10192 ip1dbg(("ip_output_v6: multicast" 10193 " conn_nofailover_ill no ipif\n")); 10194 goto multicast_discard; 10195 } 10196 saved_ill = ill; 10197 ill = connp->conn_nofailover_ill; 10198 attach_if = B_TRUE; 10199 match_flags = MATCH_IRE_ILL; 10200 10201 /* 10202 * Check if we need an ire that will not be 10203 * looked up by anybody else i.e. HIDDEN. 10204 */ 10205 if (ill_is_probeonly(ill)) 10206 match_flags |= MATCH_IRE_MARK_HIDDEN; 10207 } else if (ip6i != NULL && (ip6i->ip6i_flags & IP6I_ATTACH_IF)) { 10208 /* 10209 * Redo 1. If we did not find an IRE_CACHE the first time, 10210 * we should have an ip6i_t with IP6I_ATTACH_IF if 10211 * IPV6_BOUND_PIF or bind to the IPIF_NOFAILOVER address was 10212 * used on this endpoint. 10213 */ 10214 ASSERT(ip6i->ip6i_ifindex != 0); 10215 attach_if = B_TRUE; 10216 ASSERT(ill != NULL); 10217 match_flags = MATCH_IRE_ILL; 10218 10219 /* 10220 * Check if we need an ire that will not be 10221 * looked up by anybody else i.e. HIDDEN. 10222 */ 10223 if (ill_is_probeonly(ill)) 10224 match_flags |= MATCH_IRE_MARK_HIDDEN; 10225 } else if (ip6i != NULL && (ip6i->ip6i_flags & IP6I_IFINDEX)) { 10226 /* 3. If an ip6i_t with IP6I_IFINDEX set then use that ill. */ 10227 10228 ASSERT(ill != NULL); 10229 } else if (ill != NULL) { 10230 /* 10231 * 4. If q is an ill queue and (link local or multicast 10232 * destination) then use that ill. 10233 * We don't need the ipif initialization here. 10234 * This useless assert below is just to prevent lint from 10235 * reporting a null body if statement. 10236 */ 10237 ASSERT(ill != NULL); 10238 } else if (connp != NULL) { 10239 /* 10240 * 5. If IPV6_BOUND_IF has been set use that ill. 10241 * 10242 * 6. For multicast: if IPV6_MULTICAST_IF has been set use it. 10243 * Otherwise look for the best IRE match for the unspecified 10244 * group to determine the ill. 10245 * 10246 * conn_multicast_ill is used for only IPv6 packets. 10247 * conn_multicast_ipif is used for only IPv4 packets. 10248 * Thus a PF_INET6 socket send both IPv4 and IPv6 10249 * multicast packets using different IP*_MULTICAST_IF 10250 * interfaces. 10251 */ 10252 if (connp->conn_outgoing_ill != NULL) { 10253 err = ill_check_and_refhold(connp->conn_outgoing_ill); 10254 if (err == ILL_LOOKUP_FAILED) { 10255 ip1dbg(("ip_output_v6: multicast" 10256 " conn_outgoing_ill no ipif\n")); 10257 goto multicast_discard; 10258 } 10259 ill = connp->conn_outgoing_ill; 10260 } else if (connp->conn_multicast_ill != NULL) { 10261 err = ill_check_and_refhold(connp->conn_multicast_ill); 10262 if (err == ILL_LOOKUP_FAILED) { 10263 ip1dbg(("ip_output_v6: multicast" 10264 " conn_multicast_ill no ipif\n")); 10265 goto multicast_discard; 10266 } 10267 ill = connp->conn_multicast_ill; 10268 } else { 10269 mutex_exit(&connp->conn_lock); 10270 conn_lock_held = B_FALSE; 10271 ipif = ipif_lookup_group_v6(v6dstp, zoneid, ipst); 10272 if (ipif == NULL) { 10273 ip1dbg(("ip_output_v6: multicast no ipif\n")); 10274 goto multicast_discard; 10275 } 10276 /* 10277 * We have a ref to this ipif, so we can safely 10278 * access ipif_ill. 10279 */ 10280 ill = ipif->ipif_ill; 10281 mutex_enter(&ill->ill_lock); 10282 if (!ILL_CAN_LOOKUP(ill)) { 10283 mutex_exit(&ill->ill_lock); 10284 ipif_refrele(ipif); 10285 ill = NULL; 10286 ip1dbg(("ip_output_v6: multicast no ipif\n")); 10287 goto multicast_discard; 10288 } 10289 ill_refhold_locked(ill); 10290 mutex_exit(&ill->ill_lock); 10291 ipif_refrele(ipif); 10292 /* 10293 * Save binding until IPV6_MULTICAST_IF 10294 * changes it 10295 */ 10296 mutex_enter(&connp->conn_lock); 10297 connp->conn_multicast_ill = ill; 10298 connp->conn_orig_multicast_ifindex = 10299 ill->ill_phyint->phyint_ifindex; 10300 mutex_exit(&connp->conn_lock); 10301 } 10302 } 10303 if (conn_lock_held) 10304 mutex_exit(&connp->conn_lock); 10305 10306 if (saved_ill != NULL) 10307 ill_refrele(saved_ill); 10308 10309 ASSERT(ill != NULL); 10310 /* 10311 * For multicast loopback interfaces replace the multicast address 10312 * with a unicast address for the ire lookup. 10313 */ 10314 if (ill->ill_phyint->phyint_flags & PHYI_LOOPBACK) 10315 v6dstp = &ill->ill_ipif->ipif_v6lcl_addr; 10316 10317 mibptr = ill->ill_ip_mib; 10318 if (do_outrequests) { 10319 BUMP_MIB(mibptr, ipIfStatsHCOutRequests); 10320 do_outrequests = B_FALSE; 10321 } 10322 BUMP_MIB(mibptr, ipIfStatsHCOutMcastPkts); 10323 UPDATE_MIB(mibptr, ipIfStatsHCOutMcastOctets, 10324 ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN); 10325 10326 /* 10327 * As we may lose the conn by the time we reach ip_wput_ire_v6 10328 * we copy conn_multicast_loop and conn_dontroute on to an 10329 * ipsec_out. In case if this datagram goes out secure, 10330 * we need the ill_index also. Copy that also into the 10331 * ipsec_out. 10332 */ 10333 if (mctl_present) { 10334 io = (ipsec_out_t *)first_mp->b_rptr; 10335 ASSERT(first_mp->b_datap->db_type == M_CTL); 10336 ASSERT(io->ipsec_out_type == IPSEC_OUT); 10337 } else { 10338 ASSERT(mp == first_mp); 10339 if ((first_mp = ipsec_alloc_ipsec_out(ipst->ips_netstack)) == 10340 NULL) { 10341 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10342 freemsg(mp); 10343 if (ill != NULL) 10344 ill_refrele(ill); 10345 if (need_decref) 10346 CONN_DEC_REF(connp); 10347 return; 10348 } 10349 io = (ipsec_out_t *)first_mp->b_rptr; 10350 /* This is not a secure packet */ 10351 io->ipsec_out_secure = B_FALSE; 10352 io->ipsec_out_use_global_policy = B_TRUE; 10353 io->ipsec_out_zoneid = 10354 (zoneid != ALL_ZONES ? zoneid : GLOBAL_ZONEID); 10355 first_mp->b_cont = mp; 10356 mctl_present = B_TRUE; 10357 } 10358 io->ipsec_out_ill_index = ill->ill_phyint->phyint_ifindex; 10359 io->ipsec_out_unspec_src = unspec_src; 10360 if (connp != NULL) 10361 io->ipsec_out_dontroute = connp->conn_dontroute; 10362 10363 send_from_ill: 10364 ASSERT(ill != NULL); 10365 ASSERT(mibptr == ill->ill_ip_mib); 10366 if (do_outrequests) { 10367 BUMP_MIB(mibptr, ipIfStatsHCOutRequests); 10368 do_outrequests = B_FALSE; 10369 } 10370 10371 if (io != NULL) 10372 io->ipsec_out_ill_index = ill->ill_phyint->phyint_ifindex; 10373 10374 /* 10375 * When a specific ill is specified (using IPV6_PKTINFO, 10376 * IPV6_MULTICAST_IF, or IPV6_BOUND_IF) we will only match 10377 * on routing entries (ftable and ctable) that have a matching 10378 * ire->ire_ipif->ipif_ill. Thus this can only be used 10379 * for destinations that are on-link for the specific ill 10380 * and that can appear on multiple links. Thus it is useful 10381 * for multicast destinations, link-local destinations, and 10382 * at some point perhaps for site-local destinations (if the 10383 * node sits at a site boundary). 10384 * We create the cache entries in the regular ctable since 10385 * it can not "confuse" things for other destinations. 10386 * table. 10387 * 10388 * NOTE : conn_ire_cache is not used for caching ire_ctable_lookups. 10389 * It is used only when ire_cache_lookup is used above. 10390 */ 10391 ire = ire_ctable_lookup_v6(v6dstp, 0, 0, ill->ill_ipif, 10392 zoneid, MBLK_GETLABEL(mp), match_flags, ipst); 10393 if (ire != NULL) { 10394 /* 10395 * Check if the ire has the RTF_MULTIRT flag, inherited 10396 * from an IRE_OFFSUBNET ire entry in ip_newroute(). 10397 */ 10398 if (ire->ire_flags & RTF_MULTIRT) { 10399 /* 10400 * Force hop limit of multirouted packets if required. 10401 * The hop limit of such packets is bounded by the 10402 * ip_multirt_ttl ndd variable. 10403 * NDP packets must have a hop limit of 255; don't 10404 * change the hop limit in that case. 10405 */ 10406 if ((ipst->ips_ip_multirt_ttl > 0) && 10407 (ip6h->ip6_hops > ipst->ips_ip_multirt_ttl) && 10408 (ip6h->ip6_hops != IPV6_MAX_HOPS)) { 10409 if (ip_debug > 3) { 10410 ip2dbg(("ip_wput_v6: forcing multirt " 10411 "hop limit to %d (was %d) ", 10412 ipst->ips_ip_multirt_ttl, 10413 ip6h->ip6_hops)); 10414 pr_addr_dbg("v6dst %s\n", AF_INET6, 10415 &ire->ire_addr_v6); 10416 } 10417 ip6h->ip6_hops = ipst->ips_ip_multirt_ttl; 10418 } 10419 10420 /* 10421 * We look at this point if there are pending 10422 * unresolved routes. ire_multirt_need_resolve_v6() 10423 * checks in O(n) that all IRE_OFFSUBNET ire 10424 * entries for the packet's destination and 10425 * flagged RTF_MULTIRT are currently resolved. 10426 * If some remain unresolved, we make a copy 10427 * of the current message. It will be used 10428 * to initiate additional route resolutions. 10429 */ 10430 multirt_need_resolve = 10431 ire_multirt_need_resolve_v6(&ire->ire_addr_v6, 10432 MBLK_GETLABEL(first_mp), ipst); 10433 ip2dbg(("ip_wput_v6[send_from_ill]: ire %p, " 10434 "multirt_need_resolve %d, first_mp %p\n", 10435 (void *)ire, multirt_need_resolve, 10436 (void *)first_mp)); 10437 if (multirt_need_resolve) { 10438 copy_mp = copymsg(first_mp); 10439 if (copy_mp != NULL) { 10440 MULTIRT_DEBUG_TAG(copy_mp); 10441 } 10442 } 10443 } 10444 10445 ip1dbg(("ip_wput_v6: send on %s, ire = %p, ill index = %d\n", 10446 ill->ill_name, (void *)ire, 10447 ill->ill_phyint->phyint_ifindex)); 10448 ip_wput_ire_v6(q, first_mp, ire, unspec_src, cksum_request, 10449 connp, caller, 10450 (attach_if ? ill->ill_phyint->phyint_ifindex : 0), 10451 ip6i_flags, zoneid); 10452 ire_refrele(ire); 10453 if (need_decref) { 10454 CONN_DEC_REF(connp); 10455 connp = NULL; 10456 } 10457 10458 /* 10459 * Try to resolve another multiroute if 10460 * ire_multirt_need_resolve_v6() deemed it necessary. 10461 * copy_mp will be consumed (sent or freed) by 10462 * ip_newroute_[ipif_]v6(). 10463 */ 10464 if (copy_mp != NULL) { 10465 if (mctl_present) { 10466 ip6h = (ip6_t *)copy_mp->b_cont->b_rptr; 10467 } else { 10468 ip6h = (ip6_t *)copy_mp->b_rptr; 10469 } 10470 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 10471 ipif = ipif_lookup_group_v6(&ip6h->ip6_dst, 10472 zoneid, ipst); 10473 if (ipif == NULL) { 10474 ip1dbg(("ip_wput_v6: No ipif for " 10475 "multicast\n")); 10476 MULTIRT_DEBUG_UNTAG(copy_mp); 10477 freemsg(copy_mp); 10478 return; 10479 } 10480 ip_newroute_ipif_v6(q, copy_mp, ipif, 10481 ip6h->ip6_dst, unspec_src, zoneid); 10482 ipif_refrele(ipif); 10483 } else { 10484 ip_newroute_v6(q, copy_mp, &ip6h->ip6_dst, 10485 &ip6h->ip6_src, ill, zoneid, ipst); 10486 } 10487 } 10488 ill_refrele(ill); 10489 return; 10490 } 10491 if (need_decref) { 10492 CONN_DEC_REF(connp); 10493 connp = NULL; 10494 } 10495 10496 /* Update rptr if there was an ip6i_t header. */ 10497 if (ip6i != NULL) 10498 mp->b_rptr -= sizeof (ip6i_t); 10499 if (unspec_src || attach_if) { 10500 if (ip6i == NULL) { 10501 /* 10502 * Add ip6i_t header to carry unspec_src 10503 * or attach_if until the packet comes back in 10504 * ip_wput_v6. 10505 */ 10506 if (mctl_present) { 10507 first_mp->b_cont = 10508 ip_add_info_v6(mp, NULL, v6dstp); 10509 mp = first_mp->b_cont; 10510 if (mp == NULL) 10511 freeb(first_mp); 10512 } else { 10513 first_mp = mp = ip_add_info_v6(mp, NULL, 10514 v6dstp); 10515 } 10516 if (mp == NULL) { 10517 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10518 ill_refrele(ill); 10519 return; 10520 } 10521 ip6i = (ip6i_t *)mp->b_rptr; 10522 if ((mp->b_wptr - (uchar_t *)ip6i) == 10523 sizeof (ip6i_t)) { 10524 /* 10525 * ndp_resolver called from ip_newroute_v6 10526 * expects a pulled up message. 10527 */ 10528 if (!pullupmsg(mp, -1)) { 10529 ip1dbg(("ip_wput_v6: pullupmsg" 10530 " failed\n")); 10531 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10532 freemsg(first_mp); 10533 return; 10534 } 10535 ip6i = (ip6i_t *)mp->b_rptr; 10536 } 10537 ip6h = (ip6_t *)&ip6i[1]; 10538 v6dstp = &ip6h->ip6_dst; 10539 } 10540 if (unspec_src) 10541 ip6i->ip6i_flags |= IP6I_UNSPEC_SRC; 10542 if (attach_if) { 10543 /* 10544 * Bind to nofailover/BOUND_PIF overrides ifindex. 10545 */ 10546 ip6i->ip6i_flags |= IP6I_ATTACH_IF; 10547 ip6i->ip6i_flags &= ~IP6I_IFINDEX; 10548 ip6i->ip6i_ifindex = ill->ill_phyint->phyint_ifindex; 10549 if (drop_if_delayed) { 10550 /* This is a multipathing probe packet */ 10551 ip6i->ip6i_flags |= IP6I_DROP_IFDELAYED; 10552 } 10553 } 10554 if (mctl_present) { 10555 ASSERT(io != NULL); 10556 io->ipsec_out_unspec_src = unspec_src; 10557 } 10558 } 10559 if (IN6_IS_ADDR_MULTICAST(v6dstp)) { 10560 ip_newroute_ipif_v6(q, first_mp, ill->ill_ipif, *v6dstp, 10561 unspec_src, zoneid); 10562 } else { 10563 ip_newroute_v6(q, first_mp, v6dstp, &ip6h->ip6_src, ill, 10564 zoneid, ipst); 10565 } 10566 ill_refrele(ill); 10567 return; 10568 10569 notv6: 10570 /* 10571 * XXX implement a IPv4 and IPv6 packet counter per conn and 10572 * switch when ratio exceeds e.g. 10:1 10573 */ 10574 if (q->q_next == NULL) { 10575 connp = Q_TO_CONN(q); 10576 10577 if (IPCL_IS_TCP(connp)) { 10578 /* change conn_send for the tcp_v4_connections */ 10579 connp->conn_send = ip_output; 10580 } else if (connp->conn_ulp == IPPROTO_SCTP) { 10581 /* The 'q' is the default SCTP queue */ 10582 connp = (conn_t *)arg; 10583 } else { 10584 ip_setqinfo(RD(q), IPV4_MINOR, B_TRUE, ipst); 10585 } 10586 } 10587 BUMP_MIB(mibptr, ipIfStatsOutWrongIPVersion); 10588 (void) ip_output(arg, first_mp, arg2, caller); 10589 if (ill != NULL) 10590 ill_refrele(ill); 10591 } 10592 10593 /* 10594 * If this is a conn_t queue, then we pass in the conn. This includes the 10595 * zoneid. 10596 * Otherwise, this is a message for an ill_t queue, 10597 * in which case we use the global zoneid since those are all part of 10598 * the global zone. 10599 */ 10600 static void 10601 ip_wput_v6(queue_t *q, mblk_t *mp) 10602 { 10603 if (CONN_Q(q)) 10604 ip_output_v6(Q_TO_CONN(q), mp, q, IP_WPUT); 10605 else 10606 ip_output_v6(GLOBAL_ZONEID, mp, q, IP_WPUT); 10607 } 10608 10609 static void 10610 ipsec_out_attach_if(ipsec_out_t *io, int attach_index) 10611 { 10612 ASSERT(io->ipsec_out_type == IPSEC_OUT); 10613 io->ipsec_out_attach_if = B_TRUE; 10614 io->ipsec_out_ill_index = attach_index; 10615 } 10616 10617 /* 10618 * NULL send-to queue - packet is to be delivered locally. 10619 */ 10620 void 10621 ip_wput_local_v6(queue_t *q, ill_t *ill, ip6_t *ip6h, mblk_t *first_mp, 10622 ire_t *ire, int fanout_flags) 10623 { 10624 uint32_t ports; 10625 mblk_t *mp = first_mp, *first_mp1; 10626 boolean_t mctl_present; 10627 uint8_t nexthdr; 10628 uint16_t hdr_length; 10629 ipsec_out_t *io; 10630 mib2_ipIfStatsEntry_t *mibptr; 10631 ilm_t *ilm; 10632 uint_t nexthdr_offset; 10633 ip_stack_t *ipst = ill->ill_ipst; 10634 10635 if (DB_TYPE(mp) == M_CTL) { 10636 io = (ipsec_out_t *)mp->b_rptr; 10637 if (!io->ipsec_out_secure) { 10638 mp = mp->b_cont; 10639 freeb(first_mp); 10640 first_mp = mp; 10641 mctl_present = B_FALSE; 10642 } else { 10643 mctl_present = B_TRUE; 10644 mp = first_mp->b_cont; 10645 ipsec_out_to_in(first_mp); 10646 } 10647 } else { 10648 mctl_present = B_FALSE; 10649 } 10650 10651 /* 10652 * Remove reachability confirmation bit from version field 10653 * before passing the packet on to any firewall hooks or 10654 * looping back the packet. 10655 */ 10656 if (ip6h->ip6_vcf & IP_FORWARD_PROG) 10657 ip6h->ip6_vcf &= ~IP_FORWARD_PROG; 10658 10659 DTRACE_PROBE4(ip6__loopback__in__start, 10660 ill_t *, ill, ill_t *, NULL, 10661 ip6_t *, ip6h, mblk_t *, first_mp); 10662 10663 FW_HOOKS6(ipst->ips_ip6_loopback_in_event, 10664 ipst->ips_ipv6firewall_loopback_in, 10665 ill, NULL, ip6h, first_mp, mp, ipst); 10666 10667 DTRACE_PROBE1(ip6__loopback__in__end, mblk_t *, first_mp); 10668 10669 if (first_mp == NULL) 10670 return; 10671 10672 nexthdr = ip6h->ip6_nxt; 10673 mibptr = ill->ill_ip_mib; 10674 10675 /* Fastpath */ 10676 switch (nexthdr) { 10677 case IPPROTO_TCP: 10678 case IPPROTO_UDP: 10679 case IPPROTO_ICMPV6: 10680 case IPPROTO_SCTP: 10681 hdr_length = IPV6_HDR_LEN; 10682 nexthdr_offset = (uint_t)((uchar_t *)&ip6h->ip6_nxt - 10683 (uchar_t *)ip6h); 10684 break; 10685 default: { 10686 uint8_t *nexthdrp; 10687 10688 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, 10689 &hdr_length, &nexthdrp)) { 10690 /* Malformed packet */ 10691 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10692 freemsg(first_mp); 10693 return; 10694 } 10695 nexthdr = *nexthdrp; 10696 nexthdr_offset = nexthdrp - (uint8_t *)ip6h; 10697 break; 10698 } 10699 } 10700 10701 UPDATE_OB_PKT_COUNT(ire); 10702 ire->ire_last_used_time = lbolt; 10703 10704 switch (nexthdr) { 10705 case IPPROTO_TCP: 10706 if (DB_TYPE(mp) == M_DATA) { 10707 /* 10708 * M_DATA mblk, so init mblk (chain) for 10709 * no struio(). 10710 */ 10711 mblk_t *mp1 = mp; 10712 10713 do { 10714 mp1->b_datap->db_struioflag = 0; 10715 } while ((mp1 = mp1->b_cont) != NULL); 10716 } 10717 ports = *(uint32_t *)(mp->b_rptr + hdr_length + 10718 TCP_PORTS_OFFSET); 10719 ip_fanout_tcp_v6(q, first_mp, ip6h, ill, ill, 10720 fanout_flags|IP_FF_SEND_ICMP|IP_FF_SYN_ADDIRE| 10721 IP_FF_IPINFO|IP6_NO_IPPOLICY|IP_FF_LOOPBACK, 10722 hdr_length, mctl_present, ire->ire_zoneid); 10723 return; 10724 10725 case IPPROTO_UDP: 10726 ports = *(uint32_t *)(mp->b_rptr + hdr_length + 10727 UDP_PORTS_OFFSET); 10728 ip_fanout_udp_v6(q, first_mp, ip6h, ports, ill, ill, 10729 fanout_flags|IP_FF_SEND_ICMP|IP_FF_IPINFO| 10730 IP6_NO_IPPOLICY, mctl_present, ire->ire_zoneid); 10731 return; 10732 10733 case IPPROTO_SCTP: 10734 { 10735 uint_t ipif_seqid = ire->ire_ipif->ipif_seqid; 10736 10737 ports = *(uint32_t *)(mp->b_rptr + hdr_length); 10738 ip_fanout_sctp(mp, ill, (ipha_t *)ip6h, ports, 10739 fanout_flags|IP_FF_SEND_ICMP|IP_FF_IPINFO, 10740 mctl_present, IP6_NO_IPPOLICY, ipif_seqid, 10741 ire->ire_zoneid); 10742 return; 10743 } 10744 case IPPROTO_ICMPV6: { 10745 icmp6_t *icmp6; 10746 10747 /* check for full IPv6+ICMPv6 header */ 10748 if ((mp->b_wptr - mp->b_rptr) < 10749 (hdr_length + ICMP6_MINLEN)) { 10750 if (!pullupmsg(mp, hdr_length + ICMP6_MINLEN)) { 10751 ip1dbg(("ip_wput_v6: ICMP hdr pullupmsg" 10752 " failed\n")); 10753 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10754 freemsg(first_mp); 10755 return; 10756 } 10757 ip6h = (ip6_t *)mp->b_rptr; 10758 } 10759 icmp6 = (icmp6_t *)((uchar_t *)ip6h + hdr_length); 10760 10761 /* Update output mib stats */ 10762 icmp_update_out_mib_v6(ill, icmp6); 10763 10764 /* Check variable for testing applications */ 10765 if (ipst->ips_ipv6_drop_inbound_icmpv6) { 10766 freemsg(first_mp); 10767 return; 10768 } 10769 /* 10770 * Assume that there is always at least one conn for 10771 * ICMPv6 (in.ndpd) i.e. don't optimize the case 10772 * where there is no conn. 10773 */ 10774 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst) && 10775 !(ill->ill_phyint->phyint_flags & PHYI_LOOPBACK)) { 10776 /* 10777 * In the multicast case, applications may have 10778 * joined the group from different zones, so we 10779 * need to deliver the packet to each of them. 10780 * Loop through the multicast memberships 10781 * structures (ilm) on the receive ill and send 10782 * a copy of the packet up each matching one. 10783 * However, we don't do this for multicasts sent 10784 * on the loopback interface (PHYI_LOOPBACK flag 10785 * set) as they must stay in the sender's zone. 10786 */ 10787 ILM_WALKER_HOLD(ill); 10788 for (ilm = ill->ill_ilm; ilm != NULL; 10789 ilm = ilm->ilm_next) { 10790 if (ilm->ilm_flags & ILM_DELETED) 10791 continue; 10792 if (!IN6_ARE_ADDR_EQUAL( 10793 &ilm->ilm_v6addr, &ip6h->ip6_dst)) 10794 continue; 10795 if ((fanout_flags & 10796 IP_FF_NO_MCAST_LOOP) && 10797 ilm->ilm_zoneid == ire->ire_zoneid) 10798 continue; 10799 if (!ipif_lookup_zoneid(ill, 10800 ilm->ilm_zoneid, IPIF_UP, NULL)) 10801 continue; 10802 10803 first_mp1 = ip_copymsg(first_mp); 10804 if (first_mp1 == NULL) 10805 continue; 10806 icmp_inbound_v6(q, first_mp1, ill, 10807 hdr_length, mctl_present, 10808 IP6_NO_IPPOLICY, ilm->ilm_zoneid, 10809 NULL); 10810 } 10811 ILM_WALKER_RELE(ill); 10812 } else { 10813 first_mp1 = ip_copymsg(first_mp); 10814 if (first_mp1 != NULL) 10815 icmp_inbound_v6(q, first_mp1, ill, 10816 hdr_length, mctl_present, 10817 IP6_NO_IPPOLICY, ire->ire_zoneid, 10818 NULL); 10819 } 10820 } 10821 /* FALLTHRU */ 10822 default: { 10823 /* 10824 * Handle protocols with which IPv6 is less intimate. 10825 */ 10826 fanout_flags |= IP_FF_RAWIP|IP_FF_IPINFO; 10827 10828 /* 10829 * Enable sending ICMP for "Unknown" nexthdr 10830 * case. i.e. where we did not FALLTHRU from 10831 * IPPROTO_ICMPV6 processing case above. 10832 */ 10833 if (nexthdr != IPPROTO_ICMPV6) 10834 fanout_flags |= IP_FF_SEND_ICMP; 10835 /* 10836 * Note: There can be more than one stream bound 10837 * to a particular protocol. When this is the case, 10838 * each one gets a copy of any incoming packets. 10839 */ 10840 ip_fanout_proto_v6(q, first_mp, ip6h, ill, ill, nexthdr, 10841 nexthdr_offset, fanout_flags|IP6_NO_IPPOLICY, 10842 mctl_present, ire->ire_zoneid); 10843 return; 10844 } 10845 } 10846 } 10847 10848 /* 10849 * Send packet using IRE. 10850 * Checksumming is controlled by cksum_request: 10851 * -1 => normal i.e. TCP/UDP/SCTP/ICMPv6 are checksummed and nothing else. 10852 * 1 => Skip TCP/UDP/SCTP checksum 10853 * Otherwise => checksum_request contains insert offset for checksum 10854 * 10855 * Assumes that the following set of headers appear in the first 10856 * mblk: 10857 * ip6_t 10858 * Any extension headers 10859 * TCP/UDP/SCTP header (if present) 10860 * The routine can handle an ICMPv6 header that is not in the first mblk. 10861 * 10862 * NOTE : This function does not ire_refrele the ire passed in as the 10863 * argument unlike ip_wput_ire where the REFRELE is done. 10864 * Refer to ip_wput_ire for more on this. 10865 */ 10866 static void 10867 ip_wput_ire_v6(queue_t *q, mblk_t *mp, ire_t *ire, int unspec_src, 10868 int cksum_request, conn_t *connp, int caller, int attach_index, int flags, 10869 zoneid_t zoneid) 10870 { 10871 ip6_t *ip6h; 10872 uint8_t nexthdr; 10873 uint16_t hdr_length; 10874 uint_t reachable = 0x0; 10875 ill_t *ill; 10876 mib2_ipIfStatsEntry_t *mibptr; 10877 mblk_t *first_mp; 10878 boolean_t mctl_present; 10879 ipsec_out_t *io; 10880 boolean_t conn_dontroute; /* conn value for multicast */ 10881 boolean_t conn_multicast_loop; /* conn value for multicast */ 10882 boolean_t multicast_forward; /* Should we forward ? */ 10883 int max_frag; 10884 ip_stack_t *ipst = ire->ire_ipst; 10885 ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec; 10886 10887 ill = ire_to_ill(ire); 10888 first_mp = mp; 10889 multicast_forward = B_FALSE; 10890 10891 if (mp->b_datap->db_type != M_CTL) { 10892 ip6h = (ip6_t *)first_mp->b_rptr; 10893 } else { 10894 io = (ipsec_out_t *)first_mp->b_rptr; 10895 ASSERT(io->ipsec_out_type == IPSEC_OUT); 10896 /* 10897 * Grab the zone id now because the M_CTL can be discarded by 10898 * ip_wput_ire_parse_ipsec_out() below. 10899 */ 10900 ASSERT(zoneid == io->ipsec_out_zoneid); 10901 ASSERT(zoneid != ALL_ZONES); 10902 ip6h = (ip6_t *)first_mp->b_cont->b_rptr; 10903 /* 10904 * For the multicast case, ipsec_out carries conn_dontroute and 10905 * conn_multicast_loop as conn may not be available here. We 10906 * need this for multicast loopback and forwarding which is done 10907 * later in the code. 10908 */ 10909 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 10910 conn_dontroute = io->ipsec_out_dontroute; 10911 conn_multicast_loop = io->ipsec_out_multicast_loop; 10912 /* 10913 * If conn_dontroute is not set or conn_multicast_loop 10914 * is set, we need to do forwarding/loopback. For 10915 * datagrams from ip_wput_multicast, conn_dontroute is 10916 * set to B_TRUE and conn_multicast_loop is set to 10917 * B_FALSE so that we neither do forwarding nor 10918 * loopback. 10919 */ 10920 if (!conn_dontroute || conn_multicast_loop) 10921 multicast_forward = B_TRUE; 10922 } 10923 } 10924 10925 /* 10926 * If the sender didn't supply the hop limit and there is a default 10927 * unicast hop limit associated with the output interface, we use 10928 * that if the packet is unicast. Interface specific unicast hop 10929 * limits as set via the SIOCSLIFLNKINFO ioctl. 10930 */ 10931 if (ill->ill_max_hops != 0 && !(flags & IP6I_HOPLIMIT) && 10932 !(IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst))) { 10933 ip6h->ip6_hops = ill->ill_max_hops; 10934 } 10935 10936 if (ire->ire_type == IRE_LOCAL && ire->ire_zoneid != zoneid && 10937 ire->ire_zoneid != ALL_ZONES) { 10938 /* 10939 * When a zone sends a packet to another zone, we try to deliver 10940 * the packet under the same conditions as if the destination 10941 * was a real node on the network. To do so, we look for a 10942 * matching route in the forwarding table. 10943 * RTF_REJECT and RTF_BLACKHOLE are handled just like 10944 * ip_newroute_v6() does. 10945 * Note that IRE_LOCAL are special, since they are used 10946 * when the zoneid doesn't match in some cases. This means that 10947 * we need to handle ipha_src differently since ire_src_addr 10948 * belongs to the receiving zone instead of the sending zone. 10949 * When ip_restrict_interzone_loopback is set, then 10950 * ire_cache_lookup_v6() ensures that IRE_LOCAL are only used 10951 * for loopback between zones when the logical "Ethernet" would 10952 * have looped them back. 10953 */ 10954 ire_t *src_ire; 10955 10956 src_ire = ire_ftable_lookup_v6(&ip6h->ip6_dst, 0, 0, 0, 10957 NULL, NULL, zoneid, 0, NULL, (MATCH_IRE_RECURSIVE | 10958 MATCH_IRE_DEFAULT | MATCH_IRE_RJ_BHOLE), ipst); 10959 if (src_ire != NULL && 10960 !(src_ire->ire_flags & (RTF_REJECT | RTF_BLACKHOLE)) && 10961 (!ipst->ips_ip_restrict_interzone_loopback || 10962 ire_local_same_ill_group(ire, src_ire))) { 10963 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src) && 10964 !unspec_src) { 10965 ip6h->ip6_src = src_ire->ire_src_addr_v6; 10966 } 10967 ire_refrele(src_ire); 10968 } else { 10969 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutNoRoutes); 10970 if (src_ire != NULL) { 10971 if (src_ire->ire_flags & RTF_BLACKHOLE) { 10972 ire_refrele(src_ire); 10973 freemsg(first_mp); 10974 return; 10975 } 10976 ire_refrele(src_ire); 10977 } 10978 if (ip_hdr_complete_v6(ip6h, zoneid, ipst)) { 10979 /* Failed */ 10980 freemsg(first_mp); 10981 return; 10982 } 10983 icmp_unreachable_v6(q, first_mp, 10984 ICMP6_DST_UNREACH_NOROUTE, B_FALSE, B_FALSE, 10985 zoneid, ipst); 10986 return; 10987 } 10988 } 10989 10990 if (mp->b_datap->db_type == M_CTL || 10991 ipss->ipsec_outbound_v6_policy_present) { 10992 mp = ip_wput_ire_parse_ipsec_out(first_mp, NULL, ip6h, ire, 10993 connp, unspec_src, zoneid); 10994 if (mp == NULL) { 10995 return; 10996 } 10997 } 10998 10999 first_mp = mp; 11000 if (mp->b_datap->db_type == M_CTL) { 11001 io = (ipsec_out_t *)mp->b_rptr; 11002 ASSERT(io->ipsec_out_type == IPSEC_OUT); 11003 mp = mp->b_cont; 11004 mctl_present = B_TRUE; 11005 } else { 11006 mctl_present = B_FALSE; 11007 } 11008 11009 ip6h = (ip6_t *)mp->b_rptr; 11010 nexthdr = ip6h->ip6_nxt; 11011 mibptr = ill->ill_ip_mib; 11012 11013 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src) && !unspec_src) { 11014 ipif_t *ipif; 11015 11016 /* 11017 * Select the source address using ipif_select_source_v6. 11018 */ 11019 if (attach_index != 0) { 11020 ipif = ipif_select_source_v6(ill, &ip6h->ip6_dst, 11021 RESTRICT_TO_ILL, IPV6_PREFER_SRC_DEFAULT, zoneid); 11022 } else { 11023 ipif = ipif_select_source_v6(ill, &ip6h->ip6_dst, 11024 RESTRICT_TO_NONE, IPV6_PREFER_SRC_DEFAULT, zoneid); 11025 } 11026 if (ipif == NULL) { 11027 if (ip_debug > 2) { 11028 /* ip1dbg */ 11029 pr_addr_dbg("ip_wput_ire_v6: no src for " 11030 "dst %s\n, ", AF_INET6, &ip6h->ip6_dst); 11031 printf("ip_wput_ire_v6: interface name %s\n", 11032 ill->ill_name); 11033 } 11034 freemsg(first_mp); 11035 return; 11036 } 11037 ip6h->ip6_src = ipif->ipif_v6src_addr; 11038 ipif_refrele(ipif); 11039 } 11040 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 11041 if ((connp != NULL && connp->conn_multicast_loop) || 11042 !(ill->ill_phyint->phyint_flags & PHYI_LOOPBACK)) { 11043 ilm_t *ilm; 11044 11045 ILM_WALKER_HOLD(ill); 11046 ilm = ilm_lookup_ill_v6(ill, &ip6h->ip6_dst, ALL_ZONES); 11047 ILM_WALKER_RELE(ill); 11048 if (ilm != NULL) { 11049 mblk_t *nmp; 11050 int fanout_flags = 0; 11051 11052 if (connp != NULL && 11053 !connp->conn_multicast_loop) { 11054 fanout_flags |= IP_FF_NO_MCAST_LOOP; 11055 } 11056 ip1dbg(("ip_wput_ire_v6: " 11057 "Loopback multicast\n")); 11058 nmp = ip_copymsg(first_mp); 11059 if (nmp != NULL) { 11060 ip6_t *nip6h; 11061 mblk_t *mp_ip6h; 11062 11063 if (mctl_present) { 11064 nip6h = (ip6_t *) 11065 nmp->b_cont->b_rptr; 11066 mp_ip6h = nmp->b_cont; 11067 } else { 11068 nip6h = (ip6_t *)nmp->b_rptr; 11069 mp_ip6h = nmp; 11070 } 11071 11072 DTRACE_PROBE4( 11073 ip6__loopback__out__start, 11074 ill_t *, NULL, 11075 ill_t *, ill, 11076 ip6_t *, nip6h, 11077 mblk_t *, nmp); 11078 11079 FW_HOOKS6( 11080 ipst->ips_ip6_loopback_out_event, 11081 ipst->ips_ipv6firewall_loopback_out, 11082 NULL, ill, nip6h, nmp, mp_ip6h, 11083 ipst); 11084 11085 DTRACE_PROBE1( 11086 ip6__loopback__out__end, 11087 mblk_t *, nmp); 11088 11089 if (nmp != NULL) { 11090 /* 11091 * Deliver locally and to 11092 * every local zone, except 11093 * the sending zone when 11094 * IPV6_MULTICAST_LOOP is 11095 * disabled. 11096 */ 11097 ip_wput_local_v6(RD(q), ill, 11098 nip6h, nmp, 11099 ire, fanout_flags); 11100 } 11101 } else { 11102 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 11103 ip1dbg(("ip_wput_ire_v6: " 11104 "copymsg failed\n")); 11105 } 11106 } 11107 } 11108 if (ip6h->ip6_hops == 0 || 11109 IN6_IS_ADDR_MC_NODELOCAL(&ip6h->ip6_dst) || 11110 (ill->ill_phyint->phyint_flags & PHYI_LOOPBACK)) { 11111 /* 11112 * Local multicast or just loopback on loopback 11113 * interface. 11114 */ 11115 BUMP_MIB(mibptr, ipIfStatsHCOutMcastPkts); 11116 UPDATE_MIB(mibptr, ipIfStatsHCOutMcastOctets, 11117 ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN); 11118 ip1dbg(("ip_wput_ire_v6: local multicast only\n")); 11119 freemsg(first_mp); 11120 return; 11121 } 11122 } 11123 11124 if (ire->ire_stq != NULL) { 11125 uint32_t sum; 11126 uint_t ill_index = ((ill_t *)ire->ire_stq->q_ptr)-> 11127 ill_phyint->phyint_ifindex; 11128 queue_t *dev_q = ire->ire_stq->q_next; 11129 11130 /* 11131 * non-NULL send-to queue - packet is to be sent 11132 * out an interface. 11133 */ 11134 11135 /* Driver is flow-controlling? */ 11136 if (!IP_FLOW_CONTROLLED_ULP(nexthdr) && 11137 ((dev_q->q_next || dev_q->q_first) && !canput(dev_q))) { 11138 /* 11139 * Queue packet if we have an conn to give back 11140 * pressure. We can't queue packets intended for 11141 * hardware acceleration since we've tossed that 11142 * state already. If the packet is being fed back 11143 * from ire_send_v6, we don't know the position in 11144 * the queue to enqueue the packet and we discard 11145 * the packet. 11146 */ 11147 if (ipst->ips_ip_output_queue && connp != NULL && 11148 !mctl_present && caller != IRE_SEND) { 11149 if (caller == IP_WSRV) { 11150 connp->conn_did_putbq = 1; 11151 (void) putbq(connp->conn_wq, mp); 11152 conn_drain_insert(connp); 11153 /* 11154 * caller == IP_WSRV implies we are 11155 * the service thread, and the 11156 * queue is already noenabled. 11157 * The check for canput and 11158 * the putbq is not atomic. 11159 * So we need to check again. 11160 */ 11161 if (canput(dev_q)) 11162 connp->conn_did_putbq = 0; 11163 } else { 11164 (void) putq(connp->conn_wq, mp); 11165 } 11166 return; 11167 } 11168 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 11169 freemsg(first_mp); 11170 return; 11171 } 11172 11173 /* 11174 * Look for reachability confirmations from the transport. 11175 */ 11176 if (ip6h->ip6_vcf & IP_FORWARD_PROG) { 11177 reachable |= IPV6_REACHABILITY_CONFIRMATION; 11178 ip6h->ip6_vcf &= ~IP_FORWARD_PROG; 11179 if (mctl_present) 11180 io->ipsec_out_reachable = B_TRUE; 11181 } 11182 /* Fastpath */ 11183 switch (nexthdr) { 11184 case IPPROTO_TCP: 11185 case IPPROTO_UDP: 11186 case IPPROTO_ICMPV6: 11187 case IPPROTO_SCTP: 11188 hdr_length = IPV6_HDR_LEN; 11189 break; 11190 default: { 11191 uint8_t *nexthdrp; 11192 11193 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, 11194 &hdr_length, &nexthdrp)) { 11195 /* Malformed packet */ 11196 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 11197 freemsg(first_mp); 11198 return; 11199 } 11200 nexthdr = *nexthdrp; 11201 break; 11202 } 11203 } 11204 11205 if (cksum_request != -1 && nexthdr != IPPROTO_ICMPV6) { 11206 uint16_t *up; 11207 uint16_t *insp; 11208 11209 /* 11210 * The packet header is processed once for all, even 11211 * in the multirouting case. We disable hardware 11212 * checksum if the packet is multirouted, as it will be 11213 * replicated via several interfaces, and not all of 11214 * them may have this capability. 11215 */ 11216 if (cksum_request == 1 && 11217 !(ire->ire_flags & RTF_MULTIRT)) { 11218 /* Skip the transport checksum */ 11219 goto cksum_done; 11220 } 11221 /* 11222 * Do user-configured raw checksum. 11223 * Compute checksum and insert at offset "cksum_request" 11224 */ 11225 11226 /* check for enough headers for checksum */ 11227 cksum_request += hdr_length; /* offset from rptr */ 11228 if ((mp->b_wptr - mp->b_rptr) < 11229 (cksum_request + sizeof (int16_t))) { 11230 if (!pullupmsg(mp, 11231 cksum_request + sizeof (int16_t))) { 11232 ip1dbg(("ip_wput_v6: ICMP hdr pullupmsg" 11233 " failed\n")); 11234 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 11235 freemsg(first_mp); 11236 return; 11237 } 11238 ip6h = (ip6_t *)mp->b_rptr; 11239 } 11240 insp = (uint16_t *)((uchar_t *)ip6h + cksum_request); 11241 ASSERT(((uintptr_t)insp & 0x1) == 0); 11242 up = (uint16_t *)&ip6h->ip6_src; 11243 /* 11244 * icmp has placed length and routing 11245 * header adjustment in *insp. 11246 */ 11247 sum = htons(nexthdr) + 11248 up[0] + up[1] + up[2] + up[3] + 11249 up[4] + up[5] + up[6] + up[7] + 11250 up[8] + up[9] + up[10] + up[11] + 11251 up[12] + up[13] + up[14] + up[15]; 11252 sum = (sum & 0xffff) + (sum >> 16); 11253 *insp = IP_CSUM(mp, hdr_length, sum); 11254 if (*insp == 0) 11255 *insp = 0xFFFF; 11256 } else if (nexthdr == IPPROTO_TCP) { 11257 uint16_t *up; 11258 11259 /* 11260 * Check for full IPv6 header + enough TCP header 11261 * to get at the checksum field. 11262 */ 11263 if ((mp->b_wptr - mp->b_rptr) < 11264 (hdr_length + TCP_CHECKSUM_OFFSET + 11265 TCP_CHECKSUM_SIZE)) { 11266 if (!pullupmsg(mp, hdr_length + 11267 TCP_CHECKSUM_OFFSET + TCP_CHECKSUM_SIZE)) { 11268 ip1dbg(("ip_wput_v6: TCP hdr pullupmsg" 11269 " failed\n")); 11270 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 11271 freemsg(first_mp); 11272 return; 11273 } 11274 ip6h = (ip6_t *)mp->b_rptr; 11275 } 11276 11277 up = (uint16_t *)&ip6h->ip6_src; 11278 /* 11279 * Note: The TCP module has stored the length value 11280 * into the tcp checksum field, so we don't 11281 * need to explicitly sum it in here. 11282 */ 11283 sum = up[0] + up[1] + up[2] + up[3] + 11284 up[4] + up[5] + up[6] + up[7] + 11285 up[8] + up[9] + up[10] + up[11] + 11286 up[12] + up[13] + up[14] + up[15]; 11287 11288 /* Fold the initial sum */ 11289 sum = (sum & 0xffff) + (sum >> 16); 11290 11291 up = (uint16_t *)(((uchar_t *)ip6h) + 11292 hdr_length + TCP_CHECKSUM_OFFSET); 11293 11294 IP_CKSUM_XMIT(ill, ire, mp, ip6h, up, IPPROTO_TCP, 11295 hdr_length, ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN, 11296 ire->ire_max_frag, mctl_present, sum); 11297 11298 /* Software checksum? */ 11299 if (DB_CKSUMFLAGS(mp) == 0) { 11300 IP6_STAT(ipst, ip6_out_sw_cksum); 11301 IP6_STAT_UPDATE(ipst, 11302 ip6_tcp_out_sw_cksum_bytes, 11303 (ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN) - 11304 hdr_length); 11305 } 11306 } else if (nexthdr == IPPROTO_UDP) { 11307 uint16_t *up; 11308 11309 /* 11310 * check for full IPv6 header + enough UDP header 11311 * to get at the UDP checksum field 11312 */ 11313 if ((mp->b_wptr - mp->b_rptr) < (hdr_length + 11314 UDP_CHECKSUM_OFFSET + UDP_CHECKSUM_SIZE)) { 11315 if (!pullupmsg(mp, hdr_length + 11316 UDP_CHECKSUM_OFFSET + UDP_CHECKSUM_SIZE)) { 11317 ip1dbg(("ip_wput_v6: UDP hdr pullupmsg" 11318 " failed\n")); 11319 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 11320 freemsg(first_mp); 11321 return; 11322 } 11323 ip6h = (ip6_t *)mp->b_rptr; 11324 } 11325 up = (uint16_t *)&ip6h->ip6_src; 11326 /* 11327 * Note: The UDP module has stored the length value 11328 * into the udp checksum field, so we don't 11329 * need to explicitly sum it in here. 11330 */ 11331 sum = up[0] + up[1] + up[2] + up[3] + 11332 up[4] + up[5] + up[6] + up[7] + 11333 up[8] + up[9] + up[10] + up[11] + 11334 up[12] + up[13] + up[14] + up[15]; 11335 11336 /* Fold the initial sum */ 11337 sum = (sum & 0xffff) + (sum >> 16); 11338 11339 up = (uint16_t *)(((uchar_t *)ip6h) + 11340 hdr_length + UDP_CHECKSUM_OFFSET); 11341 11342 IP_CKSUM_XMIT(ill, ire, mp, ip6h, up, IPPROTO_UDP, 11343 hdr_length, ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN, 11344 ire->ire_max_frag, mctl_present, sum); 11345 11346 /* Software checksum? */ 11347 if (DB_CKSUMFLAGS(mp) == 0) { 11348 IP6_STAT(ipst, ip6_out_sw_cksum); 11349 IP6_STAT_UPDATE(ipst, 11350 ip6_udp_out_sw_cksum_bytes, 11351 (ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN) - 11352 hdr_length); 11353 } 11354 } else if (nexthdr == IPPROTO_ICMPV6) { 11355 uint16_t *up; 11356 icmp6_t *icmp6; 11357 11358 /* check for full IPv6+ICMPv6 header */ 11359 if ((mp->b_wptr - mp->b_rptr) < 11360 (hdr_length + ICMP6_MINLEN)) { 11361 if (!pullupmsg(mp, hdr_length + ICMP6_MINLEN)) { 11362 ip1dbg(("ip_wput_v6: ICMP hdr pullupmsg" 11363 " failed\n")); 11364 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 11365 freemsg(first_mp); 11366 return; 11367 } 11368 ip6h = (ip6_t *)mp->b_rptr; 11369 } 11370 icmp6 = (icmp6_t *)((uchar_t *)ip6h + hdr_length); 11371 up = (uint16_t *)&ip6h->ip6_src; 11372 /* 11373 * icmp has placed length and routing 11374 * header adjustment in icmp6_cksum. 11375 */ 11376 sum = htons(IPPROTO_ICMPV6) + 11377 up[0] + up[1] + up[2] + up[3] + 11378 up[4] + up[5] + up[6] + up[7] + 11379 up[8] + up[9] + up[10] + up[11] + 11380 up[12] + up[13] + up[14] + up[15]; 11381 sum = (sum & 0xffff) + (sum >> 16); 11382 icmp6->icmp6_cksum = IP_CSUM(mp, hdr_length, sum); 11383 if (icmp6->icmp6_cksum == 0) 11384 icmp6->icmp6_cksum = 0xFFFF; 11385 11386 /* Update output mib stats */ 11387 icmp_update_out_mib_v6(ill, icmp6); 11388 } else if (nexthdr == IPPROTO_SCTP) { 11389 sctp_hdr_t *sctph; 11390 11391 if (MBLKL(mp) < (hdr_length + sizeof (*sctph))) { 11392 if (!pullupmsg(mp, hdr_length + 11393 sizeof (*sctph))) { 11394 ip1dbg(("ip_wput_v6: SCTP hdr pullupmsg" 11395 " failed\n")); 11396 BUMP_MIB(ill->ill_ip_mib, 11397 ipIfStatsOutDiscards); 11398 freemsg(mp); 11399 return; 11400 } 11401 ip6h = (ip6_t *)mp->b_rptr; 11402 } 11403 sctph = (sctp_hdr_t *)(mp->b_rptr + hdr_length); 11404 sctph->sh_chksum = 0; 11405 sctph->sh_chksum = sctp_cksum(mp, hdr_length); 11406 } 11407 11408 cksum_done: 11409 /* 11410 * We force the insertion of a fragment header using the 11411 * IPH_FRAG_HDR flag in two cases: 11412 * - after reception of an ICMPv6 "packet too big" message 11413 * with a MTU < 1280 (cf. RFC 2460 section 5) 11414 * - for multirouted IPv6 packets, so that the receiver can 11415 * discard duplicates according to their fragment identifier 11416 * 11417 * Two flags modifed from the API can modify this behavior. 11418 * The first is IPV6_USE_MIN_MTU. With this API the user 11419 * can specify how to manage PMTUD for unicast and multicast. 11420 * 11421 * IPV6_DONTFRAG disallows fragmentation. 11422 */ 11423 max_frag = ire->ire_max_frag; 11424 switch (IP6I_USE_MIN_MTU_API(flags)) { 11425 case IPV6_USE_MIN_MTU_DEFAULT: 11426 case IPV6_USE_MIN_MTU_UNICAST: 11427 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 11428 max_frag = IPV6_MIN_MTU; 11429 } 11430 break; 11431 11432 case IPV6_USE_MIN_MTU_NEVER: 11433 max_frag = IPV6_MIN_MTU; 11434 break; 11435 } 11436 if (ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN > max_frag || 11437 (ire->ire_frag_flag & IPH_FRAG_HDR)) { 11438 if (connp != NULL && (flags & IP6I_DONTFRAG)) { 11439 icmp_pkt2big_v6(ire->ire_stq, first_mp, 11440 max_frag, B_FALSE, B_TRUE, zoneid, ipst); 11441 return; 11442 } 11443 11444 if (ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN != 11445 (mp->b_cont ? msgdsize(mp) : 11446 mp->b_wptr - (uchar_t *)ip6h)) { 11447 ip0dbg(("Packet length mismatch: %d, %ld\n", 11448 ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN, 11449 msgdsize(mp))); 11450 freemsg(first_mp); 11451 return; 11452 } 11453 /* Do IPSEC processing first */ 11454 if (mctl_present) { 11455 if (attach_index != 0) 11456 ipsec_out_attach_if(io, attach_index); 11457 ipsec_out_process(q, first_mp, ire, ill_index); 11458 return; 11459 } 11460 ASSERT(mp->b_prev == NULL); 11461 ip2dbg(("Fragmenting Size = %d, mtu = %d\n", 11462 ntohs(ip6h->ip6_plen) + 11463 IPV6_HDR_LEN, max_frag)); 11464 ASSERT(mp == first_mp); 11465 /* Initiate IPPF processing */ 11466 if (IPP_ENABLED(IPP_LOCAL_OUT, ipst)) { 11467 ip_process(IPP_LOCAL_OUT, &mp, ill_index); 11468 if (mp == NULL) { 11469 return; 11470 } 11471 } 11472 ip_wput_frag_v6(mp, ire, reachable, connp, 11473 caller, max_frag); 11474 return; 11475 } 11476 /* Do IPSEC processing first */ 11477 if (mctl_present) { 11478 int extra_len = ipsec_out_extra_length(first_mp); 11479 11480 if (ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN + extra_len > 11481 max_frag) { 11482 /* 11483 * IPsec headers will push the packet over the 11484 * MTU limit. Issue an ICMPv6 Packet Too Big 11485 * message for this packet if the upper-layer 11486 * that issued this packet will be able to 11487 * react to the icmp_pkt2big_v6() that we'll 11488 * generate. 11489 */ 11490 icmp_pkt2big_v6(ire->ire_stq, first_mp, 11491 max_frag, B_FALSE, B_TRUE, zoneid, ipst); 11492 return; 11493 } 11494 if (attach_index != 0) 11495 ipsec_out_attach_if(io, attach_index); 11496 ipsec_out_process(q, first_mp, ire, ill_index); 11497 return; 11498 } 11499 /* 11500 * XXX multicast: add ip_mforward_v6() here. 11501 * Check conn_dontroute 11502 */ 11503 #ifdef lint 11504 /* 11505 * XXX The only purpose of this statement is to avoid lint 11506 * errors. See the above "XXX multicast". When that gets 11507 * fixed, remove this whole #ifdef lint section. 11508 */ 11509 ip3dbg(("multicast forward is %s.\n", 11510 (multicast_forward ? "TRUE" : "FALSE"))); 11511 #endif 11512 11513 UPDATE_OB_PKT_COUNT(ire); 11514 ire->ire_last_used_time = lbolt; 11515 ASSERT(mp == first_mp); 11516 ip_xmit_v6(mp, ire, reachable, connp, caller, NULL); 11517 } else { 11518 DTRACE_PROBE4(ip6__loopback__out__start, 11519 ill_t *, NULL, ill_t *, ill, 11520 ip6_t *, ip6h, mblk_t *, first_mp); 11521 FW_HOOKS6(ipst->ips_ip6_loopback_out_event, 11522 ipst->ips_ipv6firewall_loopback_out, 11523 NULL, ill, ip6h, first_mp, mp, ipst); 11524 DTRACE_PROBE1(ip6__loopback__out__end, mblk_t *, first_mp); 11525 if (first_mp != NULL) 11526 ip_wput_local_v6(RD(q), ill, ip6h, first_mp, ire, 0); 11527 } 11528 } 11529 11530 /* 11531 * Outbound IPv6 fragmentation routine using MDT. 11532 */ 11533 static void 11534 ip_wput_frag_mdt_v6(mblk_t *mp, ire_t *ire, size_t max_chunk, 11535 size_t unfragmentable_len, uint8_t nexthdr, uint_t prev_nexthdr_offset) 11536 { 11537 ip6_t *ip6h = (ip6_t *)mp->b_rptr; 11538 uint_t pkts, wroff, hdr_chunk_len, pbuf_idx; 11539 mblk_t *hdr_mp, *md_mp = NULL; 11540 int i1; 11541 multidata_t *mmd; 11542 unsigned char *hdr_ptr, *pld_ptr; 11543 ip_pdescinfo_t pdi; 11544 uint32_t ident; 11545 size_t len; 11546 uint16_t offset; 11547 queue_t *stq = ire->ire_stq; 11548 ill_t *ill = (ill_t *)stq->q_ptr; 11549 ip_stack_t *ipst = ill->ill_ipst; 11550 11551 ASSERT(DB_TYPE(mp) == M_DATA); 11552 ASSERT(MBLKL(mp) > unfragmentable_len); 11553 11554 /* 11555 * Move read ptr past unfragmentable portion, we don't want this part 11556 * of the data in our fragments. 11557 */ 11558 mp->b_rptr += unfragmentable_len; 11559 11560 /* Calculate how many packets we will send out */ 11561 i1 = (mp->b_cont == NULL) ? MBLKL(mp) : msgsize(mp); 11562 pkts = (i1 + max_chunk - 1) / max_chunk; 11563 ASSERT(pkts > 1); 11564 11565 /* Allocate a message block which will hold all the IP Headers. */ 11566 wroff = ipst->ips_ip_wroff_extra; 11567 hdr_chunk_len = wroff + unfragmentable_len + sizeof (ip6_frag_t); 11568 11569 i1 = pkts * hdr_chunk_len; 11570 /* 11571 * Create the header buffer, Multidata and destination address 11572 * and SAP attribute that should be associated with it. 11573 */ 11574 if ((hdr_mp = allocb(i1, BPRI_HI)) == NULL || 11575 ((hdr_mp->b_wptr += i1), 11576 (mmd = mmd_alloc(hdr_mp, &md_mp, KM_NOSLEEP)) == NULL) || 11577 !ip_md_addr_attr(mmd, NULL, ire->ire_nce->nce_res_mp)) { 11578 freemsg(mp); 11579 if (md_mp == NULL) { 11580 freemsg(hdr_mp); 11581 } else { 11582 free_mmd: IP6_STAT(ipst, ip6_frag_mdt_discarded); 11583 freemsg(md_mp); 11584 } 11585 IP6_STAT(ipst, ip6_frag_mdt_allocfail); 11586 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails); 11587 return; 11588 } 11589 IP6_STAT(ipst, ip6_frag_mdt_allocd); 11590 11591 /* 11592 * Add a payload buffer to the Multidata; this operation must not 11593 * fail, or otherwise our logic in this routine is broken. There 11594 * is no memory allocation done by the routine, so any returned 11595 * failure simply tells us that we've done something wrong. 11596 * 11597 * A failure tells us that either we're adding the same payload 11598 * buffer more than once, or we're trying to add more buffers than 11599 * allowed. None of the above cases should happen, and we panic 11600 * because either there's horrible heap corruption, and/or 11601 * programming mistake. 11602 */ 11603 if ((pbuf_idx = mmd_addpldbuf(mmd, mp)) < 0) { 11604 goto pbuf_panic; 11605 } 11606 11607 hdr_ptr = hdr_mp->b_rptr; 11608 pld_ptr = mp->b_rptr; 11609 11610 pdi.flags = PDESC_HBUF_REF | PDESC_PBUF_REF; 11611 11612 ident = htonl(atomic_add_32_nv(&ire->ire_ident, 1)); 11613 11614 /* 11615 * len is the total length of the fragmentable data in this 11616 * datagram. For each fragment sent, we will decrement len 11617 * by the amount of fragmentable data sent in that fragment 11618 * until len reaches zero. 11619 */ 11620 len = ntohs(ip6h->ip6_plen) - (unfragmentable_len - IPV6_HDR_LEN); 11621 11622 offset = 0; 11623 prev_nexthdr_offset += wroff; 11624 11625 while (len != 0) { 11626 size_t mlen; 11627 ip6_t *fip6h; 11628 ip6_frag_t *fraghdr; 11629 int error; 11630 11631 ASSERT((hdr_ptr + hdr_chunk_len) <= hdr_mp->b_wptr); 11632 mlen = MIN(len, max_chunk); 11633 len -= mlen; 11634 11635 fip6h = (ip6_t *)(hdr_ptr + wroff); 11636 ASSERT(OK_32PTR(fip6h)); 11637 bcopy(ip6h, fip6h, unfragmentable_len); 11638 hdr_ptr[prev_nexthdr_offset] = IPPROTO_FRAGMENT; 11639 11640 fip6h->ip6_plen = htons((uint16_t)(mlen + 11641 unfragmentable_len - IPV6_HDR_LEN + sizeof (ip6_frag_t))); 11642 11643 fraghdr = (ip6_frag_t *)((unsigned char *)fip6h + 11644 unfragmentable_len); 11645 fraghdr->ip6f_nxt = nexthdr; 11646 fraghdr->ip6f_reserved = 0; 11647 fraghdr->ip6f_offlg = htons(offset) | 11648 ((len != 0) ? IP6F_MORE_FRAG : 0); 11649 fraghdr->ip6f_ident = ident; 11650 11651 /* 11652 * Record offset and size of header and data of the next packet 11653 * in the multidata message. 11654 */ 11655 PDESC_HDR_ADD(&pdi, hdr_ptr, wroff, 11656 unfragmentable_len + sizeof (ip6_frag_t), 0); 11657 PDESC_PLD_INIT(&pdi); 11658 i1 = MIN(mp->b_wptr - pld_ptr, mlen); 11659 ASSERT(i1 > 0); 11660 PDESC_PLD_SPAN_ADD(&pdi, pbuf_idx, pld_ptr, i1); 11661 if (i1 == mlen) { 11662 pld_ptr += mlen; 11663 } else { 11664 i1 = mlen - i1; 11665 mp = mp->b_cont; 11666 ASSERT(mp != NULL); 11667 ASSERT(MBLKL(mp) >= i1); 11668 /* 11669 * Attach the next payload message block to the 11670 * multidata message. 11671 */ 11672 if ((pbuf_idx = mmd_addpldbuf(mmd, mp)) < 0) 11673 goto pbuf_panic; 11674 PDESC_PLD_SPAN_ADD(&pdi, pbuf_idx, mp->b_rptr, i1); 11675 pld_ptr = mp->b_rptr + i1; 11676 } 11677 11678 if ((mmd_addpdesc(mmd, (pdescinfo_t *)&pdi, &error, 11679 KM_NOSLEEP)) == NULL) { 11680 /* 11681 * Any failure other than ENOMEM indicates that we 11682 * have passed in invalid pdesc info or parameters 11683 * to mmd_addpdesc, which must not happen. 11684 * 11685 * EINVAL is a result of failure on boundary checks 11686 * against the pdesc info contents. It should not 11687 * happen, and we panic because either there's 11688 * horrible heap corruption, and/or programming 11689 * mistake. 11690 */ 11691 if (error != ENOMEM) { 11692 cmn_err(CE_PANIC, "ip_wput_frag_mdt_v6: " 11693 "pdesc logic error detected for " 11694 "mmd %p pinfo %p (%d)\n", 11695 (void *)mmd, (void *)&pdi, error); 11696 /* NOTREACHED */ 11697 } 11698 IP6_STAT(ipst, ip6_frag_mdt_addpdescfail); 11699 /* Free unattached payload message blocks as well */ 11700 md_mp->b_cont = mp->b_cont; 11701 goto free_mmd; 11702 } 11703 11704 /* Advance fragment offset. */ 11705 offset += mlen; 11706 11707 /* Advance to location for next header in the buffer. */ 11708 hdr_ptr += hdr_chunk_len; 11709 11710 /* Did we reach the next payload message block? */ 11711 if (pld_ptr == mp->b_wptr && mp->b_cont != NULL) { 11712 mp = mp->b_cont; 11713 /* 11714 * Attach the next message block with payload 11715 * data to the multidata message. 11716 */ 11717 if ((pbuf_idx = mmd_addpldbuf(mmd, mp)) < 0) 11718 goto pbuf_panic; 11719 pld_ptr = mp->b_rptr; 11720 } 11721 } 11722 11723 ASSERT(hdr_mp->b_wptr == hdr_ptr); 11724 ASSERT(mp->b_wptr == pld_ptr); 11725 11726 /* Update IP statistics */ 11727 UPDATE_MIB(ill->ill_ip_mib, ipIfStatsOutFragCreates, pkts); 11728 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragOKs); 11729 UPDATE_MIB(ill->ill_ip_mib, ipIfStatsHCOutTransmits, pkts); 11730 /* 11731 * The ipv6 header len is accounted for in unfragmentable_len so 11732 * when calculating the fragmentation overhead just add the frag 11733 * header len. 11734 */ 11735 UPDATE_MIB(ill->ill_ip_mib, ipIfStatsHCOutOctets, 11736 (ntohs(ip6h->ip6_plen) - (unfragmentable_len - IPV6_HDR_LEN)) + 11737 pkts * (unfragmentable_len + sizeof (ip6_frag_t))); 11738 IP6_STAT_UPDATE(ipst, ip6_frag_mdt_pkt_out, pkts); 11739 11740 ire->ire_ob_pkt_count += pkts; 11741 if (ire->ire_ipif != NULL) 11742 atomic_add_32(&ire->ire_ipif->ipif_ob_pkt_count, pkts); 11743 11744 ire->ire_last_used_time = lbolt; 11745 /* Send it down */ 11746 putnext(stq, md_mp); 11747 return; 11748 11749 pbuf_panic: 11750 cmn_err(CE_PANIC, "ip_wput_frag_mdt_v6: payload buffer logic " 11751 "error for mmd %p pbuf %p (%d)", (void *)mmd, (void *)mp, 11752 pbuf_idx); 11753 /* NOTREACHED */ 11754 } 11755 11756 /* 11757 * IPv6 fragmentation. Essentially the same as IPv4 fragmentation. 11758 * We have not optimized this in terms of number of mblks 11759 * allocated. For instance, for each fragment sent we always allocate a 11760 * mblk to hold the IPv6 header and fragment header. 11761 * 11762 * Assumes that all the extension headers are contained in the first mblk. 11763 * 11764 * The fragment header is inserted after an hop-by-hop options header 11765 * and after [an optional destinations header followed by] a routing header. 11766 * 11767 * NOTE : This function does not ire_refrele the ire passed in as 11768 * the argument. 11769 */ 11770 void 11771 ip_wput_frag_v6(mblk_t *mp, ire_t *ire, uint_t reachable, conn_t *connp, 11772 int caller, int max_frag) 11773 { 11774 ip6_t *ip6h = (ip6_t *)mp->b_rptr; 11775 ip6_t *fip6h; 11776 mblk_t *hmp; 11777 mblk_t *hmp0; 11778 mblk_t *dmp; 11779 ip6_frag_t *fraghdr; 11780 size_t unfragmentable_len; 11781 size_t len; 11782 size_t mlen; 11783 size_t max_chunk; 11784 uint32_t ident; 11785 uint16_t off_flags; 11786 uint16_t offset = 0; 11787 ill_t *ill; 11788 uint8_t nexthdr; 11789 uint_t prev_nexthdr_offset; 11790 uint8_t *ptr; 11791 ip_stack_t *ipst = ire->ire_ipst; 11792 11793 ASSERT(ire->ire_type == IRE_CACHE); 11794 ill = (ill_t *)ire->ire_stq->q_ptr; 11795 11796 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragReqds); 11797 11798 /* 11799 * Determine the length of the unfragmentable portion of this 11800 * datagram. This consists of the IPv6 header, a potential 11801 * hop-by-hop options header, a potential pre-routing-header 11802 * destination options header, and a potential routing header. 11803 */ 11804 nexthdr = ip6h->ip6_nxt; 11805 prev_nexthdr_offset = (uint8_t *)&ip6h->ip6_nxt - (uint8_t *)ip6h; 11806 ptr = (uint8_t *)&ip6h[1]; 11807 11808 if (nexthdr == IPPROTO_HOPOPTS) { 11809 ip6_hbh_t *hbh_hdr; 11810 uint_t hdr_len; 11811 11812 hbh_hdr = (ip6_hbh_t *)ptr; 11813 hdr_len = 8 * (hbh_hdr->ip6h_len + 1); 11814 nexthdr = hbh_hdr->ip6h_nxt; 11815 prev_nexthdr_offset = (uint8_t *)&hbh_hdr->ip6h_nxt 11816 - (uint8_t *)ip6h; 11817 ptr += hdr_len; 11818 } 11819 if (nexthdr == IPPROTO_DSTOPTS) { 11820 ip6_dest_t *dest_hdr; 11821 uint_t hdr_len; 11822 11823 dest_hdr = (ip6_dest_t *)ptr; 11824 if (dest_hdr->ip6d_nxt == IPPROTO_ROUTING) { 11825 hdr_len = 8 * (dest_hdr->ip6d_len + 1); 11826 nexthdr = dest_hdr->ip6d_nxt; 11827 prev_nexthdr_offset = (uint8_t *)&dest_hdr->ip6d_nxt 11828 - (uint8_t *)ip6h; 11829 ptr += hdr_len; 11830 } 11831 } 11832 if (nexthdr == IPPROTO_ROUTING) { 11833 ip6_rthdr_t *rthdr; 11834 uint_t hdr_len; 11835 11836 rthdr = (ip6_rthdr_t *)ptr; 11837 nexthdr = rthdr->ip6r_nxt; 11838 prev_nexthdr_offset = (uint8_t *)&rthdr->ip6r_nxt 11839 - (uint8_t *)ip6h; 11840 hdr_len = 8 * (rthdr->ip6r_len + 1); 11841 ptr += hdr_len; 11842 } 11843 unfragmentable_len = (uint_t)(ptr - (uint8_t *)ip6h); 11844 11845 max_chunk = (min(max_frag, ire->ire_max_frag) - unfragmentable_len - 11846 sizeof (ip6_frag_t)) & ~7; 11847 11848 /* Check if we can use MDT to send out the frags. */ 11849 ASSERT(!IRE_IS_LOCAL(ire)); 11850 if (ipst->ips_ip_multidata_outbound && reachable == 0 && 11851 !(ire->ire_flags & RTF_MULTIRT) && ILL_MDT_CAPABLE(ill) && 11852 IP_CAN_FRAG_MDT(mp, unfragmentable_len, max_chunk)) { 11853 ip_wput_frag_mdt_v6(mp, ire, max_chunk, unfragmentable_len, 11854 nexthdr, prev_nexthdr_offset); 11855 return; 11856 } 11857 11858 /* 11859 * Allocate an mblk with enough room for the link-layer 11860 * header, the unfragmentable part of the datagram, and the 11861 * fragment header. This (or a copy) will be used as the 11862 * first mblk for each fragment we send. 11863 */ 11864 hmp = allocb(unfragmentable_len + sizeof (ip6_frag_t) + 11865 ipst->ips_ip_wroff_extra, BPRI_HI); 11866 if (hmp == NULL) { 11867 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails); 11868 freemsg(mp); 11869 return; 11870 } 11871 hmp->b_rptr += ipst->ips_ip_wroff_extra; 11872 hmp->b_wptr = hmp->b_rptr + unfragmentable_len + sizeof (ip6_frag_t); 11873 11874 fip6h = (ip6_t *)hmp->b_rptr; 11875 fraghdr = (ip6_frag_t *)(hmp->b_rptr + unfragmentable_len); 11876 11877 bcopy(ip6h, fip6h, unfragmentable_len); 11878 hmp->b_rptr[prev_nexthdr_offset] = IPPROTO_FRAGMENT; 11879 11880 ident = atomic_add_32_nv(&ire->ire_ident, 1); 11881 11882 fraghdr->ip6f_nxt = nexthdr; 11883 fraghdr->ip6f_reserved = 0; 11884 fraghdr->ip6f_offlg = 0; 11885 fraghdr->ip6f_ident = htonl(ident); 11886 11887 /* 11888 * len is the total length of the fragmentable data in this 11889 * datagram. For each fragment sent, we will decrement len 11890 * by the amount of fragmentable data sent in that fragment 11891 * until len reaches zero. 11892 */ 11893 len = ntohs(ip6h->ip6_plen) - (unfragmentable_len - IPV6_HDR_LEN); 11894 11895 /* 11896 * Move read ptr past unfragmentable portion, we don't want this part 11897 * of the data in our fragments. 11898 */ 11899 mp->b_rptr += unfragmentable_len; 11900 11901 while (len != 0) { 11902 mlen = MIN(len, max_chunk); 11903 len -= mlen; 11904 if (len != 0) { 11905 /* Not last */ 11906 hmp0 = copyb(hmp); 11907 if (hmp0 == NULL) { 11908 freeb(hmp); 11909 freemsg(mp); 11910 BUMP_MIB(ill->ill_ip_mib, 11911 ipIfStatsOutFragFails); 11912 ip1dbg(("ip_wput_frag_v6: copyb failed\n")); 11913 return; 11914 } 11915 off_flags = IP6F_MORE_FRAG; 11916 } else { 11917 /* Last fragment */ 11918 hmp0 = hmp; 11919 hmp = NULL; 11920 off_flags = 0; 11921 } 11922 fip6h = (ip6_t *)(hmp0->b_rptr); 11923 fraghdr = (ip6_frag_t *)(hmp0->b_rptr + unfragmentable_len); 11924 11925 fip6h->ip6_plen = htons((uint16_t)(mlen + 11926 unfragmentable_len - IPV6_HDR_LEN + sizeof (ip6_frag_t))); 11927 /* 11928 * Note: Optimization alert. 11929 * In IPv6 (and IPv4) protocol header, Fragment Offset 11930 * ("offset") is 13 bits wide and in 8-octet units. 11931 * In IPv6 protocol header (unlike IPv4) in a 16 bit field, 11932 * it occupies the most significant 13 bits. 11933 * (least significant 13 bits in IPv4). 11934 * We do not do any shifts here. Not shifting is same effect 11935 * as taking offset value in octet units, dividing by 8 and 11936 * then shifting 3 bits left to line it up in place in proper 11937 * place protocol header. 11938 */ 11939 fraghdr->ip6f_offlg = htons(offset) | off_flags; 11940 11941 if (!(dmp = ip_carve_mp(&mp, mlen))) { 11942 /* mp has already been freed by ip_carve_mp() */ 11943 if (hmp != NULL) 11944 freeb(hmp); 11945 freeb(hmp0); 11946 ip1dbg(("ip_carve_mp: failed\n")); 11947 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails); 11948 return; 11949 } 11950 hmp0->b_cont = dmp; 11951 /* Get the priority marking, if any */ 11952 hmp0->b_band = dmp->b_band; 11953 UPDATE_OB_PKT_COUNT(ire); 11954 ire->ire_last_used_time = lbolt; 11955 ip_xmit_v6(hmp0, ire, reachable | IP6_NO_IPPOLICY, connp, 11956 caller, NULL); 11957 reachable = 0; /* No need to redo state machine in loop */ 11958 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragCreates); 11959 offset += mlen; 11960 } 11961 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragOKs); 11962 } 11963 11964 /* 11965 * Determine if the ill and multicast aspects of that packets 11966 * "matches" the conn. 11967 */ 11968 boolean_t 11969 conn_wantpacket_v6(conn_t *connp, ill_t *ill, ip6_t *ip6h, int fanout_flags, 11970 zoneid_t zoneid) 11971 { 11972 ill_t *in_ill; 11973 boolean_t wantpacket = B_TRUE; 11974 in6_addr_t *v6dst_ptr = &ip6h->ip6_dst; 11975 in6_addr_t *v6src_ptr = &ip6h->ip6_src; 11976 11977 /* 11978 * conn_incoming_ill is set by IPV6_BOUND_IF which limits 11979 * unicast and multicast reception to conn_incoming_ill. 11980 * conn_wantpacket_v6 is called both for unicast and 11981 * multicast. 11982 * 11983 * 1) The unicast copy of the packet can come anywhere in 11984 * the ill group if it is part of the group. Thus, we 11985 * need to check to see whether the ill group matches 11986 * if in_ill is part of a group. 11987 * 11988 * 2) ip_rput does not suppress duplicate multicast packets. 11989 * If there are two interfaces in a ill group and we have 11990 * 2 applications (conns) joined a multicast group G on 11991 * both the interfaces, ilm_lookup_ill filter in ip_rput 11992 * will give us two packets because we join G on both the 11993 * interfaces rather than nominating just one interface 11994 * for receiving multicast like broadcast above. So, 11995 * we have to call ilg_lookup_ill to filter out duplicate 11996 * copies, if ill is part of a group, to supress duplicates. 11997 */ 11998 in_ill = connp->conn_incoming_ill; 11999 if (in_ill != NULL) { 12000 mutex_enter(&connp->conn_lock); 12001 in_ill = connp->conn_incoming_ill; 12002 mutex_enter(&ill->ill_lock); 12003 /* 12004 * No IPMP, and the packet did not arrive on conn_incoming_ill 12005 * OR, IPMP in use and the packet arrived on an IPMP group 12006 * different from the conn_incoming_ill's IPMP group. 12007 * Reject the packet. 12008 */ 12009 if ((in_ill->ill_group == NULL && in_ill != ill) || 12010 (in_ill->ill_group != NULL && 12011 in_ill->ill_group != ill->ill_group)) { 12012 wantpacket = B_FALSE; 12013 } 12014 mutex_exit(&ill->ill_lock); 12015 mutex_exit(&connp->conn_lock); 12016 if (!wantpacket) 12017 return (B_FALSE); 12018 } 12019 12020 if (connp->conn_multi_router) 12021 return (B_TRUE); 12022 12023 if (!IN6_IS_ADDR_MULTICAST(v6dst_ptr) && 12024 !IN6_IS_ADDR_V4MAPPED_CLASSD(v6dst_ptr)) { 12025 /* 12026 * Unicast case: we match the conn only if it's in the specified 12027 * zone. 12028 */ 12029 return (IPCL_ZONE_MATCH(connp, zoneid)); 12030 } 12031 12032 if ((fanout_flags & IP_FF_NO_MCAST_LOOP) && 12033 (connp->conn_zoneid == zoneid || zoneid == ALL_ZONES)) { 12034 /* 12035 * Loopback case: the sending endpoint has IP_MULTICAST_LOOP 12036 * disabled, therefore we don't dispatch the multicast packet to 12037 * the sending zone. 12038 */ 12039 return (B_FALSE); 12040 } 12041 12042 if ((ill->ill_phyint->phyint_flags & PHYI_LOOPBACK) && 12043 connp->conn_zoneid != zoneid && zoneid != ALL_ZONES) { 12044 /* 12045 * Multicast packet on the loopback interface: we only match 12046 * conns who joined the group in the specified zone. 12047 */ 12048 return (B_FALSE); 12049 } 12050 12051 mutex_enter(&connp->conn_lock); 12052 wantpacket = 12053 ilg_lookup_ill_withsrc_v6(connp, v6dst_ptr, v6src_ptr, ill) != NULL; 12054 mutex_exit(&connp->conn_lock); 12055 12056 return (wantpacket); 12057 } 12058 12059 12060 /* 12061 * Transmit a packet and update any NUD state based on the flags 12062 * XXX need to "recover" any ip6i_t when doing putq! 12063 * 12064 * NOTE : This function does not ire_refrele the ire passed in as the 12065 * argument. 12066 */ 12067 void 12068 ip_xmit_v6(mblk_t *mp, ire_t *ire, uint_t flags, conn_t *connp, 12069 int caller, ipsec_out_t *io) 12070 { 12071 mblk_t *mp1; 12072 nce_t *nce = ire->ire_nce; 12073 ill_t *ill; 12074 ill_t *out_ill; 12075 uint64_t delta; 12076 ip6_t *ip6h; 12077 queue_t *stq = ire->ire_stq; 12078 ire_t *ire1 = NULL; 12079 ire_t *save_ire = ire; 12080 boolean_t multirt_send = B_FALSE; 12081 mblk_t *next_mp = NULL; 12082 ip_stack_t *ipst = ire->ire_ipst; 12083 12084 ip6h = (ip6_t *)mp->b_rptr; 12085 ASSERT(!IN6_IS_ADDR_V4MAPPED(&ire->ire_addr_v6)); 12086 ASSERT(ire->ire_ipversion == IPV6_VERSION); 12087 ASSERT(nce != NULL); 12088 ASSERT(mp->b_datap->db_type == M_DATA); 12089 ASSERT(stq != NULL); 12090 12091 ill = ire_to_ill(ire); 12092 if (!ill) { 12093 ip0dbg(("ip_xmit_v6: ire_to_ill failed\n")); 12094 freemsg(mp); 12095 return; 12096 } 12097 12098 /* 12099 * If a packet is to be sent out an interface that is a 6to4 12100 * tunnel, outgoing IPv6 packets, with a 6to4 addressed IPv6 12101 * destination, must be checked to have a 6to4 prefix 12102 * (2002:V4ADDR::/48) that is NOT equal to the 6to4 prefix of 12103 * address configured on the sending interface. Otherwise, 12104 * the packet was delivered to this interface in error and the 12105 * packet must be dropped. 12106 */ 12107 if ((ill->ill_is_6to4tun) && IN6_IS_ADDR_6TO4(&ip6h->ip6_dst)) { 12108 ipif_t *ipif = ill->ill_ipif; 12109 12110 if (IN6_ARE_6TO4_PREFIX_EQUAL(&ipif->ipif_v6lcl_addr, 12111 &ip6h->ip6_dst)) { 12112 if (ip_debug > 2) { 12113 /* ip1dbg */ 12114 pr_addr_dbg("ip_xmit_v6: attempting to " 12115 "send 6to4 addressed IPv6 " 12116 "destination (%s) out the wrong " 12117 "interface.\n", AF_INET6, 12118 &ip6h->ip6_dst); 12119 } 12120 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); 12121 freemsg(mp); 12122 return; 12123 } 12124 } 12125 12126 /* Flow-control check has been done in ip_wput_ire_v6 */ 12127 if (IP_FLOW_CONTROLLED_ULP(ip6h->ip6_nxt) || caller == IP_WPUT || 12128 caller == IP_WSRV || canput(stq->q_next)) { 12129 uint32_t ill_index; 12130 12131 /* 12132 * In most cases, the emission loop below is entered only 12133 * once. Only in the case where the ire holds the 12134 * RTF_MULTIRT flag, do we loop to process all RTF_MULTIRT 12135 * flagged ires in the bucket, and send the packet 12136 * through all crossed RTF_MULTIRT routes. 12137 */ 12138 if (ire->ire_flags & RTF_MULTIRT) { 12139 /* 12140 * Multirouting case. The bucket where ire is stored 12141 * probably holds other RTF_MULTIRT flagged ires 12142 * to the destination. In this call to ip_xmit_v6, 12143 * we attempt to send the packet through all 12144 * those ires. Thus, we first ensure that ire is the 12145 * first RTF_MULTIRT ire in the bucket, 12146 * before walking the ire list. 12147 */ 12148 ire_t *first_ire; 12149 irb_t *irb = ire->ire_bucket; 12150 ASSERT(irb != NULL); 12151 multirt_send = B_TRUE; 12152 12153 /* Make sure we do not omit any multiroute ire. */ 12154 IRB_REFHOLD(irb); 12155 for (first_ire = irb->irb_ire; 12156 first_ire != NULL; 12157 first_ire = first_ire->ire_next) { 12158 if ((first_ire->ire_flags & RTF_MULTIRT) && 12159 (IN6_ARE_ADDR_EQUAL(&first_ire->ire_addr_v6, 12160 &ire->ire_addr_v6)) && 12161 !(first_ire->ire_marks & 12162 (IRE_MARK_CONDEMNED | IRE_MARK_HIDDEN))) 12163 break; 12164 } 12165 12166 if ((first_ire != NULL) && (first_ire != ire)) { 12167 IRE_REFHOLD(first_ire); 12168 /* ire will be released by the caller */ 12169 ire = first_ire; 12170 nce = ire->ire_nce; 12171 stq = ire->ire_stq; 12172 ill = ire_to_ill(ire); 12173 } 12174 IRB_REFRELE(irb); 12175 } else if (connp != NULL && IPCL_IS_TCP(connp) && 12176 connp->conn_mdt_ok && !connp->conn_tcp->tcp_mdt && 12177 ILL_MDT_USABLE(ill)) { 12178 /* 12179 * This tcp connection was marked as MDT-capable, but 12180 * it has been turned off due changes in the interface. 12181 * Now that the interface support is back, turn it on 12182 * by notifying tcp. We don't directly modify tcp_mdt, 12183 * since we leave all the details to the tcp code that 12184 * knows better. 12185 */ 12186 mblk_t *mdimp = ip_mdinfo_alloc(ill->ill_mdt_capab); 12187 12188 if (mdimp == NULL) { 12189 ip0dbg(("ip_xmit_v6: can't re-enable MDT for " 12190 "connp %p (ENOMEM)\n", (void *)connp)); 12191 } else { 12192 CONN_INC_REF(connp); 12193 squeue_fill(connp->conn_sqp, mdimp, tcp_input, 12194 connp, SQTAG_TCP_INPUT_MCTL); 12195 } 12196 } 12197 12198 do { 12199 mblk_t *mp_ip6h; 12200 12201 if (multirt_send) { 12202 irb_t *irb; 12203 /* 12204 * We are in a multiple send case, need to get 12205 * the next ire and make a duplicate of the 12206 * packet. ire1 holds here the next ire to 12207 * process in the bucket. If multirouting is 12208 * expected, any non-RTF_MULTIRT ire that has 12209 * the right destination address is ignored. 12210 */ 12211 irb = ire->ire_bucket; 12212 ASSERT(irb != NULL); 12213 12214 IRB_REFHOLD(irb); 12215 for (ire1 = ire->ire_next; 12216 ire1 != NULL; 12217 ire1 = ire1->ire_next) { 12218 if (!(ire1->ire_flags & RTF_MULTIRT)) 12219 continue; 12220 if (!IN6_ARE_ADDR_EQUAL( 12221 &ire1->ire_addr_v6, 12222 &ire->ire_addr_v6)) 12223 continue; 12224 if (ire1->ire_marks & 12225 (IRE_MARK_CONDEMNED| 12226 IRE_MARK_HIDDEN)) 12227 continue; 12228 12229 /* Got one */ 12230 if (ire1 != save_ire) { 12231 IRE_REFHOLD(ire1); 12232 } 12233 break; 12234 } 12235 IRB_REFRELE(irb); 12236 12237 if (ire1 != NULL) { 12238 next_mp = copyb(mp); 12239 if ((next_mp == NULL) || 12240 ((mp->b_cont != NULL) && 12241 ((next_mp->b_cont = 12242 dupmsg(mp->b_cont)) == 12243 NULL))) { 12244 freemsg(next_mp); 12245 next_mp = NULL; 12246 ire_refrele(ire1); 12247 ire1 = NULL; 12248 } 12249 } 12250 12251 /* Last multiroute ire; don't loop anymore. */ 12252 if (ire1 == NULL) { 12253 multirt_send = B_FALSE; 12254 } 12255 } 12256 12257 ill_index = 12258 ((ill_t *)stq->q_ptr)->ill_phyint->phyint_ifindex; 12259 12260 /* Initiate IPPF processing */ 12261 if (IP6_OUT_IPP(flags, ipst)) { 12262 ip_process(IPP_LOCAL_OUT, &mp, ill_index); 12263 if (mp == NULL) { 12264 BUMP_MIB(ill->ill_ip_mib, 12265 ipIfStatsOutDiscards); 12266 if (next_mp != NULL) 12267 freemsg(next_mp); 12268 if (ire != save_ire) { 12269 ire_refrele(ire); 12270 } 12271 return; 12272 } 12273 ip6h = (ip6_t *)mp->b_rptr; 12274 } 12275 mp_ip6h = mp; 12276 12277 /* 12278 * Check for fastpath, we need to hold nce_lock to 12279 * prevent fastpath update from chaining nce_fp_mp. 12280 */ 12281 12282 ASSERT(nce->nce_ipversion != IPV4_VERSION); 12283 mutex_enter(&nce->nce_lock); 12284 if ((mp1 = nce->nce_fp_mp) != NULL) { 12285 uint32_t hlen; 12286 uchar_t *rptr; 12287 12288 hlen = MBLKL(mp1); 12289 rptr = mp->b_rptr - hlen; 12290 /* 12291 * make sure there is room for the fastpath 12292 * datalink header 12293 */ 12294 if (rptr < mp->b_datap->db_base) { 12295 mp1 = copyb(mp1); 12296 mutex_exit(&nce->nce_lock); 12297 if (mp1 == NULL) { 12298 BUMP_MIB(ill->ill_ip_mib, 12299 ipIfStatsOutDiscards); 12300 freemsg(mp); 12301 if (next_mp != NULL) 12302 freemsg(next_mp); 12303 if (ire != save_ire) { 12304 ire_refrele(ire); 12305 } 12306 return; 12307 } 12308 mp1->b_cont = mp; 12309 12310 /* Get the priority marking, if any */ 12311 mp1->b_band = mp->b_band; 12312 mp = mp1; 12313 } else { 12314 mp->b_rptr = rptr; 12315 /* 12316 * fastpath - pre-pend datalink 12317 * header 12318 */ 12319 bcopy(mp1->b_rptr, rptr, hlen); 12320 mutex_exit(&nce->nce_lock); 12321 } 12322 } else { 12323 /* 12324 * Get the DL_UNITDATA_REQ. 12325 */ 12326 mp1 = nce->nce_res_mp; 12327 if (mp1 == NULL) { 12328 mutex_exit(&nce->nce_lock); 12329 ip1dbg(("ip_xmit_v6: No resolution " 12330 "block ire = %p\n", (void *)ire)); 12331 freemsg(mp); 12332 if (next_mp != NULL) 12333 freemsg(next_mp); 12334 if (ire != save_ire) { 12335 ire_refrele(ire); 12336 } 12337 return; 12338 } 12339 /* 12340 * Prepend the DL_UNITDATA_REQ. 12341 */ 12342 mp1 = copyb(mp1); 12343 mutex_exit(&nce->nce_lock); 12344 if (mp1 == NULL) { 12345 BUMP_MIB(ill->ill_ip_mib, 12346 ipIfStatsOutDiscards); 12347 freemsg(mp); 12348 if (next_mp != NULL) 12349 freemsg(next_mp); 12350 if (ire != save_ire) { 12351 ire_refrele(ire); 12352 } 12353 return; 12354 } 12355 mp1->b_cont = mp; 12356 12357 /* Get the priority marking, if any */ 12358 mp1->b_band = mp->b_band; 12359 mp = mp1; 12360 } 12361 12362 out_ill = (ill_t *)stq->q_ptr; 12363 12364 DTRACE_PROBE4(ip6__physical__out__start, 12365 ill_t *, NULL, ill_t *, out_ill, 12366 ip6_t *, ip6h, mblk_t *, mp); 12367 12368 FW_HOOKS6(ipst->ips_ip6_physical_out_event, 12369 ipst->ips_ipv6firewall_physical_out, 12370 NULL, out_ill, ip6h, mp, mp_ip6h, ipst); 12371 12372 DTRACE_PROBE1(ip6__physical__out__end, mblk_t *, mp); 12373 12374 if (mp == NULL) { 12375 if (multirt_send) { 12376 ASSERT(ire1 != NULL); 12377 if (ire != save_ire) { 12378 ire_refrele(ire); 12379 } 12380 /* 12381 * Proceed with the next RTF_MULTIRT 12382 * ire, also set up the send-to queue 12383 * accordingly. 12384 */ 12385 ire = ire1; 12386 ire1 = NULL; 12387 stq = ire->ire_stq; 12388 nce = ire->ire_nce; 12389 ill = ire_to_ill(ire); 12390 mp = next_mp; 12391 next_mp = NULL; 12392 continue; 12393 } else { 12394 ASSERT(next_mp == NULL); 12395 ASSERT(ire1 == NULL); 12396 break; 12397 } 12398 } 12399 12400 /* 12401 * Update ire and MIB counters; for save_ire, this has 12402 * been done by the caller. 12403 */ 12404 if (ire != save_ire) { 12405 UPDATE_OB_PKT_COUNT(ire); 12406 ire->ire_last_used_time = lbolt; 12407 12408 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 12409 BUMP_MIB(ill->ill_ip_mib, 12410 ipIfStatsHCOutMcastPkts); 12411 UPDATE_MIB(ill->ill_ip_mib, 12412 ipIfStatsHCOutMcastOctets, 12413 ntohs(ip6h->ip6_plen) + 12414 IPV6_HDR_LEN); 12415 } 12416 } 12417 12418 /* 12419 * Send it down. XXX Do we want to flow control AH/ESP 12420 * packets that carry TCP payloads? We don't flow 12421 * control TCP packets, but we should also not 12422 * flow-control TCP packets that have been protected. 12423 * We don't have an easy way to find out if an AH/ESP 12424 * packet was originally TCP or not currently. 12425 */ 12426 if (io == NULL) { 12427 BUMP_MIB(ill->ill_ip_mib, 12428 ipIfStatsHCOutTransmits); 12429 UPDATE_MIB(ill->ill_ip_mib, 12430 ipIfStatsHCOutOctets, 12431 ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN); 12432 putnext(stq, mp); 12433 } else { 12434 /* 12435 * Safety Pup says: make sure this is 12436 * going to the right interface! 12437 */ 12438 if (io->ipsec_out_capab_ill_index != 12439 ill_index) { 12440 /* IPsec kstats: bump lose counter */ 12441 freemsg(mp1); 12442 } else { 12443 BUMP_MIB(ill->ill_ip_mib, 12444 ipIfStatsHCOutTransmits); 12445 UPDATE_MIB(ill->ill_ip_mib, 12446 ipIfStatsHCOutOctets, 12447 ntohs(ip6h->ip6_plen) + 12448 IPV6_HDR_LEN); 12449 ipsec_hw_putnext(stq, mp); 12450 } 12451 } 12452 12453 if (nce->nce_flags & (NCE_F_NONUD|NCE_F_PERMANENT)) { 12454 if (ire != save_ire) { 12455 ire_refrele(ire); 12456 } 12457 if (multirt_send) { 12458 ASSERT(ire1 != NULL); 12459 /* 12460 * Proceed with the next RTF_MULTIRT 12461 * ire, also set up the send-to queue 12462 * accordingly. 12463 */ 12464 ire = ire1; 12465 ire1 = NULL; 12466 stq = ire->ire_stq; 12467 nce = ire->ire_nce; 12468 ill = ire_to_ill(ire); 12469 mp = next_mp; 12470 next_mp = NULL; 12471 continue; 12472 } 12473 ASSERT(next_mp == NULL); 12474 ASSERT(ire1 == NULL); 12475 return; 12476 } 12477 12478 ASSERT(nce->nce_state != ND_INCOMPLETE); 12479 12480 /* 12481 * Check for upper layer advice 12482 */ 12483 if (flags & IPV6_REACHABILITY_CONFIRMATION) { 12484 /* 12485 * It should be o.k. to check the state without 12486 * a lock here, at most we lose an advice. 12487 */ 12488 nce->nce_last = TICK_TO_MSEC(lbolt64); 12489 if (nce->nce_state != ND_REACHABLE) { 12490 12491 mutex_enter(&nce->nce_lock); 12492 nce->nce_state = ND_REACHABLE; 12493 nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT; 12494 mutex_exit(&nce->nce_lock); 12495 (void) untimeout(nce->nce_timeout_id); 12496 if (ip_debug > 2) { 12497 /* ip1dbg */ 12498 pr_addr_dbg("ip_xmit_v6: state" 12499 " for %s changed to" 12500 " REACHABLE\n", AF_INET6, 12501 &ire->ire_addr_v6); 12502 } 12503 } 12504 if (ire != save_ire) { 12505 ire_refrele(ire); 12506 } 12507 if (multirt_send) { 12508 ASSERT(ire1 != NULL); 12509 /* 12510 * Proceed with the next RTF_MULTIRT 12511 * ire, also set up the send-to queue 12512 * accordingly. 12513 */ 12514 ire = ire1; 12515 ire1 = NULL; 12516 stq = ire->ire_stq; 12517 nce = ire->ire_nce; 12518 ill = ire_to_ill(ire); 12519 mp = next_mp; 12520 next_mp = NULL; 12521 continue; 12522 } 12523 ASSERT(next_mp == NULL); 12524 ASSERT(ire1 == NULL); 12525 return; 12526 } 12527 12528 delta = TICK_TO_MSEC(lbolt64) - nce->nce_last; 12529 ip1dbg(("ip_xmit_v6: delta = %" PRId64 12530 " ill_reachable_time = %d \n", delta, 12531 ill->ill_reachable_time)); 12532 if (delta > (uint64_t)ill->ill_reachable_time) { 12533 nce = ire->ire_nce; 12534 mutex_enter(&nce->nce_lock); 12535 switch (nce->nce_state) { 12536 case ND_REACHABLE: 12537 case ND_STALE: 12538 /* 12539 * ND_REACHABLE is identical to 12540 * ND_STALE in this specific case. If 12541 * reachable time has expired for this 12542 * neighbor (delta is greater than 12543 * reachable time), conceptually, the 12544 * neighbor cache is no longer in 12545 * REACHABLE state, but already in 12546 * STALE state. So the correct 12547 * transition here is to ND_DELAY. 12548 */ 12549 nce->nce_state = ND_DELAY; 12550 mutex_exit(&nce->nce_lock); 12551 NDP_RESTART_TIMER(nce, 12552 ipst->ips_delay_first_probe_time); 12553 if (ip_debug > 3) { 12554 /* ip2dbg */ 12555 pr_addr_dbg("ip_xmit_v6: state" 12556 " for %s changed to" 12557 " DELAY\n", AF_INET6, 12558 &ire->ire_addr_v6); 12559 } 12560 break; 12561 case ND_DELAY: 12562 case ND_PROBE: 12563 mutex_exit(&nce->nce_lock); 12564 /* Timers have already started */ 12565 break; 12566 case ND_UNREACHABLE: 12567 /* 12568 * ndp timer has detected that this nce 12569 * is unreachable and initiated deleting 12570 * this nce and all its associated IREs. 12571 * This is a race where we found the 12572 * ire before it was deleted and have 12573 * just sent out a packet using this 12574 * unreachable nce. 12575 */ 12576 mutex_exit(&nce->nce_lock); 12577 break; 12578 default: 12579 ASSERT(0); 12580 } 12581 } 12582 12583 if (multirt_send) { 12584 ASSERT(ire1 != NULL); 12585 /* 12586 * Proceed with the next RTF_MULTIRT ire, 12587 * Also set up the send-to queue accordingly. 12588 */ 12589 if (ire != save_ire) { 12590 ire_refrele(ire); 12591 } 12592 ire = ire1; 12593 ire1 = NULL; 12594 stq = ire->ire_stq; 12595 nce = ire->ire_nce; 12596 ill = ire_to_ill(ire); 12597 mp = next_mp; 12598 next_mp = NULL; 12599 } 12600 } while (multirt_send); 12601 /* 12602 * In the multirouting case, release the last ire used for 12603 * emission. save_ire will be released by the caller. 12604 */ 12605 if (ire != save_ire) { 12606 ire_refrele(ire); 12607 } 12608 } else { 12609 /* 12610 * Queue packet if we have an conn to give back pressure. 12611 * We can't queue packets intended for hardware acceleration 12612 * since we've tossed that state already. If the packet is 12613 * being fed back from ire_send_v6, we don't know the 12614 * position in the queue to enqueue the packet and we discard 12615 * the packet. 12616 */ 12617 if (ipst->ips_ip_output_queue && (connp != NULL) && 12618 (io == NULL) && (caller != IRE_SEND)) { 12619 if (caller == IP_WSRV) { 12620 connp->conn_did_putbq = 1; 12621 (void) putbq(connp->conn_wq, mp); 12622 conn_drain_insert(connp); 12623 /* 12624 * caller == IP_WSRV implies we are 12625 * the service thread, and the 12626 * queue is already noenabled. 12627 * The check for canput and 12628 * the putbq is not atomic. 12629 * So we need to check again. 12630 */ 12631 if (canput(stq->q_next)) 12632 connp->conn_did_putbq = 0; 12633 } else { 12634 (void) putq(connp->conn_wq, mp); 12635 } 12636 return; 12637 } 12638 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); 12639 freemsg(mp); 12640 return; 12641 } 12642 } 12643 12644 /* 12645 * pr_addr_dbg function provides the needed buffer space to call 12646 * inet_ntop() function's 3rd argument. This function should be 12647 * used by any kernel routine which wants to save INET6_ADDRSTRLEN 12648 * stack buffer space in it's own stack frame. This function uses 12649 * a buffer from it's own stack and prints the information. 12650 * Example: pr_addr_dbg("func: no route for %s\n ", AF_INET, addr) 12651 * 12652 * Note: This function can call inet_ntop() once. 12653 */ 12654 void 12655 pr_addr_dbg(char *fmt1, int af, const void *addr) 12656 { 12657 char buf[INET6_ADDRSTRLEN]; 12658 12659 if (fmt1 == NULL) { 12660 ip0dbg(("pr_addr_dbg: Wrong arguments\n")); 12661 return; 12662 } 12663 12664 /* 12665 * This does not compare debug level and just prints 12666 * out. Thus it is the responsibility of the caller 12667 * to check the appropriate debug-level before calling 12668 * this function. 12669 */ 12670 if (ip_debug > 0) { 12671 printf(fmt1, inet_ntop(af, addr, buf, sizeof (buf))); 12672 } 12673 12674 12675 } 12676 12677 12678 /* 12679 * Return the length in bytes of the IPv6 headers (base header, ip6i_t 12680 * if needed and extension headers) that will be needed based on the 12681 * ip6_pkt_t structure passed by the caller. 12682 * 12683 * The returned length does not include the length of the upper level 12684 * protocol (ULP) header. 12685 */ 12686 int 12687 ip_total_hdrs_len_v6(ip6_pkt_t *ipp) 12688 { 12689 int len; 12690 12691 len = IPV6_HDR_LEN; 12692 if (ipp->ipp_fields & IPPF_HAS_IP6I) 12693 len += sizeof (ip6i_t); 12694 if (ipp->ipp_fields & IPPF_HOPOPTS) { 12695 ASSERT(ipp->ipp_hopoptslen != 0); 12696 len += ipp->ipp_hopoptslen; 12697 } 12698 if (ipp->ipp_fields & IPPF_RTHDR) { 12699 ASSERT(ipp->ipp_rthdrlen != 0); 12700 len += ipp->ipp_rthdrlen; 12701 } 12702 /* 12703 * En-route destination options 12704 * Only do them if there's a routing header as well 12705 */ 12706 if ((ipp->ipp_fields & (IPPF_RTDSTOPTS|IPPF_RTHDR)) == 12707 (IPPF_RTDSTOPTS|IPPF_RTHDR)) { 12708 ASSERT(ipp->ipp_rtdstoptslen != 0); 12709 len += ipp->ipp_rtdstoptslen; 12710 } 12711 if (ipp->ipp_fields & IPPF_DSTOPTS) { 12712 ASSERT(ipp->ipp_dstoptslen != 0); 12713 len += ipp->ipp_dstoptslen; 12714 } 12715 return (len); 12716 } 12717 12718 /* 12719 * All-purpose routine to build a header chain of an IPv6 header 12720 * followed by any required extension headers and a proto header, 12721 * preceeded (where necessary) by an ip6i_t private header. 12722 * 12723 * The fields of the IPv6 header that are derived from the ip6_pkt_t 12724 * will be filled in appropriately. 12725 * Thus the caller must fill in the rest of the IPv6 header, such as 12726 * traffic class/flowid, source address (if not set here), hoplimit (if not 12727 * set here) and destination address. 12728 * 12729 * The extension headers and ip6i_t header will all be fully filled in. 12730 */ 12731 void 12732 ip_build_hdrs_v6(uchar_t *ext_hdrs, uint_t ext_hdrs_len, 12733 ip6_pkt_t *ipp, uint8_t protocol) 12734 { 12735 uint8_t *nxthdr_ptr; 12736 uint8_t *cp; 12737 ip6i_t *ip6i; 12738 ip6_t *ip6h = (ip6_t *)ext_hdrs; 12739 12740 /* 12741 * If sending private ip6i_t header down (checksum info, nexthop, 12742 * or ifindex), adjust ip header pointer and set ip6i_t header pointer, 12743 * then fill it in. (The checksum info will be filled in by icmp). 12744 */ 12745 if (ipp->ipp_fields & IPPF_HAS_IP6I) { 12746 ip6i = (ip6i_t *)ip6h; 12747 ip6h = (ip6_t *)&ip6i[1]; 12748 12749 ip6i->ip6i_flags = 0; 12750 ip6i->ip6i_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 12751 if (ipp->ipp_fields & IPPF_IFINDEX || 12752 ipp->ipp_fields & IPPF_SCOPE_ID) { 12753 ASSERT(ipp->ipp_ifindex != 0); 12754 ip6i->ip6i_flags |= IP6I_IFINDEX; 12755 ip6i->ip6i_ifindex = ipp->ipp_ifindex; 12756 } 12757 if (ipp->ipp_fields & IPPF_ADDR) { 12758 /* 12759 * Enable per-packet source address verification if 12760 * IPV6_PKTINFO specified the source address. 12761 * ip6_src is set in the transport's _wput function. 12762 */ 12763 ASSERT(!IN6_IS_ADDR_UNSPECIFIED( 12764 &ipp->ipp_addr)); 12765 ip6i->ip6i_flags |= IP6I_VERIFY_SRC; 12766 } 12767 if (ipp->ipp_fields & IPPF_UNICAST_HOPS) { 12768 ip6h->ip6_hops = ipp->ipp_unicast_hops; 12769 /* 12770 * We need to set this flag so that IP doesn't 12771 * rewrite the IPv6 header's hoplimit with the 12772 * current default value. 12773 */ 12774 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 12775 } 12776 if (ipp->ipp_fields & IPPF_NEXTHOP) { 12777 ASSERT(!IN6_IS_ADDR_UNSPECIFIED( 12778 &ipp->ipp_nexthop)); 12779 ip6i->ip6i_flags |= IP6I_NEXTHOP; 12780 ip6i->ip6i_nexthop = ipp->ipp_nexthop; 12781 } 12782 /* 12783 * tell IP this is an ip6i_t private header 12784 */ 12785 ip6i->ip6i_nxt = IPPROTO_RAW; 12786 } 12787 /* Initialize IPv6 header */ 12788 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 12789 if (ipp->ipp_fields & IPPF_TCLASS) { 12790 ip6h->ip6_vcf = (ip6h->ip6_vcf & ~IPV6_FLOWINFO_TCLASS) | 12791 (ipp->ipp_tclass << 20); 12792 } 12793 if (ipp->ipp_fields & IPPF_ADDR) 12794 ip6h->ip6_src = ipp->ipp_addr; 12795 12796 nxthdr_ptr = (uint8_t *)&ip6h->ip6_nxt; 12797 cp = (uint8_t *)&ip6h[1]; 12798 /* 12799 * Here's where we have to start stringing together 12800 * any extension headers in the right order: 12801 * Hop-by-hop, destination, routing, and final destination opts. 12802 */ 12803 if (ipp->ipp_fields & IPPF_HOPOPTS) { 12804 /* Hop-by-hop options */ 12805 ip6_hbh_t *hbh = (ip6_hbh_t *)cp; 12806 12807 *nxthdr_ptr = IPPROTO_HOPOPTS; 12808 nxthdr_ptr = &hbh->ip6h_nxt; 12809 12810 bcopy(ipp->ipp_hopopts, cp, ipp->ipp_hopoptslen); 12811 cp += ipp->ipp_hopoptslen; 12812 } 12813 /* 12814 * En-route destination options 12815 * Only do them if there's a routing header as well 12816 */ 12817 if ((ipp->ipp_fields & (IPPF_RTDSTOPTS|IPPF_RTHDR)) == 12818 (IPPF_RTDSTOPTS|IPPF_RTHDR)) { 12819 ip6_dest_t *dst = (ip6_dest_t *)cp; 12820 12821 *nxthdr_ptr = IPPROTO_DSTOPTS; 12822 nxthdr_ptr = &dst->ip6d_nxt; 12823 12824 bcopy(ipp->ipp_rtdstopts, cp, ipp->ipp_rtdstoptslen); 12825 cp += ipp->ipp_rtdstoptslen; 12826 } 12827 /* 12828 * Routing header next 12829 */ 12830 if (ipp->ipp_fields & IPPF_RTHDR) { 12831 ip6_rthdr_t *rt = (ip6_rthdr_t *)cp; 12832 12833 *nxthdr_ptr = IPPROTO_ROUTING; 12834 nxthdr_ptr = &rt->ip6r_nxt; 12835 12836 bcopy(ipp->ipp_rthdr, cp, ipp->ipp_rthdrlen); 12837 cp += ipp->ipp_rthdrlen; 12838 } 12839 /* 12840 * Do ultimate destination options 12841 */ 12842 if (ipp->ipp_fields & IPPF_DSTOPTS) { 12843 ip6_dest_t *dest = (ip6_dest_t *)cp; 12844 12845 *nxthdr_ptr = IPPROTO_DSTOPTS; 12846 nxthdr_ptr = &dest->ip6d_nxt; 12847 12848 bcopy(ipp->ipp_dstopts, cp, ipp->ipp_dstoptslen); 12849 cp += ipp->ipp_dstoptslen; 12850 } 12851 /* 12852 * Now set the last header pointer to the proto passed in 12853 */ 12854 *nxthdr_ptr = protocol; 12855 ASSERT((int)(cp - ext_hdrs) == ext_hdrs_len); 12856 } 12857 12858 /* 12859 * Return a pointer to the routing header extension header 12860 * in the IPv6 header(s) chain passed in. 12861 * If none found, return NULL 12862 * Assumes that all extension headers are in same mblk as the v6 header 12863 */ 12864 ip6_rthdr_t * 12865 ip_find_rthdr_v6(ip6_t *ip6h, uint8_t *endptr) 12866 { 12867 ip6_dest_t *desthdr; 12868 ip6_frag_t *fraghdr; 12869 uint_t hdrlen; 12870 uint8_t nexthdr; 12871 uint8_t *ptr = (uint8_t *)&ip6h[1]; 12872 12873 if (ip6h->ip6_nxt == IPPROTO_ROUTING) 12874 return ((ip6_rthdr_t *)ptr); 12875 12876 /* 12877 * The routing header will precede all extension headers 12878 * other than the hop-by-hop and destination options 12879 * extension headers, so if we see anything other than those, 12880 * we're done and didn't find it. 12881 * We could see a destination options header alone but no 12882 * routing header, in which case we'll return NULL as soon as 12883 * we see anything after that. 12884 * Hop-by-hop and destination option headers are identical, 12885 * so we can use either one we want as a template. 12886 */ 12887 nexthdr = ip6h->ip6_nxt; 12888 while (ptr < endptr) { 12889 /* Is there enough left for len + nexthdr? */ 12890 if (ptr + MIN_EHDR_LEN > endptr) 12891 return (NULL); 12892 12893 switch (nexthdr) { 12894 case IPPROTO_HOPOPTS: 12895 case IPPROTO_DSTOPTS: 12896 /* Assumes the headers are identical for hbh and dst */ 12897 desthdr = (ip6_dest_t *)ptr; 12898 hdrlen = 8 * (desthdr->ip6d_len + 1); 12899 nexthdr = desthdr->ip6d_nxt; 12900 break; 12901 12902 case IPPROTO_ROUTING: 12903 return ((ip6_rthdr_t *)ptr); 12904 12905 case IPPROTO_FRAGMENT: 12906 fraghdr = (ip6_frag_t *)ptr; 12907 hdrlen = sizeof (ip6_frag_t); 12908 nexthdr = fraghdr->ip6f_nxt; 12909 break; 12910 12911 default: 12912 return (NULL); 12913 } 12914 ptr += hdrlen; 12915 } 12916 return (NULL); 12917 } 12918 12919 /* 12920 * Called for source-routed packets originating on this node. 12921 * Manipulates the original routing header by moving every entry up 12922 * one slot, placing the first entry in the v6 header's v6_dst field, 12923 * and placing the ultimate destination in the routing header's last 12924 * slot. 12925 * 12926 * Returns the checksum diference between the ultimate destination 12927 * (last hop in the routing header when the packet is sent) and 12928 * the first hop (ip6_dst when the packet is sent) 12929 */ 12930 /* ARGSUSED2 */ 12931 uint32_t 12932 ip_massage_options_v6(ip6_t *ip6h, ip6_rthdr_t *rth, netstack_t *ns) 12933 { 12934 uint_t numaddr; 12935 uint_t i; 12936 in6_addr_t *addrptr; 12937 in6_addr_t tmp; 12938 ip6_rthdr0_t *rthdr = (ip6_rthdr0_t *)rth; 12939 uint32_t cksm; 12940 uint32_t addrsum = 0; 12941 uint16_t *ptr; 12942 12943 /* 12944 * Perform any processing needed for source routing. 12945 * We know that all extension headers will be in the same mblk 12946 * as the IPv6 header. 12947 */ 12948 12949 /* 12950 * If no segments left in header, or the header length field is zero, 12951 * don't move hop addresses around; 12952 * Checksum difference is zero. 12953 */ 12954 if ((rthdr->ip6r0_segleft == 0) || (rthdr->ip6r0_len == 0)) 12955 return (0); 12956 12957 ptr = (uint16_t *)&ip6h->ip6_dst; 12958 cksm = 0; 12959 for (i = 0; i < (sizeof (in6_addr_t) / sizeof (uint16_t)); i++) { 12960 cksm += ptr[i]; 12961 } 12962 cksm = (cksm & 0xFFFF) + (cksm >> 16); 12963 12964 /* 12965 * Here's where the fun begins - we have to 12966 * move all addresses up one spot, take the 12967 * first hop and make it our first ip6_dst, 12968 * and place the ultimate destination in the 12969 * newly-opened last slot. 12970 */ 12971 addrptr = (in6_addr_t *)((char *)rthdr + sizeof (*rthdr)); 12972 numaddr = rthdr->ip6r0_len / 2; 12973 tmp = *addrptr; 12974 for (i = 0; i < (numaddr - 1); addrptr++, i++) { 12975 *addrptr = addrptr[1]; 12976 } 12977 *addrptr = ip6h->ip6_dst; 12978 ip6h->ip6_dst = tmp; 12979 12980 /* 12981 * From the checksummed ultimate destination subtract the checksummed 12982 * current ip6_dst (the first hop address). Return that number. 12983 * (In the v4 case, the second part of this is done in each routine 12984 * that calls ip_massage_options(). We do it all in this one place 12985 * for v6). 12986 */ 12987 ptr = (uint16_t *)&ip6h->ip6_dst; 12988 for (i = 0; i < (sizeof (in6_addr_t) / sizeof (uint16_t)); i++) { 12989 addrsum += ptr[i]; 12990 } 12991 cksm -= ((addrsum >> 16) + (addrsum & 0xFFFF)); 12992 if ((int)cksm < 0) 12993 cksm--; 12994 cksm = (cksm & 0xFFFF) + (cksm >> 16); 12995 12996 return (cksm); 12997 } 12998 12999 /* 13000 * Propagate a multicast group membership operation (join/leave) (*fn) on 13001 * all interfaces crossed by the related multirt routes. 13002 * The call is considered successful if the operation succeeds 13003 * on at least one interface. 13004 * The function is called if the destination address in the packet to send 13005 * is multirouted. 13006 */ 13007 int 13008 ip_multirt_apply_membership_v6(int (*fn)(conn_t *, boolean_t, 13009 const in6_addr_t *, int, mcast_record_t, const in6_addr_t *, mblk_t *), 13010 ire_t *ire, conn_t *connp, boolean_t checkonly, const in6_addr_t *v6grp, 13011 mcast_record_t fmode, const in6_addr_t *v6src, mblk_t *first_mp) 13012 { 13013 ire_t *ire_gw; 13014 irb_t *irb; 13015 int index, error = 0; 13016 opt_restart_t *or; 13017 ip_stack_t *ipst = ire->ire_ipst; 13018 13019 irb = ire->ire_bucket; 13020 ASSERT(irb != NULL); 13021 13022 ASSERT(DB_TYPE(first_mp) == M_CTL); 13023 or = (opt_restart_t *)first_mp->b_rptr; 13024 13025 IRB_REFHOLD(irb); 13026 for (; ire != NULL; ire = ire->ire_next) { 13027 if ((ire->ire_flags & RTF_MULTIRT) == 0) 13028 continue; 13029 if (!IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, v6grp)) 13030 continue; 13031 13032 ire_gw = ire_ftable_lookup_v6(&ire->ire_gateway_addr_v6, 0, 0, 13033 IRE_INTERFACE, NULL, NULL, ALL_ZONES, 0, NULL, 13034 MATCH_IRE_RECURSIVE | MATCH_IRE_TYPE, ipst); 13035 /* No resolver exists for the gateway; skip this ire. */ 13036 if (ire_gw == NULL) 13037 continue; 13038 index = ire_gw->ire_ipif->ipif_ill->ill_phyint->phyint_ifindex; 13039 /* 13040 * A resolver exists: we can get the interface on which we have 13041 * to apply the operation. 13042 */ 13043 error = fn(connp, checkonly, v6grp, index, fmode, v6src, 13044 first_mp); 13045 if (error == 0) 13046 or->or_private = CGTP_MCAST_SUCCESS; 13047 13048 if (ip_debug > 0) { 13049 ulong_t off; 13050 char *ksym; 13051 13052 ksym = kobj_getsymname((uintptr_t)fn, &off); 13053 ip2dbg(("ip_multirt_apply_membership_v6: " 13054 "called %s, multirt group 0x%08x via itf 0x%08x, " 13055 "error %d [success %u]\n", 13056 ksym ? ksym : "?", 13057 ntohl(V4_PART_OF_V6((*v6grp))), 13058 ntohl(V4_PART_OF_V6(ire_gw->ire_src_addr_v6)), 13059 error, or->or_private)); 13060 } 13061 13062 ire_refrele(ire_gw); 13063 if (error == EINPROGRESS) { 13064 IRB_REFRELE(irb); 13065 return (error); 13066 } 13067 } 13068 IRB_REFRELE(irb); 13069 /* 13070 * Consider the call as successful if we succeeded on at least 13071 * one interface. Otherwise, return the last encountered error. 13072 */ 13073 return (or->or_private == CGTP_MCAST_SUCCESS ? 0 : error); 13074 } 13075 13076 void 13077 *ip6_kstat_init(netstackid_t stackid, ip6_stat_t *ip6_statisticsp) 13078 { 13079 kstat_t *ksp; 13080 13081 ip6_stat_t template = { 13082 { "ip6_udp_fast_path", KSTAT_DATA_UINT64 }, 13083 { "ip6_udp_slow_path", KSTAT_DATA_UINT64 }, 13084 { "ip6_udp_fannorm", KSTAT_DATA_UINT64 }, 13085 { "ip6_udp_fanmb", KSTAT_DATA_UINT64 }, 13086 { "ip6_out_sw_cksum", KSTAT_DATA_UINT64 }, 13087 { "ip6_in_sw_cksum", KSTAT_DATA_UINT64 }, 13088 { "ip6_tcp_in_full_hw_cksum_err", KSTAT_DATA_UINT64 }, 13089 { "ip6_tcp_in_part_hw_cksum_err", KSTAT_DATA_UINT64 }, 13090 { "ip6_tcp_in_sw_cksum_err", KSTAT_DATA_UINT64 }, 13091 { "ip6_tcp_out_sw_cksum_bytes", KSTAT_DATA_UINT64 }, 13092 { "ip6_udp_in_full_hw_cksum_err", KSTAT_DATA_UINT64 }, 13093 { "ip6_udp_in_part_hw_cksum_err", KSTAT_DATA_UINT64 }, 13094 { "ip6_udp_in_sw_cksum_err", KSTAT_DATA_UINT64 }, 13095 { "ip6_udp_out_sw_cksum_bytes", KSTAT_DATA_UINT64 }, 13096 { "ip6_frag_mdt_pkt_out", KSTAT_DATA_UINT64 }, 13097 { "ip6_frag_mdt_discarded", KSTAT_DATA_UINT64 }, 13098 { "ip6_frag_mdt_allocfail", KSTAT_DATA_UINT64 }, 13099 { "ip6_frag_mdt_addpdescfail", KSTAT_DATA_UINT64 }, 13100 { "ip6_frag_mdt_allocd", KSTAT_DATA_UINT64 }, 13101 }; 13102 ksp = kstat_create_netstack("ip", 0, "ip6stat", "net", 13103 KSTAT_TYPE_NAMED, sizeof (template) / sizeof (kstat_named_t), 13104 KSTAT_FLAG_VIRTUAL, stackid); 13105 13106 if (ksp == NULL) 13107 return (NULL); 13108 13109 bcopy(&template, ip6_statisticsp, sizeof (template)); 13110 ksp->ks_data = (void *)ip6_statisticsp; 13111 ksp->ks_private = (void *)(uintptr_t)stackid; 13112 13113 kstat_install(ksp); 13114 return (ksp); 13115 } 13116 13117 void 13118 ip6_kstat_fini(netstackid_t stackid, kstat_t *ksp) 13119 { 13120 if (ksp != NULL) { 13121 ASSERT(stackid == (netstackid_t)(uintptr_t)ksp->ks_private); 13122 kstat_delete_netstack(ksp, stackid); 13123 } 13124 } 13125 13126 /* 13127 * The following two functions set and get the value for the 13128 * IPV6_SRC_PREFERENCES socket option. 13129 */ 13130 int 13131 ip6_set_src_preferences(conn_t *connp, uint32_t prefs) 13132 { 13133 /* 13134 * We only support preferences that are covered by 13135 * IPV6_PREFER_SRC_MASK. 13136 */ 13137 if (prefs & ~IPV6_PREFER_SRC_MASK) 13138 return (EINVAL); 13139 13140 /* 13141 * Look for conflicting preferences or default preferences. If 13142 * both bits of a related pair are clear, the application wants the 13143 * system's default value for that pair. Both bits in a pair can't 13144 * be set. 13145 */ 13146 if ((prefs & IPV6_PREFER_SRC_MIPMASK) == 0) { 13147 prefs |= IPV6_PREFER_SRC_MIPDEFAULT; 13148 } else if ((prefs & IPV6_PREFER_SRC_MIPMASK) == 13149 IPV6_PREFER_SRC_MIPMASK) { 13150 return (EINVAL); 13151 } 13152 if ((prefs & IPV6_PREFER_SRC_TMPMASK) == 0) { 13153 prefs |= IPV6_PREFER_SRC_TMPDEFAULT; 13154 } else if ((prefs & IPV6_PREFER_SRC_TMPMASK) == 13155 IPV6_PREFER_SRC_TMPMASK) { 13156 return (EINVAL); 13157 } 13158 if ((prefs & IPV6_PREFER_SRC_CGAMASK) == 0) { 13159 prefs |= IPV6_PREFER_SRC_CGADEFAULT; 13160 } else if ((prefs & IPV6_PREFER_SRC_CGAMASK) == 13161 IPV6_PREFER_SRC_CGAMASK) { 13162 return (EINVAL); 13163 } 13164 13165 connp->conn_src_preferences = prefs; 13166 return (0); 13167 } 13168 13169 size_t 13170 ip6_get_src_preferences(conn_t *connp, uint32_t *val) 13171 { 13172 *val = connp->conn_src_preferences; 13173 return (sizeof (connp->conn_src_preferences)); 13174 } 13175 13176 int 13177 ip6_set_pktinfo(cred_t *cr, conn_t *connp, struct in6_pktinfo *pkti, mblk_t *mp) 13178 { 13179 ill_t *ill; 13180 ire_t *ire; 13181 int error; 13182 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 13183 13184 /* 13185 * Verify the source address and ifindex. Privileged users can use 13186 * any source address. For ancillary data the source address is 13187 * checked in ip_wput_v6. 13188 */ 13189 if (pkti->ipi6_ifindex != 0) { 13190 ASSERT(connp != NULL); 13191 ill = ill_lookup_on_ifindex(pkti->ipi6_ifindex, B_TRUE, 13192 CONNP_TO_WQ(connp), mp, ip_restart_optmgmt, &error, ipst); 13193 if (ill == NULL) { 13194 /* 13195 * We just want to know if the interface exists, we 13196 * don't really care about the ill pointer itself. 13197 */ 13198 if (error != EINPROGRESS) 13199 return (error); 13200 error = 0; /* Ensure we don't use it below */ 13201 } else { 13202 ill_refrele(ill); 13203 } 13204 } 13205 if (!IN6_IS_ADDR_UNSPECIFIED(&pkti->ipi6_addr) && 13206 secpolicy_net_rawaccess(cr) != 0) { 13207 ire = ire_route_lookup_v6(&pkti->ipi6_addr, 0, 0, 13208 (IRE_LOCAL|IRE_LOOPBACK), NULL, NULL, 13209 connp->conn_zoneid, NULL, MATCH_IRE_TYPE, ipst); 13210 if (ire != NULL) 13211 ire_refrele(ire); 13212 else 13213 return (ENXIO); 13214 } 13215 return (0); 13216 } 13217 13218 /* 13219 * Get the size of the IP options (including the IP headers size) 13220 * without including the AH header's size. If till_ah is B_FALSE, 13221 * and if AH header is present, dest options beyond AH header will 13222 * also be included in the returned size. 13223 */ 13224 int 13225 ipsec_ah_get_hdr_size_v6(mblk_t *mp, boolean_t till_ah) 13226 { 13227 ip6_t *ip6h; 13228 uint8_t nexthdr; 13229 uint8_t *whereptr; 13230 ip6_hbh_t *hbhhdr; 13231 ip6_dest_t *dsthdr; 13232 ip6_rthdr_t *rthdr; 13233 int ehdrlen; 13234 int size; 13235 ah_t *ah; 13236 13237 ip6h = (ip6_t *)mp->b_rptr; 13238 size = IPV6_HDR_LEN; 13239 nexthdr = ip6h->ip6_nxt; 13240 whereptr = (uint8_t *)&ip6h[1]; 13241 for (;;) { 13242 /* Assume IP has already stripped it */ 13243 ASSERT(nexthdr != IPPROTO_FRAGMENT && nexthdr != IPPROTO_RAW); 13244 switch (nexthdr) { 13245 case IPPROTO_HOPOPTS: 13246 hbhhdr = (ip6_hbh_t *)whereptr; 13247 nexthdr = hbhhdr->ip6h_nxt; 13248 ehdrlen = 8 * (hbhhdr->ip6h_len + 1); 13249 break; 13250 case IPPROTO_DSTOPTS: 13251 dsthdr = (ip6_dest_t *)whereptr; 13252 nexthdr = dsthdr->ip6d_nxt; 13253 ehdrlen = 8 * (dsthdr->ip6d_len + 1); 13254 break; 13255 case IPPROTO_ROUTING: 13256 rthdr = (ip6_rthdr_t *)whereptr; 13257 nexthdr = rthdr->ip6r_nxt; 13258 ehdrlen = 8 * (rthdr->ip6r_len + 1); 13259 break; 13260 default : 13261 if (till_ah) { 13262 ASSERT(nexthdr == IPPROTO_AH); 13263 return (size); 13264 } 13265 /* 13266 * If we don't have a AH header to traverse, 13267 * return now. This happens normally for 13268 * outbound datagrams where we have not inserted 13269 * the AH header. 13270 */ 13271 if (nexthdr != IPPROTO_AH) { 13272 return (size); 13273 } 13274 13275 /* 13276 * We don't include the AH header's size 13277 * to be symmetrical with other cases where 13278 * we either don't have a AH header (outbound) 13279 * or peek into the AH header yet (inbound and 13280 * not pulled up yet). 13281 */ 13282 ah = (ah_t *)whereptr; 13283 nexthdr = ah->ah_nexthdr; 13284 ehdrlen = (ah->ah_length << 2) + 8; 13285 13286 if (nexthdr == IPPROTO_DSTOPTS) { 13287 if (whereptr + ehdrlen >= mp->b_wptr) { 13288 /* 13289 * The destination options header 13290 * is not part of the first mblk. 13291 */ 13292 whereptr = mp->b_cont->b_rptr; 13293 } else { 13294 whereptr += ehdrlen; 13295 } 13296 13297 dsthdr = (ip6_dest_t *)whereptr; 13298 ehdrlen = 8 * (dsthdr->ip6d_len + 1); 13299 size += ehdrlen; 13300 } 13301 return (size); 13302 } 13303 whereptr += ehdrlen; 13304 size += ehdrlen; 13305 } 13306 } 13307