1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* 26 * Copyright (c) 1990 Mentat Inc. 27 */ 28 29 #include <sys/types.h> 30 #include <sys/stream.h> 31 #include <sys/dlpi.h> 32 #include <sys/stropts.h> 33 #include <sys/sysmacros.h> 34 #include <sys/strsun.h> 35 #include <sys/strlog.h> 36 #include <sys/strsubr.h> 37 #define _SUN_TPI_VERSION 2 38 #include <sys/tihdr.h> 39 #include <sys/ddi.h> 40 #include <sys/sunddi.h> 41 #include <sys/cmn_err.h> 42 #include <sys/debug.h> 43 #include <sys/sdt.h> 44 #include <sys/kobj.h> 45 #include <sys/zone.h> 46 #include <sys/neti.h> 47 #include <sys/hook.h> 48 49 #include <sys/kmem.h> 50 #include <sys/systm.h> 51 #include <sys/param.h> 52 #include <sys/socket.h> 53 #include <sys/vtrace.h> 54 #include <sys/isa_defs.h> 55 #include <sys/atomic.h> 56 #include <sys/iphada.h> 57 #include <sys/policy.h> 58 #include <net/if.h> 59 #include <net/if_types.h> 60 #include <net/route.h> 61 #include <net/if_dl.h> 62 #include <sys/sockio.h> 63 #include <netinet/in.h> 64 #include <netinet/ip6.h> 65 #include <netinet/icmp6.h> 66 #include <netinet/sctp.h> 67 68 #include <inet/common.h> 69 #include <inet/mi.h> 70 #include <inet/optcom.h> 71 #include <inet/mib2.h> 72 #include <inet/nd.h> 73 #include <inet/arp.h> 74 75 #include <inet/ip.h> 76 #include <inet/ip_impl.h> 77 #include <inet/ip6.h> 78 #include <inet/ip6_asp.h> 79 #include <inet/tcp.h> 80 #include <inet/tcp_impl.h> 81 #include <inet/udp_impl.h> 82 #include <inet/ipp_common.h> 83 84 #include <inet/ip_multi.h> 85 #include <inet/ip_if.h> 86 #include <inet/ip_ire.h> 87 #include <inet/ip_rts.h> 88 #include <inet/ip_ndp.h> 89 #include <net/pfkeyv2.h> 90 #include <inet/ipsec_info.h> 91 #include <inet/sadb.h> 92 #include <inet/ipsec_impl.h> 93 #include <inet/tun.h> 94 #include <inet/sctp_ip.h> 95 #include <sys/pattr.h> 96 #include <inet/ipclassifier.h> 97 #include <inet/ipsecah.h> 98 #include <inet/rawip_impl.h> 99 #include <inet/rts_impl.h> 100 #include <sys/squeue_impl.h> 101 #include <sys/squeue.h> 102 103 #include <sys/tsol/label.h> 104 #include <sys/tsol/tnet.h> 105 106 #include <rpc/pmap_prot.h> 107 108 /* Temporary; for CR 6451644 work-around */ 109 #include <sys/ethernet.h> 110 111 extern int ip_squeue_flag; 112 113 /* 114 * Naming conventions: 115 * These rules should be judiciously applied 116 * if there is a need to identify something as IPv6 versus IPv4 117 * IPv6 funcions will end with _v6 in the ip module. 118 * IPv6 funcions will end with _ipv6 in the transport modules. 119 * IPv6 macros: 120 * Some macros end with _V6; e.g. ILL_FRAG_HASH_V6 121 * Some macros start with V6_; e.g. V6_OR_V4_INADDR_ANY 122 * And then there are ..V4_PART_OF_V6. 123 * The intent is that macros in the ip module end with _V6. 124 * IPv6 global variables will start with ipv6_ 125 * IPv6 structures will start with ipv6 126 * IPv6 defined constants should start with IPV6_ 127 * (but then there are NDP_DEFAULT_VERS_PRI_AND_FLOW, etc) 128 */ 129 130 /* 131 * ip6opt_ls is used to enable IPv6 (via /etc/system on TX systems). 132 * We need to do this because we didn't obtain the IP6OPT_LS (0x0a) 133 * from IANA. This mechanism will remain in effect until an official 134 * number is obtained. 135 */ 136 uchar_t ip6opt_ls; 137 138 const in6_addr_t ipv6_all_ones = 139 { 0xffffffffU, 0xffffffffU, 0xffffffffU, 0xffffffffU }; 140 const in6_addr_t ipv6_all_zeros = { 0, 0, 0, 0 }; 141 142 #ifdef _BIG_ENDIAN 143 const in6_addr_t ipv6_unspecified_group = { 0xff000000U, 0, 0, 0 }; 144 #else /* _BIG_ENDIAN */ 145 const in6_addr_t ipv6_unspecified_group = { 0x000000ffU, 0, 0, 0 }; 146 #endif /* _BIG_ENDIAN */ 147 148 #ifdef _BIG_ENDIAN 149 const in6_addr_t ipv6_loopback = { 0, 0, 0, 0x00000001U }; 150 #else /* _BIG_ENDIAN */ 151 const in6_addr_t ipv6_loopback = { 0, 0, 0, 0x01000000U }; 152 #endif /* _BIG_ENDIAN */ 153 154 #ifdef _BIG_ENDIAN 155 const in6_addr_t ipv6_all_hosts_mcast = { 0xff020000U, 0, 0, 0x00000001U }; 156 #else /* _BIG_ENDIAN */ 157 const in6_addr_t ipv6_all_hosts_mcast = { 0x000002ffU, 0, 0, 0x01000000U }; 158 #endif /* _BIG_ENDIAN */ 159 160 #ifdef _BIG_ENDIAN 161 const in6_addr_t ipv6_all_rtrs_mcast = { 0xff020000U, 0, 0, 0x00000002U }; 162 #else /* _BIG_ENDIAN */ 163 const in6_addr_t ipv6_all_rtrs_mcast = { 0x000002ffU, 0, 0, 0x02000000U }; 164 #endif /* _BIG_ENDIAN */ 165 166 #ifdef _BIG_ENDIAN 167 const in6_addr_t ipv6_all_v2rtrs_mcast = { 0xff020000U, 0, 0, 0x00000016U }; 168 #else /* _BIG_ENDIAN */ 169 const in6_addr_t ipv6_all_v2rtrs_mcast = { 0x000002ffU, 0, 0, 0x16000000U }; 170 #endif /* _BIG_ENDIAN */ 171 172 #ifdef _BIG_ENDIAN 173 const in6_addr_t ipv6_solicited_node_mcast = 174 { 0xff020000U, 0, 0x00000001U, 0xff000000U }; 175 #else /* _BIG_ENDIAN */ 176 const in6_addr_t ipv6_solicited_node_mcast = 177 { 0x000002ffU, 0, 0x01000000U, 0x000000ffU }; 178 #endif /* _BIG_ENDIAN */ 179 180 /* Leave room for ip_newroute to tack on the src and target addresses */ 181 #define OK_RESOLVER_MP_V6(mp) \ 182 ((mp) && ((mp)->b_wptr - (mp)->b_rptr) >= (2 * IPV6_ADDR_LEN)) 183 184 #define IP6_MBLK_OK 0 185 #define IP6_MBLK_HDR_ERR 1 186 #define IP6_MBLK_LEN_ERR 2 187 188 static void icmp_inbound_too_big_v6(queue_t *, mblk_t *, ill_t *, ill_t *, 189 boolean_t, zoneid_t); 190 static void icmp_pkt_v6(queue_t *, mblk_t *, void *, size_t, 191 const in6_addr_t *, boolean_t, zoneid_t, ip_stack_t *); 192 static void icmp_redirect_v6(queue_t *, mblk_t *, ill_t *ill); 193 static int ip_bind_connected_v6(conn_t *, mblk_t **, uint8_t, in6_addr_t *, 194 uint16_t, const in6_addr_t *, ip6_pkt_t *, uint16_t, 195 boolean_t, boolean_t, cred_t *); 196 static boolean_t ip_bind_get_ire_v6(mblk_t **, ire_t *, const in6_addr_t *, 197 iulp_t *, ip_stack_t *); 198 static void ip_bind_post_handling_v6(conn_t *, mblk_t *, boolean_t, 199 boolean_t, ip_stack_t *); 200 static int ip_bind_laddr_v6(conn_t *, mblk_t **, uint8_t, 201 const in6_addr_t *, uint16_t, boolean_t); 202 static void ip_fanout_proto_v6(queue_t *, mblk_t *, ip6_t *, ill_t *, 203 ill_t *, uint8_t, uint_t, uint_t, boolean_t, zoneid_t); 204 static void ip_fanout_tcp_v6(queue_t *, mblk_t *, ip6_t *, ill_t *, 205 ill_t *, uint_t, uint_t, boolean_t, zoneid_t); 206 static void ip_fanout_udp_v6(queue_t *, mblk_t *, ip6_t *, uint32_t, 207 ill_t *, ill_t *, uint_t, boolean_t, zoneid_t); 208 static int ip_process_options_v6(queue_t *, mblk_t *, ip6_t *, 209 uint8_t *, uint_t, uint8_t, ip_stack_t *); 210 static mblk_t *ip_rput_frag_v6(ill_t *, ill_t *, mblk_t *, ip6_t *, 211 ip6_frag_t *, uint_t, uint_t *, uint32_t *, uint16_t *); 212 static boolean_t ip_source_routed_v6(ip6_t *, mblk_t *, ip_stack_t *); 213 static void ip_wput_ire_v6(queue_t *, mblk_t *, ire_t *, int, int, 214 conn_t *, int, int, zoneid_t); 215 static boolean_t ipif_lookup_testaddr_v6(ill_t *, const in6_addr_t *, 216 ipif_t **); 217 218 /* 219 * A template for an IPv6 AR_ENTRY_QUERY 220 */ 221 static areq_t ipv6_areq_template = { 222 AR_ENTRY_QUERY, /* cmd */ 223 sizeof (areq_t)+(2*IPV6_ADDR_LEN), /* name offset */ 224 sizeof (areq_t), /* name len (filled by ill_arp_alloc) */ 225 IP6_DL_SAP, /* protocol, from arps perspective */ 226 sizeof (areq_t), /* target addr offset */ 227 IPV6_ADDR_LEN, /* target addr_length */ 228 0, /* flags */ 229 sizeof (areq_t) + IPV6_ADDR_LEN, /* sender addr offset */ 230 IPV6_ADDR_LEN, /* sender addr length */ 231 6, /* xmit_count */ 232 1000, /* (re)xmit_interval in milliseconds */ 233 4 /* max # of requests to buffer */ 234 /* anything else filled in by the code */ 235 }; 236 237 /* 238 * Handle IPv6 ICMP packets sent to us. Consume the mblk passed in. 239 * The message has already been checksummed and if needed, 240 * a copy has been made to be sent any interested ICMP client (conn) 241 * Note that this is different than icmp_inbound() which does the fanout 242 * to conn's as well as local processing of the ICMP packets. 243 * 244 * All error messages are passed to the matching transport stream. 245 * 246 * Zones notes: 247 * The packet is only processed in the context of the specified zone: typically 248 * only this zone will reply to an echo request. This means that the caller must 249 * call icmp_inbound_v6() for each relevant zone. 250 */ 251 static void 252 icmp_inbound_v6(queue_t *q, mblk_t *mp, ill_t *ill, ill_t *inill, 253 uint_t hdr_length, boolean_t mctl_present, uint_t flags, zoneid_t zoneid, 254 mblk_t *dl_mp) 255 { 256 icmp6_t *icmp6; 257 ip6_t *ip6h; 258 boolean_t interested; 259 in6_addr_t origsrc; 260 mblk_t *first_mp; 261 ipsec_in_t *ii; 262 ip_stack_t *ipst = ill->ill_ipst; 263 264 ASSERT(ill != NULL); 265 first_mp = mp; 266 if (mctl_present) { 267 mp = first_mp->b_cont; 268 ASSERT(mp != NULL); 269 270 ii = (ipsec_in_t *)first_mp->b_rptr; 271 ASSERT(ii->ipsec_in_type == IPSEC_IN); 272 } 273 274 ip6h = (ip6_t *)mp->b_rptr; 275 276 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInMsgs); 277 278 if ((mp->b_wptr - mp->b_rptr) < (hdr_length + ICMP6_MINLEN)) { 279 if (!pullupmsg(mp, hdr_length + ICMP6_MINLEN)) { 280 ip1dbg(("icmp_inbound_v6: pullupmsg failed\n")); 281 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors); 282 freemsg(first_mp); 283 return; 284 } 285 ip6h = (ip6_t *)mp->b_rptr; 286 } 287 if (ipst->ips_icmp_accept_clear_messages == 0) { 288 first_mp = ipsec_check_global_policy(first_mp, NULL, 289 NULL, ip6h, mctl_present, ipst->ips_netstack); 290 if (first_mp == NULL) 291 return; 292 } 293 294 /* 295 * On a labeled system, we have to check whether the zone itself is 296 * permitted to receive raw traffic. 297 */ 298 if (is_system_labeled()) { 299 if (zoneid == ALL_ZONES) 300 zoneid = tsol_packet_to_zoneid(mp); 301 if (!tsol_can_accept_raw(mp, B_FALSE)) { 302 ip1dbg(("icmp_inbound_v6: zone %d can't receive raw", 303 zoneid)); 304 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors); 305 freemsg(first_mp); 306 return; 307 } 308 } 309 310 icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]); 311 ip2dbg(("icmp_inbound_v6: type %d code %d\n", icmp6->icmp6_type, 312 icmp6->icmp6_code)); 313 interested = !(icmp6->icmp6_type & ICMP6_INFOMSG_MASK); 314 315 /* Initiate IPPF processing here */ 316 if (IP6_IN_IPP(flags, ipst)) { 317 318 /* 319 * If the ifindex changes due to SIOCSLIFINDEX 320 * packet may return to IP on the wrong ill. 321 */ 322 ip_process(IPP_LOCAL_IN, &mp, ill->ill_phyint->phyint_ifindex); 323 if (mp == NULL) { 324 if (mctl_present) { 325 freeb(first_mp); 326 } 327 return; 328 } 329 } 330 331 switch (icmp6->icmp6_type) { 332 case ICMP6_DST_UNREACH: 333 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInDestUnreachs); 334 if (icmp6->icmp6_code == ICMP6_DST_UNREACH_ADMIN) 335 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInAdminProhibs); 336 break; 337 338 case ICMP6_TIME_EXCEEDED: 339 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInTimeExcds); 340 break; 341 342 case ICMP6_PARAM_PROB: 343 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInParmProblems); 344 break; 345 346 case ICMP6_PACKET_TOO_BIG: 347 icmp_inbound_too_big_v6(q, first_mp, ill, inill, mctl_present, 348 zoneid); 349 return; 350 case ICMP6_ECHO_REQUEST: 351 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInEchos); 352 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst) && 353 !ipst->ips_ipv6_resp_echo_mcast) 354 break; 355 356 /* 357 * We must have exclusive use of the mblk to convert it to 358 * a response. 359 * If not, we copy it. 360 */ 361 if (mp->b_datap->db_ref > 1) { 362 mblk_t *mp1; 363 364 mp1 = copymsg(mp); 365 freemsg(mp); 366 if (mp1 == NULL) { 367 BUMP_MIB(ill->ill_icmp6_mib, 368 ipv6IfIcmpInErrors); 369 if (mctl_present) 370 freeb(first_mp); 371 return; 372 } 373 mp = mp1; 374 ip6h = (ip6_t *)mp->b_rptr; 375 icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]); 376 if (mctl_present) 377 first_mp->b_cont = mp; 378 else 379 first_mp = mp; 380 } 381 382 /* 383 * Turn the echo into an echo reply. 384 * Remove any extension headers (do not reverse a source route) 385 * and clear the flow id (keep traffic class for now). 386 */ 387 if (hdr_length != IPV6_HDR_LEN) { 388 int i; 389 390 for (i = 0; i < IPV6_HDR_LEN; i++) 391 mp->b_rptr[hdr_length - i - 1] = 392 mp->b_rptr[IPV6_HDR_LEN - i - 1]; 393 mp->b_rptr += (hdr_length - IPV6_HDR_LEN); 394 ip6h = (ip6_t *)mp->b_rptr; 395 ip6h->ip6_nxt = IPPROTO_ICMPV6; 396 hdr_length = IPV6_HDR_LEN; 397 } 398 ip6h->ip6_vcf &= ~IPV6_FLOWINFO_FLOWLABEL; 399 icmp6->icmp6_type = ICMP6_ECHO_REPLY; 400 401 ip6h->ip6_plen = 402 htons((uint16_t)(msgdsize(mp) - IPV6_HDR_LEN)); 403 origsrc = ip6h->ip6_src; 404 /* 405 * Reverse the source and destination addresses. 406 * If the return address is a multicast, zero out the source 407 * (ip_wput_v6 will set an address). 408 */ 409 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 410 ip6h->ip6_src = ipv6_all_zeros; 411 ip6h->ip6_dst = origsrc; 412 } else { 413 ip6h->ip6_src = ip6h->ip6_dst; 414 ip6h->ip6_dst = origsrc; 415 } 416 417 /* set the hop limit */ 418 ip6h->ip6_hops = ipst->ips_ipv6_def_hops; 419 420 /* 421 * Prepare for checksum by putting icmp length in the icmp 422 * checksum field. The checksum is calculated in ip_wput_v6. 423 */ 424 icmp6->icmp6_cksum = ip6h->ip6_plen; 425 426 if (!mctl_present) { 427 /* 428 * This packet should go out the same way as it 429 * came in i.e in clear. To make sure that global 430 * policy will not be applied to this in ip_wput, 431 * we attach a IPSEC_IN mp and clear ipsec_in_secure. 432 */ 433 ASSERT(first_mp == mp); 434 first_mp = ipsec_in_alloc(B_FALSE, ipst->ips_netstack); 435 if (first_mp == NULL) { 436 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 437 freemsg(mp); 438 return; 439 } 440 ii = (ipsec_in_t *)first_mp->b_rptr; 441 442 /* This is not a secure packet */ 443 ii->ipsec_in_secure = B_FALSE; 444 first_mp->b_cont = mp; 445 } 446 ii->ipsec_in_zoneid = zoneid; 447 ASSERT(zoneid != ALL_ZONES); 448 if (!ipsec_in_to_out(first_mp, NULL, ip6h)) { 449 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 450 return; 451 } 452 put(WR(q), first_mp); 453 return; 454 455 case ICMP6_ECHO_REPLY: 456 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInEchoReplies); 457 break; 458 459 case ND_ROUTER_SOLICIT: 460 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInRouterSolicits); 461 break; 462 463 case ND_ROUTER_ADVERT: 464 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInRouterAdvertisements); 465 break; 466 467 case ND_NEIGHBOR_SOLICIT: 468 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInNeighborSolicits); 469 if (mctl_present) 470 freeb(first_mp); 471 /* XXX may wish to pass first_mp up to ndp_input someday. */ 472 ndp_input(inill, mp, dl_mp); 473 return; 474 475 case ND_NEIGHBOR_ADVERT: 476 BUMP_MIB(ill->ill_icmp6_mib, 477 ipv6IfIcmpInNeighborAdvertisements); 478 if (mctl_present) 479 freeb(first_mp); 480 /* XXX may wish to pass first_mp up to ndp_input someday. */ 481 ndp_input(inill, mp, dl_mp); 482 return; 483 484 case ND_REDIRECT: { 485 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInRedirects); 486 487 if (ipst->ips_ipv6_ignore_redirect) 488 break; 489 490 /* 491 * As there is no upper client to deliver, we don't 492 * need the first_mp any more. 493 */ 494 if (mctl_present) 495 freeb(first_mp); 496 if (!pullupmsg(mp, -1)) { 497 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects); 498 break; 499 } 500 icmp_redirect_v6(q, mp, ill); 501 return; 502 } 503 504 /* 505 * The next three icmp messages will be handled by MLD. 506 * Pass all valid MLD packets up to any process(es) 507 * listening on a raw ICMP socket. MLD messages are 508 * freed by mld_input function. 509 */ 510 case MLD_LISTENER_QUERY: 511 case MLD_LISTENER_REPORT: 512 case MLD_LISTENER_REDUCTION: 513 if (mctl_present) 514 freeb(first_mp); 515 mld_input(q, mp, ill); 516 return; 517 default: 518 break; 519 } 520 if (interested) { 521 icmp_inbound_error_fanout_v6(q, first_mp, ip6h, icmp6, ill, 522 inill, mctl_present, zoneid); 523 } else { 524 freemsg(first_mp); 525 } 526 } 527 528 /* 529 * Process received IPv6 ICMP Packet too big. 530 * After updating any IRE it does the fanout to any matching transport streams. 531 * Assumes the IPv6 plus ICMPv6 headers have been pulled up but nothing else. 532 */ 533 /* ARGSUSED */ 534 static void 535 icmp_inbound_too_big_v6(queue_t *q, mblk_t *mp, ill_t *ill, ill_t *inill, 536 boolean_t mctl_present, zoneid_t zoneid) 537 { 538 ip6_t *ip6h; 539 ip6_t *inner_ip6h; 540 icmp6_t *icmp6; 541 uint16_t hdr_length; 542 uint32_t mtu; 543 ire_t *ire, *first_ire; 544 mblk_t *first_mp; 545 ip_stack_t *ipst = ill->ill_ipst; 546 547 first_mp = mp; 548 if (mctl_present) 549 mp = first_mp->b_cont; 550 /* 551 * We must have exclusive use of the mblk to update the MTU 552 * in the packet. 553 * If not, we copy it. 554 * 555 * If there's an M_CTL present, we know that allocated first_mp 556 * earlier in this function, so we know first_mp has refcnt of one. 557 */ 558 ASSERT(!mctl_present || first_mp->b_datap->db_ref == 1); 559 if (mp->b_datap->db_ref > 1) { 560 mblk_t *mp1; 561 562 mp1 = copymsg(mp); 563 freemsg(mp); 564 if (mp1 == NULL) { 565 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 566 if (mctl_present) 567 freeb(first_mp); 568 return; 569 } 570 mp = mp1; 571 if (mctl_present) 572 first_mp->b_cont = mp; 573 else 574 first_mp = mp; 575 } 576 ip6h = (ip6_t *)mp->b_rptr; 577 if (ip6h->ip6_nxt != IPPROTO_ICMPV6) 578 hdr_length = ip_hdr_length_v6(mp, ip6h); 579 else 580 hdr_length = IPV6_HDR_LEN; 581 582 icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]); 583 ASSERT((size_t)(mp->b_wptr - mp->b_rptr) >= hdr_length + ICMP6_MINLEN); 584 inner_ip6h = (ip6_t *)&icmp6[1]; /* Packet in error */ 585 if ((uchar_t *)&inner_ip6h[1] > mp->b_wptr) { 586 if (!pullupmsg(mp, (uchar_t *)&inner_ip6h[1] - mp->b_rptr)) { 587 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 588 freemsg(first_mp); 589 return; 590 } 591 ip6h = (ip6_t *)mp->b_rptr; 592 icmp6 = (icmp6_t *)&mp->b_rptr[hdr_length]; 593 inner_ip6h = (ip6_t *)&icmp6[1]; 594 } 595 596 /* 597 * For link local destinations matching simply on IRE type is not 598 * sufficient. Same link local addresses for different ILL's is 599 * possible. 600 */ 601 if (IN6_IS_ADDR_LINKLOCAL(&inner_ip6h->ip6_dst)) { 602 first_ire = ire_ctable_lookup_v6(&inner_ip6h->ip6_dst, NULL, 603 IRE_CACHE, ill->ill_ipif, ALL_ZONES, NULL, 604 MATCH_IRE_TYPE | MATCH_IRE_ILL, ipst); 605 606 if (first_ire == NULL) { 607 if (ip_debug > 2) { 608 /* ip1dbg */ 609 pr_addr_dbg("icmp_inbound_too_big_v6:" 610 "no ire for dst %s\n", AF_INET6, 611 &inner_ip6h->ip6_dst); 612 } 613 freemsg(first_mp); 614 return; 615 } 616 617 mtu = ntohl(icmp6->icmp6_mtu); 618 rw_enter(&first_ire->ire_bucket->irb_lock, RW_READER); 619 for (ire = first_ire; ire != NULL && 620 IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, &inner_ip6h->ip6_dst); 621 ire = ire->ire_next) { 622 mutex_enter(&ire->ire_lock); 623 if (mtu < IPV6_MIN_MTU) { 624 ip1dbg(("Received mtu less than IPv6 " 625 "min mtu %d: %d\n", IPV6_MIN_MTU, mtu)); 626 mtu = IPV6_MIN_MTU; 627 /* 628 * If an mtu less than IPv6 min mtu is received, 629 * we must include a fragment header in 630 * subsequent packets. 631 */ 632 ire->ire_frag_flag |= IPH_FRAG_HDR; 633 } 634 ip1dbg(("Received mtu from router: %d\n", mtu)); 635 ire->ire_max_frag = MIN(ire->ire_max_frag, mtu); 636 if (ire->ire_max_frag == mtu) { 637 /* Decreased it */ 638 ire->ire_marks |= IRE_MARK_PMTU; 639 } 640 /* Record the new max frag size for the ULP. */ 641 if (ire->ire_frag_flag & IPH_FRAG_HDR) { 642 /* 643 * If we need a fragment header in every packet 644 * (above case or multirouting), make sure the 645 * ULP takes it into account when computing the 646 * payload size. 647 */ 648 icmp6->icmp6_mtu = htonl(ire->ire_max_frag - 649 sizeof (ip6_frag_t)); 650 } else { 651 icmp6->icmp6_mtu = htonl(ire->ire_max_frag); 652 } 653 mutex_exit(&ire->ire_lock); 654 } 655 rw_exit(&first_ire->ire_bucket->irb_lock); 656 ire_refrele(first_ire); 657 } else { 658 irb_t *irb = NULL; 659 /* 660 * for non-link local destinations we match only on the IRE type 661 */ 662 ire = ire_ctable_lookup_v6(&inner_ip6h->ip6_dst, NULL, 663 IRE_CACHE, ill->ill_ipif, ALL_ZONES, NULL, MATCH_IRE_TYPE, 664 ipst); 665 if (ire == NULL) { 666 if (ip_debug > 2) { 667 /* ip1dbg */ 668 pr_addr_dbg("icmp_inbound_too_big_v6:" 669 "no ire for dst %s\n", 670 AF_INET6, &inner_ip6h->ip6_dst); 671 } 672 freemsg(first_mp); 673 return; 674 } 675 irb = ire->ire_bucket; 676 ire_refrele(ire); 677 rw_enter(&irb->irb_lock, RW_READER); 678 for (ire = irb->irb_ire; ire != NULL; ire = ire->ire_next) { 679 if (IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, 680 &inner_ip6h->ip6_dst)) { 681 mtu = ntohl(icmp6->icmp6_mtu); 682 mutex_enter(&ire->ire_lock); 683 if (mtu < IPV6_MIN_MTU) { 684 ip1dbg(("Received mtu less than IPv6" 685 "min mtu %d: %d\n", 686 IPV6_MIN_MTU, mtu)); 687 mtu = IPV6_MIN_MTU; 688 /* 689 * If an mtu less than IPv6 min mtu is 690 * received, we must include a fragment 691 * header in subsequent packets. 692 */ 693 ire->ire_frag_flag |= IPH_FRAG_HDR; 694 } 695 696 ip1dbg(("Received mtu from router: %d\n", mtu)); 697 ire->ire_max_frag = MIN(ire->ire_max_frag, mtu); 698 if (ire->ire_max_frag == mtu) { 699 /* Decreased it */ 700 ire->ire_marks |= IRE_MARK_PMTU; 701 } 702 /* Record the new max frag size for the ULP. */ 703 if (ire->ire_frag_flag & IPH_FRAG_HDR) { 704 /* 705 * If we need a fragment header in 706 * every packet (above case or 707 * multirouting), make sure the ULP 708 * takes it into account when computing 709 * the payload size. 710 */ 711 icmp6->icmp6_mtu = 712 htonl(ire->ire_max_frag - 713 sizeof (ip6_frag_t)); 714 } else { 715 icmp6->icmp6_mtu = 716 htonl(ire->ire_max_frag); 717 } 718 mutex_exit(&ire->ire_lock); 719 } 720 } 721 rw_exit(&irb->irb_lock); 722 } 723 icmp_inbound_error_fanout_v6(q, first_mp, ip6h, icmp6, ill, inill, 724 mctl_present, zoneid); 725 } 726 727 /* 728 * Fanout received ICMPv6 error packets to the transports. 729 * Assumes the IPv6 plus ICMPv6 headers have been pulled up but nothing else. 730 */ 731 void 732 icmp_inbound_error_fanout_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, 733 icmp6_t *icmp6, ill_t *ill, ill_t *inill, boolean_t mctl_present, 734 zoneid_t zoneid) 735 { 736 uint16_t *up; /* Pointer to ports in ULP header */ 737 uint32_t ports; /* reversed ports for fanout */ 738 ip6_t rip6h; /* With reversed addresses */ 739 uint16_t hdr_length; 740 uint8_t *nexthdrp; 741 uint8_t nexthdr; 742 mblk_t *first_mp; 743 ipsec_in_t *ii; 744 tcpha_t *tcpha; 745 conn_t *connp; 746 ip_stack_t *ipst = ill->ill_ipst; 747 748 first_mp = mp; 749 if (mctl_present) { 750 mp = first_mp->b_cont; 751 ASSERT(mp != NULL); 752 753 ii = (ipsec_in_t *)first_mp->b_rptr; 754 ASSERT(ii->ipsec_in_type == IPSEC_IN); 755 } else { 756 ii = NULL; 757 } 758 759 hdr_length = (uint16_t)((uchar_t *)icmp6 - (uchar_t *)ip6h); 760 ASSERT((size_t)(mp->b_wptr - (uchar_t *)icmp6) >= ICMP6_MINLEN); 761 762 /* 763 * Need to pullup everything in order to use 764 * ip_hdr_length_nexthdr_v6() 765 */ 766 if (mp->b_cont != NULL) { 767 if (!pullupmsg(mp, -1)) { 768 ip1dbg(("icmp_inbound_error_fanout_v6: " 769 "pullupmsg failed\n")); 770 goto drop_pkt; 771 } 772 ip6h = (ip6_t *)mp->b_rptr; 773 icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]); 774 } 775 776 ip6h = (ip6_t *)&icmp6[1]; /* Packet in error */ 777 if ((uchar_t *)&ip6h[1] > mp->b_wptr) 778 goto drop_pkt; 779 780 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_length, &nexthdrp)) 781 goto drop_pkt; 782 nexthdr = *nexthdrp; 783 784 /* Set message type, must be done after pullups */ 785 mp->b_datap->db_type = M_CTL; 786 787 /* Try to pass the ICMP message to clients who need it */ 788 switch (nexthdr) { 789 case IPPROTO_UDP: { 790 /* 791 * Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of 792 * UDP header to get the port information. 793 */ 794 if ((uchar_t *)ip6h + hdr_length + ICMP_MIN_TP_HDR_LEN > 795 mp->b_wptr) { 796 break; 797 } 798 /* 799 * Attempt to find a client stream based on port. 800 * Note that we do a reverse lookup since the header is 801 * in the form we sent it out. 802 * The rip6h header is only used for the IPCL_UDP_MATCH_V6 803 * and we only set the src and dst addresses and nexthdr. 804 */ 805 up = (uint16_t *)((uchar_t *)ip6h + hdr_length); 806 rip6h.ip6_src = ip6h->ip6_dst; 807 rip6h.ip6_dst = ip6h->ip6_src; 808 rip6h.ip6_nxt = nexthdr; 809 ((uint16_t *)&ports)[0] = up[1]; 810 ((uint16_t *)&ports)[1] = up[0]; 811 812 ip_fanout_udp_v6(q, first_mp, &rip6h, ports, ill, inill, 813 IP6_NO_IPPOLICY, mctl_present, zoneid); 814 return; 815 } 816 case IPPROTO_TCP: { 817 /* 818 * Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of 819 * the TCP header to get the port information. 820 */ 821 if ((uchar_t *)ip6h + hdr_length + ICMP_MIN_TP_HDR_LEN > 822 mp->b_wptr) { 823 break; 824 } 825 826 /* 827 * Attempt to find a client stream based on port. 828 * Note that we do a reverse lookup since the header is 829 * in the form we sent it out. 830 * The rip6h header is only used for the IP_TCP_*MATCH_V6 and 831 * we only set the src and dst addresses and nexthdr. 832 */ 833 834 tcpha = (tcpha_t *)((char *)ip6h + hdr_length); 835 connp = ipcl_tcp_lookup_reversed_ipv6(ip6h, tcpha, 836 TCPS_LISTEN, ill->ill_phyint->phyint_ifindex, ipst); 837 if (connp == NULL) { 838 goto drop_pkt; 839 } 840 841 SQUEUE_ENTER_ONE(connp->conn_sqp, first_mp, tcp_input, connp, 842 SQ_FILL, SQTAG_TCP6_INPUT_ICMP_ERR); 843 return; 844 845 } 846 case IPPROTO_SCTP: 847 /* 848 * Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of 849 * the SCTP header to get the port information. 850 */ 851 if ((uchar_t *)ip6h + hdr_length + ICMP_MIN_TP_HDR_LEN > 852 mp->b_wptr) { 853 break; 854 } 855 856 up = (uint16_t *)((uchar_t *)ip6h + hdr_length); 857 ((uint16_t *)&ports)[0] = up[1]; 858 ((uint16_t *)&ports)[1] = up[0]; 859 ip_fanout_sctp(first_mp, inill, (ipha_t *)ip6h, ports, 0, 860 mctl_present, IP6_NO_IPPOLICY, zoneid); 861 return; 862 case IPPROTO_ESP: 863 case IPPROTO_AH: { 864 int ipsec_rc; 865 ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec; 866 867 /* 868 * We need a IPSEC_IN in the front to fanout to AH/ESP. 869 * We will re-use the IPSEC_IN if it is already present as 870 * AH/ESP will not affect any fields in the IPSEC_IN for 871 * ICMP errors. If there is no IPSEC_IN, allocate a new 872 * one and attach it in the front. 873 */ 874 if (ii != NULL) { 875 /* 876 * ip_fanout_proto_again converts the ICMP errors 877 * that come back from AH/ESP to M_DATA so that 878 * if it is non-AH/ESP and we do a pullupmsg in 879 * this function, it would work. Convert it back 880 * to M_CTL before we send up as this is a ICMP 881 * error. This could have been generated locally or 882 * by some router. Validate the inner IPSEC 883 * headers. 884 * 885 * NOTE : ill_index is used by ip_fanout_proto_again 886 * to locate the ill. 887 */ 888 ASSERT(ill != NULL); 889 ii->ipsec_in_ill_index = 890 ill->ill_phyint->phyint_ifindex; 891 ii->ipsec_in_rill_index = 892 inill->ill_phyint->phyint_ifindex; 893 first_mp->b_cont->b_datap->db_type = M_CTL; 894 } else { 895 /* 896 * IPSEC_IN is not present. We attach a ipsec_in 897 * message and send up to IPSEC for validating 898 * and removing the IPSEC headers. Clear 899 * ipsec_in_secure so that when we return 900 * from IPSEC, we don't mistakenly think that this 901 * is a secure packet came from the network. 902 * 903 * NOTE : ill_index is used by ip_fanout_proto_again 904 * to locate the ill. 905 */ 906 ASSERT(first_mp == mp); 907 first_mp = ipsec_in_alloc(B_FALSE, ipst->ips_netstack); 908 ASSERT(ill != NULL); 909 if (first_mp == NULL) { 910 freemsg(mp); 911 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 912 return; 913 } 914 ii = (ipsec_in_t *)first_mp->b_rptr; 915 916 /* This is not a secure packet */ 917 ii->ipsec_in_secure = B_FALSE; 918 first_mp->b_cont = mp; 919 mp->b_datap->db_type = M_CTL; 920 ii->ipsec_in_ill_index = 921 ill->ill_phyint->phyint_ifindex; 922 ii->ipsec_in_rill_index = 923 inill->ill_phyint->phyint_ifindex; 924 } 925 926 if (!ipsec_loaded(ipss)) { 927 ip_proto_not_sup(q, first_mp, 0, zoneid, ipst); 928 return; 929 } 930 931 if (nexthdr == IPPROTO_ESP) 932 ipsec_rc = ipsecesp_icmp_error(first_mp); 933 else 934 ipsec_rc = ipsecah_icmp_error(first_mp); 935 if (ipsec_rc == IPSEC_STATUS_FAILED) 936 return; 937 938 ip_fanout_proto_again(first_mp, ill, inill, NULL); 939 return; 940 } 941 case IPPROTO_ENCAP: 942 case IPPROTO_IPV6: 943 if ((uint8_t *)ip6h + hdr_length + 944 (nexthdr == IPPROTO_ENCAP ? sizeof (ipha_t) : 945 sizeof (ip6_t)) > mp->b_wptr) { 946 goto drop_pkt; 947 } 948 949 if (nexthdr == IPPROTO_ENCAP || 950 !IN6_ARE_ADDR_EQUAL( 951 &((ip6_t *)(((uint8_t *)ip6h) + hdr_length))->ip6_src, 952 &ip6h->ip6_src) || 953 !IN6_ARE_ADDR_EQUAL( 954 &((ip6_t *)(((uint8_t *)ip6h) + hdr_length))->ip6_dst, 955 &ip6h->ip6_dst)) { 956 /* 957 * For tunnels that have used IPsec protection, 958 * we need to adjust the MTU to take into account 959 * the IPsec overhead. 960 */ 961 if (ii != NULL) 962 icmp6->icmp6_mtu = htonl( 963 ntohl(icmp6->icmp6_mtu) - 964 ipsec_in_extra_length(first_mp)); 965 } else { 966 /* 967 * Self-encapsulated case. As in the ipv4 case, 968 * we need to strip the 2nd IP header. Since mp 969 * is already pulled-up, we can simply bcopy 970 * the 3rd header + data over the 2nd header. 971 */ 972 uint16_t unused_len; 973 ip6_t *inner_ip6h = (ip6_t *) 974 ((uchar_t *)ip6h + hdr_length); 975 976 /* 977 * Make sure we don't do recursion more than once. 978 */ 979 if (!ip_hdr_length_nexthdr_v6(mp, inner_ip6h, 980 &unused_len, &nexthdrp) || 981 *nexthdrp == IPPROTO_IPV6) { 982 goto drop_pkt; 983 } 984 985 /* 986 * We are about to modify the packet. Make a copy if 987 * someone else has a reference to it. 988 */ 989 if (DB_REF(mp) > 1) { 990 mblk_t *mp1; 991 uint16_t icmp6_offset; 992 993 mp1 = copymsg(mp); 994 if (mp1 == NULL) { 995 goto drop_pkt; 996 } 997 icmp6_offset = (uint16_t) 998 ((uchar_t *)icmp6 - mp->b_rptr); 999 freemsg(mp); 1000 mp = mp1; 1001 1002 icmp6 = (icmp6_t *)(mp->b_rptr + icmp6_offset); 1003 ip6h = (ip6_t *)&icmp6[1]; 1004 inner_ip6h = (ip6_t *) 1005 ((uchar_t *)ip6h + hdr_length); 1006 1007 if (mctl_present) 1008 first_mp->b_cont = mp; 1009 else 1010 first_mp = mp; 1011 } 1012 1013 /* 1014 * Need to set db_type back to M_DATA before 1015 * refeeding mp into this function. 1016 */ 1017 DB_TYPE(mp) = M_DATA; 1018 1019 /* 1020 * Copy the 3rd header + remaining data on top 1021 * of the 2nd header. 1022 */ 1023 bcopy(inner_ip6h, ip6h, 1024 mp->b_wptr - (uchar_t *)inner_ip6h); 1025 1026 /* 1027 * Subtract length of the 2nd header. 1028 */ 1029 mp->b_wptr -= hdr_length; 1030 1031 /* 1032 * Now recurse, and see what I _really_ should be 1033 * doing here. 1034 */ 1035 icmp_inbound_error_fanout_v6(q, first_mp, 1036 (ip6_t *)mp->b_rptr, icmp6, ill, inill, 1037 mctl_present, zoneid); 1038 return; 1039 } 1040 /* FALLTHRU */ 1041 default: 1042 /* 1043 * The rip6h header is only used for the lookup and we 1044 * only set the src and dst addresses and nexthdr. 1045 */ 1046 rip6h.ip6_src = ip6h->ip6_dst; 1047 rip6h.ip6_dst = ip6h->ip6_src; 1048 rip6h.ip6_nxt = nexthdr; 1049 ip_fanout_proto_v6(q, first_mp, &rip6h, ill, inill, nexthdr, 0, 1050 IP6_NO_IPPOLICY, mctl_present, zoneid); 1051 return; 1052 } 1053 /* NOTREACHED */ 1054 drop_pkt: 1055 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors); 1056 ip1dbg(("icmp_inbound_error_fanout_v6: drop pkt\n")); 1057 freemsg(first_mp); 1058 } 1059 1060 /* 1061 * Process received IPv6 ICMP Redirect messages. 1062 */ 1063 /* ARGSUSED */ 1064 static void 1065 icmp_redirect_v6(queue_t *q, mblk_t *mp, ill_t *ill) 1066 { 1067 ip6_t *ip6h; 1068 uint16_t hdr_length; 1069 nd_redirect_t *rd; 1070 ire_t *ire; 1071 ire_t *prev_ire; 1072 ire_t *redir_ire; 1073 in6_addr_t *src, *dst, *gateway; 1074 nd_opt_hdr_t *opt; 1075 nce_t *nce; 1076 int nce_flags = 0; 1077 int err = 0; 1078 boolean_t redirect_to_router = B_FALSE; 1079 int len; 1080 int optlen; 1081 iulp_t ulp_info = { 0 }; 1082 ill_t *prev_ire_ill; 1083 ipif_t *ipif; 1084 ip_stack_t *ipst = ill->ill_ipst; 1085 1086 ip6h = (ip6_t *)mp->b_rptr; 1087 if (ip6h->ip6_nxt != IPPROTO_ICMPV6) 1088 hdr_length = ip_hdr_length_v6(mp, ip6h); 1089 else 1090 hdr_length = IPV6_HDR_LEN; 1091 1092 rd = (nd_redirect_t *)&mp->b_rptr[hdr_length]; 1093 len = mp->b_wptr - mp->b_rptr - hdr_length; 1094 src = &ip6h->ip6_src; 1095 dst = &rd->nd_rd_dst; 1096 gateway = &rd->nd_rd_target; 1097 1098 /* Verify if it is a valid redirect */ 1099 if (!IN6_IS_ADDR_LINKLOCAL(src) || 1100 (ip6h->ip6_hops != IPV6_MAX_HOPS) || 1101 (rd->nd_rd_code != 0) || 1102 (len < sizeof (nd_redirect_t)) || 1103 (IN6_IS_ADDR_V4MAPPED(dst)) || 1104 (IN6_IS_ADDR_MULTICAST(dst))) { 1105 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects); 1106 freemsg(mp); 1107 return; 1108 } 1109 1110 if (!(IN6_IS_ADDR_LINKLOCAL(gateway) || 1111 IN6_ARE_ADDR_EQUAL(gateway, dst))) { 1112 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects); 1113 freemsg(mp); 1114 return; 1115 } 1116 1117 if (len > sizeof (nd_redirect_t)) { 1118 if (!ndp_verify_optlen((nd_opt_hdr_t *)&rd[1], 1119 len - sizeof (nd_redirect_t))) { 1120 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects); 1121 freemsg(mp); 1122 return; 1123 } 1124 } 1125 1126 if (!IN6_ARE_ADDR_EQUAL(gateway, dst)) { 1127 redirect_to_router = B_TRUE; 1128 nce_flags |= NCE_F_ISROUTER; 1129 } 1130 1131 /* ipif will be refreleased afterwards */ 1132 ipif = ipif_get_next_ipif(NULL, ill); 1133 if (ipif == NULL) { 1134 freemsg(mp); 1135 return; 1136 } 1137 1138 /* 1139 * Verify that the IP source address of the redirect is 1140 * the same as the current first-hop router for the specified 1141 * ICMP destination address. 1142 * Also, Make sure we had a route for the dest in question and 1143 * that route was pointing to the old gateway (the source of the 1144 * redirect packet.) 1145 */ 1146 1147 prev_ire = ire_route_lookup_v6(dst, 0, src, 0, ipif, NULL, ALL_ZONES, 1148 NULL, MATCH_IRE_GW | MATCH_IRE_ILL | MATCH_IRE_DEFAULT, ipst); 1149 1150 /* 1151 * Check that 1152 * the redirect was not from ourselves 1153 * old gateway is still directly reachable 1154 */ 1155 if (prev_ire == NULL || 1156 prev_ire->ire_type == IRE_LOCAL) { 1157 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects); 1158 ipif_refrele(ipif); 1159 goto fail_redirect; 1160 } 1161 prev_ire_ill = ire_to_ill(prev_ire); 1162 ASSERT(prev_ire_ill != NULL); 1163 if (prev_ire_ill->ill_flags & ILLF_NONUD) 1164 nce_flags |= NCE_F_NONUD; 1165 1166 /* 1167 * Should we use the old ULP info to create the new gateway? From 1168 * a user's perspective, we should inherit the info so that it 1169 * is a "smooth" transition. If we do not do that, then new 1170 * connections going thru the new gateway will have no route metrics, 1171 * which is counter-intuitive to user. From a network point of 1172 * view, this may or may not make sense even though the new gateway 1173 * is still directly connected to us so the route metrics should not 1174 * change much. 1175 * 1176 * But if the old ire_uinfo is not initialized, we do another 1177 * recursive lookup on the dest using the new gateway. There may 1178 * be a route to that. If so, use it to initialize the redirect 1179 * route. 1180 */ 1181 if (prev_ire->ire_uinfo.iulp_set) { 1182 bcopy(&prev_ire->ire_uinfo, &ulp_info, sizeof (iulp_t)); 1183 } else if (redirect_to_router) { 1184 /* 1185 * Only do the following if the redirection is really to 1186 * a router. 1187 */ 1188 ire_t *tmp_ire; 1189 ire_t *sire; 1190 1191 tmp_ire = ire_ftable_lookup_v6(dst, 0, gateway, 0, NULL, &sire, 1192 ALL_ZONES, 0, NULL, 1193 (MATCH_IRE_RECURSIVE | MATCH_IRE_GW | MATCH_IRE_DEFAULT), 1194 ipst); 1195 if (sire != NULL) { 1196 bcopy(&sire->ire_uinfo, &ulp_info, sizeof (iulp_t)); 1197 ASSERT(tmp_ire != NULL); 1198 ire_refrele(tmp_ire); 1199 ire_refrele(sire); 1200 } else if (tmp_ire != NULL) { 1201 bcopy(&tmp_ire->ire_uinfo, &ulp_info, 1202 sizeof (iulp_t)); 1203 ire_refrele(tmp_ire); 1204 } 1205 } 1206 1207 optlen = mp->b_wptr - mp->b_rptr - hdr_length - sizeof (nd_redirect_t); 1208 opt = (nd_opt_hdr_t *)&rd[1]; 1209 opt = ndp_get_option(opt, optlen, ND_OPT_TARGET_LINKADDR); 1210 if (opt != NULL) { 1211 err = ndp_lookup_then_add_v6(ill, 1212 B_FALSE, /* don't match across illgrp */ 1213 (uchar_t *)&opt[1], /* Link layer address */ 1214 gateway, 1215 &ipv6_all_ones, /* prefix mask */ 1216 &ipv6_all_zeros, /* Mapping mask */ 1217 0, 1218 nce_flags, 1219 ND_STALE, 1220 &nce); 1221 switch (err) { 1222 case 0: 1223 NCE_REFRELE(nce); 1224 break; 1225 case EEXIST: 1226 /* 1227 * Check to see if link layer address has changed and 1228 * process the nce_state accordingly. 1229 */ 1230 ndp_process(nce, (uchar_t *)&opt[1], 0, B_FALSE); 1231 NCE_REFRELE(nce); 1232 break; 1233 default: 1234 ip1dbg(("icmp_redirect_v6: NCE create failed %d\n", 1235 err)); 1236 ipif_refrele(ipif); 1237 goto fail_redirect; 1238 } 1239 } 1240 if (redirect_to_router) { 1241 /* icmp_redirect_ok_v6() must have already verified this */ 1242 ASSERT(IN6_IS_ADDR_LINKLOCAL(gateway)); 1243 1244 /* 1245 * Create a Route Association. This will allow us to remember 1246 * a router told us to use the particular gateway. 1247 */ 1248 ire = ire_create_v6( 1249 dst, 1250 &ipv6_all_ones, /* mask */ 1251 &prev_ire->ire_src_addr_v6, /* source addr */ 1252 gateway, /* gateway addr */ 1253 &prev_ire->ire_max_frag, /* max frag */ 1254 NULL, /* no src nce */ 1255 NULL, /* no rfq */ 1256 NULL, /* no stq */ 1257 IRE_HOST, 1258 prev_ire->ire_ipif, 1259 NULL, 1260 0, 1261 0, 1262 (RTF_DYNAMIC | RTF_GATEWAY | RTF_HOST), 1263 &ulp_info, 1264 NULL, 1265 NULL, 1266 ipst); 1267 } else { 1268 queue_t *stq; 1269 1270 stq = (ipif->ipif_net_type == IRE_IF_RESOLVER) 1271 ? ipif->ipif_rq : ipif->ipif_wq; 1272 1273 /* 1274 * Just create an on link entry, i.e. interface route. 1275 */ 1276 ire = ire_create_v6( 1277 dst, /* gateway == dst */ 1278 &ipv6_all_ones, /* mask */ 1279 &prev_ire->ire_src_addr_v6, /* source addr */ 1280 &ipv6_all_zeros, /* gateway addr */ 1281 &prev_ire->ire_max_frag, /* max frag */ 1282 NULL, /* no src nce */ 1283 NULL, /* ire rfq */ 1284 stq, /* ire stq */ 1285 ipif->ipif_net_type, /* IF_[NO]RESOLVER */ 1286 prev_ire->ire_ipif, 1287 &ipv6_all_ones, 1288 0, 1289 0, 1290 (RTF_DYNAMIC | RTF_HOST), 1291 &ulp_info, 1292 NULL, 1293 NULL, 1294 ipst); 1295 } 1296 1297 /* Release reference from earlier ipif_get_next_ipif() */ 1298 ipif_refrele(ipif); 1299 1300 if (ire == NULL) 1301 goto fail_redirect; 1302 1303 if (ire_add(&ire, NULL, NULL, NULL, B_FALSE) == 0) { 1304 1305 /* tell routing sockets that we received a redirect */ 1306 ip_rts_change_v6(RTM_REDIRECT, 1307 &rd->nd_rd_dst, 1308 &rd->nd_rd_target, 1309 &ipv6_all_ones, 0, &ire->ire_src_addr_v6, 1310 (RTF_DYNAMIC | RTF_GATEWAY | RTF_HOST), 0, 1311 (RTA_DST | RTA_GATEWAY | RTA_NETMASK | RTA_AUTHOR), ipst); 1312 1313 /* 1314 * Delete any existing IRE_HOST type ires for this destination. 1315 * This together with the added IRE has the effect of 1316 * modifying an existing redirect. 1317 */ 1318 redir_ire = ire_ftable_lookup_v6(dst, 0, src, IRE_HOST, 1319 ire->ire_ipif, NULL, ALL_ZONES, 0, NULL, 1320 (MATCH_IRE_GW | MATCH_IRE_TYPE | MATCH_IRE_ILL), ipst); 1321 1322 ire_refrele(ire); /* Held in ire_add_v6 */ 1323 1324 if (redir_ire != NULL) { 1325 if (redir_ire->ire_flags & RTF_DYNAMIC) 1326 ire_delete(redir_ire); 1327 ire_refrele(redir_ire); 1328 } 1329 } 1330 1331 if (prev_ire->ire_type == IRE_CACHE) 1332 ire_delete(prev_ire); 1333 ire_refrele(prev_ire); 1334 prev_ire = NULL; 1335 1336 fail_redirect: 1337 if (prev_ire != NULL) 1338 ire_refrele(prev_ire); 1339 freemsg(mp); 1340 } 1341 1342 static ill_t * 1343 ip_queue_to_ill_v6(queue_t *q, ip_stack_t *ipst) 1344 { 1345 ill_t *ill; 1346 1347 ASSERT(WR(q) == q); 1348 1349 if (q->q_next != NULL) { 1350 ill = (ill_t *)q->q_ptr; 1351 if (ILL_CAN_LOOKUP(ill)) 1352 ill_refhold(ill); 1353 else 1354 ill = NULL; 1355 } else { 1356 ill = ill_lookup_on_name(ipif_loopback_name, B_FALSE, B_TRUE, 1357 NULL, NULL, NULL, NULL, NULL, ipst); 1358 } 1359 if (ill == NULL) 1360 ip0dbg(("ip_queue_to_ill_v6: no ill\n")); 1361 return (ill); 1362 } 1363 1364 /* 1365 * Assigns an appropriate source address to the packet. 1366 * If origdst is one of our IP addresses that use it as the source. 1367 * If the queue is an ill queue then select a source from that ill. 1368 * Otherwise pick a source based on a route lookup back to the origsrc. 1369 * 1370 * src is the return parameter. Returns a pointer to src or NULL if failure. 1371 */ 1372 static in6_addr_t * 1373 icmp_pick_source_v6(queue_t *wq, in6_addr_t *origsrc, in6_addr_t *origdst, 1374 in6_addr_t *src, zoneid_t zoneid, ip_stack_t *ipst) 1375 { 1376 ill_t *ill; 1377 ire_t *ire; 1378 ipif_t *ipif; 1379 1380 ASSERT(!(wq->q_flag & QREADR)); 1381 if (wq->q_next != NULL) { 1382 ill = (ill_t *)wq->q_ptr; 1383 } else { 1384 ill = NULL; 1385 } 1386 1387 ire = ire_route_lookup_v6(origdst, 0, 0, (IRE_LOCAL|IRE_LOOPBACK), 1388 NULL, NULL, zoneid, NULL, (MATCH_IRE_TYPE|MATCH_IRE_ZONEONLY), 1389 ipst); 1390 if (ire != NULL) { 1391 /* Destined to one of our addresses */ 1392 *src = *origdst; 1393 ire_refrele(ire); 1394 return (src); 1395 } 1396 if (ire != NULL) { 1397 ire_refrele(ire); 1398 ire = NULL; 1399 } 1400 if (ill == NULL) { 1401 /* What is the route back to the original source? */ 1402 ire = ire_route_lookup_v6(origsrc, 0, 0, 0, 1403 NULL, NULL, zoneid, NULL, 1404 (MATCH_IRE_DEFAULT|MATCH_IRE_RECURSIVE), ipst); 1405 if (ire == NULL) { 1406 BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsOutNoRoutes); 1407 return (NULL); 1408 } 1409 ASSERT(ire->ire_ipif != NULL); 1410 ill = ire->ire_ipif->ipif_ill; 1411 ire_refrele(ire); 1412 } 1413 ipif = ipif_select_source_v6(ill, origsrc, B_FALSE, 1414 IPV6_PREFER_SRC_DEFAULT, zoneid); 1415 if (ipif != NULL) { 1416 *src = ipif->ipif_v6src_addr; 1417 ipif_refrele(ipif); 1418 return (src); 1419 } 1420 /* 1421 * Unusual case - can't find a usable source address to reach the 1422 * original source. Use what in the route to the source. 1423 */ 1424 ire = ire_route_lookup_v6(origsrc, 0, 0, 0, 1425 NULL, NULL, zoneid, NULL, 1426 (MATCH_IRE_DEFAULT|MATCH_IRE_RECURSIVE), ipst); 1427 if (ire == NULL) { 1428 BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsOutNoRoutes); 1429 return (NULL); 1430 } 1431 ASSERT(ire != NULL); 1432 *src = ire->ire_src_addr_v6; 1433 ire_refrele(ire); 1434 return (src); 1435 } 1436 1437 /* 1438 * Build and ship an IPv6 ICMP message using the packet data in mp, 1439 * and the ICMP header pointed to by "stuff". (May be called as 1440 * writer.) 1441 * Note: assumes that icmp_pkt_err_ok_v6 has been called to 1442 * verify that an icmp error packet can be sent. 1443 * 1444 * If q is an ill write side queue (which is the case when packets 1445 * arrive from ip_rput) then ip_wput code will ensure that packets to 1446 * link-local destinations are sent out that ill. 1447 * 1448 * If v6src_ptr is set use it as a source. Otherwise select a reasonable 1449 * source address (see above function). 1450 */ 1451 static void 1452 icmp_pkt_v6(queue_t *q, mblk_t *mp, void *stuff, size_t len, 1453 const in6_addr_t *v6src_ptr, boolean_t mctl_present, zoneid_t zoneid, 1454 ip_stack_t *ipst) 1455 { 1456 ip6_t *ip6h; 1457 in6_addr_t v6dst; 1458 size_t len_needed; 1459 size_t msg_len; 1460 mblk_t *mp1; 1461 icmp6_t *icmp6; 1462 ill_t *ill; 1463 in6_addr_t v6src; 1464 mblk_t *ipsec_mp; 1465 ipsec_out_t *io; 1466 1467 ill = ip_queue_to_ill_v6(q, ipst); 1468 if (ill == NULL) { 1469 freemsg(mp); 1470 return; 1471 } 1472 1473 if (mctl_present) { 1474 /* 1475 * If it is : 1476 * 1477 * 1) a IPSEC_OUT, then this is caused by outbound 1478 * datagram originating on this host. IPSEC processing 1479 * may or may not have been done. Refer to comments above 1480 * icmp_inbound_error_fanout for details. 1481 * 1482 * 2) a IPSEC_IN if we are generating a icmp_message 1483 * for an incoming datagram destined for us i.e called 1484 * from ip_fanout_send_icmp. 1485 */ 1486 ipsec_info_t *in; 1487 1488 ipsec_mp = mp; 1489 mp = ipsec_mp->b_cont; 1490 1491 in = (ipsec_info_t *)ipsec_mp->b_rptr; 1492 ip6h = (ip6_t *)mp->b_rptr; 1493 1494 ASSERT(in->ipsec_info_type == IPSEC_OUT || 1495 in->ipsec_info_type == IPSEC_IN); 1496 1497 if (in->ipsec_info_type == IPSEC_IN) { 1498 /* 1499 * Convert the IPSEC_IN to IPSEC_OUT. 1500 */ 1501 if (!ipsec_in_to_out(ipsec_mp, NULL, ip6h)) { 1502 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 1503 ill_refrele(ill); 1504 return; 1505 } 1506 } else { 1507 ASSERT(in->ipsec_info_type == IPSEC_OUT); 1508 io = (ipsec_out_t *)in; 1509 /* 1510 * Clear out ipsec_out_proc_begin, so we do a fresh 1511 * ire lookup. 1512 */ 1513 io->ipsec_out_proc_begin = B_FALSE; 1514 } 1515 } else { 1516 /* 1517 * This is in clear. The icmp message we are building 1518 * here should go out in clear. 1519 */ 1520 ipsec_in_t *ii; 1521 ASSERT(mp->b_datap->db_type == M_DATA); 1522 ipsec_mp = ipsec_in_alloc(B_FALSE, ipst->ips_netstack); 1523 if (ipsec_mp == NULL) { 1524 freemsg(mp); 1525 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 1526 ill_refrele(ill); 1527 return; 1528 } 1529 ii = (ipsec_in_t *)ipsec_mp->b_rptr; 1530 1531 /* This is not a secure packet */ 1532 ii->ipsec_in_secure = B_FALSE; 1533 /* 1534 * For trusted extensions using a shared IP address we can 1535 * send using any zoneid. 1536 */ 1537 if (zoneid == ALL_ZONES) 1538 ii->ipsec_in_zoneid = GLOBAL_ZONEID; 1539 else 1540 ii->ipsec_in_zoneid = zoneid; 1541 ipsec_mp->b_cont = mp; 1542 ip6h = (ip6_t *)mp->b_rptr; 1543 /* 1544 * Convert the IPSEC_IN to IPSEC_OUT. 1545 */ 1546 if (!ipsec_in_to_out(ipsec_mp, NULL, ip6h)) { 1547 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 1548 ill_refrele(ill); 1549 return; 1550 } 1551 } 1552 io = (ipsec_out_t *)ipsec_mp->b_rptr; 1553 1554 if (v6src_ptr != NULL) { 1555 v6src = *v6src_ptr; 1556 } else { 1557 if (icmp_pick_source_v6(q, &ip6h->ip6_src, &ip6h->ip6_dst, 1558 &v6src, zoneid, ipst) == NULL) { 1559 freemsg(ipsec_mp); 1560 ill_refrele(ill); 1561 return; 1562 } 1563 } 1564 v6dst = ip6h->ip6_src; 1565 len_needed = ipst->ips_ipv6_icmp_return - IPV6_HDR_LEN - len; 1566 msg_len = msgdsize(mp); 1567 if (msg_len > len_needed) { 1568 if (!adjmsg(mp, len_needed - msg_len)) { 1569 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutErrors); 1570 freemsg(ipsec_mp); 1571 ill_refrele(ill); 1572 return; 1573 } 1574 msg_len = len_needed; 1575 } 1576 mp1 = allocb_tmpl(IPV6_HDR_LEN + len, mp); 1577 if (mp1 == NULL) { 1578 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutErrors); 1579 freemsg(ipsec_mp); 1580 ill_refrele(ill); 1581 return; 1582 } 1583 ill_refrele(ill); 1584 mp1->b_cont = mp; 1585 mp = mp1; 1586 ASSERT(ipsec_mp->b_datap->db_type == M_CTL && 1587 io->ipsec_out_type == IPSEC_OUT); 1588 ipsec_mp->b_cont = mp; 1589 1590 /* 1591 * Set ipsec_out_icmp_loopback so we can let the ICMP messages this 1592 * node generates be accepted in peace by all on-host destinations. 1593 * If we do NOT assume that all on-host destinations trust 1594 * self-generated ICMP messages, then rework here, ip.c, and spd.c. 1595 * (Look for ipsec_out_icmp_loopback). 1596 */ 1597 io->ipsec_out_icmp_loopback = B_TRUE; 1598 1599 ip6h = (ip6_t *)mp->b_rptr; 1600 mp1->b_wptr = (uchar_t *)ip6h + (IPV6_HDR_LEN + len); 1601 1602 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 1603 ip6h->ip6_nxt = IPPROTO_ICMPV6; 1604 ip6h->ip6_hops = ipst->ips_ipv6_def_hops; 1605 ip6h->ip6_dst = v6dst; 1606 ip6h->ip6_src = v6src; 1607 msg_len += IPV6_HDR_LEN + len; 1608 if (msg_len > IP_MAXPACKET + IPV6_HDR_LEN) { 1609 (void) adjmsg(mp, IP_MAXPACKET + IPV6_HDR_LEN - msg_len); 1610 msg_len = IP_MAXPACKET + IPV6_HDR_LEN; 1611 } 1612 ip6h->ip6_plen = htons((uint16_t)(msgdsize(mp) - IPV6_HDR_LEN)); 1613 icmp6 = (icmp6_t *)&ip6h[1]; 1614 bcopy(stuff, (char *)icmp6, len); 1615 /* 1616 * Prepare for checksum by putting icmp length in the icmp 1617 * checksum field. The checksum is calculated in ip_wput_v6. 1618 */ 1619 icmp6->icmp6_cksum = ip6h->ip6_plen; 1620 if (icmp6->icmp6_type == ND_REDIRECT) { 1621 ip6h->ip6_hops = IPV6_MAX_HOPS; 1622 } 1623 /* Send to V6 writeside put routine */ 1624 put(q, ipsec_mp); 1625 } 1626 1627 /* 1628 * Update the output mib when ICMPv6 packets are sent. 1629 */ 1630 static void 1631 icmp_update_out_mib_v6(ill_t *ill, icmp6_t *icmp6) 1632 { 1633 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutMsgs); 1634 1635 switch (icmp6->icmp6_type) { 1636 case ICMP6_DST_UNREACH: 1637 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutDestUnreachs); 1638 if (icmp6->icmp6_code == ICMP6_DST_UNREACH_ADMIN) 1639 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutAdminProhibs); 1640 break; 1641 1642 case ICMP6_TIME_EXCEEDED: 1643 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutTimeExcds); 1644 break; 1645 1646 case ICMP6_PARAM_PROB: 1647 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutParmProblems); 1648 break; 1649 1650 case ICMP6_PACKET_TOO_BIG: 1651 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutPktTooBigs); 1652 break; 1653 1654 case ICMP6_ECHO_REQUEST: 1655 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutEchos); 1656 break; 1657 1658 case ICMP6_ECHO_REPLY: 1659 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutEchoReplies); 1660 break; 1661 1662 case ND_ROUTER_SOLICIT: 1663 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutRouterSolicits); 1664 break; 1665 1666 case ND_ROUTER_ADVERT: 1667 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutRouterAdvertisements); 1668 break; 1669 1670 case ND_NEIGHBOR_SOLICIT: 1671 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutNeighborSolicits); 1672 break; 1673 1674 case ND_NEIGHBOR_ADVERT: 1675 BUMP_MIB(ill->ill_icmp6_mib, 1676 ipv6IfIcmpOutNeighborAdvertisements); 1677 break; 1678 1679 case ND_REDIRECT: 1680 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutRedirects); 1681 break; 1682 1683 case MLD_LISTENER_QUERY: 1684 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutGroupMembQueries); 1685 break; 1686 1687 case MLD_LISTENER_REPORT: 1688 case MLD_V2_LISTENER_REPORT: 1689 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutGroupMembResponses); 1690 break; 1691 1692 case MLD_LISTENER_REDUCTION: 1693 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutGroupMembReductions); 1694 break; 1695 } 1696 } 1697 1698 /* 1699 * Check if it is ok to send an ICMPv6 error packet in 1700 * response to the IP packet in mp. 1701 * Free the message and return null if no 1702 * ICMP error packet should be sent. 1703 */ 1704 static mblk_t * 1705 icmp_pkt_err_ok_v6(queue_t *q, mblk_t *mp, 1706 boolean_t llbcast, boolean_t mcast_ok, ip_stack_t *ipst) 1707 { 1708 ip6_t *ip6h; 1709 1710 if (!mp) 1711 return (NULL); 1712 1713 ip6h = (ip6_t *)mp->b_rptr; 1714 1715 /* Check if source address uniquely identifies the host */ 1716 1717 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_src) || 1718 IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_src) || 1719 IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 1720 freemsg(mp); 1721 return (NULL); 1722 } 1723 1724 if (ip6h->ip6_nxt == IPPROTO_ICMPV6) { 1725 size_t len_needed = IPV6_HDR_LEN + ICMP6_MINLEN; 1726 icmp6_t *icmp6; 1727 1728 if (mp->b_wptr - mp->b_rptr < len_needed) { 1729 if (!pullupmsg(mp, len_needed)) { 1730 ill_t *ill; 1731 1732 ill = ip_queue_to_ill_v6(q, ipst); 1733 if (ill == NULL) { 1734 BUMP_MIB(&ipst->ips_icmp6_mib, 1735 ipv6IfIcmpInErrors); 1736 } else { 1737 BUMP_MIB(ill->ill_icmp6_mib, 1738 ipv6IfIcmpInErrors); 1739 ill_refrele(ill); 1740 } 1741 freemsg(mp); 1742 return (NULL); 1743 } 1744 ip6h = (ip6_t *)mp->b_rptr; 1745 } 1746 icmp6 = (icmp6_t *)&ip6h[1]; 1747 /* Explicitly do not generate errors in response to redirects */ 1748 if (ICMP6_IS_ERROR(icmp6->icmp6_type) || 1749 icmp6->icmp6_type == ND_REDIRECT) { 1750 freemsg(mp); 1751 return (NULL); 1752 } 1753 } 1754 /* 1755 * Check that the destination is not multicast and that the packet 1756 * was not sent on link layer broadcast or multicast. (Exception 1757 * is Packet too big message as per the draft - when mcast_ok is set.) 1758 */ 1759 if (!mcast_ok && 1760 (llbcast || IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst))) { 1761 freemsg(mp); 1762 return (NULL); 1763 } 1764 if (icmp_err_rate_limit(ipst)) { 1765 /* 1766 * Only send ICMP error packets every so often. 1767 * This should be done on a per port/source basis, 1768 * but for now this will suffice. 1769 */ 1770 freemsg(mp); 1771 return (NULL); 1772 } 1773 return (mp); 1774 } 1775 1776 /* 1777 * Generate an ICMPv6 redirect message. 1778 * Include target link layer address option if it exits. 1779 * Always include redirect header. 1780 */ 1781 static void 1782 icmp_send_redirect_v6(queue_t *q, mblk_t *mp, in6_addr_t *targetp, 1783 in6_addr_t *dest, ill_t *ill, boolean_t llbcast) 1784 { 1785 nd_redirect_t *rd; 1786 nd_opt_rd_hdr_t *rdh; 1787 uchar_t *buf; 1788 nce_t *nce = NULL; 1789 nd_opt_hdr_t *opt; 1790 int len; 1791 int ll_opt_len = 0; 1792 int max_redir_hdr_data_len; 1793 int pkt_len; 1794 in6_addr_t *srcp; 1795 ip_stack_t *ipst = ill->ill_ipst; 1796 1797 /* 1798 * We are called from ip_rput where we could 1799 * not have attached an IPSEC_IN. 1800 */ 1801 ASSERT(mp->b_datap->db_type == M_DATA); 1802 1803 mp = icmp_pkt_err_ok_v6(q, mp, llbcast, B_FALSE, ipst); 1804 if (mp == NULL) 1805 return; 1806 nce = ndp_lookup_v6(ill, B_TRUE, targetp, B_FALSE); 1807 if (nce != NULL && nce->nce_state != ND_INCOMPLETE) { 1808 ll_opt_len = (sizeof (nd_opt_hdr_t) + 1809 ill->ill_phys_addr_length + 7)/8 * 8; 1810 } 1811 len = sizeof (nd_redirect_t) + sizeof (nd_opt_rd_hdr_t) + ll_opt_len; 1812 ASSERT(len % 4 == 0); 1813 buf = kmem_alloc(len, KM_NOSLEEP); 1814 if (buf == NULL) { 1815 if (nce != NULL) 1816 NCE_REFRELE(nce); 1817 freemsg(mp); 1818 return; 1819 } 1820 1821 rd = (nd_redirect_t *)buf; 1822 rd->nd_rd_type = (uint8_t)ND_REDIRECT; 1823 rd->nd_rd_code = 0; 1824 rd->nd_rd_reserved = 0; 1825 rd->nd_rd_target = *targetp; 1826 rd->nd_rd_dst = *dest; 1827 1828 opt = (nd_opt_hdr_t *)(buf + sizeof (nd_redirect_t)); 1829 if (nce != NULL && ll_opt_len != 0) { 1830 opt->nd_opt_type = ND_OPT_TARGET_LINKADDR; 1831 opt->nd_opt_len = ll_opt_len/8; 1832 bcopy((char *)nce->nce_res_mp->b_rptr + 1833 NCE_LL_ADDR_OFFSET(ill), &opt[1], 1834 ill->ill_phys_addr_length); 1835 } 1836 if (nce != NULL) 1837 NCE_REFRELE(nce); 1838 rdh = (nd_opt_rd_hdr_t *)(buf + sizeof (nd_redirect_t) + ll_opt_len); 1839 rdh->nd_opt_rh_type = (uint8_t)ND_OPT_REDIRECTED_HEADER; 1840 /* max_redir_hdr_data_len and nd_opt_rh_len must be multiple of 8 */ 1841 max_redir_hdr_data_len = 1842 (ipst->ips_ipv6_icmp_return - IPV6_HDR_LEN - len)/8*8; 1843 pkt_len = msgdsize(mp); 1844 /* Make sure mp is 8 byte aligned */ 1845 if (pkt_len > max_redir_hdr_data_len) { 1846 rdh->nd_opt_rh_len = (max_redir_hdr_data_len + 1847 sizeof (nd_opt_rd_hdr_t))/8; 1848 (void) adjmsg(mp, max_redir_hdr_data_len - pkt_len); 1849 } else { 1850 rdh->nd_opt_rh_len = (pkt_len + sizeof (nd_opt_rd_hdr_t))/8; 1851 (void) adjmsg(mp, -(pkt_len % 8)); 1852 } 1853 rdh->nd_opt_rh_reserved1 = 0; 1854 rdh->nd_opt_rh_reserved2 = 0; 1855 /* ipif_v6src_addr contains the link-local source address */ 1856 srcp = &ill->ill_ipif->ipif_v6src_addr; 1857 1858 /* Redirects sent by router, and router is global zone */ 1859 icmp_pkt_v6(q, mp, buf, len, srcp, B_FALSE, GLOBAL_ZONEID, ipst); 1860 kmem_free(buf, len); 1861 } 1862 1863 1864 /* Generate an ICMP time exceeded message. (May be called as writer.) */ 1865 void 1866 icmp_time_exceeded_v6(queue_t *q, mblk_t *mp, uint8_t code, 1867 boolean_t llbcast, boolean_t mcast_ok, zoneid_t zoneid, 1868 ip_stack_t *ipst) 1869 { 1870 icmp6_t icmp6; 1871 boolean_t mctl_present; 1872 mblk_t *first_mp; 1873 1874 EXTRACT_PKT_MP(mp, first_mp, mctl_present); 1875 1876 mp = icmp_pkt_err_ok_v6(q, mp, llbcast, mcast_ok, ipst); 1877 if (mp == NULL) { 1878 if (mctl_present) 1879 freeb(first_mp); 1880 return; 1881 } 1882 bzero(&icmp6, sizeof (icmp6_t)); 1883 icmp6.icmp6_type = ICMP6_TIME_EXCEEDED; 1884 icmp6.icmp6_code = code; 1885 icmp_pkt_v6(q, first_mp, &icmp6, sizeof (icmp6_t), NULL, mctl_present, 1886 zoneid, ipst); 1887 } 1888 1889 /* 1890 * Generate an ICMP unreachable message. 1891 */ 1892 void 1893 icmp_unreachable_v6(queue_t *q, mblk_t *mp, uint8_t code, 1894 boolean_t llbcast, boolean_t mcast_ok, zoneid_t zoneid, 1895 ip_stack_t *ipst) 1896 { 1897 icmp6_t icmp6; 1898 boolean_t mctl_present; 1899 mblk_t *first_mp; 1900 1901 EXTRACT_PKT_MP(mp, first_mp, mctl_present); 1902 1903 mp = icmp_pkt_err_ok_v6(q, mp, llbcast, mcast_ok, ipst); 1904 if (mp == NULL) { 1905 if (mctl_present) 1906 freeb(first_mp); 1907 return; 1908 } 1909 bzero(&icmp6, sizeof (icmp6_t)); 1910 icmp6.icmp6_type = ICMP6_DST_UNREACH; 1911 icmp6.icmp6_code = code; 1912 icmp_pkt_v6(q, first_mp, &icmp6, sizeof (icmp6_t), NULL, mctl_present, 1913 zoneid, ipst); 1914 } 1915 1916 /* 1917 * Generate an ICMP pkt too big message. 1918 */ 1919 static void 1920 icmp_pkt2big_v6(queue_t *q, mblk_t *mp, uint32_t mtu, 1921 boolean_t llbcast, boolean_t mcast_ok, zoneid_t zoneid, ip_stack_t *ipst) 1922 { 1923 icmp6_t icmp6; 1924 mblk_t *first_mp; 1925 boolean_t mctl_present; 1926 1927 EXTRACT_PKT_MP(mp, first_mp, mctl_present); 1928 1929 mp = icmp_pkt_err_ok_v6(q, mp, llbcast, mcast_ok, ipst); 1930 if (mp == NULL) { 1931 if (mctl_present) 1932 freeb(first_mp); 1933 return; 1934 } 1935 bzero(&icmp6, sizeof (icmp6_t)); 1936 icmp6.icmp6_type = ICMP6_PACKET_TOO_BIG; 1937 icmp6.icmp6_code = 0; 1938 icmp6.icmp6_mtu = htonl(mtu); 1939 1940 icmp_pkt_v6(q, first_mp, &icmp6, sizeof (icmp6_t), NULL, mctl_present, 1941 zoneid, ipst); 1942 } 1943 1944 /* 1945 * Generate an ICMP parameter problem message. (May be called as writer.) 1946 * 'offset' is the offset from the beginning of the packet in error. 1947 */ 1948 static void 1949 icmp_param_problem_v6(queue_t *q, mblk_t *mp, uint8_t code, 1950 uint32_t offset, boolean_t llbcast, boolean_t mcast_ok, zoneid_t zoneid, 1951 ip_stack_t *ipst) 1952 { 1953 icmp6_t icmp6; 1954 boolean_t mctl_present; 1955 mblk_t *first_mp; 1956 1957 EXTRACT_PKT_MP(mp, first_mp, mctl_present); 1958 1959 mp = icmp_pkt_err_ok_v6(q, mp, llbcast, mcast_ok, ipst); 1960 if (mp == NULL) { 1961 if (mctl_present) 1962 freeb(first_mp); 1963 return; 1964 } 1965 bzero((char *)&icmp6, sizeof (icmp6_t)); 1966 icmp6.icmp6_type = ICMP6_PARAM_PROB; 1967 icmp6.icmp6_code = code; 1968 icmp6.icmp6_pptr = htonl(offset); 1969 icmp_pkt_v6(q, first_mp, &icmp6, sizeof (icmp6_t), NULL, mctl_present, 1970 zoneid, ipst); 1971 } 1972 1973 /* 1974 * This code will need to take into account the possibility of binding 1975 * to a link local address on a multi-homed host, in which case the 1976 * outgoing interface (from the conn) will need to be used when getting 1977 * an ire for the dst. Going through proper outgoing interface and 1978 * choosing the source address corresponding to the outgoing interface 1979 * is necessary when the destination address is a link-local address and 1980 * IPV6_BOUND_IF or IPV6_PKTINFO or scope_id has been set. 1981 * This can happen when active connection is setup; thus ipp pointer 1982 * is passed here from tcp_connect_*() routines, in non-TCP cases NULL 1983 * pointer is passed as ipp pointer. 1984 */ 1985 mblk_t * 1986 ip_bind_v6(queue_t *q, mblk_t *mp, conn_t *connp, ip6_pkt_t *ipp) 1987 { 1988 ssize_t len; 1989 int protocol; 1990 struct T_bind_req *tbr; 1991 sin6_t *sin6; 1992 ipa6_conn_t *ac6; 1993 in6_addr_t *v6srcp; 1994 in6_addr_t *v6dstp; 1995 uint16_t lport; 1996 uint16_t fport; 1997 uchar_t *ucp; 1998 int error = 0; 1999 boolean_t local_bind; 2000 ipa6_conn_x_t *acx6; 2001 boolean_t verify_dst; 2002 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 2003 cred_t *cr; 2004 2005 /* 2006 * All Solaris components should pass a db_credp 2007 * for this TPI message, hence we ASSERT. 2008 * But in case there is some other M_PROTO that looks 2009 * like a TPI message sent by some other kernel 2010 * component, we check and return an error. 2011 */ 2012 cr = msg_getcred(mp, NULL); 2013 ASSERT(cr != NULL); 2014 if (cr == NULL) { 2015 error = EINVAL; 2016 goto bad_addr; 2017 } 2018 2019 ASSERT(connp->conn_af_isv6); 2020 len = mp->b_wptr - mp->b_rptr; 2021 if (len < (sizeof (*tbr) + 1)) { 2022 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 2023 "ip_bind_v6: bogus msg, len %ld", len); 2024 goto bad_addr; 2025 } 2026 /* Back up and extract the protocol identifier. */ 2027 mp->b_wptr--; 2028 tbr = (struct T_bind_req *)mp->b_rptr; 2029 /* Reset the message type in preparation for shipping it back. */ 2030 mp->b_datap->db_type = M_PCPROTO; 2031 2032 protocol = *mp->b_wptr & 0xFF; 2033 connp->conn_ulp = (uint8_t)protocol; 2034 2035 /* 2036 * Check for a zero length address. This is from a protocol that 2037 * wants to register to receive all packets of its type. 2038 */ 2039 if (tbr->ADDR_length == 0) { 2040 if ((protocol == IPPROTO_TCP || protocol == IPPROTO_SCTP || 2041 protocol == IPPROTO_ESP || protocol == IPPROTO_AH) && 2042 ipst->ips_ipcl_proto_fanout_v6[protocol].connf_head != 2043 NULL) { 2044 /* 2045 * TCP, SCTP, AH, and ESP have single protocol fanouts. 2046 * Do not allow others to bind to these. 2047 */ 2048 goto bad_addr; 2049 } 2050 2051 /* 2052 * 2053 * The udp module never sends down a zero-length address, 2054 * and allowing this on a labeled system will break MLP 2055 * functionality. 2056 */ 2057 if (is_system_labeled() && protocol == IPPROTO_UDP) 2058 goto bad_addr; 2059 2060 /* Allow ipsec plumbing */ 2061 if (connp->conn_mac_exempt && protocol != IPPROTO_AH && 2062 protocol != IPPROTO_ESP) 2063 goto bad_addr; 2064 2065 connp->conn_srcv6 = ipv6_all_zeros; 2066 ipcl_proto_insert_v6(connp, protocol); 2067 2068 tbr->PRIM_type = T_BIND_ACK; 2069 return (mp); 2070 } 2071 2072 /* Extract the address pointer from the message. */ 2073 ucp = (uchar_t *)mi_offset_param(mp, tbr->ADDR_offset, 2074 tbr->ADDR_length); 2075 if (ucp == NULL) { 2076 ip1dbg(("ip_bind_v6: no address\n")); 2077 goto bad_addr; 2078 } 2079 if (!OK_32PTR(ucp)) { 2080 ip1dbg(("ip_bind_v6: unaligned address\n")); 2081 goto bad_addr; 2082 } 2083 2084 switch (tbr->ADDR_length) { 2085 default: 2086 ip1dbg(("ip_bind_v6: bad address length %d\n", 2087 (int)tbr->ADDR_length)); 2088 goto bad_addr; 2089 2090 case IPV6_ADDR_LEN: 2091 /* Verification of local address only */ 2092 v6srcp = (in6_addr_t *)ucp; 2093 lport = 0; 2094 local_bind = B_TRUE; 2095 break; 2096 2097 case sizeof (sin6_t): 2098 sin6 = (sin6_t *)ucp; 2099 v6srcp = &sin6->sin6_addr; 2100 lport = sin6->sin6_port; 2101 local_bind = B_TRUE; 2102 break; 2103 2104 case sizeof (ipa6_conn_t): 2105 /* 2106 * Verify that both the source and destination addresses 2107 * are valid. 2108 */ 2109 ac6 = (ipa6_conn_t *)ucp; 2110 v6srcp = &ac6->ac6_laddr; 2111 v6dstp = &ac6->ac6_faddr; 2112 fport = ac6->ac6_fport; 2113 /* For raw socket, the local port is not set. */ 2114 lport = ac6->ac6_lport != 0 ? ac6->ac6_lport : 2115 connp->conn_lport; 2116 local_bind = B_FALSE; 2117 /* Always verify destination reachability. */ 2118 verify_dst = B_TRUE; 2119 break; 2120 2121 case sizeof (ipa6_conn_x_t): 2122 /* 2123 * Verify that the source address is valid. 2124 */ 2125 acx6 = (ipa6_conn_x_t *)ucp; 2126 ac6 = &acx6->ac6x_conn; 2127 v6srcp = &ac6->ac6_laddr; 2128 v6dstp = &ac6->ac6_faddr; 2129 fport = ac6->ac6_fport; 2130 lport = ac6->ac6_lport; 2131 local_bind = B_FALSE; 2132 /* 2133 * Client that passed ipa6_conn_x_t to us specifies whether to 2134 * verify destination reachability. 2135 */ 2136 verify_dst = (acx6->ac6x_flags & ACX_VERIFY_DST) != 0; 2137 break; 2138 } 2139 if (local_bind) { 2140 error = ip_proto_bind_laddr_v6(connp, &mp->b_cont, protocol, 2141 v6srcp, lport, tbr->ADDR_length != IPV6_ADDR_LEN); 2142 } else { 2143 error = ip_proto_bind_connected_v6(connp, &mp->b_cont, protocol, 2144 v6srcp, lport, v6dstp, ipp, fport, B_TRUE, verify_dst, cr); 2145 } 2146 2147 if (error == 0) { 2148 /* Send it home. */ 2149 mp->b_datap->db_type = M_PCPROTO; 2150 tbr->PRIM_type = T_BIND_ACK; 2151 return (mp); 2152 } 2153 2154 bad_addr: 2155 ASSERT(error != EINPROGRESS); 2156 if (error > 0) 2157 mp = mi_tpi_err_ack_alloc(mp, TSYSERR, error); 2158 else 2159 mp = mi_tpi_err_ack_alloc(mp, TBADADDR, 0); 2160 return (mp); 2161 } 2162 2163 static void 2164 ip_bind_post_handling_v6(conn_t *connp, mblk_t *mp, 2165 boolean_t version_changed, boolean_t ire_requested, ip_stack_t *ipst) 2166 { 2167 /* Update conn_send and pktversion if v4/v6 changed */ 2168 if (version_changed) { 2169 ip_setpktversion(connp, connp->conn_pkt_isv6, B_TRUE, ipst); 2170 } 2171 2172 /* 2173 * Pass the IPSEC headers size in ire_ipsec_overhead. 2174 * We can't do this in ip_bind_insert_ire because the policy 2175 * may not have been inherited at that point in time and hence 2176 * conn_out_enforce_policy may not be set. 2177 */ 2178 if (ire_requested && connp->conn_out_enforce_policy && 2179 mp != NULL && DB_TYPE(mp) == IRE_DB_REQ_TYPE) { 2180 ire_t *ire = (ire_t *)mp->b_rptr; 2181 ASSERT(MBLKL(mp) >= sizeof (ire_t)); 2182 ire->ire_ipsec_overhead = (conn_ipsec_length(connp)); 2183 } 2184 } 2185 2186 /* 2187 * Here address is verified to be a valid local address. 2188 * If the IRE_DB_REQ_TYPE mp is present, a multicast 2189 * address is also considered a valid local address. 2190 * In the case of a multicast address, however, the 2191 * upper protocol is expected to reset the src address 2192 * to 0 if it sees an ire with IN6_IS_ADDR_MULTICAST returned so that 2193 * no packets are emitted with multicast address as 2194 * source address. 2195 * The addresses valid for bind are: 2196 * (1) - in6addr_any 2197 * (2) - IP address of an UP interface 2198 * (3) - IP address of a DOWN interface 2199 * (4) - a multicast address. In this case 2200 * the conn will only receive packets destined to 2201 * the specified multicast address. Note: the 2202 * application still has to issue an 2203 * IPV6_JOIN_GROUP socket option. 2204 * 2205 * In all the above cases, the bound address must be valid in the current zone. 2206 * When the address is loopback or multicast, there might be many matching IREs 2207 * so bind has to look up based on the zone. 2208 */ 2209 /* 2210 * Verify the local IP address. Does not change the conn_t except 2211 * conn_fully_bound and conn_policy_cached. 2212 */ 2213 static int 2214 ip_bind_laddr_v6(conn_t *connp, mblk_t **mpp, uint8_t protocol, 2215 const in6_addr_t *v6src, uint16_t lport, boolean_t fanout_insert) 2216 { 2217 int error = 0; 2218 ire_t *src_ire = NULL; 2219 zoneid_t zoneid; 2220 mblk_t *mp = NULL; 2221 boolean_t ire_requested; 2222 boolean_t ipsec_policy_set; 2223 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 2224 2225 if (mpp) 2226 mp = *mpp; 2227 2228 ire_requested = (mp != NULL && DB_TYPE(mp) == IRE_DB_REQ_TYPE); 2229 ipsec_policy_set = (mp != NULL && DB_TYPE(mp) == IPSEC_POLICY_SET); 2230 2231 /* 2232 * If it was previously connected, conn_fully_bound would have 2233 * been set. 2234 */ 2235 connp->conn_fully_bound = B_FALSE; 2236 2237 zoneid = connp->conn_zoneid; 2238 2239 if (!IN6_IS_ADDR_UNSPECIFIED(v6src)) { 2240 src_ire = ire_route_lookup_v6(v6src, 0, 0, 2241 0, NULL, NULL, zoneid, NULL, MATCH_IRE_ZONEONLY, ipst); 2242 /* 2243 * If an address other than in6addr_any is requested, 2244 * we verify that it is a valid address for bind 2245 * Note: Following code is in if-else-if form for 2246 * readability compared to a condition check. 2247 */ 2248 ASSERT(src_ire == NULL || !(src_ire->ire_type & IRE_BROADCAST)); 2249 /* LINTED - statement has no consequent */ 2250 if (IRE_IS_LOCAL(src_ire)) { 2251 /* 2252 * (2) Bind to address of local UP interface 2253 */ 2254 } else if (IN6_IS_ADDR_MULTICAST(v6src)) { 2255 ipif_t *multi_ipif = NULL; 2256 ire_t *save_ire; 2257 /* 2258 * (4) bind to multicast address. 2259 * Fake out the IRE returned to upper 2260 * layer to be a broadcast IRE in 2261 * ip_bind_insert_ire_v6(). 2262 * Pass other information that matches 2263 * the ipif (e.g. the source address). 2264 * conn_multicast_ill is only used for 2265 * IPv6 packets 2266 */ 2267 mutex_enter(&connp->conn_lock); 2268 if (connp->conn_multicast_ill != NULL) { 2269 (void) ipif_lookup_zoneid( 2270 connp->conn_multicast_ill, zoneid, 0, 2271 &multi_ipif); 2272 } else { 2273 /* 2274 * Look for default like 2275 * ip_wput_v6 2276 */ 2277 multi_ipif = ipif_lookup_group_v6( 2278 &ipv6_unspecified_group, zoneid, ipst); 2279 } 2280 mutex_exit(&connp->conn_lock); 2281 save_ire = src_ire; 2282 src_ire = NULL; 2283 if (multi_ipif == NULL || !ire_requested || 2284 (src_ire = ipif_to_ire_v6(multi_ipif)) == NULL) { 2285 src_ire = save_ire; 2286 error = EADDRNOTAVAIL; 2287 } else { 2288 ASSERT(src_ire != NULL); 2289 if (save_ire != NULL) 2290 ire_refrele(save_ire); 2291 } 2292 if (multi_ipif != NULL) 2293 ipif_refrele(multi_ipif); 2294 } else { 2295 if (!ip_addr_exists_v6(v6src, zoneid, ipst)) { 2296 /* 2297 * Not a valid address for bind 2298 */ 2299 error = EADDRNOTAVAIL; 2300 } 2301 } 2302 2303 if (error != 0) { 2304 /* Red Alert! Attempting to be a bogon! */ 2305 if (ip_debug > 2) { 2306 /* ip1dbg */ 2307 pr_addr_dbg("ip_bind_laddr_v6: bad src" 2308 " address %s\n", AF_INET6, v6src); 2309 } 2310 goto bad_addr; 2311 } 2312 } 2313 2314 /* 2315 * Allow setting new policies. For example, disconnects come 2316 * down as ipa_t bind. As we would have set conn_policy_cached 2317 * to B_TRUE before, we should set it to B_FALSE, so that policy 2318 * can change after the disconnect. 2319 */ 2320 connp->conn_policy_cached = B_FALSE; 2321 2322 /* If not fanout_insert this was just an address verification */ 2323 if (fanout_insert) { 2324 /* 2325 * The addresses have been verified. Time to insert in 2326 * the correct fanout list. 2327 */ 2328 connp->conn_srcv6 = *v6src; 2329 connp->conn_remv6 = ipv6_all_zeros; 2330 connp->conn_lport = lport; 2331 connp->conn_fport = 0; 2332 error = ipcl_bind_insert_v6(connp, protocol, v6src, lport); 2333 } 2334 if (error == 0) { 2335 if (ire_requested) { 2336 if (!ip_bind_get_ire_v6(mpp, src_ire, v6src, NULL, 2337 ipst)) { 2338 error = -1; 2339 goto bad_addr; 2340 } 2341 mp = *mpp; 2342 } else if (ipsec_policy_set) { 2343 if (!ip_bind_ipsec_policy_set(connp, mp)) { 2344 error = -1; 2345 goto bad_addr; 2346 } 2347 } 2348 } 2349 bad_addr: 2350 if (error != 0) { 2351 if (connp->conn_anon_port) { 2352 (void) tsol_mlp_anon(crgetzone(connp->conn_cred), 2353 connp->conn_mlp_type, connp->conn_ulp, ntohs(lport), 2354 B_FALSE); 2355 } 2356 connp->conn_mlp_type = mlptSingle; 2357 } 2358 2359 if (src_ire != NULL) 2360 ire_refrele(src_ire); 2361 2362 if (ipsec_policy_set) { 2363 ASSERT(mp != NULL); 2364 freeb(mp); 2365 /* 2366 * As of now assume that nothing else accompanies 2367 * IPSEC_POLICY_SET. 2368 */ 2369 *mpp = NULL; 2370 } 2371 2372 return (error); 2373 } 2374 int 2375 ip_proto_bind_laddr_v6(conn_t *connp, mblk_t **mpp, uint8_t protocol, 2376 const in6_addr_t *v6srcp, uint16_t lport, boolean_t fanout_insert) 2377 { 2378 int error; 2379 boolean_t ire_requested; 2380 mblk_t *mp = NULL; 2381 boolean_t orig_pkt_isv6 = connp->conn_pkt_isv6; 2382 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 2383 2384 /* 2385 * Note that we allow connect to broadcast and multicast 2386 * address when ire_requested is set. Thus the ULP 2387 * has to check for IRE_BROADCAST and multicast. 2388 */ 2389 if (mpp) 2390 mp = *mpp; 2391 ire_requested = (mp && DB_TYPE(mp) == IRE_DB_REQ_TYPE); 2392 2393 ASSERT(connp->conn_af_isv6); 2394 connp->conn_ulp = protocol; 2395 2396 if (IN6_IS_ADDR_V4MAPPED(v6srcp) && !connp->conn_ipv6_v6only) { 2397 /* Bind to IPv4 address */ 2398 ipaddr_t v4src; 2399 2400 IN6_V4MAPPED_TO_IPADDR(v6srcp, v4src); 2401 2402 error = ip_bind_laddr_v4(connp, mpp, protocol, v4src, lport, 2403 fanout_insert); 2404 if (error != 0) 2405 goto bad_addr; 2406 connp->conn_pkt_isv6 = B_FALSE; 2407 } else { 2408 if (IN6_IS_ADDR_V4MAPPED(v6srcp)) { 2409 error = 0; 2410 goto bad_addr; 2411 } 2412 error = ip_bind_laddr_v6(connp, mpp, protocol, v6srcp, 2413 lport, fanout_insert); 2414 if (error != 0) 2415 goto bad_addr; 2416 connp->conn_pkt_isv6 = B_TRUE; 2417 } 2418 2419 ip_bind_post_handling_v6(connp, mpp ? *mpp : NULL, 2420 orig_pkt_isv6 != connp->conn_pkt_isv6, ire_requested, ipst); 2421 return (0); 2422 2423 bad_addr: 2424 if (error < 0) 2425 error = -TBADADDR; 2426 return (error); 2427 } 2428 2429 /* 2430 * Verify that both the source and destination addresses 2431 * are valid. If verify_dst, then destination address must also be reachable, 2432 * i.e. have a route. Protocols like TCP want this. Tunnels do not. 2433 * It takes ip6_pkt_t * as one of the arguments to determine correct 2434 * source address when IPV6_PKTINFO or scope_id is set along with a link-local 2435 * destination address. Note that parameter ipp is only useful for TCP connect 2436 * when scope_id is set or IPV6_PKTINFO option is set with an ifindex. For all 2437 * non-TCP cases, it is NULL and for all other tcp cases it is not useful. 2438 * 2439 */ 2440 int 2441 ip_bind_connected_v6(conn_t *connp, mblk_t **mpp, uint8_t protocol, 2442 in6_addr_t *v6src, uint16_t lport, const in6_addr_t *v6dst, 2443 ip6_pkt_t *ipp, uint16_t fport, boolean_t fanout_insert, 2444 boolean_t verify_dst, cred_t *cr) 2445 { 2446 ire_t *src_ire; 2447 ire_t *dst_ire; 2448 int error = 0; 2449 ire_t *sire = NULL; 2450 ire_t *md_dst_ire = NULL; 2451 ill_t *md_ill = NULL; 2452 ill_t *dst_ill = NULL; 2453 ipif_t *src_ipif = NULL; 2454 zoneid_t zoneid; 2455 boolean_t ill_held = B_FALSE; 2456 mblk_t *mp = NULL; 2457 boolean_t ire_requested = B_FALSE; 2458 boolean_t ipsec_policy_set = B_FALSE; 2459 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 2460 ts_label_t *tsl = NULL; 2461 cred_t *effective_cred = NULL; 2462 2463 if (mpp) 2464 mp = *mpp; 2465 2466 if (mp != NULL) { 2467 ire_requested = (DB_TYPE(mp) == IRE_DB_REQ_TYPE); 2468 ipsec_policy_set = (DB_TYPE(mp) == IPSEC_POLICY_SET); 2469 } 2470 2471 src_ire = dst_ire = NULL; 2472 /* 2473 * If we never got a disconnect before, clear it now. 2474 */ 2475 connp->conn_fully_bound = B_FALSE; 2476 2477 zoneid = connp->conn_zoneid; 2478 2479 /* 2480 * Check whether Trusted Solaris policy allows communication with this 2481 * host, and pretend that the destination is unreachable if not. 2482 * 2483 * This is never a problem for TCP, since that transport is known to 2484 * compute the label properly as part of the tcp_rput_other T_BIND_ACK 2485 * handling. If the remote is unreachable, it will be detected at that 2486 * point, so there's no reason to check it here. 2487 * 2488 * Note that for sendto (and other datagram-oriented friends), this 2489 * check is done as part of the data path label computation instead. 2490 * The check here is just to make non-TCP connect() report the right 2491 * error. 2492 */ 2493 if (is_system_labeled() && !IPCL_IS_TCP(connp)) { 2494 if ((error = tsol_check_dest(cr, v6dst, IPV6_VERSION, 2495 connp->conn_mac_exempt, &effective_cred)) != 0) { 2496 if (ip_debug > 2) { 2497 pr_addr_dbg( 2498 "ip_bind_connected: no label for dst %s\n", 2499 AF_INET6, v6dst); 2500 } 2501 goto bad_addr; 2502 } 2503 2504 /* 2505 * tsol_check_dest() may have created a new cred with 2506 * a modified security label. Use that cred if it exists 2507 * for ire lookups. 2508 */ 2509 if (effective_cred == NULL) { 2510 tsl = crgetlabel(cr); 2511 } else { 2512 tsl = crgetlabel(effective_cred); 2513 } 2514 } 2515 2516 if (IN6_IS_ADDR_MULTICAST(v6dst)) { 2517 ipif_t *ipif; 2518 2519 /* 2520 * Use an "emulated" IRE_BROADCAST to tell the transport it 2521 * is a multicast. 2522 * Pass other information that matches 2523 * the ipif (e.g. the source address). 2524 * 2525 * conn_multicast_ill is only used for IPv6 packets 2526 */ 2527 mutex_enter(&connp->conn_lock); 2528 if (connp->conn_multicast_ill != NULL) { 2529 (void) ipif_lookup_zoneid(connp->conn_multicast_ill, 2530 zoneid, 0, &ipif); 2531 } else { 2532 /* Look for default like ip_wput_v6 */ 2533 ipif = ipif_lookup_group_v6(v6dst, zoneid, ipst); 2534 } 2535 mutex_exit(&connp->conn_lock); 2536 if (ipif == NULL || ire_requested || 2537 (dst_ire = ipif_to_ire_v6(ipif)) == NULL) { 2538 if (ipif != NULL) 2539 ipif_refrele(ipif); 2540 if (ip_debug > 2) { 2541 /* ip1dbg */ 2542 pr_addr_dbg("ip_bind_connected_v6: bad " 2543 "connected multicast %s\n", AF_INET6, 2544 v6dst); 2545 } 2546 error = ENETUNREACH; 2547 goto bad_addr; 2548 } 2549 if (ipif != NULL) 2550 ipif_refrele(ipif); 2551 } else { 2552 dst_ire = ire_route_lookup_v6(v6dst, NULL, NULL, 0, 2553 NULL, &sire, zoneid, tsl, 2554 MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT | 2555 MATCH_IRE_PARENT | MATCH_IRE_RJ_BHOLE | MATCH_IRE_SECATTR, 2556 ipst); 2557 /* 2558 * We also prevent ire's with src address INADDR_ANY to 2559 * be used, which are created temporarily for 2560 * sending out packets from endpoints that have 2561 * conn_unspec_src set. 2562 */ 2563 if (dst_ire == NULL || 2564 (dst_ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) || 2565 IN6_IS_ADDR_UNSPECIFIED(&dst_ire->ire_src_addr_v6)) { 2566 /* 2567 * When verifying destination reachability, we always 2568 * complain. 2569 * 2570 * When not verifying destination reachability but we 2571 * found an IRE, i.e. the destination is reachable, 2572 * then the other tests still apply and we complain. 2573 */ 2574 if (verify_dst || (dst_ire != NULL)) { 2575 if (ip_debug > 2) { 2576 /* ip1dbg */ 2577 pr_addr_dbg("ip_bind_connected_v6: bad" 2578 " connected dst %s\n", AF_INET6, 2579 v6dst); 2580 } 2581 if (dst_ire == NULL || 2582 !(dst_ire->ire_type & IRE_HOST)) { 2583 error = ENETUNREACH; 2584 } else { 2585 error = EHOSTUNREACH; 2586 } 2587 goto bad_addr; 2588 } 2589 } 2590 } 2591 2592 /* 2593 * If the app does a connect(), it means that it will most likely 2594 * send more than 1 packet to the destination. It makes sense 2595 * to clear the temporary flag. 2596 */ 2597 if (dst_ire != NULL && dst_ire->ire_type == IRE_CACHE && 2598 (dst_ire->ire_marks & IRE_MARK_TEMPORARY)) { 2599 irb_t *irb = dst_ire->ire_bucket; 2600 2601 rw_enter(&irb->irb_lock, RW_WRITER); 2602 /* 2603 * We need to recheck for IRE_MARK_TEMPORARY after acquiring 2604 * the lock in order to guarantee irb_tmp_ire_cnt. 2605 */ 2606 if (dst_ire->ire_marks & IRE_MARK_TEMPORARY) { 2607 dst_ire->ire_marks &= ~IRE_MARK_TEMPORARY; 2608 irb->irb_tmp_ire_cnt--; 2609 } 2610 rw_exit(&irb->irb_lock); 2611 } 2612 2613 ASSERT(dst_ire == NULL || dst_ire->ire_ipversion == IPV6_VERSION); 2614 2615 /* 2616 * See if we should notify ULP about MDT; we do this whether or not 2617 * ire_requested is TRUE, in order to handle active connects; MDT 2618 * eligibility tests for passive connects are handled separately 2619 * through tcp_adapt_ire(). We do this before the source address 2620 * selection, because dst_ire may change after a call to 2621 * ipif_select_source_v6(). This is a best-effort check, as the 2622 * packet for this connection may not actually go through 2623 * dst_ire->ire_stq, and the exact IRE can only be known after 2624 * calling ip_newroute_v6(). This is why we further check on the 2625 * IRE during Multidata packet transmission in tcp_multisend(). 2626 */ 2627 if (ipst->ips_ip_multidata_outbound && !ipsec_policy_set && 2628 dst_ire != NULL && 2629 !(dst_ire->ire_type & (IRE_LOCAL | IRE_LOOPBACK | IRE_BROADCAST)) && 2630 (md_ill = ire_to_ill(dst_ire), md_ill != NULL) && 2631 ILL_MDT_CAPABLE(md_ill)) { 2632 md_dst_ire = dst_ire; 2633 IRE_REFHOLD(md_dst_ire); 2634 } 2635 2636 if (dst_ire != NULL && 2637 dst_ire->ire_type == IRE_LOCAL && 2638 dst_ire->ire_zoneid != zoneid && 2639 dst_ire->ire_zoneid != ALL_ZONES) { 2640 src_ire = ire_ftable_lookup_v6(v6dst, 0, 0, 0, NULL, NULL, 2641 zoneid, 0, NULL, 2642 MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT | 2643 MATCH_IRE_RJ_BHOLE, ipst); 2644 if (src_ire == NULL) { 2645 error = EHOSTUNREACH; 2646 goto bad_addr; 2647 } else if (src_ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) { 2648 if (!(src_ire->ire_type & IRE_HOST)) 2649 error = ENETUNREACH; 2650 else 2651 error = EHOSTUNREACH; 2652 goto bad_addr; 2653 } 2654 if (IN6_IS_ADDR_UNSPECIFIED(v6src)) { 2655 src_ipif = src_ire->ire_ipif; 2656 ipif_refhold(src_ipif); 2657 *v6src = src_ipif->ipif_v6lcl_addr; 2658 } 2659 ire_refrele(src_ire); 2660 src_ire = NULL; 2661 } else if (IN6_IS_ADDR_UNSPECIFIED(v6src) && dst_ire != NULL) { 2662 if ((sire != NULL) && (sire->ire_flags & RTF_SETSRC)) { 2663 *v6src = sire->ire_src_addr_v6; 2664 ire_refrele(dst_ire); 2665 dst_ire = sire; 2666 sire = NULL; 2667 } else if (dst_ire->ire_type == IRE_CACHE && 2668 (dst_ire->ire_flags & RTF_SETSRC)) { 2669 ASSERT(dst_ire->ire_zoneid == zoneid || 2670 dst_ire->ire_zoneid == ALL_ZONES); 2671 *v6src = dst_ire->ire_src_addr_v6; 2672 } else { 2673 /* 2674 * Pick a source address so that a proper inbound load 2675 * spreading would happen. Use dst_ill specified by the 2676 * app. when socket option or scopeid is set. 2677 */ 2678 int err; 2679 2680 if (ipp != NULL && ipp->ipp_ifindex != 0) { 2681 uint_t if_index; 2682 2683 /* 2684 * Scope id or IPV6_PKTINFO 2685 */ 2686 2687 if_index = ipp->ipp_ifindex; 2688 dst_ill = ill_lookup_on_ifindex( 2689 if_index, B_TRUE, NULL, NULL, NULL, NULL, 2690 ipst); 2691 if (dst_ill == NULL) { 2692 ip1dbg(("ip_bind_connected_v6:" 2693 " bad ifindex %d\n", if_index)); 2694 error = EADDRNOTAVAIL; 2695 goto bad_addr; 2696 } 2697 ill_held = B_TRUE; 2698 } else if (connp->conn_outgoing_ill != NULL) { 2699 /* 2700 * For IPV6_BOUND_IF socket option, 2701 * conn_outgoing_ill should be set 2702 * already in TCP or UDP/ICMP. 2703 */ 2704 dst_ill = conn_get_held_ill(connp, 2705 &connp->conn_outgoing_ill, &err); 2706 if (err == ILL_LOOKUP_FAILED) { 2707 ip1dbg(("ip_bind_connected_v6:" 2708 "no ill for bound_if\n")); 2709 error = EADDRNOTAVAIL; 2710 goto bad_addr; 2711 } 2712 ill_held = B_TRUE; 2713 } else if (dst_ire->ire_stq != NULL) { 2714 /* No need to hold ill here */ 2715 dst_ill = (ill_t *)dst_ire->ire_stq->q_ptr; 2716 } else { 2717 /* No need to hold ill here */ 2718 dst_ill = dst_ire->ire_ipif->ipif_ill; 2719 } 2720 if (ip6_asp_can_lookup(ipst)) { 2721 src_ipif = ipif_select_source_v6(dst_ill, 2722 v6dst, B_FALSE, connp->conn_src_preferences, 2723 zoneid); 2724 ip6_asp_table_refrele(ipst); 2725 if (src_ipif == NULL) { 2726 pr_addr_dbg("ip_bind_connected_v6: " 2727 "no usable source address for " 2728 "connection to %s\n", 2729 AF_INET6, v6dst); 2730 error = EADDRNOTAVAIL; 2731 goto bad_addr; 2732 } 2733 *v6src = src_ipif->ipif_v6lcl_addr; 2734 } else { 2735 error = EADDRNOTAVAIL; 2736 goto bad_addr; 2737 } 2738 } 2739 } 2740 2741 /* 2742 * We do ire_route_lookup_v6() here (and not an interface lookup) 2743 * as we assert that v6src should only come from an 2744 * UP interface for hard binding. 2745 */ 2746 src_ire = ire_route_lookup_v6(v6src, 0, 0, 0, NULL, 2747 NULL, zoneid, NULL, MATCH_IRE_ZONEONLY, ipst); 2748 2749 /* src_ire must be a local|loopback */ 2750 if (!IRE_IS_LOCAL(src_ire)) { 2751 if (ip_debug > 2) { 2752 /* ip1dbg */ 2753 pr_addr_dbg("ip_bind_connected_v6: bad " 2754 "connected src %s\n", AF_INET6, v6src); 2755 } 2756 error = EADDRNOTAVAIL; 2757 goto bad_addr; 2758 } 2759 2760 /* 2761 * If the source address is a loopback address, the 2762 * destination had best be local or multicast. 2763 * The transports that can't handle multicast will reject 2764 * those addresses. 2765 */ 2766 if (src_ire->ire_type == IRE_LOOPBACK && 2767 !(IRE_IS_LOCAL(dst_ire) || IN6_IS_ADDR_MULTICAST(v6dst) || 2768 IN6_IS_ADDR_V4MAPPED_CLASSD(v6dst))) { 2769 ip1dbg(("ip_bind_connected_v6: bad connected loopback\n")); 2770 error = -1; 2771 goto bad_addr; 2772 } 2773 /* 2774 * Allow setting new policies. For example, disconnects come 2775 * down as ipa_t bind. As we would have set conn_policy_cached 2776 * to B_TRUE before, we should set it to B_FALSE, so that policy 2777 * can change after the disconnect. 2778 */ 2779 connp->conn_policy_cached = B_FALSE; 2780 2781 /* 2782 * The addresses have been verified. Initialize the conn 2783 * before calling the policy as they expect the conns 2784 * initialized. 2785 */ 2786 connp->conn_srcv6 = *v6src; 2787 connp->conn_remv6 = *v6dst; 2788 connp->conn_lport = lport; 2789 connp->conn_fport = fport; 2790 2791 ASSERT(!(ipsec_policy_set && ire_requested)); 2792 if (ire_requested) { 2793 iulp_t *ulp_info = NULL; 2794 2795 /* 2796 * Note that sire will not be NULL if this is an off-link 2797 * connection and there is not cache for that dest yet. 2798 * 2799 * XXX Because of an existing bug, if there are multiple 2800 * default routes, the IRE returned now may not be the actual 2801 * default route used (default routes are chosen in a 2802 * round robin fashion). So if the metrics for different 2803 * default routes are different, we may return the wrong 2804 * metrics. This will not be a problem if the existing 2805 * bug is fixed. 2806 */ 2807 if (sire != NULL) 2808 ulp_info = &(sire->ire_uinfo); 2809 2810 if (!ip_bind_get_ire_v6(mpp, dst_ire, v6dst, ulp_info, 2811 ipst)) { 2812 error = -1; 2813 goto bad_addr; 2814 } 2815 } else if (ipsec_policy_set) { 2816 if (!ip_bind_ipsec_policy_set(connp, mp)) { 2817 error = -1; 2818 goto bad_addr; 2819 } 2820 } 2821 2822 /* 2823 * Cache IPsec policy in this conn. If we have per-socket policy, 2824 * we'll cache that. If we don't, we'll inherit global policy. 2825 * 2826 * We can't insert until the conn reflects the policy. Note that 2827 * conn_policy_cached is set by ipsec_conn_cache_policy() even for 2828 * connections where we don't have a policy. This is to prevent 2829 * global policy lookups in the inbound path. 2830 * 2831 * If we insert before we set conn_policy_cached, 2832 * CONN_INBOUND_POLICY_PRESENT_V6() check can still evaluate true 2833 * because global policy cound be non-empty. We normally call 2834 * ipsec_check_policy() for conn_policy_cached connections only if 2835 * conn_in_enforce_policy is set. But in this case, 2836 * conn_policy_cached can get set anytime since we made the 2837 * CONN_INBOUND_POLICY_PRESENT_V6() check and ipsec_check_policy() 2838 * is called, which will make the above assumption false. Thus, we 2839 * need to insert after we set conn_policy_cached. 2840 */ 2841 if ((error = ipsec_conn_cache_policy(connp, B_FALSE)) != 0) 2842 goto bad_addr; 2843 2844 /* If not fanout_insert this was just an address verification */ 2845 if (fanout_insert) { 2846 /* 2847 * The addresses have been verified. Time to insert in 2848 * the correct fanout list. 2849 */ 2850 error = ipcl_conn_insert_v6(connp, protocol, v6src, v6dst, 2851 connp->conn_ports, 2852 IPCL_IS_TCP(connp) ? connp->conn_tcp->tcp_bound_if : 0); 2853 } 2854 if (error == 0) { 2855 connp->conn_fully_bound = B_TRUE; 2856 /* 2857 * Our initial checks for MDT have passed; the IRE is not 2858 * LOCAL/LOOPBACK/BROADCAST, and the link layer seems to 2859 * be supporting MDT. Pass the IRE, IPC and ILL into 2860 * ip_mdinfo_return(), which performs further checks 2861 * against them and upon success, returns the MDT info 2862 * mblk which we will attach to the bind acknowledgment. 2863 */ 2864 if (md_dst_ire != NULL) { 2865 mblk_t *mdinfo_mp; 2866 2867 ASSERT(md_ill != NULL); 2868 ASSERT(md_ill->ill_mdt_capab != NULL); 2869 if ((mdinfo_mp = ip_mdinfo_return(md_dst_ire, connp, 2870 md_ill->ill_name, md_ill->ill_mdt_capab)) != NULL) { 2871 if (mp == NULL) { 2872 *mpp = mdinfo_mp; 2873 } else { 2874 linkb(mp, mdinfo_mp); 2875 } 2876 } 2877 } 2878 } 2879 bad_addr: 2880 if (ipsec_policy_set) { 2881 ASSERT(mp != NULL); 2882 freeb(mp); 2883 /* 2884 * As of now assume that nothing else accompanies 2885 * IPSEC_POLICY_SET. 2886 */ 2887 *mpp = NULL; 2888 } 2889 refrele_and_quit: 2890 if (src_ire != NULL) 2891 IRE_REFRELE(src_ire); 2892 if (dst_ire != NULL) 2893 IRE_REFRELE(dst_ire); 2894 if (sire != NULL) 2895 IRE_REFRELE(sire); 2896 if (src_ipif != NULL) 2897 ipif_refrele(src_ipif); 2898 if (md_dst_ire != NULL) 2899 IRE_REFRELE(md_dst_ire); 2900 if (ill_held && dst_ill != NULL) 2901 ill_refrele(dst_ill); 2902 if (effective_cred != NULL) 2903 crfree(effective_cred); 2904 return (error); 2905 } 2906 2907 /* ARGSUSED */ 2908 int 2909 ip_proto_bind_connected_v6(conn_t *connp, mblk_t **mpp, uint8_t protocol, 2910 in6_addr_t *v6srcp, uint16_t lport, const in6_addr_t *v6dstp, 2911 ip6_pkt_t *ipp, uint16_t fport, boolean_t fanout_insert, 2912 boolean_t verify_dst, cred_t *cr) 2913 { 2914 int error = 0; 2915 boolean_t orig_pkt_isv6 = connp->conn_pkt_isv6; 2916 boolean_t ire_requested; 2917 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 2918 2919 /* 2920 * Note that we allow connect to broadcast and multicast 2921 * address when ire_requested is set. Thus the ULP 2922 * has to check for IRE_BROADCAST and multicast. 2923 */ 2924 ASSERT(mpp != NULL); 2925 ire_requested = (*mpp != NULL && DB_TYPE(*mpp) == IRE_DB_REQ_TYPE); 2926 2927 ASSERT(connp->conn_af_isv6); 2928 connp->conn_ulp = protocol; 2929 2930 /* For raw socket, the local port is not set. */ 2931 lport = lport != 0 ? lport : connp->conn_lport; 2932 2933 /* 2934 * Bind to local and remote address. Local might be 2935 * unspecified in which case it will be extracted from 2936 * ire_src_addr_v6 2937 */ 2938 if (IN6_IS_ADDR_V4MAPPED(v6dstp) && !connp->conn_ipv6_v6only) { 2939 /* Connect to IPv4 address */ 2940 ipaddr_t v4src; 2941 ipaddr_t v4dst; 2942 2943 /* Is the source unspecified or mapped? */ 2944 if (!IN6_IS_ADDR_V4MAPPED(v6srcp) && 2945 !IN6_IS_ADDR_UNSPECIFIED(v6srcp)) { 2946 ip1dbg(("ip_proto_bind_connected_v6: " 2947 "dst is mapped, but not the src\n")); 2948 goto bad_addr; 2949 } 2950 IN6_V4MAPPED_TO_IPADDR(v6srcp, v4src); 2951 IN6_V4MAPPED_TO_IPADDR(v6dstp, v4dst); 2952 2953 /* Always verify destination reachability. */ 2954 error = ip_bind_connected_v4(connp, mpp, protocol, &v4src, 2955 lport, v4dst, fport, B_TRUE, B_TRUE, cr); 2956 if (error != 0) 2957 goto bad_addr; 2958 IN6_IPADDR_TO_V4MAPPED(v4src, v6srcp); 2959 connp->conn_pkt_isv6 = B_FALSE; 2960 } else if (IN6_IS_ADDR_V4MAPPED(v6srcp)) { 2961 ip1dbg(("ip_proto_bind_connected_v6: " 2962 "src is mapped, but not the dst\n")); 2963 goto bad_addr; 2964 } else { 2965 error = ip_bind_connected_v6(connp, mpp, protocol, v6srcp, 2966 lport, v6dstp, ipp, fport, B_TRUE, verify_dst, cr); 2967 if (error != 0) 2968 goto bad_addr; 2969 connp->conn_pkt_isv6 = B_TRUE; 2970 } 2971 2972 ip_bind_post_handling_v6(connp, mpp ? *mpp : NULL, 2973 orig_pkt_isv6 != connp->conn_pkt_isv6, ire_requested, ipst); 2974 2975 /* Send it home. */ 2976 return (0); 2977 2978 bad_addr: 2979 if (error == 0) 2980 error = -TBADADDR; 2981 return (error); 2982 } 2983 2984 /* 2985 * Get the ire in *mpp. Returns false if it fails (due to lack of space). 2986 * Makes the IRE be IRE_BROADCAST if dst is a multicast address. 2987 */ 2988 /* ARGSUSED4 */ 2989 static boolean_t 2990 ip_bind_get_ire_v6(mblk_t **mpp, ire_t *ire, const in6_addr_t *dst, 2991 iulp_t *ulp_info, ip_stack_t *ipst) 2992 { 2993 mblk_t *mp = *mpp; 2994 ire_t *ret_ire; 2995 2996 ASSERT(mp != NULL); 2997 2998 if (ire != NULL) { 2999 /* 3000 * mp initialized above to IRE_DB_REQ_TYPE 3001 * appended mblk. Its <upper protocol>'s 3002 * job to make sure there is room. 3003 */ 3004 if ((mp->b_datap->db_lim - mp->b_rptr) < sizeof (ire_t)) 3005 return (B_FALSE); 3006 3007 mp->b_datap->db_type = IRE_DB_TYPE; 3008 mp->b_wptr = mp->b_rptr + sizeof (ire_t); 3009 bcopy(ire, mp->b_rptr, sizeof (ire_t)); 3010 ret_ire = (ire_t *)mp->b_rptr; 3011 if (IN6_IS_ADDR_MULTICAST(dst) || 3012 IN6_IS_ADDR_V4MAPPED_CLASSD(dst)) { 3013 ret_ire->ire_type = IRE_BROADCAST; 3014 ret_ire->ire_addr_v6 = *dst; 3015 } 3016 if (ulp_info != NULL) { 3017 bcopy(ulp_info, &(ret_ire->ire_uinfo), 3018 sizeof (iulp_t)); 3019 } 3020 ret_ire->ire_mp = mp; 3021 } else { 3022 /* 3023 * No IRE was found. Remove IRE mblk. 3024 */ 3025 *mpp = mp->b_cont; 3026 freeb(mp); 3027 } 3028 return (B_TRUE); 3029 } 3030 3031 /* 3032 * Add an ip6i_t header to the front of the mblk. 3033 * Inline if possible else allocate a separate mblk containing only the ip6i_t. 3034 * Returns NULL if allocation fails (and frees original message). 3035 * Used in outgoing path when going through ip_newroute_*v6(). 3036 * Used in incoming path to pass ifindex to transports. 3037 */ 3038 mblk_t * 3039 ip_add_info_v6(mblk_t *mp, ill_t *ill, const in6_addr_t *dst) 3040 { 3041 mblk_t *mp1; 3042 ip6i_t *ip6i; 3043 ip6_t *ip6h; 3044 3045 ip6h = (ip6_t *)mp->b_rptr; 3046 ip6i = (ip6i_t *)(mp->b_rptr - sizeof (ip6i_t)); 3047 if ((uchar_t *)ip6i < mp->b_datap->db_base || 3048 mp->b_datap->db_ref > 1) { 3049 mp1 = allocb(sizeof (ip6i_t), BPRI_MED); 3050 if (mp1 == NULL) { 3051 freemsg(mp); 3052 return (NULL); 3053 } 3054 mp1->b_wptr = mp1->b_rptr = mp1->b_datap->db_lim; 3055 mp1->b_cont = mp; 3056 mp = mp1; 3057 ip6i = (ip6i_t *)(mp->b_rptr - sizeof (ip6i_t)); 3058 } 3059 mp->b_rptr = (uchar_t *)ip6i; 3060 ip6i->ip6i_vcf = ip6h->ip6_vcf; 3061 ip6i->ip6i_nxt = IPPROTO_RAW; 3062 if (ill != NULL) { 3063 ip6i->ip6i_flags = IP6I_IFINDEX; 3064 /* 3065 * If `ill' is in an IPMP group, make sure we use the IPMP 3066 * interface index so that e.g. IPV6_RECVPKTINFO will get the 3067 * IPMP interface index and not an underlying interface index. 3068 */ 3069 if (IS_UNDER_IPMP(ill)) 3070 ip6i->ip6i_ifindex = ipmp_ill_get_ipmp_ifindex(ill); 3071 else 3072 ip6i->ip6i_ifindex = ill->ill_phyint->phyint_ifindex; 3073 } else { 3074 ip6i->ip6i_flags = 0; 3075 } 3076 ip6i->ip6i_nexthop = *dst; 3077 return (mp); 3078 } 3079 3080 /* 3081 * Handle protocols with which IP is less intimate. There 3082 * can be more than one stream bound to a particular 3083 * protocol. When this is the case, normally each one gets a copy 3084 * of any incoming packets. 3085 * However, if the packet was tunneled and not multicast we only send to it 3086 * the first match. 3087 * 3088 * Zones notes: 3089 * Packets will be distributed to streams in all zones. This is really only 3090 * useful for ICMPv6 as only applications in the global zone can create raw 3091 * sockets for other protocols. 3092 */ 3093 static void 3094 ip_fanout_proto_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, ill_t *ill, 3095 ill_t *inill, uint8_t nexthdr, uint_t nexthdr_offset, uint_t flags, 3096 boolean_t mctl_present, zoneid_t zoneid) 3097 { 3098 queue_t *rq; 3099 mblk_t *mp1, *first_mp1; 3100 in6_addr_t dst = ip6h->ip6_dst; 3101 in6_addr_t src = ip6h->ip6_src; 3102 boolean_t one_only; 3103 mblk_t *first_mp = mp; 3104 boolean_t secure, shared_addr; 3105 conn_t *connp, *first_connp, *next_connp; 3106 connf_t *connfp; 3107 ip_stack_t *ipst = inill->ill_ipst; 3108 ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec; 3109 3110 if (mctl_present) { 3111 mp = first_mp->b_cont; 3112 secure = ipsec_in_is_secure(first_mp); 3113 ASSERT(mp != NULL); 3114 } else { 3115 secure = B_FALSE; 3116 } 3117 3118 /* 3119 * If the packet was tunneled and not multicast we only send to it 3120 * the first match. 3121 */ 3122 one_only = ((nexthdr == IPPROTO_ENCAP || nexthdr == IPPROTO_IPV6) && 3123 !IN6_IS_ADDR_MULTICAST(&dst)); 3124 3125 shared_addr = (zoneid == ALL_ZONES); 3126 if (shared_addr) { 3127 /* 3128 * We don't allow multilevel ports for raw IP, so no need to 3129 * check for that here. 3130 */ 3131 zoneid = tsol_packet_to_zoneid(mp); 3132 } 3133 3134 connfp = &ipst->ips_ipcl_proto_fanout_v6[nexthdr]; 3135 mutex_enter(&connfp->connf_lock); 3136 connp = connfp->connf_head; 3137 for (connp = connfp->connf_head; connp != NULL; 3138 connp = connp->conn_next) { 3139 if (IPCL_PROTO_MATCH_V6(connp, nexthdr, ip6h, ill, flags, 3140 zoneid) && 3141 (!is_system_labeled() || 3142 tsol_receive_local(mp, &dst, IPV6_VERSION, shared_addr, 3143 connp))) 3144 break; 3145 } 3146 3147 if (connp == NULL) { 3148 /* 3149 * No one bound to this port. Is 3150 * there a client that wants all 3151 * unclaimed datagrams? 3152 */ 3153 mutex_exit(&connfp->connf_lock); 3154 if (ip_fanout_send_icmp_v6(q, first_mp, flags, 3155 ICMP6_PARAM_PROB, ICMP6_PARAMPROB_NEXTHEADER, 3156 nexthdr_offset, mctl_present, zoneid, ipst)) { 3157 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInUnknownProtos); 3158 } 3159 3160 return; 3161 } 3162 3163 ASSERT(IPCL_IS_NONSTR(connp) || connp->conn_upq != NULL); 3164 3165 CONN_INC_REF(connp); 3166 first_connp = connp; 3167 3168 /* 3169 * XXX: Fix the multiple protocol listeners case. We should not 3170 * be walking the conn->next list here. 3171 */ 3172 if (one_only) { 3173 /* 3174 * Only send message to one tunnel driver by immediately 3175 * terminating the loop. 3176 */ 3177 connp = NULL; 3178 } else { 3179 connp = connp->conn_next; 3180 3181 } 3182 for (;;) { 3183 while (connp != NULL) { 3184 if (IPCL_PROTO_MATCH_V6(connp, nexthdr, ip6h, ill, 3185 flags, zoneid) && 3186 (!is_system_labeled() || 3187 tsol_receive_local(mp, &dst, IPV6_VERSION, 3188 shared_addr, connp))) 3189 break; 3190 connp = connp->conn_next; 3191 } 3192 3193 /* 3194 * Just copy the data part alone. The mctl part is 3195 * needed just for verifying policy and it is never 3196 * sent up. 3197 */ 3198 if (connp == NULL || 3199 (((first_mp1 = dupmsg(first_mp)) == NULL) && 3200 ((first_mp1 = ip_copymsg(first_mp)) == NULL))) { 3201 /* 3202 * No more intested clients or memory 3203 * allocation failed 3204 */ 3205 connp = first_connp; 3206 break; 3207 } 3208 ASSERT(IPCL_IS_NONSTR(connp) || connp->conn_rq != NULL); 3209 mp1 = mctl_present ? first_mp1->b_cont : first_mp1; 3210 CONN_INC_REF(connp); 3211 mutex_exit(&connfp->connf_lock); 3212 rq = connp->conn_rq; 3213 /* 3214 * For link-local always add ifindex so that transport can set 3215 * sin6_scope_id. Avoid it for ICMP error fanout. 3216 */ 3217 if ((connp->conn_ip_recvpktinfo || 3218 IN6_IS_ADDR_LINKLOCAL(&src)) && 3219 (flags & IP_FF_IPINFO)) { 3220 /* Add header */ 3221 mp1 = ip_add_info_v6(mp1, inill, &dst); 3222 } 3223 if (mp1 == NULL) { 3224 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3225 } else if ( 3226 (IPCL_IS_NONSTR(connp) && PROTO_FLOW_CNTRLD(connp)) || 3227 (!IPCL_IS_NONSTR(connp) && !canputnext(rq))) { 3228 if (flags & IP_FF_RAWIP) { 3229 BUMP_MIB(ill->ill_ip_mib, 3230 rawipIfStatsInOverflows); 3231 } else { 3232 BUMP_MIB(ill->ill_icmp6_mib, 3233 ipv6IfIcmpInOverflows); 3234 } 3235 3236 freemsg(mp1); 3237 } else { 3238 /* 3239 * Don't enforce here if we're a tunnel - let "tun" do 3240 * it instead. 3241 */ 3242 if (!IPCL_IS_IPTUN(connp) && 3243 (CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) || 3244 secure)) { 3245 first_mp1 = ipsec_check_inbound_policy( 3246 first_mp1, connp, NULL, ip6h, mctl_present); 3247 } 3248 if (first_mp1 != NULL) { 3249 if (mctl_present) 3250 freeb(first_mp1); 3251 BUMP_MIB(ill->ill_ip_mib, 3252 ipIfStatsHCInDelivers); 3253 (connp->conn_recv)(connp, mp1, NULL); 3254 } 3255 } 3256 mutex_enter(&connfp->connf_lock); 3257 /* Follow the next pointer before releasing the conn. */ 3258 next_connp = connp->conn_next; 3259 CONN_DEC_REF(connp); 3260 connp = next_connp; 3261 } 3262 3263 /* Last one. Send it upstream. */ 3264 mutex_exit(&connfp->connf_lock); 3265 3266 /* Initiate IPPF processing */ 3267 if (IP6_IN_IPP(flags, ipst)) { 3268 uint_t ifindex; 3269 3270 mutex_enter(&ill->ill_lock); 3271 ifindex = ill->ill_phyint->phyint_ifindex; 3272 mutex_exit(&ill->ill_lock); 3273 ip_process(IPP_LOCAL_IN, &mp, ifindex); 3274 if (mp == NULL) { 3275 CONN_DEC_REF(connp); 3276 if (mctl_present) 3277 freeb(first_mp); 3278 return; 3279 } 3280 } 3281 3282 /* 3283 * For link-local always add ifindex so that transport can set 3284 * sin6_scope_id. Avoid it for ICMP error fanout. 3285 */ 3286 if ((connp->conn_ip_recvpktinfo || IN6_IS_ADDR_LINKLOCAL(&src)) && 3287 (flags & IP_FF_IPINFO)) { 3288 /* Add header */ 3289 mp = ip_add_info_v6(mp, inill, &dst); 3290 if (mp == NULL) { 3291 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3292 CONN_DEC_REF(connp); 3293 if (mctl_present) 3294 freeb(first_mp); 3295 return; 3296 } else if (mctl_present) { 3297 first_mp->b_cont = mp; 3298 } else { 3299 first_mp = mp; 3300 } 3301 } 3302 3303 rq = connp->conn_rq; 3304 if ((IPCL_IS_NONSTR(connp) && PROTO_FLOW_CNTRLD(connp)) || 3305 (!IPCL_IS_NONSTR(connp) && !canputnext(rq))) { 3306 3307 if (flags & IP_FF_RAWIP) { 3308 BUMP_MIB(ill->ill_ip_mib, rawipIfStatsInOverflows); 3309 } else { 3310 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInOverflows); 3311 } 3312 3313 freemsg(first_mp); 3314 } else { 3315 if (IPCL_IS_IPTUN(connp)) { 3316 /* 3317 * Tunneled packet. We enforce policy in the tunnel 3318 * module itself. 3319 * 3320 * Send the WHOLE packet up (incl. IPSEC_IN) without 3321 * a policy check. 3322 */ 3323 putnext(rq, first_mp); 3324 CONN_DEC_REF(connp); 3325 return; 3326 } 3327 /* 3328 * Don't enforce here if we're a tunnel - let "tun" do 3329 * it instead. 3330 */ 3331 if (nexthdr != IPPROTO_ENCAP && nexthdr != IPPROTO_IPV6 && 3332 (CONN_INBOUND_POLICY_PRESENT(connp, ipss) || secure)) { 3333 first_mp = ipsec_check_inbound_policy(first_mp, connp, 3334 NULL, ip6h, mctl_present); 3335 if (first_mp == NULL) { 3336 CONN_DEC_REF(connp); 3337 return; 3338 } 3339 } 3340 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 3341 (connp->conn_recv)(connp, mp, NULL); 3342 if (mctl_present) 3343 freeb(first_mp); 3344 } 3345 CONN_DEC_REF(connp); 3346 } 3347 3348 /* 3349 * Send an ICMP error after patching up the packet appropriately. Returns 3350 * non-zero if the appropriate MIB should be bumped; zero otherwise. 3351 */ 3352 int 3353 ip_fanout_send_icmp_v6(queue_t *q, mblk_t *mp, uint_t flags, 3354 uint_t icmp_type, uint8_t icmp_code, uint_t nexthdr_offset, 3355 boolean_t mctl_present, zoneid_t zoneid, ip_stack_t *ipst) 3356 { 3357 ip6_t *ip6h; 3358 mblk_t *first_mp; 3359 boolean_t secure; 3360 unsigned char db_type; 3361 ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec; 3362 3363 first_mp = mp; 3364 if (mctl_present) { 3365 mp = mp->b_cont; 3366 secure = ipsec_in_is_secure(first_mp); 3367 ASSERT(mp != NULL); 3368 } else { 3369 /* 3370 * If this is an ICMP error being reported - which goes 3371 * up as M_CTLs, we need to convert them to M_DATA till 3372 * we finish checking with global policy because 3373 * ipsec_check_global_policy() assumes M_DATA as clear 3374 * and M_CTL as secure. 3375 */ 3376 db_type = mp->b_datap->db_type; 3377 mp->b_datap->db_type = M_DATA; 3378 secure = B_FALSE; 3379 } 3380 /* 3381 * We are generating an icmp error for some inbound packet. 3382 * Called from all ip_fanout_(udp, tcp, proto) functions. 3383 * Before we generate an error, check with global policy 3384 * to see whether this is allowed to enter the system. As 3385 * there is no "conn", we are checking with global policy. 3386 */ 3387 ip6h = (ip6_t *)mp->b_rptr; 3388 if (secure || ipss->ipsec_inbound_v6_policy_present) { 3389 first_mp = ipsec_check_global_policy(first_mp, NULL, 3390 NULL, ip6h, mctl_present, ipst->ips_netstack); 3391 if (first_mp == NULL) 3392 return (0); 3393 } 3394 3395 if (!mctl_present) 3396 mp->b_datap->db_type = db_type; 3397 3398 if (flags & IP_FF_SEND_ICMP) { 3399 if (flags & IP_FF_HDR_COMPLETE) { 3400 if (ip_hdr_complete_v6(ip6h, zoneid, ipst)) { 3401 freemsg(first_mp); 3402 return (1); 3403 } 3404 } 3405 switch (icmp_type) { 3406 case ICMP6_DST_UNREACH: 3407 icmp_unreachable_v6(WR(q), first_mp, icmp_code, 3408 B_FALSE, B_FALSE, zoneid, ipst); 3409 break; 3410 case ICMP6_PARAM_PROB: 3411 icmp_param_problem_v6(WR(q), first_mp, icmp_code, 3412 nexthdr_offset, B_FALSE, B_FALSE, zoneid, ipst); 3413 break; 3414 default: 3415 #ifdef DEBUG 3416 panic("ip_fanout_send_icmp_v6: wrong type"); 3417 /*NOTREACHED*/ 3418 #else 3419 freemsg(first_mp); 3420 break; 3421 #endif 3422 } 3423 } else { 3424 freemsg(first_mp); 3425 return (0); 3426 } 3427 3428 return (1); 3429 } 3430 3431 /* 3432 * Fanout for TCP packets 3433 * The caller puts <fport, lport> in the ports parameter. 3434 */ 3435 static void 3436 ip_fanout_tcp_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, ill_t *ill, ill_t *inill, 3437 uint_t flags, uint_t hdr_len, boolean_t mctl_present, zoneid_t zoneid) 3438 { 3439 mblk_t *first_mp; 3440 boolean_t secure; 3441 conn_t *connp; 3442 tcph_t *tcph; 3443 boolean_t syn_present = B_FALSE; 3444 ip_stack_t *ipst = inill->ill_ipst; 3445 ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec; 3446 3447 first_mp = mp; 3448 if (mctl_present) { 3449 mp = first_mp->b_cont; 3450 secure = ipsec_in_is_secure(first_mp); 3451 ASSERT(mp != NULL); 3452 } else { 3453 secure = B_FALSE; 3454 } 3455 3456 connp = ipcl_classify_v6(mp, IPPROTO_TCP, hdr_len, zoneid, ipst); 3457 3458 if (connp == NULL || 3459 !conn_wantpacket_v6(connp, ill, ip6h, flags, zoneid)) { 3460 /* 3461 * No hard-bound match. Send Reset. 3462 */ 3463 dblk_t *dp = mp->b_datap; 3464 uint32_t ill_index; 3465 3466 ASSERT((dp->db_struioflag & STRUIO_IP) == 0); 3467 3468 /* Initiate IPPf processing, if needed. */ 3469 if (IPP_ENABLED(IPP_LOCAL_IN, ipst) && 3470 (flags & IP6_NO_IPPOLICY)) { 3471 ill_index = ill->ill_phyint->phyint_ifindex; 3472 ip_process(IPP_LOCAL_IN, &first_mp, ill_index); 3473 if (first_mp == NULL) { 3474 if (connp != NULL) 3475 CONN_DEC_REF(connp); 3476 return; 3477 } 3478 } 3479 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 3480 if (connp != NULL) { 3481 ip_xmit_reset_serialize(first_mp, hdr_len, zoneid, 3482 ipst->ips_netstack->netstack_tcp, connp); 3483 CONN_DEC_REF(connp); 3484 } else { 3485 tcp_xmit_listeners_reset(first_mp, hdr_len, zoneid, 3486 ipst->ips_netstack->netstack_tcp, NULL); 3487 } 3488 3489 return; 3490 } 3491 3492 tcph = (tcph_t *)&mp->b_rptr[hdr_len]; 3493 if ((tcph->th_flags[0] & (TH_SYN|TH_ACK|TH_RST|TH_URG)) == TH_SYN) { 3494 if (IPCL_IS_TCP(connp)) { 3495 squeue_t *sqp; 3496 3497 /* 3498 * If the queue belongs to a conn, and fused tcp 3499 * loopback is enabled, assign the eager's squeue 3500 * to be that of the active connect's. 3501 */ 3502 if ((flags & IP_FF_LOOPBACK) && do_tcp_fusion && 3503 CONN_Q(q) && IPCL_IS_TCP(Q_TO_CONN(q)) && 3504 !CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) && 3505 !secure && 3506 !IP6_IN_IPP(flags, ipst)) { 3507 ASSERT(Q_TO_CONN(q)->conn_sqp != NULL); 3508 sqp = Q_TO_CONN(q)->conn_sqp; 3509 } else { 3510 sqp = IP_SQUEUE_GET(lbolt); 3511 } 3512 3513 mp->b_datap->db_struioflag |= STRUIO_EAGER; 3514 DB_CKSUMSTART(mp) = (intptr_t)sqp; 3515 3516 /* 3517 * db_cksumstuff is unused in the incoming 3518 * path; Thus store the ifindex here. It will 3519 * be cleared in tcp_conn_create_v6(). 3520 */ 3521 DB_CKSUMSTUFF(mp) = 3522 (intptr_t)ill->ill_phyint->phyint_ifindex; 3523 syn_present = B_TRUE; 3524 } 3525 } 3526 3527 if (IPCL_IS_TCP(connp) && IPCL_IS_BOUND(connp) && !syn_present) { 3528 uint_t flags = (unsigned int)tcph->th_flags[0] & 0xFF; 3529 if ((flags & TH_RST) || (flags & TH_URG)) { 3530 CONN_DEC_REF(connp); 3531 freemsg(first_mp); 3532 return; 3533 } 3534 if (flags & TH_ACK) { 3535 ip_xmit_reset_serialize(first_mp, hdr_len, zoneid, 3536 ipst->ips_netstack->netstack_tcp, connp); 3537 CONN_DEC_REF(connp); 3538 return; 3539 } 3540 3541 CONN_DEC_REF(connp); 3542 freemsg(first_mp); 3543 return; 3544 } 3545 3546 if (CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) || secure) { 3547 first_mp = ipsec_check_inbound_policy(first_mp, connp, 3548 NULL, ip6h, mctl_present); 3549 if (first_mp == NULL) { 3550 CONN_DEC_REF(connp); 3551 return; 3552 } 3553 if (IPCL_IS_TCP(connp) && IPCL_IS_BOUND(connp)) { 3554 ASSERT(syn_present); 3555 if (mctl_present) { 3556 ASSERT(first_mp != mp); 3557 first_mp->b_datap->db_struioflag |= 3558 STRUIO_POLICY; 3559 } else { 3560 ASSERT(first_mp == mp); 3561 mp->b_datap->db_struioflag &= 3562 ~STRUIO_EAGER; 3563 mp->b_datap->db_struioflag |= 3564 STRUIO_POLICY; 3565 } 3566 } else { 3567 /* 3568 * Discard first_mp early since we're dealing with a 3569 * fully-connected conn_t and tcp doesn't do policy in 3570 * this case. Also, if someone is bound to IPPROTO_TCP 3571 * over raw IP, they don't expect to see a M_CTL. 3572 */ 3573 if (mctl_present) { 3574 freeb(first_mp); 3575 mctl_present = B_FALSE; 3576 } 3577 first_mp = mp; 3578 } 3579 } 3580 3581 /* Initiate IPPF processing */ 3582 if (IP6_IN_IPP(flags, ipst)) { 3583 uint_t ifindex; 3584 3585 mutex_enter(&ill->ill_lock); 3586 ifindex = ill->ill_phyint->phyint_ifindex; 3587 mutex_exit(&ill->ill_lock); 3588 ip_process(IPP_LOCAL_IN, &mp, ifindex); 3589 if (mp == NULL) { 3590 CONN_DEC_REF(connp); 3591 if (mctl_present) { 3592 freeb(first_mp); 3593 } 3594 return; 3595 } else if (mctl_present) { 3596 /* 3597 * ip_add_info_v6 might return a new mp. 3598 */ 3599 ASSERT(first_mp != mp); 3600 first_mp->b_cont = mp; 3601 } else { 3602 first_mp = mp; 3603 } 3604 } 3605 3606 /* 3607 * For link-local always add ifindex so that TCP can bind to that 3608 * interface. Avoid it for ICMP error fanout. 3609 */ 3610 if (!syn_present && ((connp->conn_ip_recvpktinfo || 3611 IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src)) && 3612 (flags & IP_FF_IPINFO))) { 3613 /* Add header */ 3614 mp = ip_add_info_v6(mp, inill, &ip6h->ip6_dst); 3615 if (mp == NULL) { 3616 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3617 CONN_DEC_REF(connp); 3618 if (mctl_present) 3619 freeb(first_mp); 3620 return; 3621 } else if (mctl_present) { 3622 ASSERT(first_mp != mp); 3623 first_mp->b_cont = mp; 3624 } else { 3625 first_mp = mp; 3626 } 3627 } 3628 3629 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 3630 if (IPCL_IS_TCP(connp)) { 3631 SQUEUE_ENTER_ONE(connp->conn_sqp, first_mp, connp->conn_recv, 3632 connp, ip_squeue_flag, SQTAG_IP6_TCP_INPUT); 3633 } else { 3634 /* SOCK_RAW, IPPROTO_TCP case */ 3635 (connp->conn_recv)(connp, first_mp, NULL); 3636 CONN_DEC_REF(connp); 3637 } 3638 } 3639 3640 /* 3641 * Fanout for UDP packets. 3642 * The caller puts <fport, lport> in the ports parameter. 3643 * ire_type must be IRE_BROADCAST for multicast and broadcast packets. 3644 * 3645 * If SO_REUSEADDR is set all multicast and broadcast packets 3646 * will be delivered to all streams bound to the same port. 3647 * 3648 * Zones notes: 3649 * Multicast packets will be distributed to streams in all zones. 3650 */ 3651 static void 3652 ip_fanout_udp_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, uint32_t ports, 3653 ill_t *ill, ill_t *inill, uint_t flags, boolean_t mctl_present, 3654 zoneid_t zoneid) 3655 { 3656 uint32_t dstport, srcport; 3657 in6_addr_t dst; 3658 mblk_t *first_mp; 3659 boolean_t secure; 3660 conn_t *connp; 3661 connf_t *connfp; 3662 conn_t *first_conn; 3663 conn_t *next_conn; 3664 mblk_t *mp1, *first_mp1; 3665 in6_addr_t src; 3666 boolean_t shared_addr; 3667 ip_stack_t *ipst = inill->ill_ipst; 3668 ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec; 3669 3670 first_mp = mp; 3671 if (mctl_present) { 3672 mp = first_mp->b_cont; 3673 secure = ipsec_in_is_secure(first_mp); 3674 ASSERT(mp != NULL); 3675 } else { 3676 secure = B_FALSE; 3677 } 3678 3679 /* Extract ports in net byte order */ 3680 dstport = htons(ntohl(ports) & 0xFFFF); 3681 srcport = htons(ntohl(ports) >> 16); 3682 dst = ip6h->ip6_dst; 3683 src = ip6h->ip6_src; 3684 3685 shared_addr = (zoneid == ALL_ZONES); 3686 if (shared_addr) { 3687 /* 3688 * No need to handle exclusive-stack zones since ALL_ZONES 3689 * only applies to the shared stack. 3690 */ 3691 zoneid = tsol_mlp_findzone(IPPROTO_UDP, dstport); 3692 /* 3693 * If no shared MLP is found, tsol_mlp_findzone returns 3694 * ALL_ZONES. In that case, we assume it's SLP, and 3695 * search for the zone based on the packet label. 3696 * That will also return ALL_ZONES on failure, but 3697 * we never allow conn_zoneid to be set to ALL_ZONES. 3698 */ 3699 if (zoneid == ALL_ZONES) 3700 zoneid = tsol_packet_to_zoneid(mp); 3701 } 3702 3703 /* Attempt to find a client stream based on destination port. */ 3704 connfp = &ipst->ips_ipcl_udp_fanout[IPCL_UDP_HASH(dstport, ipst)]; 3705 mutex_enter(&connfp->connf_lock); 3706 connp = connfp->connf_head; 3707 if (!IN6_IS_ADDR_MULTICAST(&dst)) { 3708 /* 3709 * Not multicast. Send to the one (first) client we find. 3710 */ 3711 while (connp != NULL) { 3712 if (IPCL_UDP_MATCH_V6(connp, dstport, dst, srcport, 3713 src) && IPCL_ZONE_MATCH(connp, zoneid) && 3714 conn_wantpacket_v6(connp, ill, ip6h, 3715 flags, zoneid)) { 3716 break; 3717 } 3718 connp = connp->conn_next; 3719 } 3720 if (connp == NULL || connp->conn_upq == NULL) 3721 goto notfound; 3722 3723 if (is_system_labeled() && 3724 !tsol_receive_local(mp, &dst, IPV6_VERSION, shared_addr, 3725 connp)) 3726 goto notfound; 3727 3728 /* Found a client */ 3729 CONN_INC_REF(connp); 3730 mutex_exit(&connfp->connf_lock); 3731 3732 if ((IPCL_IS_NONSTR(connp) && PROTO_FLOW_CNTRLD(connp)) || 3733 (!IPCL_IS_NONSTR(connp) && CONN_UDP_FLOWCTLD(connp))) { 3734 freemsg(first_mp); 3735 CONN_DEC_REF(connp); 3736 return; 3737 } 3738 if (CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) || secure) { 3739 first_mp = ipsec_check_inbound_policy(first_mp, 3740 connp, NULL, ip6h, mctl_present); 3741 if (first_mp == NULL) { 3742 CONN_DEC_REF(connp); 3743 return; 3744 } 3745 } 3746 /* Initiate IPPF processing */ 3747 if (IP6_IN_IPP(flags, ipst)) { 3748 uint_t ifindex; 3749 3750 mutex_enter(&ill->ill_lock); 3751 ifindex = ill->ill_phyint->phyint_ifindex; 3752 mutex_exit(&ill->ill_lock); 3753 ip_process(IPP_LOCAL_IN, &mp, ifindex); 3754 if (mp == NULL) { 3755 CONN_DEC_REF(connp); 3756 if (mctl_present) 3757 freeb(first_mp); 3758 return; 3759 } 3760 } 3761 /* 3762 * For link-local always add ifindex so that 3763 * transport can set sin6_scope_id. Avoid it for 3764 * ICMP error fanout. 3765 */ 3766 if ((connp->conn_ip_recvpktinfo || 3767 IN6_IS_ADDR_LINKLOCAL(&src)) && 3768 (flags & IP_FF_IPINFO)) { 3769 /* Add header */ 3770 mp = ip_add_info_v6(mp, inill, &dst); 3771 if (mp == NULL) { 3772 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3773 CONN_DEC_REF(connp); 3774 if (mctl_present) 3775 freeb(first_mp); 3776 return; 3777 } else if (mctl_present) { 3778 first_mp->b_cont = mp; 3779 } else { 3780 first_mp = mp; 3781 } 3782 } 3783 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 3784 3785 /* Send it upstream */ 3786 (connp->conn_recv)(connp, mp, NULL); 3787 3788 IP6_STAT(ipst, ip6_udp_fannorm); 3789 CONN_DEC_REF(connp); 3790 if (mctl_present) 3791 freeb(first_mp); 3792 return; 3793 } 3794 3795 while (connp != NULL) { 3796 if ((IPCL_UDP_MATCH_V6(connp, dstport, dst, srcport, src)) && 3797 conn_wantpacket_v6(connp, ill, ip6h, flags, zoneid) && 3798 (!is_system_labeled() || 3799 tsol_receive_local(mp, &dst, IPV6_VERSION, shared_addr, 3800 connp))) 3801 break; 3802 connp = connp->conn_next; 3803 } 3804 3805 if (connp == NULL || connp->conn_upq == NULL) 3806 goto notfound; 3807 3808 first_conn = connp; 3809 3810 CONN_INC_REF(connp); 3811 connp = connp->conn_next; 3812 for (;;) { 3813 while (connp != NULL) { 3814 if (IPCL_UDP_MATCH_V6(connp, dstport, dst, srcport, 3815 src) && conn_wantpacket_v6(connp, ill, ip6h, 3816 flags, zoneid) && 3817 (!is_system_labeled() || 3818 tsol_receive_local(mp, &dst, IPV6_VERSION, 3819 shared_addr, connp))) 3820 break; 3821 connp = connp->conn_next; 3822 } 3823 /* 3824 * Just copy the data part alone. The mctl part is 3825 * needed just for verifying policy and it is never 3826 * sent up. 3827 */ 3828 if (connp == NULL || 3829 (((first_mp1 = dupmsg(first_mp)) == NULL) && 3830 ((first_mp1 = ip_copymsg(first_mp)) == NULL))) { 3831 /* 3832 * No more interested clients or memory 3833 * allocation failed 3834 */ 3835 connp = first_conn; 3836 break; 3837 } 3838 mp1 = mctl_present ? first_mp1->b_cont : first_mp1; 3839 CONN_INC_REF(connp); 3840 mutex_exit(&connfp->connf_lock); 3841 /* 3842 * For link-local always add ifindex so that transport 3843 * can set sin6_scope_id. Avoid it for ICMP error 3844 * fanout. 3845 */ 3846 if ((connp->conn_ip_recvpktinfo || 3847 IN6_IS_ADDR_LINKLOCAL(&src)) && 3848 (flags & IP_FF_IPINFO)) { 3849 /* Add header */ 3850 mp1 = ip_add_info_v6(mp1, inill, &dst); 3851 } 3852 /* mp1 could have changed */ 3853 if (mctl_present) 3854 first_mp1->b_cont = mp1; 3855 else 3856 first_mp1 = mp1; 3857 if (mp1 == NULL) { 3858 if (mctl_present) 3859 freeb(first_mp1); 3860 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3861 goto next_one; 3862 } 3863 if ((IPCL_IS_NONSTR(connp) && PROTO_FLOW_CNTRLD(connp)) || 3864 (!IPCL_IS_NONSTR(connp) && CONN_UDP_FLOWCTLD(connp))) { 3865 BUMP_MIB(ill->ill_ip_mib, udpIfStatsInOverflows); 3866 freemsg(first_mp1); 3867 goto next_one; 3868 } 3869 3870 if (CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) || secure) { 3871 first_mp1 = ipsec_check_inbound_policy 3872 (first_mp1, connp, NULL, ip6h, 3873 mctl_present); 3874 } 3875 if (first_mp1 != NULL) { 3876 if (mctl_present) 3877 freeb(first_mp1); 3878 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 3879 3880 /* Send it upstream */ 3881 (connp->conn_recv)(connp, mp1, NULL); 3882 } 3883 next_one: 3884 mutex_enter(&connfp->connf_lock); 3885 /* Follow the next pointer before releasing the conn. */ 3886 next_conn = connp->conn_next; 3887 IP6_STAT(ipst, ip6_udp_fanmb); 3888 CONN_DEC_REF(connp); 3889 connp = next_conn; 3890 } 3891 3892 /* Last one. Send it upstream. */ 3893 mutex_exit(&connfp->connf_lock); 3894 3895 /* Initiate IPPF processing */ 3896 if (IP6_IN_IPP(flags, ipst)) { 3897 uint_t ifindex; 3898 3899 mutex_enter(&ill->ill_lock); 3900 ifindex = ill->ill_phyint->phyint_ifindex; 3901 mutex_exit(&ill->ill_lock); 3902 ip_process(IPP_LOCAL_IN, &mp, ifindex); 3903 if (mp == NULL) { 3904 CONN_DEC_REF(connp); 3905 if (mctl_present) { 3906 freeb(first_mp); 3907 } 3908 return; 3909 } 3910 } 3911 3912 /* 3913 * For link-local always add ifindex so that transport can set 3914 * sin6_scope_id. Avoid it for ICMP error fanout. 3915 */ 3916 if ((connp->conn_ip_recvpktinfo || 3917 IN6_IS_ADDR_LINKLOCAL(&src)) && (flags & IP_FF_IPINFO)) { 3918 /* Add header */ 3919 mp = ip_add_info_v6(mp, inill, &dst); 3920 if (mp == NULL) { 3921 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3922 CONN_DEC_REF(connp); 3923 if (mctl_present) 3924 freeb(first_mp); 3925 return; 3926 } else if (mctl_present) { 3927 first_mp->b_cont = mp; 3928 } else { 3929 first_mp = mp; 3930 } 3931 } 3932 if ((IPCL_IS_NONSTR(connp) && PROTO_FLOW_CNTRLD(connp)) || 3933 (!IPCL_IS_NONSTR(connp) && CONN_UDP_FLOWCTLD(connp))) { 3934 BUMP_MIB(ill->ill_ip_mib, udpIfStatsInOverflows); 3935 freemsg(mp); 3936 } else { 3937 if (CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) || secure) { 3938 first_mp = ipsec_check_inbound_policy(first_mp, 3939 connp, NULL, ip6h, mctl_present); 3940 if (first_mp == NULL) { 3941 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3942 CONN_DEC_REF(connp); 3943 return; 3944 } 3945 } 3946 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 3947 3948 /* Send it upstream */ 3949 (connp->conn_recv)(connp, mp, NULL); 3950 } 3951 IP6_STAT(ipst, ip6_udp_fanmb); 3952 CONN_DEC_REF(connp); 3953 if (mctl_present) 3954 freeb(first_mp); 3955 return; 3956 3957 notfound: 3958 mutex_exit(&connfp->connf_lock); 3959 /* 3960 * No one bound to this port. Is 3961 * there a client that wants all 3962 * unclaimed datagrams? 3963 */ 3964 if (ipst->ips_ipcl_proto_fanout_v6[IPPROTO_UDP].connf_head != NULL) { 3965 ip_fanout_proto_v6(q, first_mp, ip6h, ill, inill, IPPROTO_UDP, 3966 0, flags | IP_FF_RAWIP | IP_FF_IPINFO, mctl_present, 3967 zoneid); 3968 } else { 3969 if (ip_fanout_send_icmp_v6(q, first_mp, flags, 3970 ICMP6_DST_UNREACH, ICMP6_DST_UNREACH_NOPORT, 0, 3971 mctl_present, zoneid, ipst)) { 3972 BUMP_MIB(ill->ill_ip_mib, udpIfStatsNoPorts); 3973 } 3974 } 3975 } 3976 3977 /* 3978 * int ip_find_hdr_v6() 3979 * 3980 * This routine is used by the upper layer protocols and the IP tunnel 3981 * module to: 3982 * - Set extension header pointers to appropriate locations 3983 * - Determine IPv6 header length and return it 3984 * - Return a pointer to the last nexthdr value 3985 * 3986 * The caller must initialize ipp_fields. 3987 * 3988 * NOTE: If multiple extension headers of the same type are present, 3989 * ip_find_hdr_v6() will set the respective extension header pointers 3990 * to the first one that it encounters in the IPv6 header. It also 3991 * skips fragment headers. This routine deals with malformed packets 3992 * of various sorts in which case the returned length is up to the 3993 * malformed part. 3994 */ 3995 int 3996 ip_find_hdr_v6(mblk_t *mp, ip6_t *ip6h, ip6_pkt_t *ipp, uint8_t *nexthdrp) 3997 { 3998 uint_t length, ehdrlen; 3999 uint8_t nexthdr; 4000 uint8_t *whereptr, *endptr; 4001 ip6_dest_t *tmpdstopts; 4002 ip6_rthdr_t *tmprthdr; 4003 ip6_hbh_t *tmphopopts; 4004 ip6_frag_t *tmpfraghdr; 4005 4006 length = IPV6_HDR_LEN; 4007 whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */ 4008 endptr = mp->b_wptr; 4009 4010 nexthdr = ip6h->ip6_nxt; 4011 while (whereptr < endptr) { 4012 /* Is there enough left for len + nexthdr? */ 4013 if (whereptr + MIN_EHDR_LEN > endptr) 4014 goto done; 4015 4016 switch (nexthdr) { 4017 case IPPROTO_HOPOPTS: 4018 tmphopopts = (ip6_hbh_t *)whereptr; 4019 ehdrlen = 8 * (tmphopopts->ip6h_len + 1); 4020 if ((uchar_t *)tmphopopts + ehdrlen > endptr) 4021 goto done; 4022 nexthdr = tmphopopts->ip6h_nxt; 4023 /* return only 1st hbh */ 4024 if (!(ipp->ipp_fields & IPPF_HOPOPTS)) { 4025 ipp->ipp_fields |= IPPF_HOPOPTS; 4026 ipp->ipp_hopopts = tmphopopts; 4027 ipp->ipp_hopoptslen = ehdrlen; 4028 } 4029 break; 4030 case IPPROTO_DSTOPTS: 4031 tmpdstopts = (ip6_dest_t *)whereptr; 4032 ehdrlen = 8 * (tmpdstopts->ip6d_len + 1); 4033 if ((uchar_t *)tmpdstopts + ehdrlen > endptr) 4034 goto done; 4035 nexthdr = tmpdstopts->ip6d_nxt; 4036 /* 4037 * ipp_dstopts is set to the destination header after a 4038 * routing header. 4039 * Assume it is a post-rthdr destination header 4040 * and adjust when we find an rthdr. 4041 */ 4042 if (!(ipp->ipp_fields & IPPF_DSTOPTS)) { 4043 ipp->ipp_fields |= IPPF_DSTOPTS; 4044 ipp->ipp_dstopts = tmpdstopts; 4045 ipp->ipp_dstoptslen = ehdrlen; 4046 } 4047 break; 4048 case IPPROTO_ROUTING: 4049 tmprthdr = (ip6_rthdr_t *)whereptr; 4050 ehdrlen = 8 * (tmprthdr->ip6r_len + 1); 4051 if ((uchar_t *)tmprthdr + ehdrlen > endptr) 4052 goto done; 4053 nexthdr = tmprthdr->ip6r_nxt; 4054 /* return only 1st rthdr */ 4055 if (!(ipp->ipp_fields & IPPF_RTHDR)) { 4056 ipp->ipp_fields |= IPPF_RTHDR; 4057 ipp->ipp_rthdr = tmprthdr; 4058 ipp->ipp_rthdrlen = ehdrlen; 4059 } 4060 /* 4061 * Make any destination header we've seen be a 4062 * pre-rthdr destination header. 4063 */ 4064 if (ipp->ipp_fields & IPPF_DSTOPTS) { 4065 ipp->ipp_fields &= ~IPPF_DSTOPTS; 4066 ipp->ipp_fields |= IPPF_RTDSTOPTS; 4067 ipp->ipp_rtdstopts = ipp->ipp_dstopts; 4068 ipp->ipp_dstopts = NULL; 4069 ipp->ipp_rtdstoptslen = ipp->ipp_dstoptslen; 4070 ipp->ipp_dstoptslen = 0; 4071 } 4072 break; 4073 case IPPROTO_FRAGMENT: 4074 tmpfraghdr = (ip6_frag_t *)whereptr; 4075 ehdrlen = sizeof (ip6_frag_t); 4076 if ((uchar_t *)tmpfraghdr + ehdrlen > endptr) 4077 goto done; 4078 nexthdr = tmpfraghdr->ip6f_nxt; 4079 if (!(ipp->ipp_fields & IPPF_FRAGHDR)) { 4080 ipp->ipp_fields |= IPPF_FRAGHDR; 4081 ipp->ipp_fraghdr = tmpfraghdr; 4082 ipp->ipp_fraghdrlen = ehdrlen; 4083 } 4084 break; 4085 case IPPROTO_NONE: 4086 default: 4087 goto done; 4088 } 4089 length += ehdrlen; 4090 whereptr += ehdrlen; 4091 } 4092 done: 4093 if (nexthdrp != NULL) 4094 *nexthdrp = nexthdr; 4095 return (length); 4096 } 4097 4098 int 4099 ip_hdr_complete_v6(ip6_t *ip6h, zoneid_t zoneid, ip_stack_t *ipst) 4100 { 4101 ire_t *ire; 4102 4103 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 4104 ire = ire_lookup_local_v6(zoneid, ipst); 4105 if (ire == NULL) { 4106 ip1dbg(("ip_hdr_complete_v6: no source IRE\n")); 4107 return (1); 4108 } 4109 ip6h->ip6_src = ire->ire_addr_v6; 4110 ire_refrele(ire); 4111 } 4112 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 4113 ip6h->ip6_hops = ipst->ips_ipv6_def_hops; 4114 return (0); 4115 } 4116 4117 /* 4118 * Try to determine where and what are the IPv6 header length and 4119 * pointer to nexthdr value for the upper layer protocol (or an 4120 * unknown next hdr). 4121 * 4122 * Parameters returns a pointer to the nexthdr value; 4123 * Must handle malformed packets of various sorts. 4124 * Function returns failure for malformed cases. 4125 */ 4126 boolean_t 4127 ip_hdr_length_nexthdr_v6(mblk_t *mp, ip6_t *ip6h, uint16_t *hdr_length_ptr, 4128 uint8_t **nexthdrpp) 4129 { 4130 uint16_t length; 4131 uint_t ehdrlen; 4132 uint8_t *nexthdrp; 4133 uint8_t *whereptr; 4134 uint8_t *endptr; 4135 ip6_dest_t *desthdr; 4136 ip6_rthdr_t *rthdr; 4137 ip6_frag_t *fraghdr; 4138 4139 ASSERT((IPH_HDR_VERSION(ip6h) & ~IP_FORWARD_PROG_BIT) == IPV6_VERSION); 4140 length = IPV6_HDR_LEN; 4141 whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */ 4142 endptr = mp->b_wptr; 4143 4144 nexthdrp = &ip6h->ip6_nxt; 4145 while (whereptr < endptr) { 4146 /* Is there enough left for len + nexthdr? */ 4147 if (whereptr + MIN_EHDR_LEN > endptr) 4148 break; 4149 4150 switch (*nexthdrp) { 4151 case IPPROTO_HOPOPTS: 4152 case IPPROTO_DSTOPTS: 4153 /* Assumes the headers are identical for hbh and dst */ 4154 desthdr = (ip6_dest_t *)whereptr; 4155 ehdrlen = 8 * (desthdr->ip6d_len + 1); 4156 if ((uchar_t *)desthdr + ehdrlen > endptr) 4157 return (B_FALSE); 4158 nexthdrp = &desthdr->ip6d_nxt; 4159 break; 4160 case IPPROTO_ROUTING: 4161 rthdr = (ip6_rthdr_t *)whereptr; 4162 ehdrlen = 8 * (rthdr->ip6r_len + 1); 4163 if ((uchar_t *)rthdr + ehdrlen > endptr) 4164 return (B_FALSE); 4165 nexthdrp = &rthdr->ip6r_nxt; 4166 break; 4167 case IPPROTO_FRAGMENT: 4168 fraghdr = (ip6_frag_t *)whereptr; 4169 ehdrlen = sizeof (ip6_frag_t); 4170 if ((uchar_t *)&fraghdr[1] > endptr) 4171 return (B_FALSE); 4172 nexthdrp = &fraghdr->ip6f_nxt; 4173 break; 4174 case IPPROTO_NONE: 4175 /* No next header means we're finished */ 4176 default: 4177 *hdr_length_ptr = length; 4178 *nexthdrpp = nexthdrp; 4179 return (B_TRUE); 4180 } 4181 length += ehdrlen; 4182 whereptr += ehdrlen; 4183 *hdr_length_ptr = length; 4184 *nexthdrpp = nexthdrp; 4185 } 4186 switch (*nexthdrp) { 4187 case IPPROTO_HOPOPTS: 4188 case IPPROTO_DSTOPTS: 4189 case IPPROTO_ROUTING: 4190 case IPPROTO_FRAGMENT: 4191 /* 4192 * If any know extension headers are still to be processed, 4193 * the packet's malformed (or at least all the IP header(s) are 4194 * not in the same mblk - and that should never happen. 4195 */ 4196 return (B_FALSE); 4197 4198 default: 4199 /* 4200 * If we get here, we know that all of the IP headers were in 4201 * the same mblk, even if the ULP header is in the next mblk. 4202 */ 4203 *hdr_length_ptr = length; 4204 *nexthdrpp = nexthdrp; 4205 return (B_TRUE); 4206 } 4207 } 4208 4209 /* 4210 * Return the length of the IPv6 related headers (including extension headers) 4211 * Returns a length even if the packet is malformed. 4212 */ 4213 int 4214 ip_hdr_length_v6(mblk_t *mp, ip6_t *ip6h) 4215 { 4216 uint16_t hdr_len; 4217 uint8_t *nexthdrp; 4218 4219 (void) ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_len, &nexthdrp); 4220 return (hdr_len); 4221 } 4222 4223 /* 4224 * IPv6 - 4225 * ip_newroute_v6 is called by ip_rput_data_v6 or ip_wput_v6 whenever we need 4226 * to send out a packet to a destination address for which we do not have 4227 * specific routing information. 4228 * 4229 * Handle non-multicast packets. If ill is non-NULL the match is done 4230 * for that ill. 4231 * 4232 * When a specific ill is specified (using IPV6_PKTINFO, 4233 * IPV6_MULTICAST_IF, or IPV6_BOUND_IF) we will only match 4234 * on routing entries (ftable and ctable) that have a matching 4235 * ire->ire_ipif->ipif_ill. Thus this can only be used 4236 * for destinations that are on-link for the specific ill 4237 * and that can appear on multiple links. Thus it is useful 4238 * for multicast destinations, link-local destinations, and 4239 * at some point perhaps for site-local destinations (if the 4240 * node sits at a site boundary). 4241 * We create the cache entries in the regular ctable since 4242 * it can not "confuse" things for other destinations. 4243 * 4244 * NOTE : These are the scopes of some of the variables that point at IRE, 4245 * which needs to be followed while making any future modifications 4246 * to avoid memory leaks. 4247 * 4248 * - ire and sire are the entries looked up initially by 4249 * ire_ftable_lookup_v6. 4250 * - ipif_ire is used to hold the interface ire associated with 4251 * the new cache ire. But it's scope is limited, so we always REFRELE 4252 * it before branching out to error paths. 4253 * - save_ire is initialized before ire_create, so that ire returned 4254 * by ire_create will not over-write the ire. We REFRELE save_ire 4255 * before breaking out of the switch. 4256 * 4257 * Thus on failures, we have to REFRELE only ire and sire, if they 4258 * are not NULL. 4259 */ 4260 /* ARGSUSED */ 4261 void 4262 ip_newroute_v6(queue_t *q, mblk_t *mp, const in6_addr_t *v6dstp, 4263 const in6_addr_t *v6srcp, ill_t *ill, zoneid_t zoneid, ip_stack_t *ipst) 4264 { 4265 in6_addr_t v6gw; 4266 in6_addr_t dst; 4267 ire_t *ire = NULL; 4268 ipif_t *src_ipif = NULL; 4269 ill_t *dst_ill = NULL; 4270 ire_t *sire = NULL; 4271 ire_t *save_ire; 4272 ip6_t *ip6h; 4273 int err = 0; 4274 mblk_t *first_mp; 4275 ipsec_out_t *io; 4276 ushort_t ire_marks = 0; 4277 int match_flags; 4278 ire_t *first_sire = NULL; 4279 mblk_t *copy_mp = NULL; 4280 mblk_t *xmit_mp = NULL; 4281 in6_addr_t save_dst; 4282 uint32_t multirt_flags = 4283 MULTIRT_CACHEGW | MULTIRT_USESTAMP | MULTIRT_SETSTAMP; 4284 boolean_t multirt_is_resolvable; 4285 boolean_t multirt_resolve_next; 4286 boolean_t need_rele = B_FALSE; 4287 boolean_t ip6_asp_table_held = B_FALSE; 4288 tsol_ire_gw_secattr_t *attrp = NULL; 4289 tsol_gcgrp_t *gcgrp = NULL; 4290 tsol_gcgrp_addr_t ga; 4291 4292 ASSERT(!IN6_IS_ADDR_MULTICAST(v6dstp)); 4293 4294 first_mp = mp; 4295 if (mp->b_datap->db_type == M_CTL) { 4296 mp = mp->b_cont; 4297 io = (ipsec_out_t *)first_mp->b_rptr; 4298 ASSERT(io->ipsec_out_type == IPSEC_OUT); 4299 } else { 4300 io = NULL; 4301 } 4302 4303 ip6h = (ip6_t *)mp->b_rptr; 4304 4305 if (IN6_IS_ADDR_LOOPBACK(v6dstp)) { 4306 ip1dbg(("ip_newroute_v6: dst with loopback addr\n")); 4307 goto icmp_err_ret; 4308 } else if (IN6_IS_ADDR_LOOPBACK(v6srcp)) { 4309 ip1dbg(("ip_newroute_v6: src with loopback addr\n")); 4310 goto icmp_err_ret; 4311 } 4312 4313 /* 4314 * If this IRE is created for forwarding or it is not for 4315 * TCP traffic, mark it as temporary. 4316 * 4317 * Is it sufficient just to check the next header?? 4318 */ 4319 if (mp->b_prev != NULL || !IP_FLOW_CONTROLLED_ULP(ip6h->ip6_nxt)) 4320 ire_marks |= IRE_MARK_TEMPORARY; 4321 4322 /* 4323 * Get what we can from ire_ftable_lookup_v6 which will follow an IRE 4324 * chain until it gets the most specific information available. 4325 * For example, we know that there is no IRE_CACHE for this dest, 4326 * but there may be an IRE_OFFSUBNET which specifies a gateway. 4327 * ire_ftable_lookup_v6 will look up the gateway, etc. 4328 */ 4329 4330 if (ill == NULL) { 4331 match_flags = MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT | 4332 MATCH_IRE_PARENT | MATCH_IRE_RJ_BHOLE | MATCH_IRE_SECATTR; 4333 ire = ire_ftable_lookup_v6(v6dstp, 0, 0, 0, 4334 NULL, &sire, zoneid, 0, msg_getlabel(mp), 4335 match_flags, ipst); 4336 } else { 4337 match_flags = MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT | 4338 MATCH_IRE_RJ_BHOLE | MATCH_IRE_ILL; 4339 match_flags |= MATCH_IRE_PARENT | MATCH_IRE_SECATTR; 4340 4341 /* 4342 * Because nce_xmit() calls ip_output_v6() and NCEs are always 4343 * tied to an underlying interface, IS_UNDER_IPMP() may be 4344 * true even when building IREs that will be used for data 4345 * traffic. As such, use the packet's source address to 4346 * determine whether the traffic is test traffic, and set 4347 * MATCH_IRE_MARK_TESTHIDDEN if so. 4348 */ 4349 if (IS_UNDER_IPMP(ill) && !IN6_IS_ADDR_UNSPECIFIED(v6srcp)) { 4350 if (ipif_lookup_testaddr_v6(ill, v6srcp, NULL)) 4351 match_flags |= MATCH_IRE_MARK_TESTHIDDEN; 4352 } 4353 4354 ire = ire_ftable_lookup_v6(v6dstp, NULL, NULL, 0, ill->ill_ipif, 4355 &sire, zoneid, 0, msg_getlabel(mp), match_flags, ipst); 4356 } 4357 4358 ip3dbg(("ip_newroute_v6: ire_ftable_lookup_v6() " 4359 "returned ire %p, sire %p\n", (void *)ire, (void *)sire)); 4360 4361 /* 4362 * We enter a loop that will be run only once in most cases. 4363 * The loop is re-entered in the case where the destination 4364 * can be reached through multiple RTF_MULTIRT-flagged routes. 4365 * The intention is to compute multiple routes to a single 4366 * destination in a single ip_newroute_v6 call. 4367 * The information is contained in sire->ire_flags. 4368 */ 4369 do { 4370 multirt_resolve_next = B_FALSE; 4371 4372 if (dst_ill != NULL) { 4373 ill_refrele(dst_ill); 4374 dst_ill = NULL; 4375 } 4376 if (src_ipif != NULL) { 4377 ipif_refrele(src_ipif); 4378 src_ipif = NULL; 4379 } 4380 if ((sire != NULL) && sire->ire_flags & RTF_MULTIRT) { 4381 ip3dbg(("ip_newroute_v6: starting new resolution " 4382 "with first_mp %p, tag %d\n", 4383 (void *)first_mp, MULTIRT_DEBUG_TAGGED(first_mp))); 4384 4385 /* 4386 * We check if there are trailing unresolved routes for 4387 * the destination contained in sire. 4388 */ 4389 multirt_is_resolvable = ire_multirt_lookup_v6(&ire, 4390 &sire, multirt_flags, msg_getlabel(mp), ipst); 4391 4392 ip3dbg(("ip_newroute_v6: multirt_is_resolvable %d, " 4393 "ire %p, sire %p\n", 4394 multirt_is_resolvable, (void *)ire, (void *)sire)); 4395 4396 if (!multirt_is_resolvable) { 4397 /* 4398 * No more multirt routes to resolve; give up 4399 * (all routes resolved or no more resolvable 4400 * routes). 4401 */ 4402 if (ire != NULL) { 4403 ire_refrele(ire); 4404 ire = NULL; 4405 } 4406 } else { 4407 ASSERT(sire != NULL); 4408 ASSERT(ire != NULL); 4409 /* 4410 * We simply use first_sire as a flag that 4411 * indicates if a resolvable multirt route has 4412 * already been found during the preceding 4413 * loops. If it is not the case, we may have 4414 * to send an ICMP error to report that the 4415 * destination is unreachable. We do not 4416 * IRE_REFHOLD first_sire. 4417 */ 4418 if (first_sire == NULL) { 4419 first_sire = sire; 4420 } 4421 } 4422 } 4423 if ((ire == NULL) || (ire == sire)) { 4424 /* 4425 * either ire == NULL (the destination cannot be 4426 * resolved) or ire == sire (the gateway cannot be 4427 * resolved). At this point, there are no more routes 4428 * to resolve for the destination, thus we exit. 4429 */ 4430 if (ip_debug > 3) { 4431 /* ip2dbg */ 4432 pr_addr_dbg("ip_newroute_v6: " 4433 "can't resolve %s\n", AF_INET6, v6dstp); 4434 } 4435 ip3dbg(("ip_newroute_v6: " 4436 "ire %p, sire %p, first_sire %p\n", 4437 (void *)ire, (void *)sire, (void *)first_sire)); 4438 4439 if (sire != NULL) { 4440 ire_refrele(sire); 4441 sire = NULL; 4442 } 4443 4444 if (first_sire != NULL) { 4445 /* 4446 * At least one multirt route has been found 4447 * in the same ip_newroute() call; there is no 4448 * need to report an ICMP error. 4449 * first_sire was not IRE_REFHOLDed. 4450 */ 4451 MULTIRT_DEBUG_UNTAG(first_mp); 4452 freemsg(first_mp); 4453 return; 4454 } 4455 ip_rts_change_v6(RTM_MISS, v6dstp, 0, 0, 0, 0, 0, 0, 4456 RTA_DST, ipst); 4457 goto icmp_err_ret; 4458 } 4459 4460 ASSERT(ire->ire_ipversion == IPV6_VERSION); 4461 4462 /* 4463 * Verify that the returned IRE does not have either the 4464 * RTF_REJECT or RTF_BLACKHOLE flags set and that the IRE is 4465 * either an IRE_CACHE, IRE_IF_NORESOLVER or IRE_IF_RESOLVER. 4466 */ 4467 if ((ire->ire_flags & (RTF_REJECT | RTF_BLACKHOLE)) || 4468 (ire->ire_type & (IRE_CACHE | IRE_INTERFACE)) == 0) 4469 goto icmp_err_ret; 4470 4471 /* 4472 * Increment the ire_ob_pkt_count field for ire if it is an 4473 * INTERFACE (IF_RESOLVER or IF_NORESOLVER) IRE type, and 4474 * increment the same for the parent IRE, sire, if it is some 4475 * sort of prefix IRE (which includes DEFAULT, PREFIX, and HOST) 4476 */ 4477 if ((ire->ire_type & IRE_INTERFACE) != 0) { 4478 UPDATE_OB_PKT_COUNT(ire); 4479 ire->ire_last_used_time = lbolt; 4480 } 4481 4482 if (sire != NULL) { 4483 mutex_enter(&sire->ire_lock); 4484 v6gw = sire->ire_gateway_addr_v6; 4485 mutex_exit(&sire->ire_lock); 4486 ASSERT((sire->ire_type & (IRE_CACHETABLE | 4487 IRE_INTERFACE)) == 0); 4488 UPDATE_OB_PKT_COUNT(sire); 4489 sire->ire_last_used_time = lbolt; 4490 } else { 4491 v6gw = ipv6_all_zeros; 4492 } 4493 4494 /* 4495 * We have a route to reach the destination. Find the 4496 * appropriate ill, then get a source address that matches the 4497 * right scope via ipif_select_source_v6(). 4498 * 4499 * If we are here trying to create an IRE_CACHE for an offlink 4500 * destination and have an IRE_CACHE entry for VNI, then use 4501 * ire_stq instead since VNI's queue is a black hole. 4502 * 4503 * Note: While we pick a dst_ill we are really only interested 4504 * in the ill for load spreading. The source ipif is 4505 * determined by source address selection below. 4506 */ 4507 if ((ire->ire_type == IRE_CACHE) && 4508 IS_VNI(ire->ire_ipif->ipif_ill)) { 4509 dst_ill = ire->ire_stq->q_ptr; 4510 ill_refhold(dst_ill); 4511 } else { 4512 ill_t *ill = ire->ire_ipif->ipif_ill; 4513 4514 if (IS_IPMP(ill)) { 4515 dst_ill = 4516 ipmp_illgrp_hold_next_ill(ill->ill_grp); 4517 } else { 4518 dst_ill = ill; 4519 ill_refhold(dst_ill); 4520 } 4521 } 4522 4523 if (dst_ill == NULL) { 4524 if (ip_debug > 2) { 4525 pr_addr_dbg("ip_newroute_v6 : no dst " 4526 "ill for dst %s\n", AF_INET6, v6dstp); 4527 } 4528 goto icmp_err_ret; 4529 } 4530 4531 if (ill != NULL && dst_ill != ill && 4532 !IS_IN_SAME_ILLGRP(dst_ill, ill)) { 4533 /* 4534 * We should have found a route matching "ill" 4535 * as we called ire_ftable_lookup_v6 with 4536 * MATCH_IRE_ILL. Rather than asserting when 4537 * there is a mismatch, we just drop the packet. 4538 */ 4539 ip0dbg(("ip_newroute_v6: BOUND_IF failed: " 4540 "dst_ill %s ill %s\n", dst_ill->ill_name, 4541 ill->ill_name)); 4542 goto icmp_err_ret; 4543 } 4544 4545 /* 4546 * Pick a source address which matches the scope of the 4547 * destination address. 4548 * For RTF_SETSRC routes, the source address is imposed by the 4549 * parent ire (sire). 4550 */ 4551 ASSERT(src_ipif == NULL); 4552 4553 /* 4554 * Because nce_xmit() calls ip_output_v6() and NCEs are always 4555 * tied to the underlying interface, IS_UNDER_IPMP() may be 4556 * true even when building IREs that will be used for data 4557 * traffic. As such, see if the packet's source address is a 4558 * test address, and if so use that test address's ipif for 4559 * the IRE so that the logic that sets IRE_MARK_TESTHIDDEN in 4560 * ire_add_v6() can work properly. 4561 */ 4562 if (ill != NULL && IS_UNDER_IPMP(ill)) 4563 (void) ipif_lookup_testaddr_v6(ill, v6srcp, &src_ipif); 4564 4565 if (src_ipif == NULL && ire->ire_type == IRE_IF_RESOLVER && 4566 !IN6_IS_ADDR_UNSPECIFIED(&v6gw) && 4567 ip6_asp_can_lookup(ipst)) { 4568 /* 4569 * The ire cache entry we're adding is for the 4570 * gateway itself. The source address in this case 4571 * is relative to the gateway's address. 4572 */ 4573 ip6_asp_table_held = B_TRUE; 4574 src_ipif = ipif_select_source_v6(dst_ill, &v6gw, 4575 B_TRUE, IPV6_PREFER_SRC_DEFAULT, zoneid); 4576 if (src_ipif != NULL) 4577 ire_marks |= IRE_MARK_USESRC_CHECK; 4578 } else if (src_ipif == NULL) { 4579 if ((sire != NULL) && (sire->ire_flags & RTF_SETSRC)) { 4580 /* 4581 * Check that the ipif matching the requested 4582 * source address still exists. 4583 */ 4584 src_ipif = ipif_lookup_addr_v6( 4585 &sire->ire_src_addr_v6, NULL, zoneid, 4586 NULL, NULL, NULL, NULL, ipst); 4587 } 4588 if (src_ipif == NULL && ip6_asp_can_lookup(ipst)) { 4589 ip6_asp_table_held = B_TRUE; 4590 src_ipif = ipif_select_source_v6(dst_ill, 4591 v6dstp, B_FALSE, 4592 IPV6_PREFER_SRC_DEFAULT, zoneid); 4593 if (src_ipif != NULL) 4594 ire_marks |= IRE_MARK_USESRC_CHECK; 4595 } 4596 } 4597 4598 if (src_ipif == NULL) { 4599 if (ip_debug > 2) { 4600 /* ip1dbg */ 4601 pr_addr_dbg("ip_newroute_v6: no src for " 4602 "dst %s\n", AF_INET6, v6dstp); 4603 printf("ip_newroute_v6: interface name %s\n", 4604 dst_ill->ill_name); 4605 } 4606 goto icmp_err_ret; 4607 } 4608 4609 if (ip_debug > 3) { 4610 /* ip2dbg */ 4611 pr_addr_dbg("ip_newroute_v6: first hop %s\n", 4612 AF_INET6, &v6gw); 4613 } 4614 ip2dbg(("\tire type %s (%d)\n", 4615 ip_nv_lookup(ire_nv_tbl, ire->ire_type), ire->ire_type)); 4616 4617 /* 4618 * At this point in ip_newroute_v6(), ire is either the 4619 * IRE_CACHE of the next-hop gateway for an off-subnet 4620 * destination or an IRE_INTERFACE type that should be used 4621 * to resolve an on-subnet destination or an on-subnet 4622 * next-hop gateway. 4623 * 4624 * In the IRE_CACHE case, we have the following : 4625 * 4626 * 1) src_ipif - used for getting a source address. 4627 * 4628 * 2) dst_ill - from which we derive ire_stq/ire_rfq. This 4629 * means packets using this IRE_CACHE will go out on dst_ill. 4630 * 4631 * 3) The IRE sire will point to the prefix that is the longest 4632 * matching route for the destination. These prefix types 4633 * include IRE_DEFAULT, IRE_PREFIX, IRE_HOST. 4634 * 4635 * The newly created IRE_CACHE entry for the off-subnet 4636 * destination is tied to both the prefix route and the 4637 * interface route used to resolve the next-hop gateway 4638 * via the ire_phandle and ire_ihandle fields, respectively. 4639 * 4640 * In the IRE_INTERFACE case, we have the following : 4641 * 4642 * 1) src_ipif - used for getting a source address. 4643 * 4644 * 2) dst_ill - from which we derive ire_stq/ire_rfq. This 4645 * means packets using the IRE_CACHE that we will build 4646 * here will go out on dst_ill. 4647 * 4648 * 3) sire may or may not be NULL. But, the IRE_CACHE that is 4649 * to be created will only be tied to the IRE_INTERFACE that 4650 * was derived from the ire_ihandle field. 4651 * 4652 * If sire is non-NULL, it means the destination is off-link 4653 * and we will first create the IRE_CACHE for the gateway. 4654 * Next time through ip_newroute_v6, we will create the 4655 * IRE_CACHE for the final destination as described above. 4656 */ 4657 save_ire = ire; 4658 switch (ire->ire_type) { 4659 case IRE_CACHE: { 4660 ire_t *ipif_ire; 4661 4662 ASSERT(sire != NULL); 4663 if (IN6_IS_ADDR_UNSPECIFIED(&v6gw)) { 4664 mutex_enter(&ire->ire_lock); 4665 v6gw = ire->ire_gateway_addr_v6; 4666 mutex_exit(&ire->ire_lock); 4667 } 4668 /* 4669 * We need 3 ire's to create a new cache ire for an 4670 * off-link destination from the cache ire of the 4671 * gateway. 4672 * 4673 * 1. The prefix ire 'sire' 4674 * 2. The cache ire of the gateway 'ire' 4675 * 3. The interface ire 'ipif_ire' 4676 * 4677 * We have (1) and (2). We lookup (3) below. 4678 * 4679 * If there is no interface route to the gateway, 4680 * it is a race condition, where we found the cache 4681 * but the inteface route has been deleted. 4682 */ 4683 ipif_ire = ire_ihandle_lookup_offlink_v6(ire, sire); 4684 if (ipif_ire == NULL) { 4685 ip1dbg(("ip_newroute_v6:" 4686 "ire_ihandle_lookup_offlink_v6 failed\n")); 4687 goto icmp_err_ret; 4688 } 4689 4690 /* 4691 * Note: the new ire inherits RTF_SETSRC 4692 * and RTF_MULTIRT to propagate these flags from prefix 4693 * to cache. 4694 */ 4695 4696 /* 4697 * Check cached gateway IRE for any security 4698 * attributes; if found, associate the gateway 4699 * credentials group to the destination IRE. 4700 */ 4701 if ((attrp = save_ire->ire_gw_secattr) != NULL) { 4702 mutex_enter(&attrp->igsa_lock); 4703 if ((gcgrp = attrp->igsa_gcgrp) != NULL) 4704 GCGRP_REFHOLD(gcgrp); 4705 mutex_exit(&attrp->igsa_lock); 4706 } 4707 4708 ire = ire_create_v6( 4709 v6dstp, /* dest address */ 4710 &ipv6_all_ones, /* mask */ 4711 &src_ipif->ipif_v6src_addr, /* source address */ 4712 &v6gw, /* gateway address */ 4713 &save_ire->ire_max_frag, 4714 NULL, /* src nce */ 4715 dst_ill->ill_rq, /* recv-from queue */ 4716 dst_ill->ill_wq, /* send-to queue */ 4717 IRE_CACHE, 4718 src_ipif, 4719 &sire->ire_mask_v6, /* Parent mask */ 4720 sire->ire_phandle, /* Parent handle */ 4721 ipif_ire->ire_ihandle, /* Interface handle */ 4722 sire->ire_flags & /* flags if any */ 4723 (RTF_SETSRC | RTF_MULTIRT), 4724 &(sire->ire_uinfo), 4725 NULL, 4726 gcgrp, 4727 ipst); 4728 4729 if (ire == NULL) { 4730 if (gcgrp != NULL) { 4731 GCGRP_REFRELE(gcgrp); 4732 gcgrp = NULL; 4733 } 4734 ire_refrele(save_ire); 4735 ire_refrele(ipif_ire); 4736 break; 4737 } 4738 4739 /* reference now held by IRE */ 4740 gcgrp = NULL; 4741 4742 ire->ire_marks |= ire_marks; 4743 4744 /* 4745 * Prevent sire and ipif_ire from getting deleted. The 4746 * newly created ire is tied to both of them via the 4747 * phandle and ihandle respectively. 4748 */ 4749 IRB_REFHOLD(sire->ire_bucket); 4750 /* Has it been removed already ? */ 4751 if (sire->ire_marks & IRE_MARK_CONDEMNED) { 4752 IRB_REFRELE(sire->ire_bucket); 4753 ire_refrele(ipif_ire); 4754 ire_refrele(save_ire); 4755 break; 4756 } 4757 4758 IRB_REFHOLD(ipif_ire->ire_bucket); 4759 /* Has it been removed already ? */ 4760 if (ipif_ire->ire_marks & IRE_MARK_CONDEMNED) { 4761 IRB_REFRELE(ipif_ire->ire_bucket); 4762 IRB_REFRELE(sire->ire_bucket); 4763 ire_refrele(ipif_ire); 4764 ire_refrele(save_ire); 4765 break; 4766 } 4767 4768 xmit_mp = first_mp; 4769 if (ire->ire_flags & RTF_MULTIRT) { 4770 copy_mp = copymsg(first_mp); 4771 if (copy_mp != NULL) { 4772 xmit_mp = copy_mp; 4773 MULTIRT_DEBUG_TAG(first_mp); 4774 } 4775 } 4776 ire_add_then_send(q, ire, xmit_mp); 4777 if (ip6_asp_table_held) { 4778 ip6_asp_table_refrele(ipst); 4779 ip6_asp_table_held = B_FALSE; 4780 } 4781 ire_refrele(save_ire); 4782 4783 /* Assert that sire is not deleted yet. */ 4784 ASSERT(sire->ire_ptpn != NULL); 4785 IRB_REFRELE(sire->ire_bucket); 4786 4787 /* Assert that ipif_ire is not deleted yet. */ 4788 ASSERT(ipif_ire->ire_ptpn != NULL); 4789 IRB_REFRELE(ipif_ire->ire_bucket); 4790 ire_refrele(ipif_ire); 4791 4792 if (copy_mp != NULL) { 4793 /* 4794 * Search for the next unresolved 4795 * multirt route. 4796 */ 4797 copy_mp = NULL; 4798 ipif_ire = NULL; 4799 ire = NULL; 4800 /* re-enter the loop */ 4801 multirt_resolve_next = B_TRUE; 4802 continue; 4803 } 4804 ire_refrele(sire); 4805 ill_refrele(dst_ill); 4806 ipif_refrele(src_ipif); 4807 return; 4808 } 4809 case IRE_IF_NORESOLVER: 4810 /* 4811 * We have what we need to build an IRE_CACHE. 4812 * 4813 * handle the Gated case, where we create 4814 * a NORESOLVER route for loopback. 4815 */ 4816 if (dst_ill->ill_net_type != IRE_IF_NORESOLVER) 4817 break; 4818 /* 4819 * TSol note: We are creating the ire cache for the 4820 * destination 'dst'. If 'dst' is offlink, going 4821 * through the first hop 'gw', the security attributes 4822 * of 'dst' must be set to point to the gateway 4823 * credentials of gateway 'gw'. If 'dst' is onlink, it 4824 * is possible that 'dst' is a potential gateway that is 4825 * referenced by some route that has some security 4826 * attributes. Thus in the former case, we need to do a 4827 * gcgrp_lookup of 'gw' while in the latter case we 4828 * need to do gcgrp_lookup of 'dst' itself. 4829 */ 4830 ga.ga_af = AF_INET6; 4831 if (!IN6_IS_ADDR_UNSPECIFIED(&v6gw)) 4832 ga.ga_addr = v6gw; 4833 else 4834 ga.ga_addr = *v6dstp; 4835 gcgrp = gcgrp_lookup(&ga, B_FALSE); 4836 4837 /* 4838 * Note: the new ire inherits sire flags RTF_SETSRC 4839 * and RTF_MULTIRT to propagate those rules from prefix 4840 * to cache. 4841 */ 4842 ire = ire_create_v6( 4843 v6dstp, /* dest address */ 4844 &ipv6_all_ones, /* mask */ 4845 &src_ipif->ipif_v6src_addr, /* source address */ 4846 &v6gw, /* gateway address */ 4847 &save_ire->ire_max_frag, 4848 NULL, /* no src nce */ 4849 dst_ill->ill_rq, /* recv-from queue */ 4850 dst_ill->ill_wq, /* send-to queue */ 4851 IRE_CACHE, 4852 src_ipif, 4853 &save_ire->ire_mask_v6, /* Parent mask */ 4854 (sire != NULL) ? /* Parent handle */ 4855 sire->ire_phandle : 0, 4856 save_ire->ire_ihandle, /* Interface handle */ 4857 (sire != NULL) ? /* flags if any */ 4858 sire->ire_flags & 4859 (RTF_SETSRC | RTF_MULTIRT) : 0, 4860 &(save_ire->ire_uinfo), 4861 NULL, 4862 gcgrp, 4863 ipst); 4864 4865 if (ire == NULL) { 4866 if (gcgrp != NULL) { 4867 GCGRP_REFRELE(gcgrp); 4868 gcgrp = NULL; 4869 } 4870 ire_refrele(save_ire); 4871 break; 4872 } 4873 4874 /* reference now held by IRE */ 4875 gcgrp = NULL; 4876 4877 ire->ire_marks |= ire_marks; 4878 4879 if (!IN6_IS_ADDR_UNSPECIFIED(&v6gw)) 4880 dst = v6gw; 4881 else 4882 dst = *v6dstp; 4883 err = ndp_noresolver(dst_ill, &dst); 4884 if (err != 0) { 4885 ire_refrele(save_ire); 4886 break; 4887 } 4888 4889 /* Prevent save_ire from getting deleted */ 4890 IRB_REFHOLD(save_ire->ire_bucket); 4891 /* Has it been removed already ? */ 4892 if (save_ire->ire_marks & IRE_MARK_CONDEMNED) { 4893 IRB_REFRELE(save_ire->ire_bucket); 4894 ire_refrele(save_ire); 4895 break; 4896 } 4897 4898 xmit_mp = first_mp; 4899 /* 4900 * In case of MULTIRT, a copy of the current packet 4901 * to send is made to further re-enter the 4902 * loop and attempt another route resolution 4903 */ 4904 if ((sire != NULL) && sire->ire_flags & RTF_MULTIRT) { 4905 copy_mp = copymsg(first_mp); 4906 if (copy_mp != NULL) { 4907 xmit_mp = copy_mp; 4908 MULTIRT_DEBUG_TAG(first_mp); 4909 } 4910 } 4911 ire_add_then_send(q, ire, xmit_mp); 4912 if (ip6_asp_table_held) { 4913 ip6_asp_table_refrele(ipst); 4914 ip6_asp_table_held = B_FALSE; 4915 } 4916 4917 /* Assert that it is not deleted yet. */ 4918 ASSERT(save_ire->ire_ptpn != NULL); 4919 IRB_REFRELE(save_ire->ire_bucket); 4920 ire_refrele(save_ire); 4921 4922 if (copy_mp != NULL) { 4923 /* 4924 * If we found a (no)resolver, we ignore any 4925 * trailing top priority IRE_CACHE in 4926 * further loops. This ensures that we do not 4927 * omit any (no)resolver despite the priority 4928 * in this call. 4929 * IRE_CACHE, if any, will be processed 4930 * by another thread entering ip_newroute(), 4931 * (on resolver response, for example). 4932 * We use this to force multiple parallel 4933 * resolution as soon as a packet needs to be 4934 * sent. The result is, after one packet 4935 * emission all reachable routes are generally 4936 * resolved. 4937 * Otherwise, complete resolution of MULTIRT 4938 * routes would require several emissions as 4939 * side effect. 4940 */ 4941 multirt_flags &= ~MULTIRT_CACHEGW; 4942 4943 /* 4944 * Search for the next unresolved multirt 4945 * route. 4946 */ 4947 copy_mp = NULL; 4948 save_ire = NULL; 4949 ire = NULL; 4950 /* re-enter the loop */ 4951 multirt_resolve_next = B_TRUE; 4952 continue; 4953 } 4954 4955 /* Don't need sire anymore */ 4956 if (sire != NULL) 4957 ire_refrele(sire); 4958 ill_refrele(dst_ill); 4959 ipif_refrele(src_ipif); 4960 return; 4961 4962 case IRE_IF_RESOLVER: 4963 /* 4964 * We can't build an IRE_CACHE yet, but at least we 4965 * found a resolver that can help. 4966 */ 4967 dst = *v6dstp; 4968 4969 /* 4970 * To be at this point in the code with a non-zero gw 4971 * means that dst is reachable through a gateway that 4972 * we have never resolved. By changing dst to the gw 4973 * addr we resolve the gateway first. When 4974 * ire_add_then_send() tries to put the IP dg to dst, 4975 * it will reenter ip_newroute() at which time we will 4976 * find the IRE_CACHE for the gw and create another 4977 * IRE_CACHE above (for dst itself). 4978 */ 4979 if (!IN6_IS_ADDR_UNSPECIFIED(&v6gw)) { 4980 save_dst = dst; 4981 dst = v6gw; 4982 v6gw = ipv6_all_zeros; 4983 } 4984 if (dst_ill->ill_flags & ILLF_XRESOLV) { 4985 /* 4986 * Ask the external resolver to do its thing. 4987 * Make an mblk chain in the following form: 4988 * ARQ_REQ_MBLK-->IRE_MBLK-->packet 4989 */ 4990 mblk_t *ire_mp; 4991 mblk_t *areq_mp; 4992 areq_t *areq; 4993 in6_addr_t *addrp; 4994 4995 ip1dbg(("ip_newroute_v6:ILLF_XRESOLV\n")); 4996 if (ip6_asp_table_held) { 4997 ip6_asp_table_refrele(ipst); 4998 ip6_asp_table_held = B_FALSE; 4999 } 5000 ire = ire_create_mp_v6( 5001 &dst, /* dest address */ 5002 &ipv6_all_ones, /* mask */ 5003 &src_ipif->ipif_v6src_addr, 5004 /* source address */ 5005 &v6gw, /* gateway address */ 5006 NULL, /* no src nce */ 5007 dst_ill->ill_rq, /* recv-from queue */ 5008 dst_ill->ill_wq, /* send-to queue */ 5009 IRE_CACHE, 5010 src_ipif, 5011 &save_ire->ire_mask_v6, /* Parent mask */ 5012 0, 5013 save_ire->ire_ihandle, 5014 /* Interface handle */ 5015 0, /* flags if any */ 5016 &(save_ire->ire_uinfo), 5017 NULL, 5018 NULL, 5019 ipst); 5020 5021 ire_refrele(save_ire); 5022 if (ire == NULL) { 5023 ip1dbg(("ip_newroute_v6:" 5024 "ire is NULL\n")); 5025 break; 5026 } 5027 5028 if ((sire != NULL) && 5029 (sire->ire_flags & RTF_MULTIRT)) { 5030 /* 5031 * processing a copy of the packet to 5032 * send for further resolution loops 5033 */ 5034 copy_mp = copymsg(first_mp); 5035 if (copy_mp != NULL) 5036 MULTIRT_DEBUG_TAG(copy_mp); 5037 } 5038 ire->ire_marks |= ire_marks; 5039 ire_mp = ire->ire_mp; 5040 /* 5041 * Now create or find an nce for this interface. 5042 * The hw addr will need to to be set from 5043 * the reply to the AR_ENTRY_QUERY that 5044 * we're about to send. This will be done in 5045 * ire_add_v6(). 5046 */ 5047 err = ndp_resolver(dst_ill, &dst, mp, zoneid); 5048 switch (err) { 5049 case 0: 5050 /* 5051 * New cache entry created. 5052 * Break, then ask the external 5053 * resolver. 5054 */ 5055 break; 5056 case EINPROGRESS: 5057 /* 5058 * Resolution in progress; 5059 * packet has been queued by 5060 * ndp_resolver(). 5061 */ 5062 ire_delete(ire); 5063 ire = NULL; 5064 /* 5065 * Check if another multirt 5066 * route must be resolved. 5067 */ 5068 if (copy_mp != NULL) { 5069 /* 5070 * If we found a resolver, we 5071 * ignore any trailing top 5072 * priority IRE_CACHE in 5073 * further loops. The reason is 5074 * the same as for noresolver. 5075 */ 5076 multirt_flags &= 5077 ~MULTIRT_CACHEGW; 5078 /* 5079 * Search for the next 5080 * unresolved multirt route. 5081 */ 5082 first_mp = copy_mp; 5083 copy_mp = NULL; 5084 mp = first_mp; 5085 if (mp->b_datap->db_type == 5086 M_CTL) { 5087 mp = mp->b_cont; 5088 } 5089 ASSERT(sire != NULL); 5090 dst = save_dst; 5091 /* 5092 * re-enter the loop 5093 */ 5094 multirt_resolve_next = 5095 B_TRUE; 5096 continue; 5097 } 5098 5099 if (sire != NULL) 5100 ire_refrele(sire); 5101 ill_refrele(dst_ill); 5102 ipif_refrele(src_ipif); 5103 return; 5104 default: 5105 /* 5106 * Transient error; packet will be 5107 * freed. 5108 */ 5109 ire_delete(ire); 5110 ire = NULL; 5111 break; 5112 } 5113 if (err != 0) 5114 break; 5115 /* 5116 * Now set up the AR_ENTRY_QUERY and send it. 5117 */ 5118 areq_mp = ill_arp_alloc(dst_ill, 5119 (uchar_t *)&ipv6_areq_template, 5120 (caddr_t)&dst); 5121 if (areq_mp == NULL) { 5122 ip1dbg(("ip_newroute_v6:" 5123 "areq_mp is NULL\n")); 5124 freemsg(ire_mp); 5125 break; 5126 } 5127 areq = (areq_t *)areq_mp->b_rptr; 5128 addrp = (in6_addr_t *)((char *)areq + 5129 areq->areq_target_addr_offset); 5130 *addrp = dst; 5131 addrp = (in6_addr_t *)((char *)areq + 5132 areq->areq_sender_addr_offset); 5133 *addrp = src_ipif->ipif_v6src_addr; 5134 /* 5135 * link the chain, then send up to the resolver. 5136 */ 5137 linkb(areq_mp, ire_mp); 5138 linkb(areq_mp, mp); 5139 ip1dbg(("ip_newroute_v6:" 5140 "putnext to resolver\n")); 5141 putnext(dst_ill->ill_rq, areq_mp); 5142 /* 5143 * Check if another multirt route 5144 * must be resolved. 5145 */ 5146 ire = NULL; 5147 if (copy_mp != NULL) { 5148 /* 5149 * If we find a resolver, we ignore any 5150 * trailing top priority IRE_CACHE in 5151 * further loops. The reason is the 5152 * same as for noresolver. 5153 */ 5154 multirt_flags &= ~MULTIRT_CACHEGW; 5155 /* 5156 * Search for the next unresolved 5157 * multirt route. 5158 */ 5159 first_mp = copy_mp; 5160 copy_mp = NULL; 5161 mp = first_mp; 5162 if (mp->b_datap->db_type == M_CTL) { 5163 mp = mp->b_cont; 5164 } 5165 ASSERT(sire != NULL); 5166 dst = save_dst; 5167 /* 5168 * re-enter the loop 5169 */ 5170 multirt_resolve_next = B_TRUE; 5171 continue; 5172 } 5173 5174 if (sire != NULL) 5175 ire_refrele(sire); 5176 ill_refrele(dst_ill); 5177 ipif_refrele(src_ipif); 5178 return; 5179 } 5180 /* 5181 * Non-external resolver case. 5182 * 5183 * TSol note: Please see the note above the 5184 * IRE_IF_NORESOLVER case. 5185 */ 5186 ga.ga_af = AF_INET6; 5187 ga.ga_addr = dst; 5188 gcgrp = gcgrp_lookup(&ga, B_FALSE); 5189 5190 ire = ire_create_v6( 5191 &dst, /* dest address */ 5192 &ipv6_all_ones, /* mask */ 5193 &src_ipif->ipif_v6src_addr, /* source address */ 5194 &v6gw, /* gateway address */ 5195 &save_ire->ire_max_frag, 5196 NULL, /* no src nce */ 5197 dst_ill->ill_rq, /* recv-from queue */ 5198 dst_ill->ill_wq, /* send-to queue */ 5199 IRE_CACHE, 5200 src_ipif, 5201 &save_ire->ire_mask_v6, /* Parent mask */ 5202 0, 5203 save_ire->ire_ihandle, /* Interface handle */ 5204 0, /* flags if any */ 5205 &(save_ire->ire_uinfo), 5206 NULL, 5207 gcgrp, 5208 ipst); 5209 5210 if (ire == NULL) { 5211 if (gcgrp != NULL) { 5212 GCGRP_REFRELE(gcgrp); 5213 gcgrp = NULL; 5214 } 5215 ire_refrele(save_ire); 5216 break; 5217 } 5218 5219 /* reference now held by IRE */ 5220 gcgrp = NULL; 5221 5222 if ((sire != NULL) && 5223 (sire->ire_flags & RTF_MULTIRT)) { 5224 copy_mp = copymsg(first_mp); 5225 if (copy_mp != NULL) 5226 MULTIRT_DEBUG_TAG(copy_mp); 5227 } 5228 5229 ire->ire_marks |= ire_marks; 5230 err = ndp_resolver(dst_ill, &dst, first_mp, zoneid); 5231 switch (err) { 5232 case 0: 5233 /* Prevent save_ire from getting deleted */ 5234 IRB_REFHOLD(save_ire->ire_bucket); 5235 /* Has it been removed already ? */ 5236 if (save_ire->ire_marks & IRE_MARK_CONDEMNED) { 5237 IRB_REFRELE(save_ire->ire_bucket); 5238 ire_refrele(save_ire); 5239 break; 5240 } 5241 5242 /* 5243 * We have a resolved cache entry, 5244 * add in the IRE. 5245 */ 5246 ire_add_then_send(q, ire, first_mp); 5247 if (ip6_asp_table_held) { 5248 ip6_asp_table_refrele(ipst); 5249 ip6_asp_table_held = B_FALSE; 5250 } 5251 5252 /* Assert that it is not deleted yet. */ 5253 ASSERT(save_ire->ire_ptpn != NULL); 5254 IRB_REFRELE(save_ire->ire_bucket); 5255 ire_refrele(save_ire); 5256 /* 5257 * Check if another multirt route 5258 * must be resolved. 5259 */ 5260 ire = NULL; 5261 if (copy_mp != NULL) { 5262 /* 5263 * If we find a resolver, we ignore any 5264 * trailing top priority IRE_CACHE in 5265 * further loops. The reason is the 5266 * same as for noresolver. 5267 */ 5268 multirt_flags &= ~MULTIRT_CACHEGW; 5269 /* 5270 * Search for the next unresolved 5271 * multirt route. 5272 */ 5273 first_mp = copy_mp; 5274 copy_mp = NULL; 5275 mp = first_mp; 5276 if (mp->b_datap->db_type == M_CTL) { 5277 mp = mp->b_cont; 5278 } 5279 ASSERT(sire != NULL); 5280 dst = save_dst; 5281 /* 5282 * re-enter the loop 5283 */ 5284 multirt_resolve_next = B_TRUE; 5285 continue; 5286 } 5287 5288 if (sire != NULL) 5289 ire_refrele(sire); 5290 ill_refrele(dst_ill); 5291 ipif_refrele(src_ipif); 5292 return; 5293 5294 case EINPROGRESS: 5295 /* 5296 * mp was consumed - presumably queued. 5297 * No need for ire, presumably resolution is 5298 * in progress, and ire will be added when the 5299 * address is resolved. 5300 */ 5301 if (ip6_asp_table_held) { 5302 ip6_asp_table_refrele(ipst); 5303 ip6_asp_table_held = B_FALSE; 5304 } 5305 ASSERT(ire->ire_nce == NULL); 5306 ire_delete(ire); 5307 ire_refrele(save_ire); 5308 /* 5309 * Check if another multirt route 5310 * must be resolved. 5311 */ 5312 ire = NULL; 5313 if (copy_mp != NULL) { 5314 /* 5315 * If we find a resolver, we ignore any 5316 * trailing top priority IRE_CACHE in 5317 * further loops. The reason is the 5318 * same as for noresolver. 5319 */ 5320 multirt_flags &= ~MULTIRT_CACHEGW; 5321 /* 5322 * Search for the next unresolved 5323 * multirt route. 5324 */ 5325 first_mp = copy_mp; 5326 copy_mp = NULL; 5327 mp = first_mp; 5328 if (mp->b_datap->db_type == M_CTL) { 5329 mp = mp->b_cont; 5330 } 5331 ASSERT(sire != NULL); 5332 dst = save_dst; 5333 /* 5334 * re-enter the loop 5335 */ 5336 multirt_resolve_next = B_TRUE; 5337 continue; 5338 } 5339 if (sire != NULL) 5340 ire_refrele(sire); 5341 ill_refrele(dst_ill); 5342 ipif_refrele(src_ipif); 5343 return; 5344 default: 5345 /* Some transient error */ 5346 ASSERT(ire->ire_nce == NULL); 5347 ire_refrele(save_ire); 5348 break; 5349 } 5350 break; 5351 default: 5352 break; 5353 } 5354 if (ip6_asp_table_held) { 5355 ip6_asp_table_refrele(ipst); 5356 ip6_asp_table_held = B_FALSE; 5357 } 5358 } while (multirt_resolve_next); 5359 5360 err_ret: 5361 ip1dbg(("ip_newroute_v6: dropped\n")); 5362 if (src_ipif != NULL) 5363 ipif_refrele(src_ipif); 5364 if (dst_ill != NULL) { 5365 need_rele = B_TRUE; 5366 ill = dst_ill; 5367 } 5368 if (ill != NULL) { 5369 if (mp->b_prev != NULL) { 5370 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 5371 } else { 5372 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); 5373 } 5374 5375 if (need_rele) 5376 ill_refrele(ill); 5377 } else { 5378 if (mp->b_prev != NULL) { 5379 BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsInDiscards); 5380 } else { 5381 BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsOutDiscards); 5382 } 5383 } 5384 /* Did this packet originate externally? */ 5385 if (mp->b_prev) { 5386 mp->b_next = NULL; 5387 mp->b_prev = NULL; 5388 } 5389 if (copy_mp != NULL) { 5390 MULTIRT_DEBUG_UNTAG(copy_mp); 5391 freemsg(copy_mp); 5392 } 5393 MULTIRT_DEBUG_UNTAG(first_mp); 5394 freemsg(first_mp); 5395 if (ire != NULL) 5396 ire_refrele(ire); 5397 if (sire != NULL) 5398 ire_refrele(sire); 5399 return; 5400 5401 icmp_err_ret: 5402 if (ip6_asp_table_held) 5403 ip6_asp_table_refrele(ipst); 5404 if (src_ipif != NULL) 5405 ipif_refrele(src_ipif); 5406 if (dst_ill != NULL) { 5407 need_rele = B_TRUE; 5408 ill = dst_ill; 5409 } 5410 ip1dbg(("ip_newroute_v6: no route\n")); 5411 if (sire != NULL) 5412 ire_refrele(sire); 5413 /* 5414 * We need to set sire to NULL to avoid double freeing if we 5415 * ever goto err_ret from below. 5416 */ 5417 sire = NULL; 5418 ip6h = (ip6_t *)mp->b_rptr; 5419 /* Skip ip6i_t header if present */ 5420 if (ip6h->ip6_nxt == IPPROTO_RAW) { 5421 /* Make sure the IPv6 header is present */ 5422 if ((mp->b_wptr - (uchar_t *)ip6h) < 5423 sizeof (ip6i_t) + IPV6_HDR_LEN) { 5424 if (!pullupmsg(mp, sizeof (ip6i_t) + IPV6_HDR_LEN)) { 5425 ip1dbg(("ip_newroute_v6: pullupmsg failed\n")); 5426 goto err_ret; 5427 } 5428 } 5429 mp->b_rptr += sizeof (ip6i_t); 5430 ip6h = (ip6_t *)mp->b_rptr; 5431 } 5432 /* Did this packet originate externally? */ 5433 if (mp->b_prev) { 5434 if (ill != NULL) { 5435 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInNoRoutes); 5436 } else { 5437 BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsInNoRoutes); 5438 } 5439 mp->b_next = NULL; 5440 mp->b_prev = NULL; 5441 q = WR(q); 5442 } else { 5443 if (ill != NULL) { 5444 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutNoRoutes); 5445 } else { 5446 BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsOutNoRoutes); 5447 } 5448 if (ip_hdr_complete_v6(ip6h, zoneid, ipst)) { 5449 /* Failed */ 5450 if (copy_mp != NULL) { 5451 MULTIRT_DEBUG_UNTAG(copy_mp); 5452 freemsg(copy_mp); 5453 } 5454 MULTIRT_DEBUG_UNTAG(first_mp); 5455 freemsg(first_mp); 5456 if (ire != NULL) 5457 ire_refrele(ire); 5458 if (need_rele) 5459 ill_refrele(ill); 5460 return; 5461 } 5462 } 5463 5464 if (need_rele) 5465 ill_refrele(ill); 5466 5467 /* 5468 * At this point we will have ire only if RTF_BLACKHOLE 5469 * or RTF_REJECT flags are set on the IRE. It will not 5470 * generate ICMP6_DST_UNREACH_NOROUTE if RTF_BLACKHOLE is set. 5471 */ 5472 if (ire != NULL) { 5473 if (ire->ire_flags & RTF_BLACKHOLE) { 5474 ire_refrele(ire); 5475 if (copy_mp != NULL) { 5476 MULTIRT_DEBUG_UNTAG(copy_mp); 5477 freemsg(copy_mp); 5478 } 5479 MULTIRT_DEBUG_UNTAG(first_mp); 5480 freemsg(first_mp); 5481 return; 5482 } 5483 ire_refrele(ire); 5484 } 5485 if (ip_debug > 3) { 5486 /* ip2dbg */ 5487 pr_addr_dbg("ip_newroute_v6: no route to %s\n", 5488 AF_INET6, v6dstp); 5489 } 5490 icmp_unreachable_v6(WR(q), first_mp, ICMP6_DST_UNREACH_NOROUTE, 5491 B_FALSE, B_FALSE, zoneid, ipst); 5492 } 5493 5494 /* 5495 * ip_newroute_ipif_v6 is called by ip_wput_v6 and ip_wput_ipsec_out_v6 whenever 5496 * we need to send out a packet to a destination address for which we do not 5497 * have specific routing information. It is only used for multicast packets. 5498 * 5499 * If unspec_src we allow creating an IRE with source address zero. 5500 * ire_send_v6() will delete it after the packet is sent. 5501 */ 5502 void 5503 ip_newroute_ipif_v6(queue_t *q, mblk_t *mp, ipif_t *ipif, 5504 const in6_addr_t *v6dstp, const in6_addr_t *v6srcp, int unspec_src, 5505 zoneid_t zoneid) 5506 { 5507 ire_t *ire = NULL; 5508 ipif_t *src_ipif = NULL; 5509 int err = 0; 5510 ill_t *dst_ill = NULL; 5511 ire_t *save_ire; 5512 ipsec_out_t *io; 5513 ill_t *ill; 5514 mblk_t *first_mp; 5515 ire_t *fire = NULL; 5516 mblk_t *copy_mp = NULL; 5517 const in6_addr_t *ire_v6srcp; 5518 boolean_t probe = B_FALSE; 5519 boolean_t multirt_resolve_next; 5520 boolean_t ipif_held = B_FALSE; 5521 boolean_t ill_held = B_FALSE; 5522 boolean_t ip6_asp_table_held = B_FALSE; 5523 ip_stack_t *ipst = ipif->ipif_ill->ill_ipst; 5524 5525 /* 5526 * This loop is run only once in most cases. 5527 * We loop to resolve further routes only when the destination 5528 * can be reached through multiple RTF_MULTIRT-flagged ires. 5529 */ 5530 do { 5531 multirt_resolve_next = B_FALSE; 5532 if (dst_ill != NULL) { 5533 ill_refrele(dst_ill); 5534 dst_ill = NULL; 5535 } 5536 5537 if (src_ipif != NULL) { 5538 ipif_refrele(src_ipif); 5539 src_ipif = NULL; 5540 } 5541 ASSERT(ipif != NULL); 5542 ill = ipif->ipif_ill; 5543 5544 ASSERT(!IN6_IS_ADDR_V4MAPPED(v6dstp)); 5545 if (ip_debug > 2) { 5546 /* ip1dbg */ 5547 pr_addr_dbg("ip_newroute_ipif_v6: v6dst %s\n", 5548 AF_INET6, v6dstp); 5549 printf("ip_newroute_ipif_v6: if %s, v6 %d\n", 5550 ill->ill_name, ipif->ipif_isv6); 5551 } 5552 5553 first_mp = mp; 5554 if (mp->b_datap->db_type == M_CTL) { 5555 mp = mp->b_cont; 5556 io = (ipsec_out_t *)first_mp->b_rptr; 5557 ASSERT(io->ipsec_out_type == IPSEC_OUT); 5558 } else { 5559 io = NULL; 5560 } 5561 5562 /* 5563 * If the interface is a pt-pt interface we look for an 5564 * IRE_IF_RESOLVER or IRE_IF_NORESOLVER that matches both the 5565 * local_address and the pt-pt destination address. 5566 * Otherwise we just match the local address. 5567 */ 5568 if (!(ill->ill_flags & ILLF_MULTICAST)) { 5569 goto err_ret; 5570 } 5571 5572 /* 5573 * We check if an IRE_OFFSUBNET for the addr that goes through 5574 * ipif exists. We need it to determine if the RTF_SETSRC and/or 5575 * RTF_MULTIRT flags must be honored. 5576 */ 5577 fire = ipif_lookup_multi_ire_v6(ipif, v6dstp); 5578 ip2dbg(("ip_newroute_ipif_v6: " 5579 "ipif_lookup_multi_ire_v6(" 5580 "ipif %p, dst %08x) = fire %p\n", 5581 (void *)ipif, ntohl(V4_PART_OF_V6((*v6dstp))), 5582 (void *)fire)); 5583 5584 ASSERT(src_ipif == NULL); 5585 5586 /* 5587 * Because nce_xmit() calls ip_output_v6() and NCEs are always 5588 * tied to the underlying interface, IS_UNDER_IPMP() may be 5589 * true even when building IREs that will be used for data 5590 * traffic. As such, see if the packet's source address is a 5591 * test address, and if so use that test address's ipif for 5592 * the IRE so that the logic that sets IRE_MARK_TESTHIDDEN in 5593 * ire_add_v6() can work properly. 5594 */ 5595 if (IS_UNDER_IPMP(ill)) 5596 probe = ipif_lookup_testaddr_v6(ill, v6srcp, &src_ipif); 5597 5598 /* 5599 * Determine the outbound (destination) ill for this route. 5600 * If IPMP is not in use, that's the same as our ill. If IPMP 5601 * is in-use and we're on the IPMP interface, or we're on an 5602 * underlying ill but sending data traffic, use a suitable 5603 * destination ill from the group. The latter case covers a 5604 * subtle edge condition with multicast: when we bring up an 5605 * IPv6 data address, we will create an NCE on an underlying 5606 * interface, and send solitications to ff02::1, which would 5607 * take us through here, and cause us to create an IRE for 5608 * ff02::1. To meet our defined semantics for multicast (and 5609 * ensure there aren't unexpected echoes), that IRE needs to 5610 * use the IPMP group's nominated multicast interface. 5611 * 5612 * Note: the source ipif is determined by source address 5613 * selection later. 5614 */ 5615 if (IS_IPMP(ill) || (IS_UNDER_IPMP(ill) && !probe)) { 5616 ill_t *ipmp_ill; 5617 ipmp_illgrp_t *illg; 5618 5619 if (IS_UNDER_IPMP(ill)) { 5620 ipmp_ill = ipmp_ill_hold_ipmp_ill(ill); 5621 } else { 5622 ipmp_ill = ill; 5623 ill_refhold(ipmp_ill); /* for symmetry */ 5624 } 5625 5626 if (ipmp_ill == NULL) 5627 goto err_ret; 5628 5629 illg = ipmp_ill->ill_grp; 5630 if (IN6_IS_ADDR_MULTICAST(v6dstp)) 5631 dst_ill = ipmp_illgrp_hold_cast_ill(illg); 5632 else 5633 dst_ill = ipmp_illgrp_hold_next_ill(illg); 5634 5635 ill_refrele(ipmp_ill); 5636 } else { 5637 dst_ill = ill; 5638 ill_refhold(dst_ill); /* for symmetry */ 5639 } 5640 5641 if (dst_ill == NULL) { 5642 if (ip_debug > 2) { 5643 pr_addr_dbg("ip_newroute_ipif_v6: " 5644 "no dst ill for dst %s\n", 5645 AF_INET6, v6dstp); 5646 } 5647 goto err_ret; 5648 } 5649 5650 /* 5651 * Pick a source address which matches the scope of the 5652 * destination address. 5653 * For RTF_SETSRC routes, the source address is imposed by the 5654 * parent ire (fire). 5655 */ 5656 5657 if (src_ipif == NULL && fire != NULL && 5658 (fire->ire_flags & RTF_SETSRC)) { 5659 /* 5660 * Check that the ipif matching the requested source 5661 * address still exists. 5662 */ 5663 src_ipif = ipif_lookup_addr_v6(&fire->ire_src_addr_v6, 5664 NULL, zoneid, NULL, NULL, NULL, NULL, ipst); 5665 } 5666 5667 if (src_ipif == NULL && ip6_asp_can_lookup(ipst)) { 5668 ip6_asp_table_held = B_TRUE; 5669 src_ipif = ipif_select_source_v6(dst_ill, v6dstp, 5670 B_FALSE, IPV6_PREFER_SRC_DEFAULT, zoneid); 5671 } 5672 5673 if (src_ipif == NULL) { 5674 if (!unspec_src) { 5675 if (ip_debug > 2) { 5676 /* ip1dbg */ 5677 pr_addr_dbg("ip_newroute_ipif_v6: " 5678 "no src for dst %s\n", 5679 AF_INET6, v6dstp); 5680 printf(" through interface %s\n", 5681 dst_ill->ill_name); 5682 } 5683 goto err_ret; 5684 } 5685 ire_v6srcp = &ipv6_all_zeros; 5686 src_ipif = ipif; 5687 ipif_refhold(src_ipif); 5688 } else { 5689 ire_v6srcp = &src_ipif->ipif_v6src_addr; 5690 } 5691 5692 ire = ipif_to_ire_v6(ipif); 5693 if (ire == NULL) { 5694 if (ip_debug > 2) { 5695 /* ip1dbg */ 5696 pr_addr_dbg("ip_newroute_ipif_v6: v6src %s\n", 5697 AF_INET6, &ipif->ipif_v6lcl_addr); 5698 printf("ip_newroute_ipif_v6: " 5699 "if %s\n", dst_ill->ill_name); 5700 } 5701 goto err_ret; 5702 } 5703 if (ire->ire_flags & (RTF_REJECT | RTF_BLACKHOLE)) 5704 goto err_ret; 5705 5706 ASSERT(ire->ire_ipversion == IPV6_VERSION); 5707 5708 ip1dbg(("ip_newroute_ipif_v6: interface type %s (%d),", 5709 ip_nv_lookup(ire_nv_tbl, ire->ire_type), ire->ire_type)); 5710 if (ip_debug > 2) { 5711 /* ip1dbg */ 5712 pr_addr_dbg(" address %s\n", 5713 AF_INET6, &ire->ire_src_addr_v6); 5714 } 5715 save_ire = ire; 5716 ip2dbg(("ip_newroute_ipif: ire %p, ipif %p\n", 5717 (void *)ire, (void *)ipif)); 5718 5719 if ((fire != NULL) && (fire->ire_flags & RTF_MULTIRT)) { 5720 /* 5721 * an IRE_OFFSUBET was looked up 5722 * on that interface. 5723 * this ire has RTF_MULTIRT flag, 5724 * so the resolution loop 5725 * will be re-entered to resolve 5726 * additional routes on other 5727 * interfaces. For that purpose, 5728 * a copy of the packet is 5729 * made at this point. 5730 */ 5731 fire->ire_last_used_time = lbolt; 5732 copy_mp = copymsg(first_mp); 5733 if (copy_mp) { 5734 MULTIRT_DEBUG_TAG(copy_mp); 5735 } 5736 } 5737 5738 switch (ire->ire_type) { 5739 case IRE_IF_NORESOLVER: { 5740 /* 5741 * We have what we need to build an IRE_CACHE. 5742 * 5743 * handle the Gated case, where we create 5744 * a NORESOLVER route for loopback. 5745 */ 5746 if (dst_ill->ill_net_type != IRE_IF_NORESOLVER) 5747 break; 5748 /* 5749 * The newly created ire will inherit the flags of the 5750 * parent ire, if any. 5751 */ 5752 ire = ire_create_v6( 5753 v6dstp, /* dest address */ 5754 &ipv6_all_ones, /* mask */ 5755 ire_v6srcp, /* source address */ 5756 NULL, /* gateway address */ 5757 &save_ire->ire_max_frag, 5758 NULL, /* no src nce */ 5759 dst_ill->ill_rq, /* recv-from queue */ 5760 dst_ill->ill_wq, /* send-to queue */ 5761 IRE_CACHE, 5762 src_ipif, 5763 NULL, 5764 (fire != NULL) ? /* Parent handle */ 5765 fire->ire_phandle : 0, 5766 save_ire->ire_ihandle, /* Interface handle */ 5767 (fire != NULL) ? 5768 (fire->ire_flags & (RTF_SETSRC | RTF_MULTIRT)) : 5769 0, 5770 &ire_uinfo_null, 5771 NULL, 5772 NULL, 5773 ipst); 5774 5775 if (ire == NULL) { 5776 ire_refrele(save_ire); 5777 break; 5778 } 5779 5780 err = ndp_noresolver(dst_ill, v6dstp); 5781 if (err != 0) { 5782 ire_refrele(save_ire); 5783 break; 5784 } 5785 5786 /* Prevent save_ire from getting deleted */ 5787 IRB_REFHOLD(save_ire->ire_bucket); 5788 /* Has it been removed already ? */ 5789 if (save_ire->ire_marks & IRE_MARK_CONDEMNED) { 5790 IRB_REFRELE(save_ire->ire_bucket); 5791 ire_refrele(save_ire); 5792 break; 5793 } 5794 5795 ire_add_then_send(q, ire, first_mp); 5796 if (ip6_asp_table_held) { 5797 ip6_asp_table_refrele(ipst); 5798 ip6_asp_table_held = B_FALSE; 5799 } 5800 5801 /* Assert that it is not deleted yet. */ 5802 ASSERT(save_ire->ire_ptpn != NULL); 5803 IRB_REFRELE(save_ire->ire_bucket); 5804 ire_refrele(save_ire); 5805 if (fire != NULL) { 5806 ire_refrele(fire); 5807 fire = NULL; 5808 } 5809 5810 /* 5811 * The resolution loop is re-entered if we 5812 * actually are in a multirouting case. 5813 */ 5814 if (copy_mp != NULL) { 5815 boolean_t need_resolve = 5816 ire_multirt_need_resolve_v6(v6dstp, 5817 msg_getlabel(copy_mp), ipst); 5818 if (!need_resolve) { 5819 MULTIRT_DEBUG_UNTAG(copy_mp); 5820 freemsg(copy_mp); 5821 copy_mp = NULL; 5822 } else { 5823 /* 5824 * ipif_lookup_group_v6() calls 5825 * ire_lookup_multi_v6() that uses 5826 * ire_ftable_lookup_v6() to find 5827 * an IRE_INTERFACE for the group. 5828 * In the multirt case, 5829 * ire_lookup_multi_v6() then invokes 5830 * ire_multirt_lookup_v6() to find 5831 * the next resolvable ire. 5832 * As a result, we obtain a new 5833 * interface, derived from the 5834 * next ire. 5835 */ 5836 if (ipif_held) { 5837 ipif_refrele(ipif); 5838 ipif_held = B_FALSE; 5839 } 5840 ipif = ipif_lookup_group_v6(v6dstp, 5841 zoneid, ipst); 5842 ip2dbg(("ip_newroute_ipif: " 5843 "multirt dst %08x, ipif %p\n", 5844 ntohl(V4_PART_OF_V6((*v6dstp))), 5845 (void *)ipif)); 5846 if (ipif != NULL) { 5847 ipif_held = B_TRUE; 5848 mp = copy_mp; 5849 copy_mp = NULL; 5850 multirt_resolve_next = 5851 B_TRUE; 5852 continue; 5853 } else { 5854 freemsg(copy_mp); 5855 } 5856 } 5857 } 5858 ill_refrele(dst_ill); 5859 if (ipif_held) { 5860 ipif_refrele(ipif); 5861 ipif_held = B_FALSE; 5862 } 5863 if (src_ipif != NULL) 5864 ipif_refrele(src_ipif); 5865 return; 5866 } 5867 case IRE_IF_RESOLVER: { 5868 5869 ASSERT(dst_ill->ill_isv6); 5870 5871 /* 5872 * We obtain a partial IRE_CACHE which we will pass 5873 * along with the resolver query. When the response 5874 * comes back it will be there ready for us to add. 5875 */ 5876 /* 5877 * the newly created ire will inherit the flags of the 5878 * parent ire, if any. 5879 */ 5880 ire = ire_create_v6( 5881 v6dstp, /* dest address */ 5882 &ipv6_all_ones, /* mask */ 5883 ire_v6srcp, /* source address */ 5884 NULL, /* gateway address */ 5885 &save_ire->ire_max_frag, 5886 NULL, /* src nce */ 5887 dst_ill->ill_rq, /* recv-from queue */ 5888 dst_ill->ill_wq, /* send-to queue */ 5889 IRE_CACHE, 5890 src_ipif, 5891 NULL, 5892 (fire != NULL) ? /* Parent handle */ 5893 fire->ire_phandle : 0, 5894 save_ire->ire_ihandle, /* Interface handle */ 5895 (fire != NULL) ? 5896 (fire->ire_flags & (RTF_SETSRC | RTF_MULTIRT)) : 5897 0, 5898 &ire_uinfo_null, 5899 NULL, 5900 NULL, 5901 ipst); 5902 5903 if (ire == NULL) { 5904 ire_refrele(save_ire); 5905 break; 5906 } 5907 5908 /* Resolve and add ire to the ctable */ 5909 err = ndp_resolver(dst_ill, v6dstp, first_mp, zoneid); 5910 switch (err) { 5911 case 0: 5912 /* Prevent save_ire from getting deleted */ 5913 IRB_REFHOLD(save_ire->ire_bucket); 5914 /* Has it been removed already ? */ 5915 if (save_ire->ire_marks & IRE_MARK_CONDEMNED) { 5916 IRB_REFRELE(save_ire->ire_bucket); 5917 ire_refrele(save_ire); 5918 break; 5919 } 5920 /* 5921 * We have a resolved cache entry, 5922 * add in the IRE. 5923 */ 5924 ire_add_then_send(q, ire, first_mp); 5925 if (ip6_asp_table_held) { 5926 ip6_asp_table_refrele(ipst); 5927 ip6_asp_table_held = B_FALSE; 5928 } 5929 5930 /* Assert that it is not deleted yet. */ 5931 ASSERT(save_ire->ire_ptpn != NULL); 5932 IRB_REFRELE(save_ire->ire_bucket); 5933 ire_refrele(save_ire); 5934 if (fire != NULL) { 5935 ire_refrele(fire); 5936 fire = NULL; 5937 } 5938 5939 /* 5940 * The resolution loop is re-entered if we 5941 * actually are in a multirouting case. 5942 */ 5943 if (copy_mp != NULL) { 5944 boolean_t need_resolve = 5945 ire_multirt_need_resolve_v6(v6dstp, 5946 msg_getlabel(copy_mp), ipst); 5947 if (!need_resolve) { 5948 MULTIRT_DEBUG_UNTAG(copy_mp); 5949 freemsg(copy_mp); 5950 copy_mp = NULL; 5951 } else { 5952 /* 5953 * ipif_lookup_group_v6() calls 5954 * ire_lookup_multi_v6() that 5955 * uses ire_ftable_lookup_v6() 5956 * to find an IRE_INTERFACE for 5957 * the group. In the multirt 5958 * case, ire_lookup_multi_v6() 5959 * then invokes 5960 * ire_multirt_lookup_v6() to 5961 * find the next resolvable ire. 5962 * As a result, we obtain a new 5963 * interface, derived from the 5964 * next ire. 5965 */ 5966 if (ipif_held) { 5967 ipif_refrele(ipif); 5968 ipif_held = B_FALSE; 5969 } 5970 ipif = ipif_lookup_group_v6( 5971 v6dstp, zoneid, ipst); 5972 ip2dbg(("ip_newroute_ipif: " 5973 "multirt dst %08x, " 5974 "ipif %p\n", 5975 ntohl(V4_PART_OF_V6( 5976 (*v6dstp))), 5977 (void *)ipif)); 5978 if (ipif != NULL) { 5979 ipif_held = B_TRUE; 5980 mp = copy_mp; 5981 copy_mp = NULL; 5982 multirt_resolve_next = 5983 B_TRUE; 5984 continue; 5985 } else { 5986 freemsg(copy_mp); 5987 } 5988 } 5989 } 5990 ill_refrele(dst_ill); 5991 if (ipif_held) { 5992 ipif_refrele(ipif); 5993 ipif_held = B_FALSE; 5994 } 5995 if (src_ipif != NULL) 5996 ipif_refrele(src_ipif); 5997 return; 5998 5999 case EINPROGRESS: 6000 /* 6001 * mp was consumed - presumably queued. 6002 * No need for ire, presumably resolution is 6003 * in progress, and ire will be added when the 6004 * address is resolved. 6005 */ 6006 if (ip6_asp_table_held) { 6007 ip6_asp_table_refrele(ipst); 6008 ip6_asp_table_held = B_FALSE; 6009 } 6010 ire_delete(ire); 6011 ire_refrele(save_ire); 6012 if (fire != NULL) { 6013 ire_refrele(fire); 6014 fire = NULL; 6015 } 6016 6017 /* 6018 * The resolution loop is re-entered if we 6019 * actually are in a multirouting case. 6020 */ 6021 if (copy_mp != NULL) { 6022 boolean_t need_resolve = 6023 ire_multirt_need_resolve_v6(v6dstp, 6024 msg_getlabel(copy_mp), ipst); 6025 if (!need_resolve) { 6026 MULTIRT_DEBUG_UNTAG(copy_mp); 6027 freemsg(copy_mp); 6028 copy_mp = NULL; 6029 } else { 6030 /* 6031 * ipif_lookup_group_v6() calls 6032 * ire_lookup_multi_v6() that 6033 * uses ire_ftable_lookup_v6() 6034 * to find an IRE_INTERFACE for 6035 * the group. In the multirt 6036 * case, ire_lookup_multi_v6() 6037 * then invokes 6038 * ire_multirt_lookup_v6() to 6039 * find the next resolvable ire. 6040 * As a result, we obtain a new 6041 * interface, derived from the 6042 * next ire. 6043 */ 6044 if (ipif_held) { 6045 ipif_refrele(ipif); 6046 ipif_held = B_FALSE; 6047 } 6048 ipif = ipif_lookup_group_v6( 6049 v6dstp, zoneid, ipst); 6050 ip2dbg(("ip_newroute_ipif: " 6051 "multirt dst %08x, " 6052 "ipif %p\n", 6053 ntohl(V4_PART_OF_V6( 6054 (*v6dstp))), 6055 (void *)ipif)); 6056 if (ipif != NULL) { 6057 ipif_held = B_TRUE; 6058 mp = copy_mp; 6059 copy_mp = NULL; 6060 multirt_resolve_next = 6061 B_TRUE; 6062 continue; 6063 } else { 6064 freemsg(copy_mp); 6065 } 6066 } 6067 } 6068 ill_refrele(dst_ill); 6069 if (ipif_held) { 6070 ipif_refrele(ipif); 6071 ipif_held = B_FALSE; 6072 } 6073 if (src_ipif != NULL) 6074 ipif_refrele(src_ipif); 6075 return; 6076 default: 6077 /* Some transient error */ 6078 ire_refrele(save_ire); 6079 break; 6080 } 6081 break; 6082 } 6083 default: 6084 break; 6085 } 6086 if (ip6_asp_table_held) { 6087 ip6_asp_table_refrele(ipst); 6088 ip6_asp_table_held = B_FALSE; 6089 } 6090 } while (multirt_resolve_next); 6091 6092 err_ret: 6093 if (ip6_asp_table_held) 6094 ip6_asp_table_refrele(ipst); 6095 if (ire != NULL) 6096 ire_refrele(ire); 6097 if (fire != NULL) 6098 ire_refrele(fire); 6099 if (ipif != NULL && ipif_held) 6100 ipif_refrele(ipif); 6101 if (src_ipif != NULL) 6102 ipif_refrele(src_ipif); 6103 6104 /* Multicast - no point in trying to generate ICMP error */ 6105 if (dst_ill != NULL) { 6106 ill = dst_ill; 6107 ill_held = B_TRUE; 6108 } 6109 if (mp->b_prev || mp->b_next) { 6110 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 6111 } else { 6112 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); 6113 } 6114 ip1dbg(("ip_newroute_ipif_v6: dropped\n")); 6115 mp->b_next = NULL; 6116 mp->b_prev = NULL; 6117 freemsg(first_mp); 6118 if (ill_held) 6119 ill_refrele(ill); 6120 } 6121 6122 /* 6123 * Parse and process any hop-by-hop or destination options. 6124 * 6125 * Assumes that q is an ill read queue so that ICMP errors for link-local 6126 * destinations are sent out the correct interface. 6127 * 6128 * Returns -1 if there was an error and mp has been consumed. 6129 * Returns 0 if no special action is needed. 6130 * Returns 1 if the packet contained a router alert option for this node 6131 * which is verified to be "interesting/known" for our implementation. 6132 * 6133 * XXX Note: In future as more hbh or dest options are defined, 6134 * it may be better to have different routines for hbh and dest 6135 * options as opt_type fields other than IP6OPT_PAD1 and IP6OPT_PADN 6136 * may have same value in different namespaces. Or is it same namespace ?? 6137 * Current code checks for each opt_type (other than pads) if it is in 6138 * the expected nexthdr (hbh or dest) 6139 */ 6140 static int 6141 ip_process_options_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, 6142 uint8_t *optptr, uint_t optlen, uint8_t hdr_type, ip_stack_t *ipst) 6143 { 6144 uint8_t opt_type; 6145 uint_t optused; 6146 int ret = 0; 6147 mblk_t *first_mp; 6148 const char *errtype; 6149 zoneid_t zoneid; 6150 ill_t *ill = q->q_ptr; 6151 ipif_t *ipif; 6152 6153 first_mp = mp; 6154 if (mp->b_datap->db_type == M_CTL) { 6155 mp = mp->b_cont; 6156 } 6157 6158 while (optlen != 0) { 6159 opt_type = *optptr; 6160 if (opt_type == IP6OPT_PAD1) { 6161 optused = 1; 6162 } else { 6163 if (optlen < 2) 6164 goto bad_opt; 6165 errtype = "malformed"; 6166 if (opt_type == ip6opt_ls) { 6167 optused = 2 + optptr[1]; 6168 if (optused > optlen) 6169 goto bad_opt; 6170 } else switch (opt_type) { 6171 case IP6OPT_PADN: 6172 /* 6173 * Note:We don't verify that (N-2) pad octets 6174 * are zero as required by spec. Adhere to 6175 * "be liberal in what you accept..." part of 6176 * implementation philosophy (RFC791,RFC1122) 6177 */ 6178 optused = 2 + optptr[1]; 6179 if (optused > optlen) 6180 goto bad_opt; 6181 break; 6182 6183 case IP6OPT_JUMBO: 6184 if (hdr_type != IPPROTO_HOPOPTS) 6185 goto opt_error; 6186 goto opt_error; /* XXX Not implemented! */ 6187 6188 case IP6OPT_ROUTER_ALERT: { 6189 struct ip6_opt_router *or; 6190 6191 if (hdr_type != IPPROTO_HOPOPTS) 6192 goto opt_error; 6193 optused = 2 + optptr[1]; 6194 if (optused > optlen) 6195 goto bad_opt; 6196 or = (struct ip6_opt_router *)optptr; 6197 /* Check total length and alignment */ 6198 if (optused != sizeof (*or) || 6199 ((uintptr_t)or->ip6or_value & 0x1) != 0) 6200 goto opt_error; 6201 /* Check value */ 6202 switch (*((uint16_t *)or->ip6or_value)) { 6203 case IP6_ALERT_MLD: 6204 case IP6_ALERT_RSVP: 6205 ret = 1; 6206 } 6207 break; 6208 } 6209 case IP6OPT_HOME_ADDRESS: { 6210 /* 6211 * Minimal support for the home address option 6212 * (which is required by all IPv6 nodes). 6213 * Implement by just swapping the home address 6214 * and source address. 6215 * XXX Note: this has IPsec implications since 6216 * AH needs to take this into account. 6217 * Also, when IPsec is used we need to ensure 6218 * that this is only processed once 6219 * in the received packet (to avoid swapping 6220 * back and forth). 6221 * NOTE:This option processing is considered 6222 * to be unsafe and prone to a denial of 6223 * service attack. 6224 * The current processing is not safe even with 6225 * IPsec secured IP packets. Since the home 6226 * address option processing requirement still 6227 * is in the IETF draft and in the process of 6228 * being redefined for its usage, it has been 6229 * decided to turn off the option by default. 6230 * If this section of code needs to be executed, 6231 * ndd variable ip6_ignore_home_address_opt 6232 * should be set to 0 at the user's own risk. 6233 */ 6234 struct ip6_opt_home_address *oh; 6235 in6_addr_t tmp; 6236 6237 if (ipst->ips_ipv6_ignore_home_address_opt) 6238 goto opt_error; 6239 6240 if (hdr_type != IPPROTO_DSTOPTS) 6241 goto opt_error; 6242 optused = 2 + optptr[1]; 6243 if (optused > optlen) 6244 goto bad_opt; 6245 6246 /* 6247 * We did this dest. opt the first time 6248 * around (i.e. before AH processing). 6249 * If we've done AH... stop now. 6250 */ 6251 if (first_mp != mp) { 6252 ipsec_in_t *ii; 6253 6254 ii = (ipsec_in_t *)first_mp->b_rptr; 6255 if (ii->ipsec_in_ah_sa != NULL) 6256 break; 6257 } 6258 6259 oh = (struct ip6_opt_home_address *)optptr; 6260 /* Check total length and alignment */ 6261 if (optused < sizeof (*oh) || 6262 ((uintptr_t)oh->ip6oh_addr & 0x7) != 0) 6263 goto opt_error; 6264 /* Swap ip6_src and the home address */ 6265 tmp = ip6h->ip6_src; 6266 /* XXX Note: only 8 byte alignment option */ 6267 ip6h->ip6_src = *(in6_addr_t *)oh->ip6oh_addr; 6268 *(in6_addr_t *)oh->ip6oh_addr = tmp; 6269 break; 6270 } 6271 6272 case IP6OPT_TUNNEL_LIMIT: 6273 if (hdr_type != IPPROTO_DSTOPTS) { 6274 goto opt_error; 6275 } 6276 optused = 2 + optptr[1]; 6277 if (optused > optlen) { 6278 goto bad_opt; 6279 } 6280 if (optused != 3) { 6281 goto opt_error; 6282 } 6283 break; 6284 6285 default: 6286 errtype = "unknown"; 6287 /* FALLTHROUGH */ 6288 opt_error: 6289 /* Determine which zone should send error */ 6290 zoneid = ipif_lookup_addr_zoneid_v6( 6291 &ip6h->ip6_dst, ill, ipst); 6292 switch (IP6OPT_TYPE(opt_type)) { 6293 case IP6OPT_TYPE_SKIP: 6294 optused = 2 + optptr[1]; 6295 if (optused > optlen) 6296 goto bad_opt; 6297 ip1dbg(("ip_process_options_v6: %s " 6298 "opt 0x%x skipped\n", 6299 errtype, opt_type)); 6300 break; 6301 case IP6OPT_TYPE_DISCARD: 6302 ip1dbg(("ip_process_options_v6: %s " 6303 "opt 0x%x; packet dropped\n", 6304 errtype, opt_type)); 6305 freemsg(first_mp); 6306 return (-1); 6307 case IP6OPT_TYPE_ICMP: 6308 if (zoneid == ALL_ZONES) { 6309 freemsg(first_mp); 6310 return (-1); 6311 } 6312 icmp_param_problem_v6(WR(q), first_mp, 6313 ICMP6_PARAMPROB_OPTION, 6314 (uint32_t)(optptr - 6315 (uint8_t *)ip6h), 6316 B_FALSE, B_FALSE, zoneid, ipst); 6317 return (-1); 6318 case IP6OPT_TYPE_FORCEICMP: 6319 /* 6320 * If we don't have a zone and the dst 6321 * addr is multicast, then pick a zone 6322 * based on the inbound interface. 6323 */ 6324 if (zoneid == ALL_ZONES && 6325 IN6_IS_ADDR_MULTICAST( 6326 &ip6h->ip6_dst)) { 6327 ipif = ipif_select_source_v6( 6328 ill, &ip6h->ip6_src, 6329 B_TRUE, 6330 IPV6_PREFER_SRC_DEFAULT, 6331 ALL_ZONES); 6332 if (ipif != NULL) { 6333 zoneid = 6334 ipif->ipif_zoneid; 6335 ipif_refrele(ipif); 6336 } 6337 } 6338 if (zoneid == ALL_ZONES) { 6339 freemsg(first_mp); 6340 return (-1); 6341 } 6342 icmp_param_problem_v6(WR(q), first_mp, 6343 ICMP6_PARAMPROB_OPTION, 6344 (uint32_t)(optptr - 6345 (uint8_t *)ip6h), 6346 B_FALSE, B_TRUE, zoneid, ipst); 6347 return (-1); 6348 default: 6349 ASSERT(0); 6350 } 6351 } 6352 } 6353 optlen -= optused; 6354 optptr += optused; 6355 } 6356 return (ret); 6357 6358 bad_opt: 6359 /* Determine which zone should send error */ 6360 zoneid = ipif_lookup_addr_zoneid_v6(&ip6h->ip6_dst, ill, ipst); 6361 if (zoneid == ALL_ZONES) { 6362 freemsg(first_mp); 6363 } else { 6364 icmp_param_problem_v6(WR(q), first_mp, ICMP6_PARAMPROB_OPTION, 6365 (uint32_t)(optptr - (uint8_t *)ip6h), 6366 B_FALSE, B_FALSE, zoneid, ipst); 6367 } 6368 return (-1); 6369 } 6370 6371 /* 6372 * Process a routing header that is not yet empty. 6373 * Only handles type 0 routing headers. 6374 */ 6375 static void 6376 ip_process_rthdr(queue_t *q, mblk_t *mp, ip6_t *ip6h, ip6_rthdr_t *rth, 6377 ill_t *ill, uint_t flags, mblk_t *hada_mp, mblk_t *dl_mp) 6378 { 6379 ip6_rthdr0_t *rthdr; 6380 uint_t ehdrlen; 6381 uint_t numaddr; 6382 in6_addr_t *addrptr; 6383 in6_addr_t tmp; 6384 ip_stack_t *ipst = ill->ill_ipst; 6385 6386 ASSERT(rth->ip6r_segleft != 0); 6387 6388 if (!ipst->ips_ipv6_forward_src_routed) { 6389 /* XXX Check for source routed out same interface? */ 6390 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 6391 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInAddrErrors); 6392 freemsg(hada_mp); 6393 freemsg(mp); 6394 return; 6395 } 6396 6397 if (rth->ip6r_type != 0) { 6398 if (hada_mp != NULL) 6399 goto hada_drop; 6400 /* Sent by forwarding path, and router is global zone */ 6401 icmp_param_problem_v6(WR(q), mp, 6402 ICMP6_PARAMPROB_HEADER, 6403 (uint32_t)((uchar_t *)&rth->ip6r_type - (uchar_t *)ip6h), 6404 B_FALSE, B_FALSE, GLOBAL_ZONEID, ipst); 6405 return; 6406 } 6407 rthdr = (ip6_rthdr0_t *)rth; 6408 ehdrlen = 8 * (rthdr->ip6r0_len + 1); 6409 ASSERT(mp->b_rptr + ehdrlen <= mp->b_wptr); 6410 addrptr = (in6_addr_t *)((char *)rthdr + sizeof (*rthdr)); 6411 /* rthdr->ip6r0_len is twice the number of addresses in the header */ 6412 if (rthdr->ip6r0_len & 0x1) { 6413 /* An odd length is impossible */ 6414 if (hada_mp != NULL) 6415 goto hada_drop; 6416 /* Sent by forwarding path, and router is global zone */ 6417 icmp_param_problem_v6(WR(q), mp, 6418 ICMP6_PARAMPROB_HEADER, 6419 (uint32_t)((uchar_t *)&rthdr->ip6r0_len - (uchar_t *)ip6h), 6420 B_FALSE, B_FALSE, GLOBAL_ZONEID, ipst); 6421 return; 6422 } 6423 numaddr = rthdr->ip6r0_len / 2; 6424 if (rthdr->ip6r0_segleft > numaddr) { 6425 /* segleft exceeds number of addresses in routing header */ 6426 if (hada_mp != NULL) 6427 goto hada_drop; 6428 /* Sent by forwarding path, and router is global zone */ 6429 icmp_param_problem_v6(WR(q), mp, 6430 ICMP6_PARAMPROB_HEADER, 6431 (uint32_t)((uchar_t *)&rthdr->ip6r0_segleft - 6432 (uchar_t *)ip6h), 6433 B_FALSE, B_FALSE, GLOBAL_ZONEID, ipst); 6434 return; 6435 } 6436 addrptr += (numaddr - rthdr->ip6r0_segleft); 6437 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst) || 6438 IN6_IS_ADDR_MULTICAST(addrptr)) { 6439 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 6440 freemsg(hada_mp); 6441 freemsg(mp); 6442 return; 6443 } 6444 /* Swap */ 6445 tmp = *addrptr; 6446 *addrptr = ip6h->ip6_dst; 6447 ip6h->ip6_dst = tmp; 6448 rthdr->ip6r0_segleft--; 6449 /* Don't allow any mapped addresses - ip_wput_v6 can't handle them */ 6450 if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_dst)) { 6451 if (hada_mp != NULL) 6452 goto hada_drop; 6453 /* Sent by forwarding path, and router is global zone */ 6454 icmp_unreachable_v6(WR(q), mp, ICMP6_DST_UNREACH_NOROUTE, 6455 B_FALSE, B_FALSE, GLOBAL_ZONEID, ipst); 6456 return; 6457 } 6458 if (ip_check_v6_mblk(mp, ill) == IP6_MBLK_OK) { 6459 ip6h = (ip6_t *)mp->b_rptr; 6460 ip_rput_data_v6(q, ill, mp, ip6h, flags, hada_mp, dl_mp); 6461 } else { 6462 freemsg(mp); 6463 } 6464 return; 6465 hada_drop: 6466 /* IPsec kstats: bean counter? */ 6467 freemsg(hada_mp); 6468 freemsg(mp); 6469 } 6470 6471 /* 6472 * Read side put procedure for IPv6 module. 6473 */ 6474 void 6475 ip_rput_v6(queue_t *q, mblk_t *mp) 6476 { 6477 mblk_t *first_mp; 6478 mblk_t *hada_mp = NULL; 6479 ip6_t *ip6h; 6480 boolean_t ll_multicast = B_FALSE; 6481 boolean_t mctl_present = B_FALSE; 6482 ill_t *ill; 6483 struct iocblk *iocp; 6484 uint_t flags = 0; 6485 mblk_t *dl_mp; 6486 ip_stack_t *ipst; 6487 int check; 6488 6489 ill = (ill_t *)q->q_ptr; 6490 ipst = ill->ill_ipst; 6491 if (ill->ill_state_flags & ILL_CONDEMNED) { 6492 union DL_primitives *dl; 6493 6494 dl = (union DL_primitives *)mp->b_rptr; 6495 /* 6496 * Things are opening or closing - only accept DLPI 6497 * ack messages. If the stream is closing and ip_wsrv 6498 * has completed, ip_close is out of the qwait, but has 6499 * not yet completed qprocsoff. Don't proceed any further 6500 * because the ill has been cleaned up and things hanging 6501 * off the ill have been freed. 6502 */ 6503 if ((mp->b_datap->db_type != M_PCPROTO) || 6504 (dl->dl_primitive == DL_UNITDATA_IND)) { 6505 inet_freemsg(mp); 6506 return; 6507 } 6508 } 6509 6510 dl_mp = NULL; 6511 switch (mp->b_datap->db_type) { 6512 case M_DATA: { 6513 int hlen; 6514 uchar_t *ucp; 6515 struct ether_header *eh; 6516 dl_unitdata_ind_t *dui; 6517 6518 /* 6519 * This is a work-around for CR 6451644, a bug in Nemo. It 6520 * should be removed when that problem is fixed. 6521 */ 6522 if (ill->ill_mactype == DL_ETHER && 6523 (hlen = MBLKHEAD(mp)) >= sizeof (struct ether_header) && 6524 (ucp = mp->b_rptr)[-1] == (IP6_DL_SAP & 0xFF) && 6525 ucp[-2] == (IP6_DL_SAP >> 8)) { 6526 if (hlen >= sizeof (struct ether_vlan_header) && 6527 ucp[-5] == 0 && ucp[-6] == 0x81) 6528 ucp -= sizeof (struct ether_vlan_header); 6529 else 6530 ucp -= sizeof (struct ether_header); 6531 /* 6532 * If it's a group address, then fabricate a 6533 * DL_UNITDATA_IND message. 6534 */ 6535 if ((ll_multicast = (ucp[0] & 1)) != 0 && 6536 (dl_mp = allocb(DL_UNITDATA_IND_SIZE + 16, 6537 BPRI_HI)) != NULL) { 6538 eh = (struct ether_header *)ucp; 6539 dui = (dl_unitdata_ind_t *)dl_mp->b_rptr; 6540 DB_TYPE(dl_mp) = M_PROTO; 6541 dl_mp->b_wptr = (uchar_t *)(dui + 1) + 16; 6542 dui->dl_primitive = DL_UNITDATA_IND; 6543 dui->dl_dest_addr_length = 8; 6544 dui->dl_dest_addr_offset = DL_UNITDATA_IND_SIZE; 6545 dui->dl_src_addr_length = 8; 6546 dui->dl_src_addr_offset = DL_UNITDATA_IND_SIZE + 6547 8; 6548 dui->dl_group_address = 1; 6549 ucp = (uchar_t *)(dui + 1); 6550 if (ill->ill_sap_length > 0) 6551 ucp += ill->ill_sap_length; 6552 bcopy(&eh->ether_dhost, ucp, 6); 6553 bcopy(&eh->ether_shost, ucp + 8, 6); 6554 ucp = (uchar_t *)(dui + 1); 6555 if (ill->ill_sap_length < 0) 6556 ucp += 8 + ill->ill_sap_length; 6557 bcopy(&eh->ether_type, ucp, 2); 6558 bcopy(&eh->ether_type, ucp + 8, 2); 6559 } 6560 } 6561 break; 6562 } 6563 6564 case M_PROTO: 6565 case M_PCPROTO: 6566 if (((dl_unitdata_ind_t *)mp->b_rptr)->dl_primitive != 6567 DL_UNITDATA_IND) { 6568 /* Go handle anything other than data elsewhere. */ 6569 ip_rput_dlpi(q, mp); 6570 return; 6571 } 6572 ll_multicast = ip_get_dlpi_mbcast(ill, mp); 6573 6574 /* Save the DLPI header. */ 6575 dl_mp = mp; 6576 mp = mp->b_cont; 6577 dl_mp->b_cont = NULL; 6578 break; 6579 case M_BREAK: 6580 panic("ip_rput_v6: got an M_BREAK"); 6581 /*NOTREACHED*/ 6582 case M_IOCACK: 6583 iocp = (struct iocblk *)mp->b_rptr; 6584 switch (iocp->ioc_cmd) { 6585 case DL_IOC_HDR_INFO: 6586 ill = (ill_t *)q->q_ptr; 6587 ill_fastpath_ack(ill, mp); 6588 return; 6589 6590 case SIOCGTUNPARAM: 6591 case OSIOCGTUNPARAM: 6592 ip_rput_other(NULL, q, mp, NULL); 6593 return; 6594 6595 case SIOCSTUNPARAM: 6596 case OSIOCSTUNPARAM: 6597 /* Go through qwriter */ 6598 break; 6599 default: 6600 putnext(q, mp); 6601 return; 6602 } 6603 /* FALLTHRU */ 6604 case M_ERROR: 6605 case M_HANGUP: 6606 mutex_enter(&ill->ill_lock); 6607 if (ill->ill_state_flags & ILL_CONDEMNED) { 6608 mutex_exit(&ill->ill_lock); 6609 freemsg(mp); 6610 return; 6611 } 6612 ill_refhold_locked(ill); 6613 mutex_exit(&ill->ill_lock); 6614 qwriter_ip(ill, q, mp, ip_rput_other, CUR_OP, B_FALSE); 6615 return; 6616 case M_CTL: 6617 if ((MBLKL(mp) > sizeof (int)) && 6618 ((da_ipsec_t *)mp->b_rptr)->da_type == IPHADA_M_CTL) { 6619 ASSERT(MBLKL(mp) >= sizeof (da_ipsec_t)); 6620 mctl_present = B_TRUE; 6621 break; 6622 } 6623 putnext(q, mp); 6624 return; 6625 case M_IOCNAK: 6626 iocp = (struct iocblk *)mp->b_rptr; 6627 switch (iocp->ioc_cmd) { 6628 case DL_IOC_HDR_INFO: 6629 case SIOCGTUNPARAM: 6630 case OSIOCGTUNPARAM: 6631 ip_rput_other(NULL, q, mp, NULL); 6632 return; 6633 6634 case SIOCSTUNPARAM: 6635 case OSIOCSTUNPARAM: 6636 mutex_enter(&ill->ill_lock); 6637 if (ill->ill_state_flags & ILL_CONDEMNED) { 6638 mutex_exit(&ill->ill_lock); 6639 freemsg(mp); 6640 return; 6641 } 6642 ill_refhold_locked(ill); 6643 mutex_exit(&ill->ill_lock); 6644 qwriter_ip(ill, q, mp, ip_rput_other, CUR_OP, B_FALSE); 6645 return; 6646 default: 6647 break; 6648 } 6649 /* FALLTHRU */ 6650 default: 6651 putnext(q, mp); 6652 return; 6653 } 6654 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInReceives); 6655 UPDATE_MIB(ill->ill_ip_mib, ipIfStatsHCInOctets, 6656 (mp->b_cont == NULL) ? MBLKL(mp) : msgdsize(mp)); 6657 /* 6658 * if db_ref > 1 then copymsg and free original. Packet may be 6659 * changed and do not want other entity who has a reference to this 6660 * message to trip over the changes. This is a blind change because 6661 * trying to catch all places that might change packet is too 6662 * difficult (since it may be a module above this one). 6663 */ 6664 if (mp->b_datap->db_ref > 1) { 6665 mblk_t *mp1; 6666 6667 mp1 = copymsg(mp); 6668 freemsg(mp); 6669 if (mp1 == NULL) { 6670 first_mp = NULL; 6671 goto discard; 6672 } 6673 mp = mp1; 6674 } 6675 first_mp = mp; 6676 if (mctl_present) { 6677 hada_mp = first_mp; 6678 mp = first_mp->b_cont; 6679 } 6680 6681 if ((check = ip_check_v6_mblk(mp, ill)) == IP6_MBLK_HDR_ERR) { 6682 freemsg(mp); 6683 return; 6684 } 6685 6686 ip6h = (ip6_t *)mp->b_rptr; 6687 6688 /* 6689 * ip:::receive must see ipv6 packets with a full header, 6690 * and so is placed after the IP6_MBLK_HDR_ERR check. 6691 */ 6692 DTRACE_IP7(receive, mblk_t *, first_mp, conn_t *, NULL, void_ip_t *, 6693 ip6h, __dtrace_ipsr_ill_t *, ill, ipha_t *, NULL, ip6_t *, ip6h, 6694 int, 0); 6695 6696 if (check != IP6_MBLK_OK) { 6697 freemsg(mp); 6698 return; 6699 } 6700 6701 DTRACE_PROBE4(ip6__physical__in__start, 6702 ill_t *, ill, ill_t *, NULL, 6703 ip6_t *, ip6h, mblk_t *, first_mp); 6704 6705 FW_HOOKS6(ipst->ips_ip6_physical_in_event, 6706 ipst->ips_ipv6firewall_physical_in, 6707 ill, NULL, ip6h, first_mp, mp, ll_multicast, ipst); 6708 6709 DTRACE_PROBE1(ip6__physical__in__end, mblk_t *, first_mp); 6710 6711 if (first_mp == NULL) 6712 return; 6713 6714 /* 6715 * Attach any necessary label information to this packet. 6716 */ 6717 if (is_system_labeled() && !tsol_get_pkt_label(mp, IPV6_VERSION)) { 6718 if (ip6opt_ls != 0) 6719 ip0dbg(("tsol_get_pkt_label v6 failed\n")); 6720 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors); 6721 goto discard; 6722 } 6723 6724 /* IP observability hook. */ 6725 if (ipst->ips_ipobs_enabled) { 6726 zoneid_t dzone; 6727 6728 dzone = ip_get_zoneid_v6(&ip6h->ip6_dst, mp, ill, ipst, 6729 ALL_ZONES); 6730 ipobs_hook(mp, IPOBS_HOOK_INBOUND, ALL_ZONES, dzone, ill, 6731 IPV6_VERSION, 0, ipst); 6732 } 6733 6734 if ((ip6h->ip6_vcf & IPV6_VERS_AND_FLOW_MASK) == 6735 IPV6_DEFAULT_VERS_AND_FLOW) { 6736 /* 6737 * It may be a bit too expensive to do this mapped address 6738 * check here, but in the interest of robustness, it seems 6739 * like the correct place. 6740 * TODO: Avoid this check for e.g. connected TCP sockets 6741 */ 6742 if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_src)) { 6743 ip1dbg(("ip_rput_v6: pkt with mapped src addr\n")); 6744 goto discard; 6745 } 6746 6747 if (IN6_IS_ADDR_LOOPBACK(&ip6h->ip6_src)) { 6748 ip1dbg(("ip_rput_v6: pkt with loopback src")); 6749 goto discard; 6750 } else if (IN6_IS_ADDR_LOOPBACK(&ip6h->ip6_dst)) { 6751 ip1dbg(("ip_rput_v6: pkt with loopback dst")); 6752 goto discard; 6753 } 6754 6755 flags |= (ll_multicast ? IP6_IN_LLMCAST : 0); 6756 ip_rput_data_v6(q, ill, mp, ip6h, flags, hada_mp, dl_mp); 6757 } else { 6758 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInWrongIPVersion); 6759 goto discard; 6760 } 6761 freemsg(dl_mp); 6762 return; 6763 6764 discard: 6765 if (dl_mp != NULL) 6766 freeb(dl_mp); 6767 freemsg(first_mp); 6768 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 6769 } 6770 6771 /* 6772 * Walk through the IPv6 packet in mp and see if there's an AH header 6773 * in it. See if the AH header needs to get done before other headers in 6774 * the packet. (Worker function for ipsec_early_ah_v6().) 6775 */ 6776 #define IPSEC_HDR_DONT_PROCESS 0 6777 #define IPSEC_HDR_PROCESS 1 6778 #define IPSEC_MEMORY_ERROR 2 /* or malformed packet */ 6779 static int 6780 ipsec_needs_processing_v6(mblk_t *mp, uint8_t *nexthdr) 6781 { 6782 uint_t length; 6783 uint_t ehdrlen; 6784 uint8_t *whereptr; 6785 uint8_t *endptr; 6786 uint8_t *nexthdrp; 6787 ip6_dest_t *desthdr; 6788 ip6_rthdr_t *rthdr; 6789 ip6_t *ip6h; 6790 6791 /* 6792 * For now just pullup everything. In general, the less pullups, 6793 * the better, but there's so much squirrelling through anyway, 6794 * it's just easier this way. 6795 */ 6796 if (!pullupmsg(mp, -1)) { 6797 return (IPSEC_MEMORY_ERROR); 6798 } 6799 6800 ip6h = (ip6_t *)mp->b_rptr; 6801 length = IPV6_HDR_LEN; 6802 whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */ 6803 endptr = mp->b_wptr; 6804 6805 /* 6806 * We can't just use the argument nexthdr in the place 6807 * of nexthdrp becaue we don't dereference nexthdrp 6808 * till we confirm whether it is a valid address. 6809 */ 6810 nexthdrp = &ip6h->ip6_nxt; 6811 while (whereptr < endptr) { 6812 /* Is there enough left for len + nexthdr? */ 6813 if (whereptr + MIN_EHDR_LEN > endptr) 6814 return (IPSEC_MEMORY_ERROR); 6815 6816 switch (*nexthdrp) { 6817 case IPPROTO_HOPOPTS: 6818 case IPPROTO_DSTOPTS: 6819 /* Assumes the headers are identical for hbh and dst */ 6820 desthdr = (ip6_dest_t *)whereptr; 6821 ehdrlen = 8 * (desthdr->ip6d_len + 1); 6822 if ((uchar_t *)desthdr + ehdrlen > endptr) 6823 return (IPSEC_MEMORY_ERROR); 6824 /* 6825 * Return DONT_PROCESS because the destination 6826 * options header may be for each hop in a 6827 * routing-header, and we only want AH if we're 6828 * finished with routing headers. 6829 */ 6830 if (*nexthdrp == IPPROTO_DSTOPTS) 6831 return (IPSEC_HDR_DONT_PROCESS); 6832 nexthdrp = &desthdr->ip6d_nxt; 6833 break; 6834 case IPPROTO_ROUTING: 6835 rthdr = (ip6_rthdr_t *)whereptr; 6836 6837 /* 6838 * If there's more hops left on the routing header, 6839 * return now with DON'T PROCESS. 6840 */ 6841 if (rthdr->ip6r_segleft > 0) 6842 return (IPSEC_HDR_DONT_PROCESS); 6843 6844 ehdrlen = 8 * (rthdr->ip6r_len + 1); 6845 if ((uchar_t *)rthdr + ehdrlen > endptr) 6846 return (IPSEC_MEMORY_ERROR); 6847 nexthdrp = &rthdr->ip6r_nxt; 6848 break; 6849 case IPPROTO_FRAGMENT: 6850 /* Wait for reassembly */ 6851 return (IPSEC_HDR_DONT_PROCESS); 6852 case IPPROTO_AH: 6853 *nexthdr = IPPROTO_AH; 6854 return (IPSEC_HDR_PROCESS); 6855 case IPPROTO_NONE: 6856 /* No next header means we're finished */ 6857 default: 6858 return (IPSEC_HDR_DONT_PROCESS); 6859 } 6860 length += ehdrlen; 6861 whereptr += ehdrlen; 6862 } 6863 /* 6864 * Malformed/truncated packet. 6865 */ 6866 return (IPSEC_MEMORY_ERROR); 6867 } 6868 6869 /* 6870 * Path for AH if options are present. If this is the first time we are 6871 * sending a datagram to AH, allocate a IPSEC_IN message and prepend it. 6872 * Otherwise, just fanout. Return value answers the boolean question: 6873 * "Did I consume the mblk you sent me?" 6874 * 6875 * Sometimes AH needs to be done before other IPv6 headers for security 6876 * reasons. This function (and its ipsec_needs_processing_v6() above) 6877 * indicates if that is so, and fans out to the appropriate IPsec protocol 6878 * for the datagram passed in. 6879 */ 6880 static boolean_t 6881 ipsec_early_ah_v6(queue_t *q, mblk_t *first_mp, boolean_t mctl_present, 6882 ill_t *ill, ill_t *inill, mblk_t *hada_mp, zoneid_t zoneid) 6883 { 6884 mblk_t *mp; 6885 uint8_t nexthdr; 6886 ipsec_in_t *ii = NULL; 6887 ah_t *ah; 6888 ipsec_status_t ipsec_rc; 6889 ip_stack_t *ipst = ill->ill_ipst; 6890 netstack_t *ns = ipst->ips_netstack; 6891 ipsec_stack_t *ipss = ns->netstack_ipsec; 6892 6893 ASSERT((hada_mp == NULL) || (!mctl_present)); 6894 6895 switch (ipsec_needs_processing_v6( 6896 (mctl_present ? first_mp->b_cont : first_mp), &nexthdr)) { 6897 case IPSEC_MEMORY_ERROR: 6898 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 6899 freemsg(hada_mp); 6900 freemsg(first_mp); 6901 return (B_TRUE); 6902 case IPSEC_HDR_DONT_PROCESS: 6903 return (B_FALSE); 6904 } 6905 6906 /* Default means send it to AH! */ 6907 ASSERT(nexthdr == IPPROTO_AH); 6908 if (!mctl_present) { 6909 mp = first_mp; 6910 first_mp = ipsec_in_alloc(B_FALSE, ipst->ips_netstack); 6911 if (first_mp == NULL) { 6912 ip1dbg(("ipsec_early_ah_v6: IPSEC_IN " 6913 "allocation failure.\n")); 6914 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 6915 freemsg(hada_mp); 6916 freemsg(mp); 6917 return (B_TRUE); 6918 } 6919 /* 6920 * Store the ill_index so that when we come back 6921 * from IPSEC we ride on the same queue. 6922 */ 6923 ii = (ipsec_in_t *)first_mp->b_rptr; 6924 ii->ipsec_in_ill_index = ill->ill_phyint->phyint_ifindex; 6925 ii->ipsec_in_rill_index = inill->ill_phyint->phyint_ifindex; 6926 first_mp->b_cont = mp; 6927 } 6928 /* 6929 * Cache hardware acceleration info. 6930 */ 6931 if (hada_mp != NULL) { 6932 ASSERT(ii != NULL); 6933 IPSECHW_DEBUG(IPSECHW_PKT, ("ipsec_early_ah_v6: " 6934 "caching data attr.\n")); 6935 ii->ipsec_in_accelerated = B_TRUE; 6936 ii->ipsec_in_da = hada_mp; 6937 } 6938 6939 if (!ipsec_loaded(ipss)) { 6940 ip_proto_not_sup(q, first_mp, IP_FF_SEND_ICMP, zoneid, ipst); 6941 return (B_TRUE); 6942 } 6943 6944 ah = ipsec_inbound_ah_sa(first_mp, ns); 6945 if (ah == NULL) 6946 return (B_TRUE); 6947 ASSERT(ii->ipsec_in_ah_sa != NULL); 6948 ASSERT(ii->ipsec_in_ah_sa->ipsa_input_func != NULL); 6949 ipsec_rc = ii->ipsec_in_ah_sa->ipsa_input_func(first_mp, ah); 6950 6951 switch (ipsec_rc) { 6952 case IPSEC_STATUS_SUCCESS: 6953 /* we're done with IPsec processing, send it up */ 6954 ip_fanout_proto_again(first_mp, ill, inill, NULL); 6955 break; 6956 case IPSEC_STATUS_FAILED: 6957 BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsInDiscards); 6958 break; 6959 case IPSEC_STATUS_PENDING: 6960 /* no action needed */ 6961 break; 6962 } 6963 return (B_TRUE); 6964 } 6965 6966 /* 6967 * Validate the IPv6 mblk for alignment. 6968 */ 6969 int 6970 ip_check_v6_mblk(mblk_t *mp, ill_t *ill) 6971 { 6972 int pkt_len, ip6_len; 6973 ip6_t *ip6h = (ip6_t *)mp->b_rptr; 6974 6975 /* check for alignment and full IPv6 header */ 6976 if (!OK_32PTR((uchar_t *)ip6h) || 6977 (mp->b_wptr - (uchar_t *)ip6h) < IPV6_HDR_LEN) { 6978 if (!pullupmsg(mp, IPV6_HDR_LEN)) { 6979 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 6980 ip1dbg(("ip_rput_v6: pullupmsg failed\n")); 6981 return (IP6_MBLK_HDR_ERR); 6982 } 6983 ip6h = (ip6_t *)mp->b_rptr; 6984 } 6985 6986 ASSERT(OK_32PTR((uchar_t *)ip6h) && 6987 (mp->b_wptr - (uchar_t *)ip6h) >= IPV6_HDR_LEN); 6988 6989 if (mp->b_cont == NULL) 6990 pkt_len = mp->b_wptr - mp->b_rptr; 6991 else 6992 pkt_len = msgdsize(mp); 6993 ip6_len = ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN; 6994 6995 /* 6996 * Check for bogus (too short packet) and packet which 6997 * was padded by the link layer. 6998 */ 6999 if (ip6_len != pkt_len) { 7000 ssize_t diff; 7001 7002 if (ip6_len > pkt_len) { 7003 ip1dbg(("ip_rput_data_v6: packet too short %d %d\n", 7004 ip6_len, pkt_len)); 7005 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInTruncatedPkts); 7006 return (IP6_MBLK_LEN_ERR); 7007 } 7008 diff = (ssize_t)(pkt_len - ip6_len); 7009 7010 if (!adjmsg(mp, -diff)) { 7011 ip1dbg(("ip_rput_data_v6: adjmsg failed\n")); 7012 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 7013 return (IP6_MBLK_LEN_ERR); 7014 } 7015 } 7016 return (IP6_MBLK_OK); 7017 } 7018 7019 /* 7020 * ip_rput_data_v6 -- received IPv6 packets in M_DATA messages show up here. 7021 * ip_rput_v6 has already verified alignment, the min length, the version, 7022 * and db_ref = 1. 7023 * 7024 * The ill passed in (the arg named inill) is the ill that the packet 7025 * actually arrived on. We need to remember this when saving the 7026 * input interface index into potential IPV6_PKTINFO data in 7027 * ip_add_info_v6(). 7028 * 7029 * This routine doesn't free dl_mp; that's the caller's responsibility on 7030 * return. (Note that the callers are complex enough that there's no tail 7031 * recursion here anyway.) 7032 */ 7033 void 7034 ip_rput_data_v6(queue_t *q, ill_t *inill, mblk_t *mp, ip6_t *ip6h, 7035 uint_t flags, mblk_t *hada_mp, mblk_t *dl_mp) 7036 { 7037 ire_t *ire = NULL; 7038 ill_t *ill = inill; 7039 ill_t *outill; 7040 ipif_t *ipif; 7041 uint8_t *whereptr; 7042 uint8_t nexthdr; 7043 uint16_t remlen; 7044 uint_t prev_nexthdr_offset; 7045 uint_t used; 7046 size_t old_pkt_len; 7047 size_t pkt_len; 7048 uint16_t ip6_len; 7049 uint_t hdr_len; 7050 boolean_t mctl_present; 7051 mblk_t *first_mp; 7052 mblk_t *first_mp1; 7053 boolean_t no_forward; 7054 ip6_hbh_t *hbhhdr; 7055 boolean_t ll_multicast = (flags & IP6_IN_LLMCAST); 7056 conn_t *connp; 7057 uint32_t ports; 7058 zoneid_t zoneid = GLOBAL_ZONEID; 7059 uint16_t hck_flags, reass_hck_flags; 7060 uint32_t reass_sum; 7061 boolean_t cksum_err; 7062 mblk_t *mp1; 7063 ip_stack_t *ipst = inill->ill_ipst; 7064 7065 EXTRACT_PKT_MP(mp, first_mp, mctl_present); 7066 7067 if (hada_mp != NULL) { 7068 /* 7069 * It's an IPsec accelerated packet. 7070 * Keep a pointer to the data attributes around until 7071 * we allocate the ipsecinfo structure. 7072 */ 7073 IPSECHW_DEBUG(IPSECHW_PKT, 7074 ("ip_rput_data_v6: inbound HW accelerated IPsec pkt\n")); 7075 hada_mp->b_cont = NULL; 7076 /* 7077 * Since it is accelerated, it came directly from 7078 * the ill. 7079 */ 7080 ASSERT(mctl_present == B_FALSE); 7081 ASSERT(mp->b_datap->db_type != M_CTL); 7082 } 7083 7084 ip6h = (ip6_t *)mp->b_rptr; 7085 ip6_len = ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN; 7086 old_pkt_len = pkt_len = ip6_len; 7087 7088 if (ILL_HCKSUM_CAPABLE(ill) && !mctl_present && dohwcksum) 7089 hck_flags = DB_CKSUMFLAGS(mp); 7090 else 7091 hck_flags = 0; 7092 7093 /* Clear checksum flags in case we need to forward */ 7094 DB_CKSUMFLAGS(mp) = 0; 7095 reass_sum = reass_hck_flags = 0; 7096 7097 nexthdr = ip6h->ip6_nxt; 7098 7099 prev_nexthdr_offset = (uint_t)((uchar_t *)&ip6h->ip6_nxt - 7100 (uchar_t *)ip6h); 7101 whereptr = (uint8_t *)&ip6h[1]; 7102 remlen = pkt_len - IPV6_HDR_LEN; /* Track how much is left */ 7103 7104 /* Process hop by hop header options */ 7105 if (nexthdr == IPPROTO_HOPOPTS) { 7106 uint_t ehdrlen; 7107 uint8_t *optptr; 7108 7109 if (remlen < MIN_EHDR_LEN) 7110 goto pkt_too_short; 7111 if (mp->b_cont != NULL && 7112 whereptr + MIN_EHDR_LEN > mp->b_wptr) { 7113 if (!pullupmsg(mp, IPV6_HDR_LEN + MIN_EHDR_LEN)) { 7114 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 7115 freemsg(hada_mp); 7116 freemsg(first_mp); 7117 return; 7118 } 7119 ip6h = (ip6_t *)mp->b_rptr; 7120 whereptr = (uint8_t *)ip6h + pkt_len - remlen; 7121 } 7122 hbhhdr = (ip6_hbh_t *)whereptr; 7123 nexthdr = hbhhdr->ip6h_nxt; 7124 prev_nexthdr_offset = (uint_t)(whereptr - (uint8_t *)ip6h); 7125 ehdrlen = 8 * (hbhhdr->ip6h_len + 1); 7126 7127 if (remlen < ehdrlen) 7128 goto pkt_too_short; 7129 if (mp->b_cont != NULL && 7130 whereptr + ehdrlen > mp->b_wptr) { 7131 if (!pullupmsg(mp, IPV6_HDR_LEN + ehdrlen)) { 7132 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 7133 freemsg(hada_mp); 7134 freemsg(first_mp); 7135 return; 7136 } 7137 ip6h = (ip6_t *)mp->b_rptr; 7138 whereptr = (uint8_t *)ip6h + pkt_len - remlen; 7139 hbhhdr = (ip6_hbh_t *)whereptr; 7140 } 7141 7142 optptr = whereptr + 2; 7143 whereptr += ehdrlen; 7144 remlen -= ehdrlen; 7145 switch (ip_process_options_v6(q, first_mp, ip6h, optptr, 7146 ehdrlen - 2, IPPROTO_HOPOPTS, ipst)) { 7147 case -1: 7148 /* 7149 * Packet has been consumed and any 7150 * needed ICMP messages sent. 7151 */ 7152 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors); 7153 freemsg(hada_mp); 7154 return; 7155 case 0: 7156 /* no action needed */ 7157 break; 7158 case 1: 7159 /* Known router alert */ 7160 goto ipv6forus; 7161 } 7162 } 7163 7164 /* 7165 * On incoming v6 multicast packets we will bypass the ire table, 7166 * and assume that the read queue corresponds to the targetted 7167 * interface. 7168 * 7169 * The effect of this is the same as the IPv4 original code, but is 7170 * much cleaner I think. See ip_rput for how that was done. 7171 */ 7172 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 7173 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInMcastPkts); 7174 UPDATE_MIB(ill->ill_ip_mib, ipIfStatsHCInMcastOctets, pkt_len); 7175 7176 /* 7177 * So that we don't end up with dups, only one ill in an IPMP 7178 * group is nominated to receive multicast data traffic. 7179 * However, link-locals on any underlying interfaces will have 7180 * joined their solicited-node multicast addresses and we must 7181 * accept those packets. (We don't attempt to precisely 7182 * filter out duplicate solicited-node multicast packets since 7183 * e.g. an IPMP interface and underlying interface may have 7184 * the same solicited-node multicast address.) Note that we 7185 * won't generally have duplicates because we only issue a 7186 * DL_ENABMULTI_REQ on one interface in a group; the exception 7187 * is when PHYI_MULTI_BCAST is set. 7188 */ 7189 if (IS_UNDER_IPMP(ill) && !ill->ill_nom_cast && 7190 !IN6_IS_ADDR_MC_SOLICITEDNODE(&ip6h->ip6_dst)) { 7191 goto drop_pkt; 7192 } 7193 7194 /* 7195 * XXX TODO Give to mrouted to for multicast forwarding. 7196 */ 7197 if (ilm_lookup_ill_v6(ill, &ip6h->ip6_dst, B_FALSE, 7198 ALL_ZONES) == NULL) { 7199 if (ip_debug > 3) { 7200 /* ip2dbg */ 7201 pr_addr_dbg("ip_rput_data_v6: got mcast packet" 7202 " which is not for us: %s\n", AF_INET6, 7203 &ip6h->ip6_dst); 7204 } 7205 drop_pkt: BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 7206 freemsg(hada_mp); 7207 freemsg(first_mp); 7208 return; 7209 } 7210 if (ip_debug > 3) { 7211 /* ip2dbg */ 7212 pr_addr_dbg("ip_rput_data_v6: multicast for us: %s\n", 7213 AF_INET6, &ip6h->ip6_dst); 7214 } 7215 zoneid = GLOBAL_ZONEID; 7216 goto ipv6forus; 7217 } 7218 7219 ipif = ill->ill_ipif; 7220 7221 /* 7222 * If a packet was received on an interface that is a 6to4 tunnel, 7223 * incoming IPv6 packets, with a 6to4 addressed IPv6 destination, must 7224 * be checked to have a 6to4 prefix (2002:V4ADDR::/48) that is equal to 7225 * the 6to4 prefix of the address configured on the receiving interface. 7226 * Otherwise, the packet was delivered to this interface in error and 7227 * the packet must be dropped. 7228 */ 7229 if ((ill->ill_is_6to4tun) && IN6_IS_ADDR_6TO4(&ip6h->ip6_dst)) { 7230 7231 if (!IN6_ARE_6TO4_PREFIX_EQUAL(&ipif->ipif_v6lcl_addr, 7232 &ip6h->ip6_dst)) { 7233 if (ip_debug > 2) { 7234 /* ip1dbg */ 7235 pr_addr_dbg("ip_rput_data_v6: received 6to4 " 7236 "addressed packet which is not for us: " 7237 "%s\n", AF_INET6, &ip6h->ip6_dst); 7238 } 7239 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 7240 freemsg(first_mp); 7241 return; 7242 } 7243 } 7244 7245 /* 7246 * Find an ire that matches destination. For link-local addresses 7247 * we have to match the ill. 7248 * TBD for site local addresses. 7249 */ 7250 if (IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_dst)) { 7251 ire = ire_ctable_lookup_v6(&ip6h->ip6_dst, NULL, 7252 IRE_CACHE|IRE_LOCAL, ill->ill_ipif, ALL_ZONES, NULL, 7253 MATCH_IRE_TYPE | MATCH_IRE_ILL, ipst); 7254 } else { 7255 ire = ire_cache_lookup_v6(&ip6h->ip6_dst, ALL_ZONES, 7256 msg_getlabel(mp), ipst); 7257 7258 if (ire != NULL && ire->ire_stq != NULL && 7259 ire->ire_zoneid != GLOBAL_ZONEID && 7260 ire->ire_zoneid != ALL_ZONES) { 7261 /* 7262 * Should only use IREs that are visible from the 7263 * global zone for forwarding. 7264 */ 7265 ire_refrele(ire); 7266 ire = ire_cache_lookup_v6(&ip6h->ip6_dst, 7267 GLOBAL_ZONEID, msg_getlabel(mp), ipst); 7268 } 7269 } 7270 7271 if (ire == NULL) { 7272 /* 7273 * No matching IRE found. Mark this packet as having 7274 * originated externally. 7275 */ 7276 if (!(ill->ill_flags & ILLF_ROUTER) || ll_multicast) { 7277 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 7278 if (!(ill->ill_flags & ILLF_ROUTER)) { 7279 BUMP_MIB(ill->ill_ip_mib, 7280 ipIfStatsInAddrErrors); 7281 } 7282 freemsg(hada_mp); 7283 freemsg(first_mp); 7284 return; 7285 } 7286 if (ip6h->ip6_hops <= 1) { 7287 if (hada_mp != NULL) 7288 goto hada_drop; 7289 /* Sent by forwarding path, and router is global zone */ 7290 icmp_time_exceeded_v6(WR(q), first_mp, 7291 ICMP6_TIME_EXCEED_TRANSIT, ll_multicast, B_FALSE, 7292 GLOBAL_ZONEID, ipst); 7293 return; 7294 } 7295 /* 7296 * Per RFC 3513 section 2.5.2, we must not forward packets with 7297 * an unspecified source address. 7298 */ 7299 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 7300 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 7301 freemsg(hada_mp); 7302 freemsg(first_mp); 7303 return; 7304 } 7305 mp->b_prev = (mblk_t *)(uintptr_t) 7306 ill->ill_phyint->phyint_ifindex; 7307 ip_newroute_v6(q, mp, &ip6h->ip6_dst, &ip6h->ip6_src, 7308 IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_dst) ? ill : NULL, 7309 GLOBAL_ZONEID, ipst); 7310 return; 7311 } 7312 /* we have a matching IRE */ 7313 if (ire->ire_stq != NULL) { 7314 /* 7315 * To be quicker, we may wish not to chase pointers 7316 * (ire->ire_ipif->ipif_ill...) and instead store the 7317 * forwarding policy in the ire. An unfortunate side- 7318 * effect of this would be requiring an ire flush whenever 7319 * the ILLF_ROUTER flag changes. For now, chase pointers 7320 * once and store in the boolean no_forward. 7321 * 7322 * This appears twice to keep it out of the non-forwarding, 7323 * yes-it's-for-us-on-the-right-interface case. 7324 */ 7325 no_forward = ((ill->ill_flags & 7326 ire->ire_ipif->ipif_ill->ill_flags & ILLF_ROUTER) == 0); 7327 7328 ASSERT(first_mp == mp); 7329 /* 7330 * This ire has a send-to queue - forward the packet. 7331 */ 7332 if (no_forward || ll_multicast || (hada_mp != NULL)) { 7333 freemsg(hada_mp); 7334 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 7335 if (no_forward) { 7336 BUMP_MIB(ill->ill_ip_mib, 7337 ipIfStatsInAddrErrors); 7338 } 7339 freemsg(mp); 7340 ire_refrele(ire); 7341 return; 7342 } 7343 /* 7344 * ipIfStatsHCInForwDatagrams should only be increment if there 7345 * will be an attempt to forward the packet, which is why we 7346 * increment after the above condition has been checked. 7347 */ 7348 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInForwDatagrams); 7349 if (ip6h->ip6_hops <= 1) { 7350 ip1dbg(("ip_rput_data_v6: hop limit expired.\n")); 7351 /* Sent by forwarding path, and router is global zone */ 7352 icmp_time_exceeded_v6(WR(q), mp, 7353 ICMP6_TIME_EXCEED_TRANSIT, ll_multicast, B_FALSE, 7354 GLOBAL_ZONEID, ipst); 7355 ire_refrele(ire); 7356 return; 7357 } 7358 /* 7359 * Per RFC 3513 section 2.5.2, we must not forward packets with 7360 * an unspecified source address. 7361 */ 7362 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 7363 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 7364 freemsg(mp); 7365 ire_refrele(ire); 7366 return; 7367 } 7368 7369 if (is_system_labeled()) { 7370 mblk_t *mp1; 7371 7372 if ((mp1 = tsol_ip_forward(ire, mp)) == NULL) { 7373 BUMP_MIB(ill->ill_ip_mib, 7374 ipIfStatsForwProhibits); 7375 freemsg(mp); 7376 ire_refrele(ire); 7377 return; 7378 } 7379 /* Size may have changed */ 7380 mp = mp1; 7381 ip6h = (ip6_t *)mp->b_rptr; 7382 pkt_len = msgdsize(mp); 7383 } 7384 7385 if (pkt_len > ire->ire_max_frag) { 7386 int max_frag = ire->ire_max_frag; 7387 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInTooBigErrors); 7388 /* 7389 * Handle labeled packet resizing. 7390 */ 7391 if (is_system_labeled()) { 7392 max_frag = tsol_pmtu_adjust(mp, max_frag, 7393 pkt_len - old_pkt_len, AF_INET6); 7394 } 7395 7396 /* Sent by forwarding path, and router is global zone */ 7397 icmp_pkt2big_v6(WR(q), mp, max_frag, 7398 ll_multicast, B_TRUE, GLOBAL_ZONEID, ipst); 7399 ire_refrele(ire); 7400 return; 7401 } 7402 7403 /* 7404 * Check to see if we're forwarding the packet to a 7405 * different link from which it came. If so, check the 7406 * source and destination addresses since routers must not 7407 * forward any packets with link-local source or 7408 * destination addresses to other links. Otherwise (if 7409 * we're forwarding onto the same link), conditionally send 7410 * a redirect message. 7411 */ 7412 if (ire->ire_rfq != q && 7413 !IS_IN_SAME_ILLGRP(ill, (ill_t *)ire->ire_rfq->q_ptr)) { 7414 if (IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_dst) || 7415 IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src)) { 7416 BUMP_MIB(ill->ill_ip_mib, 7417 ipIfStatsInAddrErrors); 7418 freemsg(mp); 7419 ire_refrele(ire); 7420 return; 7421 } 7422 /* TBD add site-local check at site boundary? */ 7423 } else if (ipst->ips_ipv6_send_redirects) { 7424 in6_addr_t *v6targ; 7425 in6_addr_t gw_addr_v6; 7426 ire_t *src_ire_v6 = NULL; 7427 7428 /* 7429 * Don't send a redirect when forwarding a source 7430 * routed packet. 7431 */ 7432 if (ip_source_routed_v6(ip6h, mp, ipst)) 7433 goto forward; 7434 7435 mutex_enter(&ire->ire_lock); 7436 gw_addr_v6 = ire->ire_gateway_addr_v6; 7437 mutex_exit(&ire->ire_lock); 7438 if (!IN6_IS_ADDR_UNSPECIFIED(&gw_addr_v6)) { 7439 v6targ = &gw_addr_v6; 7440 /* 7441 * We won't send redirects to a router 7442 * that doesn't have a link local 7443 * address, but will forward. 7444 */ 7445 if (!IN6_IS_ADDR_LINKLOCAL(v6targ)) { 7446 BUMP_MIB(ill->ill_ip_mib, 7447 ipIfStatsInAddrErrors); 7448 goto forward; 7449 } 7450 } else { 7451 v6targ = &ip6h->ip6_dst; 7452 } 7453 7454 src_ire_v6 = ire_ftable_lookup_v6(&ip6h->ip6_src, 7455 NULL, NULL, IRE_INTERFACE, ire->ire_ipif, NULL, 7456 GLOBAL_ZONEID, 0, NULL, 7457 MATCH_IRE_IPIF | MATCH_IRE_TYPE, 7458 ipst); 7459 7460 if (src_ire_v6 != NULL) { 7461 /* 7462 * The source is directly connected. 7463 */ 7464 mp1 = copymsg(mp); 7465 if (mp1 != NULL) { 7466 icmp_send_redirect_v6(WR(q), 7467 mp1, v6targ, &ip6h->ip6_dst, 7468 ill, B_FALSE); 7469 } 7470 ire_refrele(src_ire_v6); 7471 } 7472 } 7473 7474 forward: 7475 /* Hoplimit verified above */ 7476 ip6h->ip6_hops--; 7477 7478 outill = ire->ire_ipif->ipif_ill; 7479 7480 DTRACE_PROBE4(ip6__forwarding__start, 7481 ill_t *, inill, ill_t *, outill, 7482 ip6_t *, ip6h, mblk_t *, mp); 7483 7484 FW_HOOKS6(ipst->ips_ip6_forwarding_event, 7485 ipst->ips_ipv6firewall_forwarding, 7486 inill, outill, ip6h, mp, mp, 0, ipst); 7487 7488 DTRACE_PROBE1(ip6__forwarding__end, mblk_t *, mp); 7489 7490 if (mp != NULL) { 7491 UPDATE_IB_PKT_COUNT(ire); 7492 ire->ire_last_used_time = lbolt; 7493 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCOutForwDatagrams); 7494 ip_xmit_v6(mp, ire, 0, NULL, B_FALSE, NULL); 7495 } 7496 IRE_REFRELE(ire); 7497 return; 7498 } 7499 7500 /* 7501 * Need to put on correct queue for reassembly to find it. 7502 * No need to use put() since reassembly has its own locks. 7503 * Note: multicast packets and packets destined to addresses 7504 * assigned to loopback (ire_rfq is NULL) will be reassembled on 7505 * the arriving ill. Unlike the IPv4 case, enabling strict 7506 * destination multihoming will prevent accepting packets 7507 * addressed to an IRE_LOCAL on lo0. 7508 */ 7509 if (ire->ire_rfq != q) { 7510 if ((ire = ip_check_multihome(&ip6h->ip6_dst, ire, ill)) 7511 == NULL) { 7512 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 7513 freemsg(hada_mp); 7514 freemsg(first_mp); 7515 return; 7516 } 7517 if (ire->ire_rfq != NULL) { 7518 q = ire->ire_rfq; 7519 ill = (ill_t *)q->q_ptr; 7520 ASSERT(ill != NULL); 7521 } 7522 } 7523 7524 zoneid = ire->ire_zoneid; 7525 UPDATE_IB_PKT_COUNT(ire); 7526 ire->ire_last_used_time = lbolt; 7527 /* Don't use the ire after this point, we'll NULL it out to be sure. */ 7528 ire_refrele(ire); 7529 ire = NULL; 7530 ipv6forus: 7531 /* 7532 * Looks like this packet is for us one way or another. 7533 * This is where we'll process destination headers etc. 7534 */ 7535 for (; ; ) { 7536 switch (nexthdr) { 7537 case IPPROTO_TCP: { 7538 uint16_t *up; 7539 uint32_t sum; 7540 int offset; 7541 7542 hdr_len = pkt_len - remlen; 7543 7544 if (hada_mp != NULL) { 7545 ip0dbg(("tcp hada drop\n")); 7546 goto hada_drop; 7547 } 7548 7549 7550 /* TCP needs all of the TCP header */ 7551 if (remlen < TCP_MIN_HEADER_LENGTH) 7552 goto pkt_too_short; 7553 if (mp->b_cont != NULL && 7554 whereptr + TCP_MIN_HEADER_LENGTH > mp->b_wptr) { 7555 if (!pullupmsg(mp, 7556 hdr_len + TCP_MIN_HEADER_LENGTH)) { 7557 BUMP_MIB(ill->ill_ip_mib, 7558 ipIfStatsInDiscards); 7559 freemsg(first_mp); 7560 return; 7561 } 7562 hck_flags = 0; 7563 ip6h = (ip6_t *)mp->b_rptr; 7564 whereptr = (uint8_t *)ip6h + hdr_len; 7565 } 7566 /* 7567 * Extract the offset field from the TCP header. 7568 */ 7569 offset = ((uchar_t *)ip6h)[hdr_len + 12] >> 4; 7570 if (offset != 5) { 7571 if (offset < 5) { 7572 ip1dbg(("ip_rput_data_v6: short " 7573 "TCP data offset")); 7574 BUMP_MIB(ill->ill_ip_mib, 7575 ipIfStatsInDiscards); 7576 freemsg(first_mp); 7577 return; 7578 } 7579 /* 7580 * There must be TCP options. 7581 * Make sure we can grab them. 7582 */ 7583 offset <<= 2; 7584 if (remlen < offset) 7585 goto pkt_too_short; 7586 if (mp->b_cont != NULL && 7587 whereptr + offset > mp->b_wptr) { 7588 if (!pullupmsg(mp, 7589 hdr_len + offset)) { 7590 BUMP_MIB(ill->ill_ip_mib, 7591 ipIfStatsInDiscards); 7592 freemsg(first_mp); 7593 return; 7594 } 7595 hck_flags = 0; 7596 ip6h = (ip6_t *)mp->b_rptr; 7597 whereptr = (uint8_t *)ip6h + hdr_len; 7598 } 7599 } 7600 7601 up = (uint16_t *)&ip6h->ip6_src; 7602 /* 7603 * TCP checksum calculation. First sum up the 7604 * pseudo-header fields: 7605 * - Source IPv6 address 7606 * - Destination IPv6 address 7607 * - TCP payload length 7608 * - TCP protocol ID 7609 */ 7610 sum = htons(IPPROTO_TCP + remlen) + 7611 up[0] + up[1] + up[2] + up[3] + 7612 up[4] + up[5] + up[6] + up[7] + 7613 up[8] + up[9] + up[10] + up[11] + 7614 up[12] + up[13] + up[14] + up[15]; 7615 7616 /* Fold initial sum */ 7617 sum = (sum & 0xffff) + (sum >> 16); 7618 7619 mp1 = mp->b_cont; 7620 7621 if ((hck_flags & (HCK_FULLCKSUM|HCK_PARTIALCKSUM)) == 0) 7622 IP6_STAT(ipst, ip6_in_sw_cksum); 7623 7624 IP_CKSUM_RECV(hck_flags, sum, (uchar_t *) 7625 ((uchar_t *)mp->b_rptr + DB_CKSUMSTART(mp)), 7626 (int32_t)(whereptr - (uchar_t *)mp->b_rptr), 7627 mp, mp1, cksum_err); 7628 7629 if (cksum_err) { 7630 BUMP_MIB(ill->ill_ip_mib, tcpIfStatsInErrs); 7631 7632 if (hck_flags & HCK_FULLCKSUM) { 7633 IP6_STAT(ipst, 7634 ip6_tcp_in_full_hw_cksum_err); 7635 } else if (hck_flags & HCK_PARTIALCKSUM) { 7636 IP6_STAT(ipst, 7637 ip6_tcp_in_part_hw_cksum_err); 7638 } else { 7639 IP6_STAT(ipst, ip6_tcp_in_sw_cksum_err); 7640 } 7641 freemsg(first_mp); 7642 return; 7643 } 7644 tcp_fanout: 7645 ip_fanout_tcp_v6(q, first_mp, ip6h, ill, inill, 7646 (flags|IP_FF_SEND_ICMP|IP_FF_SYN_ADDIRE| 7647 IP_FF_IPINFO), hdr_len, mctl_present, zoneid); 7648 return; 7649 } 7650 case IPPROTO_SCTP: 7651 { 7652 sctp_hdr_t *sctph; 7653 uint32_t calcsum, pktsum; 7654 uint_t hdr_len = pkt_len - remlen; 7655 sctp_stack_t *sctps; 7656 7657 sctps = inill->ill_ipst->ips_netstack->netstack_sctp; 7658 7659 /* SCTP needs all of the SCTP header */ 7660 if (remlen < sizeof (*sctph)) { 7661 goto pkt_too_short; 7662 } 7663 if (whereptr + sizeof (*sctph) > mp->b_wptr) { 7664 ASSERT(mp->b_cont != NULL); 7665 if (!pullupmsg(mp, hdr_len + sizeof (*sctph))) { 7666 BUMP_MIB(ill->ill_ip_mib, 7667 ipIfStatsInDiscards); 7668 freemsg(mp); 7669 return; 7670 } 7671 ip6h = (ip6_t *)mp->b_rptr; 7672 whereptr = (uint8_t *)ip6h + hdr_len; 7673 } 7674 7675 sctph = (sctp_hdr_t *)(mp->b_rptr + hdr_len); 7676 /* checksum */ 7677 pktsum = sctph->sh_chksum; 7678 sctph->sh_chksum = 0; 7679 calcsum = sctp_cksum(mp, hdr_len); 7680 if (calcsum != pktsum) { 7681 BUMP_MIB(&sctps->sctps_mib, sctpChecksumError); 7682 freemsg(mp); 7683 return; 7684 } 7685 sctph->sh_chksum = pktsum; 7686 ports = *(uint32_t *)(mp->b_rptr + hdr_len); 7687 if ((connp = sctp_fanout(&ip6h->ip6_src, &ip6h->ip6_dst, 7688 ports, zoneid, mp, sctps)) == NULL) { 7689 ip_fanout_sctp_raw(first_mp, ill, 7690 (ipha_t *)ip6h, B_FALSE, ports, 7691 mctl_present, 7692 (flags|IP_FF_SEND_ICMP|IP_FF_IPINFO), 7693 B_TRUE, zoneid); 7694 return; 7695 } 7696 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 7697 sctp_input(connp, (ipha_t *)ip6h, mp, first_mp, ill, 7698 B_FALSE, mctl_present); 7699 return; 7700 } 7701 case IPPROTO_UDP: { 7702 uint16_t *up; 7703 uint32_t sum; 7704 7705 hdr_len = pkt_len - remlen; 7706 7707 if (hada_mp != NULL) { 7708 ip0dbg(("udp hada drop\n")); 7709 goto hada_drop; 7710 } 7711 7712 /* Verify that at least the ports are present */ 7713 if (remlen < UDPH_SIZE) 7714 goto pkt_too_short; 7715 if (mp->b_cont != NULL && 7716 whereptr + UDPH_SIZE > mp->b_wptr) { 7717 if (!pullupmsg(mp, hdr_len + UDPH_SIZE)) { 7718 BUMP_MIB(ill->ill_ip_mib, 7719 ipIfStatsInDiscards); 7720 freemsg(first_mp); 7721 return; 7722 } 7723 hck_flags = 0; 7724 ip6h = (ip6_t *)mp->b_rptr; 7725 whereptr = (uint8_t *)ip6h + hdr_len; 7726 } 7727 7728 /* 7729 * Before going through the regular checksum 7730 * calculation, make sure the received checksum 7731 * is non-zero. RFC 2460 says, a 0x0000 checksum 7732 * in a UDP packet (within IPv6 packet) is invalid 7733 * and should be replaced by 0xffff. This makes 7734 * sense as regular checksum calculation will 7735 * pass for both the cases i.e. 0x0000 and 0xffff. 7736 * Removing one of the case makes error detection 7737 * stronger. 7738 */ 7739 7740 if (((udpha_t *)whereptr)->uha_checksum == 0) { 7741 /* 0x0000 checksum is invalid */ 7742 ip1dbg(("ip_rput_data_v6: Invalid UDP " 7743 "checksum value 0x0000\n")); 7744 BUMP_MIB(ill->ill_ip_mib, 7745 udpIfStatsInCksumErrs); 7746 freemsg(first_mp); 7747 return; 7748 } 7749 7750 up = (uint16_t *)&ip6h->ip6_src; 7751 7752 /* 7753 * UDP checksum calculation. First sum up the 7754 * pseudo-header fields: 7755 * - Source IPv6 address 7756 * - Destination IPv6 address 7757 * - UDP payload length 7758 * - UDP protocol ID 7759 */ 7760 7761 sum = htons(IPPROTO_UDP + remlen) + 7762 up[0] + up[1] + up[2] + up[3] + 7763 up[4] + up[5] + up[6] + up[7] + 7764 up[8] + up[9] + up[10] + up[11] + 7765 up[12] + up[13] + up[14] + up[15]; 7766 7767 /* Fold initial sum */ 7768 sum = (sum & 0xffff) + (sum >> 16); 7769 7770 if (reass_hck_flags != 0) { 7771 hck_flags = reass_hck_flags; 7772 7773 IP_CKSUM_RECV_REASS(hck_flags, 7774 (int32_t)(whereptr - (uchar_t *)mp->b_rptr), 7775 sum, reass_sum, cksum_err); 7776 } else { 7777 mp1 = mp->b_cont; 7778 7779 IP_CKSUM_RECV(hck_flags, sum, (uchar_t *) 7780 ((uchar_t *)mp->b_rptr + DB_CKSUMSTART(mp)), 7781 (int32_t)(whereptr - (uchar_t *)mp->b_rptr), 7782 mp, mp1, cksum_err); 7783 } 7784 7785 if ((hck_flags & (HCK_FULLCKSUM|HCK_PARTIALCKSUM)) == 0) 7786 IP6_STAT(ipst, ip6_in_sw_cksum); 7787 7788 if (cksum_err) { 7789 BUMP_MIB(ill->ill_ip_mib, 7790 udpIfStatsInCksumErrs); 7791 7792 if (hck_flags & HCK_FULLCKSUM) 7793 IP6_STAT(ipst, 7794 ip6_udp_in_full_hw_cksum_err); 7795 else if (hck_flags & HCK_PARTIALCKSUM) 7796 IP6_STAT(ipst, 7797 ip6_udp_in_part_hw_cksum_err); 7798 else 7799 IP6_STAT(ipst, ip6_udp_in_sw_cksum_err); 7800 7801 freemsg(first_mp); 7802 return; 7803 } 7804 goto udp_fanout; 7805 } 7806 case IPPROTO_ICMPV6: { 7807 uint16_t *up; 7808 uint32_t sum; 7809 uint_t hdr_len = pkt_len - remlen; 7810 7811 if (hada_mp != NULL) { 7812 ip0dbg(("icmp hada drop\n")); 7813 goto hada_drop; 7814 } 7815 7816 up = (uint16_t *)&ip6h->ip6_src; 7817 sum = htons(IPPROTO_ICMPV6 + remlen) + 7818 up[0] + up[1] + up[2] + up[3] + 7819 up[4] + up[5] + up[6] + up[7] + 7820 up[8] + up[9] + up[10] + up[11] + 7821 up[12] + up[13] + up[14] + up[15]; 7822 sum = (sum & 0xffff) + (sum >> 16); 7823 sum = IP_CSUM(mp, hdr_len, sum); 7824 if (sum != 0) { 7825 /* IPv6 ICMP checksum failed */ 7826 ip1dbg(("ip_rput_data_v6: ICMPv6 checksum " 7827 "failed %x\n", 7828 sum)); 7829 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInMsgs); 7830 BUMP_MIB(ill->ill_icmp6_mib, 7831 ipv6IfIcmpInErrors); 7832 freemsg(first_mp); 7833 return; 7834 } 7835 7836 icmp_fanout: 7837 /* Check variable for testing applications */ 7838 if (ipst->ips_ipv6_drop_inbound_icmpv6) { 7839 freemsg(first_mp); 7840 return; 7841 } 7842 /* 7843 * Assume that there is always at least one conn for 7844 * ICMPv6 (in.ndpd) i.e. don't optimize the case 7845 * where there is no conn. 7846 */ 7847 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 7848 ilm_t *ilm; 7849 ilm_walker_t ilw; 7850 7851 ASSERT(!IS_LOOPBACK(ill)); 7852 /* 7853 * In the multicast case, applications may have 7854 * joined the group from different zones, so we 7855 * need to deliver the packet to each of them. 7856 * Loop through the multicast memberships 7857 * structures (ilm) on the receive ill and send 7858 * a copy of the packet up each matching one. 7859 */ 7860 ilm = ilm_walker_start(&ilw, inill); 7861 for (; ilm != NULL; 7862 ilm = ilm_walker_step(&ilw, ilm)) { 7863 if (!IN6_ARE_ADDR_EQUAL( 7864 &ilm->ilm_v6addr, &ip6h->ip6_dst)) 7865 continue; 7866 if (!ipif_lookup_zoneid( 7867 ilw.ilw_walk_ill, ilm->ilm_zoneid, 7868 IPIF_UP, NULL)) 7869 continue; 7870 7871 first_mp1 = ip_copymsg(first_mp); 7872 if (first_mp1 == NULL) 7873 continue; 7874 icmp_inbound_v6(q, first_mp1, 7875 ilw.ilw_walk_ill, inill, 7876 hdr_len, mctl_present, 0, 7877 ilm->ilm_zoneid, dl_mp); 7878 } 7879 ilm_walker_finish(&ilw); 7880 } else { 7881 first_mp1 = ip_copymsg(first_mp); 7882 if (first_mp1 != NULL) 7883 icmp_inbound_v6(q, first_mp1, ill, 7884 inill, hdr_len, mctl_present, 0, 7885 zoneid, dl_mp); 7886 } 7887 } 7888 /* FALLTHRU */ 7889 default: { 7890 /* 7891 * Handle protocols with which IPv6 is less intimate. 7892 */ 7893 uint_t proto_flags = IP_FF_RAWIP|IP_FF_IPINFO; 7894 7895 if (hada_mp != NULL) { 7896 ip0dbg(("default hada drop\n")); 7897 goto hada_drop; 7898 } 7899 7900 /* 7901 * Enable sending ICMP for "Unknown" nexthdr 7902 * case. i.e. where we did not FALLTHRU from 7903 * IPPROTO_ICMPV6 processing case above. 7904 * If we did FALLTHRU, then the packet has already been 7905 * processed for IPPF, don't process it again in 7906 * ip_fanout_proto_v6; set IP6_NO_IPPOLICY in the 7907 * flags 7908 */ 7909 if (nexthdr != IPPROTO_ICMPV6) 7910 proto_flags |= IP_FF_SEND_ICMP; 7911 else 7912 proto_flags |= IP6_NO_IPPOLICY; 7913 7914 ip_fanout_proto_v6(q, first_mp, ip6h, ill, inill, 7915 nexthdr, prev_nexthdr_offset, (flags|proto_flags), 7916 mctl_present, zoneid); 7917 return; 7918 } 7919 7920 case IPPROTO_DSTOPTS: { 7921 uint_t ehdrlen; 7922 uint8_t *optptr; 7923 ip6_dest_t *desthdr; 7924 7925 /* If packet is too short, look no further */ 7926 if (remlen < MIN_EHDR_LEN) 7927 goto pkt_too_short; 7928 7929 /* Check if AH is present. */ 7930 if (ipsec_early_ah_v6(q, first_mp, mctl_present, ill, 7931 inill, hada_mp, zoneid)) { 7932 return; 7933 } 7934 7935 /* 7936 * Reinitialize pointers, as ipsec_early_ah_v6() does 7937 * complete pullups. We don't have to do more pullups 7938 * as a result. 7939 */ 7940 whereptr = (uint8_t *)((uintptr_t)mp->b_rptr + 7941 (uintptr_t)(whereptr - ((uint8_t *)ip6h))); 7942 ip6h = (ip6_t *)mp->b_rptr; 7943 7944 desthdr = (ip6_dest_t *)whereptr; 7945 nexthdr = desthdr->ip6d_nxt; 7946 prev_nexthdr_offset = (uint_t)(whereptr - 7947 (uint8_t *)ip6h); 7948 ehdrlen = 8 * (desthdr->ip6d_len + 1); 7949 if (remlen < ehdrlen) 7950 goto pkt_too_short; 7951 optptr = whereptr + 2; 7952 /* 7953 * Note: XXX This code does not seem to make 7954 * distinction between Destination Options Header 7955 * being before/after Routing Header which can 7956 * happen if we are at the end of source route. 7957 * This may become significant in future. 7958 * (No real significant Destination Options are 7959 * defined/implemented yet ). 7960 */ 7961 switch (ip_process_options_v6(q, first_mp, ip6h, optptr, 7962 ehdrlen - 2, IPPROTO_DSTOPTS, ipst)) { 7963 case -1: 7964 /* 7965 * Packet has been consumed and any needed 7966 * ICMP errors sent. 7967 */ 7968 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors); 7969 freemsg(hada_mp); 7970 return; 7971 case 0: 7972 /* No action needed continue */ 7973 break; 7974 case 1: 7975 /* 7976 * Unnexpected return value 7977 * (Router alert is a Hop-by-Hop option) 7978 */ 7979 #ifdef DEBUG 7980 panic("ip_rput_data_v6: router " 7981 "alert hbh opt indication in dest opt"); 7982 /*NOTREACHED*/ 7983 #else 7984 freemsg(hada_mp); 7985 freemsg(first_mp); 7986 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 7987 return; 7988 #endif 7989 } 7990 used = ehdrlen; 7991 break; 7992 } 7993 case IPPROTO_FRAGMENT: { 7994 ip6_frag_t *fraghdr; 7995 size_t no_frag_hdr_len; 7996 7997 if (hada_mp != NULL) { 7998 ip0dbg(("frag hada drop\n")); 7999 goto hada_drop; 8000 } 8001 8002 ASSERT(first_mp == mp); 8003 if (remlen < sizeof (ip6_frag_t)) 8004 goto pkt_too_short; 8005 8006 if (mp->b_cont != NULL && 8007 whereptr + sizeof (ip6_frag_t) > mp->b_wptr) { 8008 if (!pullupmsg(mp, 8009 pkt_len - remlen + sizeof (ip6_frag_t))) { 8010 BUMP_MIB(ill->ill_ip_mib, 8011 ipIfStatsInDiscards); 8012 freemsg(mp); 8013 return; 8014 } 8015 hck_flags = 0; 8016 ip6h = (ip6_t *)mp->b_rptr; 8017 whereptr = (uint8_t *)ip6h + pkt_len - remlen; 8018 } 8019 8020 fraghdr = (ip6_frag_t *)whereptr; 8021 used = (uint_t)sizeof (ip6_frag_t); 8022 BUMP_MIB(ill->ill_ip_mib, ipIfStatsReasmReqds); 8023 8024 /* 8025 * Invoke the CGTP (multirouting) filtering module to 8026 * process the incoming packet. Packets identified as 8027 * duplicates must be discarded. Filtering is active 8028 * only if the the ip_cgtp_filter ndd variable is 8029 * non-zero. 8030 */ 8031 if (ipst->ips_ip_cgtp_filter && 8032 ipst->ips_ip_cgtp_filter_ops != NULL) { 8033 int cgtp_flt_pkt; 8034 netstackid_t stackid; 8035 8036 stackid = ipst->ips_netstack->netstack_stackid; 8037 8038 cgtp_flt_pkt = 8039 ipst->ips_ip_cgtp_filter_ops->cfo_filter_v6( 8040 stackid, inill->ill_phyint->phyint_ifindex, 8041 ip6h, fraghdr); 8042 if (cgtp_flt_pkt == CGTP_IP_PKT_DUPLICATE) { 8043 freemsg(mp); 8044 return; 8045 } 8046 } 8047 8048 /* Restore the flags */ 8049 DB_CKSUMFLAGS(mp) = hck_flags; 8050 8051 mp = ip_rput_frag_v6(ill, inill, mp, ip6h, fraghdr, 8052 remlen - used, &prev_nexthdr_offset, 8053 &reass_sum, &reass_hck_flags); 8054 if (mp == NULL) { 8055 /* Reassembly is still pending */ 8056 return; 8057 } 8058 /* The first mblk are the headers before the frag hdr */ 8059 BUMP_MIB(ill->ill_ip_mib, ipIfStatsReasmOKs); 8060 8061 first_mp = mp; /* mp has most likely changed! */ 8062 no_frag_hdr_len = mp->b_wptr - mp->b_rptr; 8063 ip6h = (ip6_t *)mp->b_rptr; 8064 nexthdr = ((char *)ip6h)[prev_nexthdr_offset]; 8065 whereptr = mp->b_rptr + no_frag_hdr_len; 8066 remlen = ntohs(ip6h->ip6_plen) + 8067 (uint16_t)(IPV6_HDR_LEN - no_frag_hdr_len); 8068 pkt_len = msgdsize(mp); 8069 used = 0; 8070 break; 8071 } 8072 case IPPROTO_HOPOPTS: { 8073 if (hada_mp != NULL) { 8074 ip0dbg(("hop hada drop\n")); 8075 goto hada_drop; 8076 } 8077 /* 8078 * Illegal header sequence. 8079 * (Hop-by-hop headers are processed above 8080 * and required to immediately follow IPv6 header) 8081 */ 8082 icmp_param_problem_v6(WR(q), first_mp, 8083 ICMP6_PARAMPROB_NEXTHEADER, 8084 prev_nexthdr_offset, 8085 B_FALSE, B_FALSE, zoneid, ipst); 8086 return; 8087 } 8088 case IPPROTO_ROUTING: { 8089 uint_t ehdrlen; 8090 ip6_rthdr_t *rthdr; 8091 8092 /* If packet is too short, look no further */ 8093 if (remlen < MIN_EHDR_LEN) 8094 goto pkt_too_short; 8095 8096 /* Check if AH is present. */ 8097 if (ipsec_early_ah_v6(q, first_mp, mctl_present, ill, 8098 inill, hada_mp, zoneid)) { 8099 return; 8100 } 8101 8102 /* 8103 * Reinitialize pointers, as ipsec_early_ah_v6() does 8104 * complete pullups. We don't have to do more pullups 8105 * as a result. 8106 */ 8107 whereptr = (uint8_t *)((uintptr_t)mp->b_rptr + 8108 (uintptr_t)(whereptr - ((uint8_t *)ip6h))); 8109 ip6h = (ip6_t *)mp->b_rptr; 8110 8111 rthdr = (ip6_rthdr_t *)whereptr; 8112 nexthdr = rthdr->ip6r_nxt; 8113 prev_nexthdr_offset = (uint_t)(whereptr - 8114 (uint8_t *)ip6h); 8115 ehdrlen = 8 * (rthdr->ip6r_len + 1); 8116 if (remlen < ehdrlen) 8117 goto pkt_too_short; 8118 if (rthdr->ip6r_segleft != 0) { 8119 /* Not end of source route */ 8120 if (ll_multicast) { 8121 BUMP_MIB(ill->ill_ip_mib, 8122 ipIfStatsForwProhibits); 8123 freemsg(hada_mp); 8124 freemsg(mp); 8125 return; 8126 } 8127 ip_process_rthdr(q, mp, ip6h, rthdr, ill, 8128 flags, hada_mp, dl_mp); 8129 return; 8130 } 8131 used = ehdrlen; 8132 break; 8133 } 8134 case IPPROTO_AH: 8135 case IPPROTO_ESP: { 8136 /* 8137 * Fast path for AH/ESP. If this is the first time 8138 * we are sending a datagram to AH/ESP, allocate 8139 * a IPSEC_IN message and prepend it. Otherwise, 8140 * just fanout. 8141 */ 8142 8143 ipsec_in_t *ii; 8144 int ipsec_rc; 8145 ipsec_stack_t *ipss; 8146 8147 ipss = ipst->ips_netstack->netstack_ipsec; 8148 if (!mctl_present) { 8149 ASSERT(first_mp == mp); 8150 first_mp = ipsec_in_alloc(B_FALSE, 8151 ipst->ips_netstack); 8152 if (first_mp == NULL) { 8153 ip1dbg(("ip_rput_data_v6: IPSEC_IN " 8154 "allocation failure.\n")); 8155 BUMP_MIB(ill->ill_ip_mib, 8156 ipIfStatsInDiscards); 8157 freemsg(mp); 8158 return; 8159 } 8160 /* 8161 * Store the ill_index so that when we come back 8162 * from IPSEC we ride on the same queue. 8163 */ 8164 ii = (ipsec_in_t *)first_mp->b_rptr; 8165 ii->ipsec_in_ill_index = 8166 ill->ill_phyint->phyint_ifindex; 8167 ii->ipsec_in_rill_index = 8168 inill->ill_phyint->phyint_ifindex; 8169 first_mp->b_cont = mp; 8170 /* 8171 * Cache hardware acceleration info. 8172 */ 8173 if (hada_mp != NULL) { 8174 IPSECHW_DEBUG(IPSECHW_PKT, 8175 ("ip_rput_data_v6: " 8176 "caching data attr.\n")); 8177 ii->ipsec_in_accelerated = B_TRUE; 8178 ii->ipsec_in_da = hada_mp; 8179 hada_mp = NULL; 8180 } 8181 } else { 8182 ii = (ipsec_in_t *)first_mp->b_rptr; 8183 } 8184 8185 if (!ipsec_loaded(ipss)) { 8186 ip_proto_not_sup(q, first_mp, IP_FF_SEND_ICMP, 8187 zoneid, ipst); 8188 return; 8189 } 8190 8191 /* select inbound SA and have IPsec process the pkt */ 8192 if (nexthdr == IPPROTO_ESP) { 8193 esph_t *esph = ipsec_inbound_esp_sa(first_mp, 8194 ipst->ips_netstack); 8195 if (esph == NULL) 8196 return; 8197 ASSERT(ii->ipsec_in_esp_sa != NULL); 8198 ASSERT(ii->ipsec_in_esp_sa->ipsa_input_func != 8199 NULL); 8200 ipsec_rc = ii->ipsec_in_esp_sa->ipsa_input_func( 8201 first_mp, esph); 8202 } else { 8203 ah_t *ah = ipsec_inbound_ah_sa(first_mp, 8204 ipst->ips_netstack); 8205 if (ah == NULL) 8206 return; 8207 ASSERT(ii->ipsec_in_ah_sa != NULL); 8208 ASSERT(ii->ipsec_in_ah_sa->ipsa_input_func != 8209 NULL); 8210 ipsec_rc = ii->ipsec_in_ah_sa->ipsa_input_func( 8211 first_mp, ah); 8212 } 8213 8214 switch (ipsec_rc) { 8215 case IPSEC_STATUS_SUCCESS: 8216 break; 8217 case IPSEC_STATUS_FAILED: 8218 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 8219 /* FALLTHRU */ 8220 case IPSEC_STATUS_PENDING: 8221 return; 8222 } 8223 /* we're done with IPsec processing, send it up */ 8224 ip_fanout_proto_again(first_mp, ill, inill, NULL); 8225 return; 8226 } 8227 case IPPROTO_NONE: 8228 /* All processing is done. Count as "delivered". */ 8229 freemsg(hada_mp); 8230 freemsg(first_mp); 8231 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 8232 return; 8233 } 8234 whereptr += used; 8235 ASSERT(remlen >= used); 8236 remlen -= used; 8237 } 8238 /* NOTREACHED */ 8239 8240 pkt_too_short: 8241 ip1dbg(("ip_rput_data_v6: packet too short %d %lu %d\n", 8242 ip6_len, pkt_len, remlen)); 8243 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInTruncatedPkts); 8244 freemsg(hada_mp); 8245 freemsg(first_mp); 8246 return; 8247 udp_fanout: 8248 if (mctl_present || IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 8249 connp = NULL; 8250 } else { 8251 connp = ipcl_classify_v6(mp, IPPROTO_UDP, hdr_len, zoneid, 8252 ipst); 8253 if ((connp != NULL) && (connp->conn_upq == NULL)) { 8254 CONN_DEC_REF(connp); 8255 connp = NULL; 8256 } 8257 } 8258 8259 if (connp == NULL) { 8260 uint32_t ports; 8261 8262 ports = *(uint32_t *)(mp->b_rptr + hdr_len + 8263 UDP_PORTS_OFFSET); 8264 IP6_STAT(ipst, ip6_udp_slow_path); 8265 ip_fanout_udp_v6(q, first_mp, ip6h, ports, ill, inill, 8266 (flags|IP_FF_SEND_ICMP|IP_FF_IPINFO), mctl_present, 8267 zoneid); 8268 return; 8269 } 8270 8271 if ((IPCL_IS_NONSTR(connp) && PROTO_FLOW_CNTRLD(connp)) || 8272 (!IPCL_IS_NONSTR(connp) && CONN_UDP_FLOWCTLD(connp))) { 8273 freemsg(first_mp); 8274 BUMP_MIB(ill->ill_ip_mib, udpIfStatsInOverflows); 8275 CONN_DEC_REF(connp); 8276 return; 8277 } 8278 8279 /* Initiate IPPF processing */ 8280 if (IP6_IN_IPP(flags, ipst)) { 8281 ip_process(IPP_LOCAL_IN, &mp, ill->ill_phyint->phyint_ifindex); 8282 if (mp == NULL) { 8283 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 8284 CONN_DEC_REF(connp); 8285 return; 8286 } 8287 } 8288 8289 if (connp->conn_ip_recvpktinfo || 8290 IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src)) { 8291 mp = ip_add_info_v6(mp, inill, &ip6h->ip6_dst); 8292 if (mp == NULL) { 8293 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 8294 CONN_DEC_REF(connp); 8295 return; 8296 } 8297 } 8298 8299 IP6_STAT(ipst, ip6_udp_fast_path); 8300 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 8301 8302 /* Send it upstream */ 8303 (connp->conn_recv)(connp, mp, NULL); 8304 8305 CONN_DEC_REF(connp); 8306 freemsg(hada_mp); 8307 return; 8308 8309 hada_drop: 8310 ip1dbg(("ip_rput_data_v6: malformed accelerated packet\n")); 8311 /* IPsec kstats: bump counter here */ 8312 freemsg(hada_mp); 8313 freemsg(first_mp); 8314 } 8315 8316 /* 8317 * Reassemble fragment. 8318 * When it returns a completed message the first mblk will only contain 8319 * the headers prior to the fragment header. 8320 * 8321 * prev_nexthdr_offset is an offset indication of where the nexthdr field is 8322 * of the preceding header. This is needed to patch the previous header's 8323 * nexthdr field when reassembly completes. 8324 */ 8325 static mblk_t * 8326 ip_rput_frag_v6(ill_t *ill, ill_t *inill, mblk_t *mp, ip6_t *ip6h, 8327 ip6_frag_t *fraghdr, uint_t remlen, uint_t *prev_nexthdr_offset, 8328 uint32_t *cksum_val, uint16_t *cksum_flags) 8329 { 8330 uint32_t ident = ntohl(fraghdr->ip6f_ident); 8331 uint16_t offset; 8332 boolean_t more_frags; 8333 uint8_t nexthdr = fraghdr->ip6f_nxt; 8334 in6_addr_t *v6dst_ptr; 8335 in6_addr_t *v6src_ptr; 8336 uint_t end; 8337 uint_t hdr_length; 8338 size_t count; 8339 ipf_t *ipf; 8340 ipf_t **ipfp; 8341 ipfb_t *ipfb; 8342 mblk_t *mp1; 8343 uint8_t ecn_info = 0; 8344 size_t msg_len; 8345 mblk_t *tail_mp; 8346 mblk_t *t_mp; 8347 boolean_t pruned = B_FALSE; 8348 uint32_t sum_val; 8349 uint16_t sum_flags; 8350 ip_stack_t *ipst = ill->ill_ipst; 8351 8352 if (cksum_val != NULL) 8353 *cksum_val = 0; 8354 if (cksum_flags != NULL) 8355 *cksum_flags = 0; 8356 8357 /* 8358 * We utilize hardware computed checksum info only for UDP since 8359 * IP fragmentation is a normal occurence for the protocol. In 8360 * addition, checksum offload support for IP fragments carrying 8361 * UDP payload is commonly implemented across network adapters. 8362 */ 8363 ASSERT(inill != NULL); 8364 if (nexthdr == IPPROTO_UDP && dohwcksum && ILL_HCKSUM_CAPABLE(inill) && 8365 (DB_CKSUMFLAGS(mp) & (HCK_FULLCKSUM | HCK_PARTIALCKSUM))) { 8366 mblk_t *mp1 = mp->b_cont; 8367 int32_t len; 8368 8369 /* Record checksum information from the packet */ 8370 sum_val = (uint32_t)DB_CKSUM16(mp); 8371 sum_flags = DB_CKSUMFLAGS(mp); 8372 8373 /* fragmented payload offset from beginning of mblk */ 8374 offset = (uint16_t)((uchar_t *)&fraghdr[1] - mp->b_rptr); 8375 8376 if ((sum_flags & HCK_PARTIALCKSUM) && 8377 (mp1 == NULL || mp1->b_cont == NULL) && 8378 offset >= (uint16_t)DB_CKSUMSTART(mp) && 8379 ((len = offset - (uint16_t)DB_CKSUMSTART(mp)) & 1) == 0) { 8380 uint32_t adj; 8381 /* 8382 * Partial checksum has been calculated by hardware 8383 * and attached to the packet; in addition, any 8384 * prepended extraneous data is even byte aligned. 8385 * If any such data exists, we adjust the checksum; 8386 * this would also handle any postpended data. 8387 */ 8388 IP_ADJCKSUM_PARTIAL(mp->b_rptr + DB_CKSUMSTART(mp), 8389 mp, mp1, len, adj); 8390 8391 /* One's complement subtract extraneous checksum */ 8392 if (adj >= sum_val) 8393 sum_val = ~(adj - sum_val) & 0xFFFF; 8394 else 8395 sum_val -= adj; 8396 } 8397 } else { 8398 sum_val = 0; 8399 sum_flags = 0; 8400 } 8401 8402 /* Clear hardware checksumming flag */ 8403 DB_CKSUMFLAGS(mp) = 0; 8404 8405 /* 8406 * Note: Fragment offset in header is in 8-octet units. 8407 * Clearing least significant 3 bits not only extracts 8408 * it but also gets it in units of octets. 8409 */ 8410 offset = ntohs(fraghdr->ip6f_offlg) & ~7; 8411 more_frags = (fraghdr->ip6f_offlg & IP6F_MORE_FRAG); 8412 8413 /* 8414 * Is the more frags flag on and the payload length not a multiple 8415 * of eight? 8416 */ 8417 if (more_frags && (ntohs(ip6h->ip6_plen) & 7)) { 8418 zoneid_t zoneid; 8419 8420 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors); 8421 zoneid = ipif_lookup_addr_zoneid_v6(&ip6h->ip6_dst, ill, ipst); 8422 if (zoneid == ALL_ZONES) { 8423 freemsg(mp); 8424 return (NULL); 8425 } 8426 icmp_param_problem_v6(ill->ill_wq, mp, ICMP6_PARAMPROB_HEADER, 8427 (uint32_t)((char *)&ip6h->ip6_plen - 8428 (char *)ip6h), B_FALSE, B_FALSE, zoneid, ipst); 8429 return (NULL); 8430 } 8431 8432 v6src_ptr = &ip6h->ip6_src; 8433 v6dst_ptr = &ip6h->ip6_dst; 8434 end = remlen; 8435 8436 hdr_length = (uint_t)((char *)&fraghdr[1] - (char *)ip6h); 8437 end += offset; 8438 8439 /* 8440 * Would fragment cause reassembled packet to have a payload length 8441 * greater than IP_MAXPACKET - the max payload size? 8442 */ 8443 if (end > IP_MAXPACKET) { 8444 zoneid_t zoneid; 8445 8446 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors); 8447 zoneid = ipif_lookup_addr_zoneid_v6(&ip6h->ip6_dst, ill, ipst); 8448 if (zoneid == ALL_ZONES) { 8449 freemsg(mp); 8450 return (NULL); 8451 } 8452 icmp_param_problem_v6(ill->ill_wq, mp, ICMP6_PARAMPROB_HEADER, 8453 (uint32_t)((char *)&fraghdr->ip6f_offlg - 8454 (char *)ip6h), B_FALSE, B_FALSE, zoneid, ipst); 8455 return (NULL); 8456 } 8457 8458 /* 8459 * This packet just has one fragment. Reassembly not 8460 * needed. 8461 */ 8462 if (!more_frags && offset == 0) { 8463 goto reass_done; 8464 } 8465 8466 /* 8467 * Drop the fragmented as early as possible, if 8468 * we don't have resource(s) to re-assemble. 8469 */ 8470 if (ipst->ips_ip_reass_queue_bytes == 0) { 8471 freemsg(mp); 8472 return (NULL); 8473 } 8474 8475 /* Record the ECN field info. */ 8476 ecn_info = (uint8_t)(ntohl(ip6h->ip6_vcf & htonl(~0xFFCFFFFF)) >> 20); 8477 /* 8478 * If this is not the first fragment, dump the unfragmentable 8479 * portion of the packet. 8480 */ 8481 if (offset) 8482 mp->b_rptr = (uchar_t *)&fraghdr[1]; 8483 8484 /* 8485 * Fragmentation reassembly. Each ILL has a hash table for 8486 * queueing packets undergoing reassembly for all IPIFs 8487 * associated with the ILL. The hash is based on the packet 8488 * IP ident field. The ILL frag hash table was allocated 8489 * as a timer block at the time the ILL was created. Whenever 8490 * there is anything on the reassembly queue, the timer will 8491 * be running. 8492 */ 8493 msg_len = MBLKSIZE(mp); 8494 tail_mp = mp; 8495 while (tail_mp->b_cont != NULL) { 8496 tail_mp = tail_mp->b_cont; 8497 msg_len += MBLKSIZE(tail_mp); 8498 } 8499 /* 8500 * If the reassembly list for this ILL will get too big 8501 * prune it. 8502 */ 8503 8504 if ((msg_len + sizeof (*ipf) + ill->ill_frag_count) >= 8505 ipst->ips_ip_reass_queue_bytes) { 8506 ill_frag_prune(ill, 8507 (ipst->ips_ip_reass_queue_bytes < msg_len) ? 0 : 8508 (ipst->ips_ip_reass_queue_bytes - msg_len)); 8509 pruned = B_TRUE; 8510 } 8511 8512 ipfb = &ill->ill_frag_hash_tbl[ILL_FRAG_HASH_V6(*v6src_ptr, ident)]; 8513 mutex_enter(&ipfb->ipfb_lock); 8514 8515 ipfp = &ipfb->ipfb_ipf; 8516 /* Try to find an existing fragment queue for this packet. */ 8517 for (;;) { 8518 ipf = ipfp[0]; 8519 if (ipf) { 8520 /* 8521 * It has to match on ident, source address, and 8522 * dest address. 8523 */ 8524 if (ipf->ipf_ident == ident && 8525 IN6_ARE_ADDR_EQUAL(&ipf->ipf_v6src, v6src_ptr) && 8526 IN6_ARE_ADDR_EQUAL(&ipf->ipf_v6dst, v6dst_ptr)) { 8527 8528 /* 8529 * If we have received too many 8530 * duplicate fragments for this packet 8531 * free it. 8532 */ 8533 if (ipf->ipf_num_dups > ip_max_frag_dups) { 8534 ill_frag_free_pkts(ill, ipfb, ipf, 1); 8535 freemsg(mp); 8536 mutex_exit(&ipfb->ipfb_lock); 8537 return (NULL); 8538 } 8539 8540 break; 8541 } 8542 ipfp = &ipf->ipf_hash_next; 8543 continue; 8544 } 8545 8546 8547 /* 8548 * If we pruned the list, do we want to store this new 8549 * fragment?. We apply an optimization here based on the 8550 * fact that most fragments will be received in order. 8551 * So if the offset of this incoming fragment is zero, 8552 * it is the first fragment of a new packet. We will 8553 * keep it. Otherwise drop the fragment, as we have 8554 * probably pruned the packet already (since the 8555 * packet cannot be found). 8556 */ 8557 8558 if (pruned && offset != 0) { 8559 mutex_exit(&ipfb->ipfb_lock); 8560 freemsg(mp); 8561 return (NULL); 8562 } 8563 8564 /* New guy. Allocate a frag message. */ 8565 mp1 = allocb(sizeof (*ipf), BPRI_MED); 8566 if (!mp1) { 8567 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 8568 freemsg(mp); 8569 partial_reass_done: 8570 mutex_exit(&ipfb->ipfb_lock); 8571 return (NULL); 8572 } 8573 8574 if (ipfb->ipfb_frag_pkts >= MAX_FRAG_PKTS(ipst)) { 8575 /* 8576 * Too many fragmented packets in this hash bucket. 8577 * Free the oldest. 8578 */ 8579 ill_frag_free_pkts(ill, ipfb, ipfb->ipfb_ipf, 1); 8580 } 8581 8582 mp1->b_cont = mp; 8583 8584 /* Initialize the fragment header. */ 8585 ipf = (ipf_t *)mp1->b_rptr; 8586 ipf->ipf_mp = mp1; 8587 ipf->ipf_ptphn = ipfp; 8588 ipfp[0] = ipf; 8589 ipf->ipf_hash_next = NULL; 8590 ipf->ipf_ident = ident; 8591 ipf->ipf_v6src = *v6src_ptr; 8592 ipf->ipf_v6dst = *v6dst_ptr; 8593 /* Record reassembly start time. */ 8594 ipf->ipf_timestamp = gethrestime_sec(); 8595 /* Record ipf generation and account for frag header */ 8596 ipf->ipf_gen = ill->ill_ipf_gen++; 8597 ipf->ipf_count = MBLKSIZE(mp1); 8598 ipf->ipf_protocol = nexthdr; 8599 ipf->ipf_nf_hdr_len = 0; 8600 ipf->ipf_prev_nexthdr_offset = 0; 8601 ipf->ipf_last_frag_seen = B_FALSE; 8602 ipf->ipf_ecn = ecn_info; 8603 ipf->ipf_num_dups = 0; 8604 ipfb->ipfb_frag_pkts++; 8605 ipf->ipf_checksum = 0; 8606 ipf->ipf_checksum_flags = 0; 8607 8608 /* Store checksum value in fragment header */ 8609 if (sum_flags != 0) { 8610 sum_val = (sum_val & 0xFFFF) + (sum_val >> 16); 8611 sum_val = (sum_val & 0xFFFF) + (sum_val >> 16); 8612 ipf->ipf_checksum = sum_val; 8613 ipf->ipf_checksum_flags = sum_flags; 8614 } 8615 8616 /* 8617 * We handle reassembly two ways. In the easy case, 8618 * where all the fragments show up in order, we do 8619 * minimal bookkeeping, and just clip new pieces on 8620 * the end. If we ever see a hole, then we go off 8621 * to ip_reassemble which has to mark the pieces and 8622 * keep track of the number of holes, etc. Obviously, 8623 * the point of having both mechanisms is so we can 8624 * handle the easy case as efficiently as possible. 8625 */ 8626 if (offset == 0) { 8627 /* Easy case, in-order reassembly so far. */ 8628 /* Update the byte count */ 8629 ipf->ipf_count += msg_len; 8630 ipf->ipf_tail_mp = tail_mp; 8631 /* 8632 * Keep track of next expected offset in 8633 * ipf_end. 8634 */ 8635 ipf->ipf_end = end; 8636 ipf->ipf_nf_hdr_len = hdr_length; 8637 ipf->ipf_prev_nexthdr_offset = *prev_nexthdr_offset; 8638 } else { 8639 /* Hard case, hole at the beginning. */ 8640 ipf->ipf_tail_mp = NULL; 8641 /* 8642 * ipf_end == 0 means that we have given up 8643 * on easy reassembly. 8644 */ 8645 ipf->ipf_end = 0; 8646 8647 /* Forget checksum offload from now on */ 8648 ipf->ipf_checksum_flags = 0; 8649 8650 /* 8651 * ipf_hole_cnt is set by ip_reassemble. 8652 * ipf_count is updated by ip_reassemble. 8653 * No need to check for return value here 8654 * as we don't expect reassembly to complete or 8655 * fail for the first fragment itself. 8656 */ 8657 (void) ip_reassemble(mp, ipf, offset, more_frags, ill, 8658 msg_len); 8659 } 8660 /* Update per ipfb and ill byte counts */ 8661 ipfb->ipfb_count += ipf->ipf_count; 8662 ASSERT(ipfb->ipfb_count > 0); /* Wraparound */ 8663 atomic_add_32(&ill->ill_frag_count, ipf->ipf_count); 8664 /* If the frag timer wasn't already going, start it. */ 8665 mutex_enter(&ill->ill_lock); 8666 ill_frag_timer_start(ill); 8667 mutex_exit(&ill->ill_lock); 8668 goto partial_reass_done; 8669 } 8670 8671 /* 8672 * If the packet's flag has changed (it could be coming up 8673 * from an interface different than the previous, therefore 8674 * possibly different checksum capability), then forget about 8675 * any stored checksum states. Otherwise add the value to 8676 * the existing one stored in the fragment header. 8677 */ 8678 if (sum_flags != 0 && sum_flags == ipf->ipf_checksum_flags) { 8679 sum_val += ipf->ipf_checksum; 8680 sum_val = (sum_val & 0xFFFF) + (sum_val >> 16); 8681 sum_val = (sum_val & 0xFFFF) + (sum_val >> 16); 8682 ipf->ipf_checksum = sum_val; 8683 } else if (ipf->ipf_checksum_flags != 0) { 8684 /* Forget checksum offload from now on */ 8685 ipf->ipf_checksum_flags = 0; 8686 } 8687 8688 /* 8689 * We have a new piece of a datagram which is already being 8690 * reassembled. Update the ECN info if all IP fragments 8691 * are ECN capable. If there is one which is not, clear 8692 * all the info. If there is at least one which has CE 8693 * code point, IP needs to report that up to transport. 8694 */ 8695 if (ecn_info != IPH_ECN_NECT && ipf->ipf_ecn != IPH_ECN_NECT) { 8696 if (ecn_info == IPH_ECN_CE) 8697 ipf->ipf_ecn = IPH_ECN_CE; 8698 } else { 8699 ipf->ipf_ecn = IPH_ECN_NECT; 8700 } 8701 8702 if (offset && ipf->ipf_end == offset) { 8703 /* The new fragment fits at the end */ 8704 ipf->ipf_tail_mp->b_cont = mp; 8705 /* Update the byte count */ 8706 ipf->ipf_count += msg_len; 8707 /* Update per ipfb and ill byte counts */ 8708 ipfb->ipfb_count += msg_len; 8709 ASSERT(ipfb->ipfb_count > 0); /* Wraparound */ 8710 atomic_add_32(&ill->ill_frag_count, msg_len); 8711 if (more_frags) { 8712 /* More to come. */ 8713 ipf->ipf_end = end; 8714 ipf->ipf_tail_mp = tail_mp; 8715 goto partial_reass_done; 8716 } 8717 } else { 8718 /* 8719 * Go do the hard cases. 8720 * Call ip_reassemble(). 8721 */ 8722 int ret; 8723 8724 if (offset == 0) { 8725 if (ipf->ipf_prev_nexthdr_offset == 0) { 8726 ipf->ipf_nf_hdr_len = hdr_length; 8727 ipf->ipf_prev_nexthdr_offset = 8728 *prev_nexthdr_offset; 8729 } 8730 } 8731 /* Save current byte count */ 8732 count = ipf->ipf_count; 8733 ret = ip_reassemble(mp, ipf, offset, more_frags, ill, msg_len); 8734 8735 /* Count of bytes added and subtracted (freeb()ed) */ 8736 count = ipf->ipf_count - count; 8737 if (count) { 8738 /* Update per ipfb and ill byte counts */ 8739 ipfb->ipfb_count += count; 8740 ASSERT(ipfb->ipfb_count > 0); /* Wraparound */ 8741 atomic_add_32(&ill->ill_frag_count, count); 8742 } 8743 if (ret == IP_REASS_PARTIAL) { 8744 goto partial_reass_done; 8745 } else if (ret == IP_REASS_FAILED) { 8746 /* Reassembly failed. Free up all resources */ 8747 ill_frag_free_pkts(ill, ipfb, ipf, 1); 8748 for (t_mp = mp; t_mp != NULL; t_mp = t_mp->b_cont) { 8749 IP_REASS_SET_START(t_mp, 0); 8750 IP_REASS_SET_END(t_mp, 0); 8751 } 8752 freemsg(mp); 8753 goto partial_reass_done; 8754 } 8755 8756 /* We will reach here iff 'ret' is IP_REASS_COMPLETE */ 8757 } 8758 /* 8759 * We have completed reassembly. Unhook the frag header from 8760 * the reassembly list. 8761 * 8762 * Grab the unfragmentable header length next header value out 8763 * of the first fragment 8764 */ 8765 ASSERT(ipf->ipf_nf_hdr_len != 0); 8766 hdr_length = ipf->ipf_nf_hdr_len; 8767 8768 /* 8769 * Before we free the frag header, record the ECN info 8770 * to report back to the transport. 8771 */ 8772 ecn_info = ipf->ipf_ecn; 8773 8774 /* 8775 * Store the nextheader field in the header preceding the fragment 8776 * header 8777 */ 8778 nexthdr = ipf->ipf_protocol; 8779 *prev_nexthdr_offset = ipf->ipf_prev_nexthdr_offset; 8780 ipfp = ipf->ipf_ptphn; 8781 8782 /* We need to supply these to caller */ 8783 if ((sum_flags = ipf->ipf_checksum_flags) != 0) 8784 sum_val = ipf->ipf_checksum; 8785 else 8786 sum_val = 0; 8787 8788 mp1 = ipf->ipf_mp; 8789 count = ipf->ipf_count; 8790 ipf = ipf->ipf_hash_next; 8791 if (ipf) 8792 ipf->ipf_ptphn = ipfp; 8793 ipfp[0] = ipf; 8794 atomic_add_32(&ill->ill_frag_count, -count); 8795 ASSERT(ipfb->ipfb_count >= count); 8796 ipfb->ipfb_count -= count; 8797 ipfb->ipfb_frag_pkts--; 8798 mutex_exit(&ipfb->ipfb_lock); 8799 /* Ditch the frag header. */ 8800 mp = mp1->b_cont; 8801 freeb(mp1); 8802 8803 /* 8804 * Make sure the packet is good by doing some sanity 8805 * check. If bad we can silentely drop the packet. 8806 */ 8807 reass_done: 8808 if (hdr_length < sizeof (ip6_frag_t)) { 8809 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors); 8810 ip1dbg(("ip_rput_frag_v6: bad packet\n")); 8811 freemsg(mp); 8812 return (NULL); 8813 } 8814 8815 /* 8816 * Remove the fragment header from the initial header by 8817 * splitting the mblk into the non-fragmentable header and 8818 * everthing after the fragment extension header. This has the 8819 * side effect of putting all the headers that need destination 8820 * processing into the b_cont block-- on return this fact is 8821 * used in order to avoid having to look at the extensions 8822 * already processed. 8823 * 8824 * Note that this code assumes that the unfragmentable portion 8825 * of the header is in the first mblk and increments 8826 * the read pointer past it. If this assumption is broken 8827 * this code fails badly. 8828 */ 8829 if (mp->b_rptr + hdr_length != mp->b_wptr) { 8830 mblk_t *nmp; 8831 8832 if (!(nmp = dupb(mp))) { 8833 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 8834 ip1dbg(("ip_rput_frag_v6: dupb failed\n")); 8835 freemsg(mp); 8836 return (NULL); 8837 } 8838 nmp->b_cont = mp->b_cont; 8839 mp->b_cont = nmp; 8840 nmp->b_rptr += hdr_length; 8841 } 8842 mp->b_wptr = mp->b_rptr + hdr_length - sizeof (ip6_frag_t); 8843 8844 ip6h = (ip6_t *)mp->b_rptr; 8845 ((char *)ip6h)[*prev_nexthdr_offset] = nexthdr; 8846 8847 /* Restore original IP length in header. */ 8848 ip6h->ip6_plen = htons((uint16_t)(msgdsize(mp) - IPV6_HDR_LEN)); 8849 /* Record the ECN info. */ 8850 ip6h->ip6_vcf &= htonl(0xFFCFFFFF); 8851 ip6h->ip6_vcf |= htonl(ecn_info << 20); 8852 8853 /* Reassembly is successful; return checksum information if needed */ 8854 if (cksum_val != NULL) 8855 *cksum_val = sum_val; 8856 if (cksum_flags != NULL) 8857 *cksum_flags = sum_flags; 8858 8859 return (mp); 8860 } 8861 8862 /* 8863 * Given an mblk and a ptr, find the destination address in an IPv6 routing 8864 * header. 8865 */ 8866 static in6_addr_t 8867 pluck_out_dst(mblk_t *mp, uint8_t *whereptr, in6_addr_t oldrv) 8868 { 8869 ip6_rthdr0_t *rt0; 8870 int segleft, numaddr; 8871 in6_addr_t *ap, rv = oldrv; 8872 8873 rt0 = (ip6_rthdr0_t *)whereptr; 8874 if (rt0->ip6r0_type != 0 && rt0->ip6r0_type != 2) { 8875 DTRACE_PROBE2(pluck_out_dst_unknown_type, mblk_t *, mp, 8876 uint8_t *, whereptr); 8877 return (rv); 8878 } 8879 segleft = rt0->ip6r0_segleft; 8880 numaddr = rt0->ip6r0_len / 2; 8881 8882 if ((rt0->ip6r0_len & 0x1) || 8883 whereptr + (rt0->ip6r0_len + 1) * 8 > mp->b_wptr || 8884 (segleft > rt0->ip6r0_len / 2)) { 8885 /* 8886 * Corrupt packet. Either the routing header length is odd 8887 * (can't happen) or mismatched compared to the packet, or the 8888 * number of addresses is. Return what we can. This will 8889 * only be a problem on forwarded packets that get squeezed 8890 * through an outbound tunnel enforcing IPsec Tunnel Mode. 8891 */ 8892 DTRACE_PROBE2(pluck_out_dst_badpkt, mblk_t *, mp, uint8_t *, 8893 whereptr); 8894 return (rv); 8895 } 8896 8897 if (segleft != 0) { 8898 ap = (in6_addr_t *)((char *)rt0 + sizeof (*rt0)); 8899 rv = ap[numaddr - 1]; 8900 } 8901 8902 return (rv); 8903 } 8904 8905 /* 8906 * Walk through the options to see if there is a routing header. 8907 * If present get the destination which is the last address of 8908 * the option. 8909 */ 8910 in6_addr_t 8911 ip_get_dst_v6(ip6_t *ip6h, mblk_t *mp, boolean_t *is_fragment) 8912 { 8913 mblk_t *current_mp = mp; 8914 uint8_t nexthdr; 8915 uint8_t *whereptr; 8916 int ehdrlen; 8917 in6_addr_t rv; 8918 8919 whereptr = (uint8_t *)ip6h; 8920 ehdrlen = sizeof (ip6_t); 8921 8922 /* We assume at least the IPv6 base header is within one mblk. */ 8923 ASSERT(mp->b_rptr <= whereptr && mp->b_wptr >= whereptr + ehdrlen); 8924 8925 rv = ip6h->ip6_dst; 8926 nexthdr = ip6h->ip6_nxt; 8927 if (is_fragment != NULL) 8928 *is_fragment = B_FALSE; 8929 8930 /* 8931 * We also assume (thanks to ipsec_tun_outbound()'s pullup) that 8932 * no extension headers will be split across mblks. 8933 */ 8934 8935 while (nexthdr == IPPROTO_HOPOPTS || nexthdr == IPPROTO_DSTOPTS || 8936 nexthdr == IPPROTO_ROUTING) { 8937 if (nexthdr == IPPROTO_ROUTING) 8938 rv = pluck_out_dst(current_mp, whereptr, rv); 8939 8940 /* 8941 * All IPv6 extension headers have the next-header in byte 8942 * 0, and the (length - 8) in 8-byte-words. 8943 */ 8944 while (whereptr + ehdrlen >= current_mp->b_wptr) { 8945 ehdrlen -= (current_mp->b_wptr - whereptr); 8946 current_mp = current_mp->b_cont; 8947 if (current_mp == NULL) { 8948 /* Bad packet. Return what we can. */ 8949 DTRACE_PROBE3(ip_get_dst_v6_badpkt, mblk_t *, 8950 mp, mblk_t *, current_mp, ip6_t *, ip6h); 8951 goto done; 8952 } 8953 whereptr = current_mp->b_rptr; 8954 } 8955 whereptr += ehdrlen; 8956 8957 nexthdr = *whereptr; 8958 ASSERT(whereptr + 1 < current_mp->b_wptr); 8959 ehdrlen = (*(whereptr + 1) + 1) * 8; 8960 } 8961 8962 done: 8963 if (nexthdr == IPPROTO_FRAGMENT && is_fragment != NULL) 8964 *is_fragment = B_TRUE; 8965 return (rv); 8966 } 8967 8968 /* 8969 * ip_source_routed_v6: 8970 * This function is called by redirect code in ip_rput_data_v6 to 8971 * know whether this packet is source routed through this node i.e 8972 * whether this node (router) is part of the journey. This 8973 * function is called under two cases : 8974 * 8975 * case 1 : Routing header was processed by this node and 8976 * ip_process_rthdr replaced ip6_dst with the next hop 8977 * and we are forwarding the packet to the next hop. 8978 * 8979 * case 2 : Routing header was not processed by this node and we 8980 * are just forwarding the packet. 8981 * 8982 * For case (1) we don't want to send redirects. For case(2) we 8983 * want to send redirects. 8984 */ 8985 static boolean_t 8986 ip_source_routed_v6(ip6_t *ip6h, mblk_t *mp, ip_stack_t *ipst) 8987 { 8988 uint8_t nexthdr; 8989 in6_addr_t *addrptr; 8990 ip6_rthdr0_t *rthdr; 8991 uint8_t numaddr; 8992 ip6_hbh_t *hbhhdr; 8993 uint_t ehdrlen; 8994 uint8_t *byteptr; 8995 8996 ip2dbg(("ip_source_routed_v6\n")); 8997 nexthdr = ip6h->ip6_nxt; 8998 ehdrlen = IPV6_HDR_LEN; 8999 9000 /* if a routing hdr is preceeded by HOPOPT or DSTOPT */ 9001 while (nexthdr == IPPROTO_HOPOPTS || 9002 nexthdr == IPPROTO_DSTOPTS) { 9003 byteptr = (uint8_t *)ip6h + ehdrlen; 9004 /* 9005 * Check if we have already processed 9006 * packets or we are just a forwarding 9007 * router which only pulled up msgs up 9008 * to IPV6HDR and one HBH ext header 9009 */ 9010 if (byteptr + MIN_EHDR_LEN > mp->b_wptr) { 9011 ip2dbg(("ip_source_routed_v6: Extension" 9012 " headers not processed\n")); 9013 return (B_FALSE); 9014 } 9015 hbhhdr = (ip6_hbh_t *)byteptr; 9016 nexthdr = hbhhdr->ip6h_nxt; 9017 ehdrlen = ehdrlen + 8 * (hbhhdr->ip6h_len + 1); 9018 } 9019 switch (nexthdr) { 9020 case IPPROTO_ROUTING: 9021 byteptr = (uint8_t *)ip6h + ehdrlen; 9022 /* 9023 * If for some reason, we haven't pulled up 9024 * the routing hdr data mblk, then we must 9025 * not have processed it at all. So for sure 9026 * we are not part of the source routed journey. 9027 */ 9028 if (byteptr + MIN_EHDR_LEN > mp->b_wptr) { 9029 ip2dbg(("ip_source_routed_v6: Routing" 9030 " header not processed\n")); 9031 return (B_FALSE); 9032 } 9033 rthdr = (ip6_rthdr0_t *)byteptr; 9034 /* 9035 * Either we are an intermediate router or the 9036 * last hop before destination and we have 9037 * already processed the routing header. 9038 * If segment_left is greater than or equal to zero, 9039 * then we must be the (numaddr - segleft) entry 9040 * of the routing header. Although ip6r0_segleft 9041 * is a unit8_t variable, we still check for zero 9042 * or greater value, if in case the data type 9043 * is changed someday in future. 9044 */ 9045 if (rthdr->ip6r0_segleft > 0 || 9046 rthdr->ip6r0_segleft == 0) { 9047 ire_t *ire = NULL; 9048 9049 numaddr = rthdr->ip6r0_len / 2; 9050 addrptr = (in6_addr_t *)((char *)rthdr + 9051 sizeof (*rthdr)); 9052 addrptr += (numaddr - (rthdr->ip6r0_segleft + 1)); 9053 if (addrptr != NULL) { 9054 ire = ire_ctable_lookup_v6(addrptr, NULL, 9055 IRE_LOCAL, NULL, ALL_ZONES, NULL, 9056 MATCH_IRE_TYPE, 9057 ipst); 9058 if (ire != NULL) { 9059 ire_refrele(ire); 9060 return (B_TRUE); 9061 } 9062 ip1dbg(("ip_source_routed_v6: No ire found\n")); 9063 } 9064 } 9065 /* FALLTHRU */ 9066 default: 9067 ip2dbg(("ip_source_routed_v6: Not source routed here\n")); 9068 return (B_FALSE); 9069 } 9070 } 9071 9072 /* 9073 * ip_wput_v6 -- Packets sent down from transport modules show up here. 9074 * Assumes that the following set of headers appear in the first 9075 * mblk: 9076 * ip6i_t (if present) CAN also appear as a separate mblk. 9077 * ip6_t 9078 * Any extension headers 9079 * TCP/UDP/SCTP header (if present) 9080 * The routine can handle an ICMPv6 header that is not in the first mblk. 9081 * 9082 * The order to determine the outgoing interface is as follows: 9083 * 1. If an ip6i_t with IP6I_IFINDEX set then use that ill. 9084 * 2. If q is an ill queue and (link local or multicast destination) then 9085 * use that ill. 9086 * 3. If IPV6_BOUND_IF has been set use that ill. 9087 * 4. For multicast: if IPV6_MULTICAST_IF has been set use it. Otherwise 9088 * look for the best IRE match for the unspecified group to determine 9089 * the ill. 9090 * 5. For unicast: Just do an IRE lookup for the best match. 9091 * 9092 * arg2 is always a queue_t *. 9093 * When that queue is an ill_t (i.e. q_next != NULL), then arg must be 9094 * the zoneid. 9095 * When that queue is not an ill_t, then arg must be a conn_t pointer. 9096 */ 9097 void 9098 ip_output_v6(void *arg, mblk_t *mp, void *arg2, int caller) 9099 { 9100 conn_t *connp = NULL; 9101 queue_t *q = (queue_t *)arg2; 9102 ire_t *ire = NULL; 9103 ire_t *sctp_ire = NULL; 9104 ip6_t *ip6h; 9105 in6_addr_t *v6dstp; 9106 ill_t *ill = NULL; 9107 ipif_t *ipif; 9108 ip6i_t *ip6i; 9109 int cksum_request; /* -1 => normal. */ 9110 /* 1 => Skip TCP/UDP/SCTP checksum */ 9111 /* Otherwise contains insert offset for checksum */ 9112 int unspec_src; 9113 boolean_t do_outrequests; /* Increment OutRequests? */ 9114 mib2_ipIfStatsEntry_t *mibptr; 9115 int match_flags = MATCH_IRE_ILL; 9116 mblk_t *first_mp; 9117 boolean_t mctl_present; 9118 ipsec_out_t *io; 9119 boolean_t multirt_need_resolve = B_FALSE; 9120 mblk_t *copy_mp = NULL; 9121 int err = 0; 9122 int ip6i_flags = 0; 9123 zoneid_t zoneid; 9124 ill_t *saved_ill = NULL; 9125 boolean_t conn_lock_held; 9126 boolean_t need_decref = B_FALSE; 9127 ip_stack_t *ipst; 9128 9129 if (q->q_next != NULL) { 9130 ill = (ill_t *)q->q_ptr; 9131 ipst = ill->ill_ipst; 9132 } else { 9133 connp = (conn_t *)arg; 9134 ASSERT(connp != NULL); 9135 ipst = connp->conn_netstack->netstack_ip; 9136 } 9137 9138 /* 9139 * Highest bit in version field is Reachability Confirmation bit 9140 * used by NUD in ip_xmit_v6(). 9141 */ 9142 #ifdef _BIG_ENDIAN 9143 #define IPVER(ip6h) ((((uint32_t *)ip6h)[0] >> 28) & 0x7) 9144 #else 9145 #define IPVER(ip6h) ((((uint32_t *)ip6h)[0] >> 4) & 0x7) 9146 #endif 9147 9148 /* 9149 * M_CTL comes from 6 places 9150 * 9151 * 1) TCP sends down IPSEC_OUT(M_CTL) for detached connections 9152 * both V4 and V6 datagrams. 9153 * 9154 * 2) AH/ESP sends down M_CTL after doing their job with both 9155 * V4 and V6 datagrams. 9156 * 9157 * 3) NDP callbacks when nce is resolved and IPSEC_OUT has been 9158 * attached. 9159 * 9160 * 4) Notifications from an external resolver (for XRESOLV ifs) 9161 * 9162 * 5) AH/ESP send down IPSEC_CTL(M_CTL) to be relayed to hardware for 9163 * IPsec hardware acceleration support. 9164 * 9165 * 6) TUN_HELLO. 9166 * 9167 * We need to handle (1)'s IPv6 case and (3) here. For the 9168 * IPv4 case in (1), and (2), IPSEC processing has already 9169 * started. The code in ip_wput() already knows how to handle 9170 * continuing IPSEC processing (for IPv4 and IPv6). All other 9171 * M_CTLs (including case (4)) are passed on to ip_wput_nondata() 9172 * for handling. 9173 */ 9174 first_mp = mp; 9175 mctl_present = B_FALSE; 9176 io = NULL; 9177 9178 /* Multidata transmit? */ 9179 if (DB_TYPE(mp) == M_MULTIDATA) { 9180 /* 9181 * We should never get here, since all Multidata messages 9182 * originating from tcp should have been directed over to 9183 * tcp_multisend() in the first place. 9184 */ 9185 BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsOutDiscards); 9186 freemsg(mp); 9187 return; 9188 } else if (DB_TYPE(mp) == M_CTL) { 9189 uint32_t mctltype = 0; 9190 uint32_t mlen = MBLKL(first_mp); 9191 9192 mp = mp->b_cont; 9193 mctl_present = B_TRUE; 9194 io = (ipsec_out_t *)first_mp->b_rptr; 9195 9196 /* 9197 * Validate this M_CTL message. The only three types of 9198 * M_CTL messages we expect to see in this code path are 9199 * ipsec_out_t or ipsec_in_t structures (allocated as 9200 * ipsec_info_t unions), or ipsec_ctl_t structures. 9201 * The ipsec_out_type and ipsec_in_type overlap in the two 9202 * data structures, and they are either set to IPSEC_OUT 9203 * or IPSEC_IN depending on which data structure it is. 9204 * ipsec_ctl_t is an IPSEC_CTL. 9205 * 9206 * All other M_CTL messages are sent to ip_wput_nondata() 9207 * for handling. 9208 */ 9209 if (mlen >= sizeof (io->ipsec_out_type)) 9210 mctltype = io->ipsec_out_type; 9211 9212 if ((mlen == sizeof (ipsec_ctl_t)) && 9213 (mctltype == IPSEC_CTL)) { 9214 ip_output(arg, first_mp, arg2, caller); 9215 return; 9216 } 9217 9218 if ((mlen < sizeof (ipsec_info_t)) || 9219 (mctltype != IPSEC_OUT && mctltype != IPSEC_IN) || 9220 mp == NULL) { 9221 ip_wput_nondata(NULL, q, first_mp, NULL); 9222 return; 9223 } 9224 /* NDP callbacks have q_next non-NULL. That's case #3. */ 9225 if (q->q_next == NULL) { 9226 ip6h = (ip6_t *)mp->b_rptr; 9227 /* 9228 * For a freshly-generated TCP dgram that needs IPV6 9229 * processing, don't call ip_wput immediately. We can 9230 * tell this by the ipsec_out_proc_begin. In-progress 9231 * IPSEC_OUT messages have proc_begin set to TRUE, 9232 * and we want to send all IPSEC_IN messages to 9233 * ip_wput() for IPsec processing or finishing. 9234 */ 9235 if (mctltype == IPSEC_IN || 9236 IPVER(ip6h) != IPV6_VERSION || 9237 io->ipsec_out_proc_begin) { 9238 mibptr = &ipst->ips_ip6_mib; 9239 goto notv6; 9240 } 9241 } 9242 } else if (DB_TYPE(mp) != M_DATA) { 9243 ip_wput_nondata(NULL, q, mp, NULL); 9244 return; 9245 } 9246 9247 ip6h = (ip6_t *)mp->b_rptr; 9248 9249 if (IPVER(ip6h) != IPV6_VERSION) { 9250 mibptr = &ipst->ips_ip6_mib; 9251 goto notv6; 9252 } 9253 9254 if (is_system_labeled() && DB_TYPE(mp) == M_DATA && 9255 (connp == NULL || !connp->conn_ulp_labeled)) { 9256 cred_t *cr; 9257 pid_t pid; 9258 9259 if (connp != NULL) { 9260 ASSERT(CONN_CRED(connp) != NULL); 9261 cr = BEST_CRED(mp, connp, &pid); 9262 err = tsol_check_label_v6(cr, &mp, 9263 connp->conn_mac_exempt, ipst, pid); 9264 } else if ((cr = msg_getcred(mp, &pid)) != NULL) { 9265 err = tsol_check_label_v6(cr, &mp, B_FALSE, ipst, pid); 9266 } 9267 if (mctl_present) 9268 first_mp->b_cont = mp; 9269 else 9270 first_mp = mp; 9271 if (err != 0) { 9272 DTRACE_PROBE3( 9273 tsol_ip_log_drop_checklabel_ip6, char *, 9274 "conn(1), failed to check/update mp(2)", 9275 conn_t, connp, mblk_t, mp); 9276 freemsg(first_mp); 9277 return; 9278 } 9279 ip6h = (ip6_t *)mp->b_rptr; 9280 } 9281 if (q->q_next != NULL) { 9282 /* 9283 * We don't know if this ill will be used for IPv6 9284 * until the ILLF_IPV6 flag is set via SIOCSLIFNAME. 9285 * ipif_set_values() sets the ill_isv6 flag to true if 9286 * ILLF_IPV6 is set. If the ill_isv6 flag isn't true, 9287 * just drop the packet. 9288 */ 9289 if (!ill->ill_isv6) { 9290 ip1dbg(("ip_wput_v6: Received an IPv6 packet before " 9291 "ILLF_IPV6 was set\n")); 9292 freemsg(first_mp); 9293 return; 9294 } 9295 /* For uniformity do a refhold */ 9296 mutex_enter(&ill->ill_lock); 9297 if (!ILL_CAN_LOOKUP(ill)) { 9298 mutex_exit(&ill->ill_lock); 9299 freemsg(first_mp); 9300 return; 9301 } 9302 ill_refhold_locked(ill); 9303 mutex_exit(&ill->ill_lock); 9304 mibptr = ill->ill_ip_mib; 9305 9306 ASSERT(mibptr != NULL); 9307 unspec_src = 0; 9308 BUMP_MIB(mibptr, ipIfStatsHCOutRequests); 9309 do_outrequests = B_FALSE; 9310 zoneid = (zoneid_t)(uintptr_t)arg; 9311 } else { 9312 ASSERT(connp != NULL); 9313 zoneid = connp->conn_zoneid; 9314 9315 /* is queue flow controlled? */ 9316 if ((q->q_first || connp->conn_draining) && 9317 (caller == IP_WPUT)) { 9318 /* 9319 * 1) TCP sends down M_CTL for detached connections. 9320 * 2) AH/ESP sends down M_CTL. 9321 * 9322 * We don't flow control either of the above. Only 9323 * UDP and others are flow controlled for which we 9324 * can't have a M_CTL. 9325 */ 9326 ASSERT(first_mp == mp); 9327 (void) putq(q, mp); 9328 return; 9329 } 9330 mibptr = &ipst->ips_ip6_mib; 9331 unspec_src = connp->conn_unspec_src; 9332 do_outrequests = B_TRUE; 9333 if (mp->b_flag & MSGHASREF) { 9334 mp->b_flag &= ~MSGHASREF; 9335 ASSERT(connp->conn_ulp == IPPROTO_SCTP); 9336 SCTP_EXTRACT_IPINFO(mp, sctp_ire); 9337 need_decref = B_TRUE; 9338 } 9339 9340 /* 9341 * If there is a policy, try to attach an ipsec_out in 9342 * the front. At the end, first_mp either points to a 9343 * M_DATA message or IPSEC_OUT message linked to a 9344 * M_DATA message. We have to do it now as we might 9345 * lose the "conn" if we go through ip_newroute. 9346 */ 9347 if (!mctl_present && 9348 (connp->conn_out_enforce_policy || 9349 connp->conn_latch != NULL)) { 9350 ASSERT(first_mp == mp); 9351 /* XXX Any better way to get the protocol fast ? */ 9352 if (((mp = ipsec_attach_ipsec_out(&mp, connp, NULL, 9353 connp->conn_ulp, ipst->ips_netstack)) == NULL)) { 9354 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 9355 if (need_decref) 9356 CONN_DEC_REF(connp); 9357 return; 9358 } else { 9359 ASSERT(mp->b_datap->db_type == M_CTL); 9360 first_mp = mp; 9361 mp = mp->b_cont; 9362 mctl_present = B_TRUE; 9363 io = (ipsec_out_t *)first_mp->b_rptr; 9364 } 9365 } 9366 } 9367 9368 /* check for alignment and full IPv6 header */ 9369 if (!OK_32PTR((uchar_t *)ip6h) || 9370 (mp->b_wptr - (uchar_t *)ip6h) < IPV6_HDR_LEN) { 9371 ip0dbg(("ip_wput_v6: bad alignment or length\n")); 9372 if (do_outrequests) 9373 BUMP_MIB(mibptr, ipIfStatsHCOutRequests); 9374 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 9375 freemsg(first_mp); 9376 if (ill != NULL) 9377 ill_refrele(ill); 9378 if (need_decref) 9379 CONN_DEC_REF(connp); 9380 return; 9381 } 9382 v6dstp = &ip6h->ip6_dst; 9383 cksum_request = -1; 9384 ip6i = NULL; 9385 9386 /* 9387 * Once neighbor discovery has completed, ndp_process() will provide 9388 * locally generated packets for which processing can be reattempted. 9389 * In these cases, connp is NULL and the original zone is part of a 9390 * prepended ipsec_out_t. 9391 */ 9392 if (io != NULL) { 9393 /* 9394 * When coming from icmp_input_v6, the zoneid might not match 9395 * for the loopback case, because inside icmp_input_v6 the 9396 * queue_t is a conn queue from the sending side. 9397 */ 9398 zoneid = io->ipsec_out_zoneid; 9399 ASSERT(zoneid != ALL_ZONES); 9400 } 9401 9402 if (ip6h->ip6_nxt == IPPROTO_RAW) { 9403 /* 9404 * This is an ip6i_t header followed by an ip6_hdr. 9405 * Check which fields are set. 9406 * 9407 * When the packet comes from a transport we should have 9408 * all needed headers in the first mblk. However, when 9409 * going through ip_newroute*_v6 the ip6i might be in 9410 * a separate mblk when we return here. In that case 9411 * we pullup everything to ensure that extension and transport 9412 * headers "stay" in the first mblk. 9413 */ 9414 ip6i = (ip6i_t *)ip6h; 9415 ip6i_flags = ip6i->ip6i_flags; 9416 9417 ASSERT((mp->b_wptr - (uchar_t *)ip6i) == sizeof (ip6i_t) || 9418 ((mp->b_wptr - (uchar_t *)ip6i) >= 9419 sizeof (ip6i_t) + IPV6_HDR_LEN)); 9420 9421 if ((mp->b_wptr - (uchar_t *)ip6i) == sizeof (ip6i_t)) { 9422 if (!pullupmsg(mp, -1)) { 9423 ip1dbg(("ip_wput_v6: pullupmsg failed\n")); 9424 if (do_outrequests) { 9425 BUMP_MIB(mibptr, 9426 ipIfStatsHCOutRequests); 9427 } 9428 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 9429 freemsg(first_mp); 9430 if (ill != NULL) 9431 ill_refrele(ill); 9432 if (need_decref) 9433 CONN_DEC_REF(connp); 9434 return; 9435 } 9436 ip6h = (ip6_t *)mp->b_rptr; 9437 v6dstp = &ip6h->ip6_dst; 9438 ip6i = (ip6i_t *)ip6h; 9439 } 9440 ip6h = (ip6_t *)&ip6i[1]; 9441 9442 /* 9443 * Advance rptr past the ip6i_t to get ready for 9444 * transmitting the packet. However, if the packet gets 9445 * passed to ip_newroute*_v6 then rptr is moved back so 9446 * that the ip6i_t header can be inspected when the 9447 * packet comes back here after passing through 9448 * ire_add_then_send. 9449 */ 9450 mp->b_rptr = (uchar_t *)ip6h; 9451 9452 if (ip6i->ip6i_flags & IP6I_IFINDEX) { 9453 ASSERT(ip6i->ip6i_ifindex != 0); 9454 if (ill != NULL) 9455 ill_refrele(ill); 9456 ill = ill_lookup_on_ifindex(ip6i->ip6i_ifindex, 1, 9457 NULL, NULL, NULL, NULL, ipst); 9458 if (ill == NULL) { 9459 if (do_outrequests) { 9460 BUMP_MIB(mibptr, 9461 ipIfStatsHCOutRequests); 9462 } 9463 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 9464 ip1dbg(("ip_wput_v6: bad ifindex %d\n", 9465 ip6i->ip6i_ifindex)); 9466 if (need_decref) 9467 CONN_DEC_REF(connp); 9468 freemsg(first_mp); 9469 return; 9470 } 9471 mibptr = ill->ill_ip_mib; 9472 /* 9473 * Preserve the index so that when we return from 9474 * IPSEC processing, we know where to send the packet. 9475 */ 9476 if (mctl_present) { 9477 ASSERT(io != NULL); 9478 io->ipsec_out_ill_index = ip6i->ip6i_ifindex; 9479 } 9480 } 9481 if (ip6i->ip6i_flags & IP6I_VERIFY_SRC) { 9482 cred_t *cr = msg_getcred(mp, NULL); 9483 9484 /* rpcmod doesn't send down db_credp for UDP packets */ 9485 if (cr == NULL) { 9486 if (connp != NULL) 9487 cr = connp->conn_cred; 9488 else 9489 cr = ill->ill_credp; 9490 } 9491 9492 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)); 9493 if (secpolicy_net_rawaccess(cr) != 0) { 9494 /* 9495 * Use IPCL_ZONEID to honor SO_ALLZONES. 9496 */ 9497 ire = ire_route_lookup_v6(&ip6h->ip6_src, 9498 0, 0, (IRE_LOCAL|IRE_LOOPBACK), NULL, 9499 NULL, connp != NULL ? 9500 IPCL_ZONEID(connp) : zoneid, NULL, 9501 MATCH_IRE_TYPE | MATCH_IRE_ZONEONLY, ipst); 9502 if (ire == NULL) { 9503 if (do_outrequests) 9504 BUMP_MIB(mibptr, 9505 ipIfStatsHCOutRequests); 9506 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 9507 ip1dbg(("ip_wput_v6: bad source " 9508 "addr\n")); 9509 freemsg(first_mp); 9510 if (ill != NULL) 9511 ill_refrele(ill); 9512 if (need_decref) 9513 CONN_DEC_REF(connp); 9514 return; 9515 } 9516 ire_refrele(ire); 9517 } 9518 /* No need to verify again when using ip_newroute */ 9519 ip6i->ip6i_flags &= ~IP6I_VERIFY_SRC; 9520 } 9521 if (!(ip6i->ip6i_flags & IP6I_NEXTHOP)) { 9522 /* 9523 * Make sure they match since ip_newroute*_v6 etc might 9524 * (unknown to them) inspect ip6i_nexthop when 9525 * they think they access ip6_dst. 9526 */ 9527 ip6i->ip6i_nexthop = ip6h->ip6_dst; 9528 } 9529 if (ip6i->ip6i_flags & IP6I_NO_ULP_CKSUM) 9530 cksum_request = 1; 9531 if (ip6i->ip6i_flags & IP6I_RAW_CHECKSUM) 9532 cksum_request = ip6i->ip6i_checksum_off; 9533 if (ip6i->ip6i_flags & IP6I_UNSPEC_SRC) 9534 unspec_src = 1; 9535 9536 if (do_outrequests && ill != NULL) { 9537 BUMP_MIB(mibptr, ipIfStatsHCOutRequests); 9538 do_outrequests = B_FALSE; 9539 } 9540 /* 9541 * Store ip6i_t info that we need after we come back 9542 * from IPSEC processing. 9543 */ 9544 if (mctl_present) { 9545 ASSERT(io != NULL); 9546 io->ipsec_out_unspec_src = unspec_src; 9547 } 9548 } 9549 if (connp != NULL && connp->conn_dontroute) 9550 ip6h->ip6_hops = 1; 9551 9552 if (IN6_IS_ADDR_MULTICAST(v6dstp)) 9553 goto ipv6multicast; 9554 9555 /* 1. If an ip6i_t with IP6I_IFINDEX set then use that ill. */ 9556 if (ip6i != NULL && (ip6i->ip6i_flags & IP6I_IFINDEX)) { 9557 ASSERT(ill != NULL); 9558 goto send_from_ill; 9559 } 9560 9561 /* 9562 * 2. If q is an ill queue and there's a link-local destination 9563 * then use that ill. 9564 */ 9565 if (ill != NULL && IN6_IS_ADDR_LINKLOCAL(v6dstp)) 9566 goto send_from_ill; 9567 9568 /* 3. If IPV6_BOUND_IF has been set use that ill. */ 9569 if (connp != NULL && connp->conn_outgoing_ill != NULL) { 9570 ill_t *conn_outgoing_ill; 9571 9572 conn_outgoing_ill = conn_get_held_ill(connp, 9573 &connp->conn_outgoing_ill, &err); 9574 if (err == ILL_LOOKUP_FAILED) { 9575 if (ill != NULL) 9576 ill_refrele(ill); 9577 if (need_decref) 9578 CONN_DEC_REF(connp); 9579 freemsg(first_mp); 9580 return; 9581 } 9582 if (ill != NULL) 9583 ill_refrele(ill); 9584 ill = conn_outgoing_ill; 9585 mibptr = ill->ill_ip_mib; 9586 goto send_from_ill; 9587 } 9588 9589 /* 9590 * 4. For unicast: Just do an IRE lookup for the best match. 9591 * If we get here for a link-local address it is rather random 9592 * what interface we pick on a multihomed host. 9593 * *If* there is an IRE_CACHE (and the link-local address 9594 * isn't duplicated on multi links) this will find the IRE_CACHE. 9595 * Otherwise it will use one of the matching IRE_INTERFACE routes 9596 * for the link-local prefix. Hence, applications 9597 * *should* be encouraged to specify an outgoing interface when sending 9598 * to a link local address. 9599 */ 9600 if (connp == NULL || (IP_FLOW_CONTROLLED_ULP(connp->conn_ulp) && 9601 !connp->conn_fully_bound)) { 9602 /* 9603 * We cache IRE_CACHEs to avoid lookups. We don't do 9604 * this for the tcp global queue and listen end point 9605 * as it does not really have a real destination to 9606 * talk to. 9607 */ 9608 ire = ire_cache_lookup_v6(v6dstp, zoneid, msg_getlabel(mp), 9609 ipst); 9610 } else { 9611 /* 9612 * IRE_MARK_CONDEMNED is marked in ire_delete. We don't 9613 * grab a lock here to check for CONDEMNED as it is okay 9614 * to send a packet or two with the IRE_CACHE that is going 9615 * away. 9616 */ 9617 mutex_enter(&connp->conn_lock); 9618 ire = sctp_ire != NULL ? sctp_ire : connp->conn_ire_cache; 9619 if (ire != NULL && 9620 IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, v6dstp) && 9621 !(ire->ire_marks & IRE_MARK_CONDEMNED)) { 9622 9623 IRE_REFHOLD(ire); 9624 mutex_exit(&connp->conn_lock); 9625 9626 } else { 9627 boolean_t cached = B_FALSE; 9628 9629 connp->conn_ire_cache = NULL; 9630 mutex_exit(&connp->conn_lock); 9631 /* Release the old ire */ 9632 if (ire != NULL && sctp_ire == NULL) 9633 IRE_REFRELE_NOTR(ire); 9634 9635 ire = ire_cache_lookup_v6(v6dstp, zoneid, 9636 msg_getlabel(mp), ipst); 9637 if (ire != NULL) { 9638 IRE_REFHOLD_NOTR(ire); 9639 9640 mutex_enter(&connp->conn_lock); 9641 if (CONN_CACHE_IRE(connp) && 9642 (connp->conn_ire_cache == NULL)) { 9643 rw_enter(&ire->ire_bucket->irb_lock, 9644 RW_READER); 9645 if (!(ire->ire_marks & 9646 IRE_MARK_CONDEMNED)) { 9647 connp->conn_ire_cache = ire; 9648 cached = B_TRUE; 9649 } 9650 rw_exit(&ire->ire_bucket->irb_lock); 9651 } 9652 mutex_exit(&connp->conn_lock); 9653 9654 /* 9655 * We can continue to use the ire but since it 9656 * was not cached, we should drop the extra 9657 * reference. 9658 */ 9659 if (!cached) 9660 IRE_REFRELE_NOTR(ire); 9661 } 9662 } 9663 } 9664 9665 if (ire != NULL) { 9666 if (do_outrequests) { 9667 /* Handle IRE_LOCAL's that might appear here */ 9668 if (ire->ire_type == IRE_CACHE) { 9669 mibptr = ((ill_t *)ire->ire_stq->q_ptr)-> 9670 ill_ip_mib; 9671 } else { 9672 mibptr = ire->ire_ipif->ipif_ill->ill_ip_mib; 9673 } 9674 BUMP_MIB(mibptr, ipIfStatsHCOutRequests); 9675 } 9676 9677 /* 9678 * Check if the ire has the RTF_MULTIRT flag, inherited 9679 * from an IRE_OFFSUBNET ire entry in ip_newroute(). 9680 */ 9681 if (ire->ire_flags & RTF_MULTIRT) { 9682 /* 9683 * Force hop limit of multirouted packets if required. 9684 * The hop limit of such packets is bounded by the 9685 * ip_multirt_ttl ndd variable. 9686 * NDP packets must have a hop limit of 255; don't 9687 * change the hop limit in that case. 9688 */ 9689 if ((ipst->ips_ip_multirt_ttl > 0) && 9690 (ip6h->ip6_hops > ipst->ips_ip_multirt_ttl) && 9691 (ip6h->ip6_hops != IPV6_MAX_HOPS)) { 9692 if (ip_debug > 3) { 9693 ip2dbg(("ip_wput_v6: forcing multirt " 9694 "hop limit to %d (was %d) ", 9695 ipst->ips_ip_multirt_ttl, 9696 ip6h->ip6_hops)); 9697 pr_addr_dbg("v6dst %s\n", AF_INET6, 9698 &ire->ire_addr_v6); 9699 } 9700 ip6h->ip6_hops = ipst->ips_ip_multirt_ttl; 9701 } 9702 9703 /* 9704 * We look at this point if there are pending 9705 * unresolved routes. ire_multirt_need_resolve_v6() 9706 * checks in O(n) that all IRE_OFFSUBNET ire 9707 * entries for the packet's destination and 9708 * flagged RTF_MULTIRT are currently resolved. 9709 * If some remain unresolved, we do a copy 9710 * of the current message. It will be used 9711 * to initiate additional route resolutions. 9712 */ 9713 multirt_need_resolve = 9714 ire_multirt_need_resolve_v6(&ire->ire_addr_v6, 9715 msg_getlabel(first_mp), ipst); 9716 ip2dbg(("ip_wput_v6: ire %p, " 9717 "multirt_need_resolve %d, first_mp %p\n", 9718 (void *)ire, multirt_need_resolve, 9719 (void *)first_mp)); 9720 if (multirt_need_resolve) { 9721 copy_mp = copymsg(first_mp); 9722 if (copy_mp != NULL) { 9723 MULTIRT_DEBUG_TAG(copy_mp); 9724 } 9725 } 9726 } 9727 ip_wput_ire_v6(q, first_mp, ire, unspec_src, cksum_request, 9728 connp, caller, ip6i_flags, zoneid); 9729 if (need_decref) { 9730 CONN_DEC_REF(connp); 9731 connp = NULL; 9732 } 9733 IRE_REFRELE(ire); 9734 9735 /* 9736 * Try to resolve another multiroute if 9737 * ire_multirt_need_resolve_v6() deemed it necessary. 9738 * copy_mp will be consumed (sent or freed) by 9739 * ip_newroute_v6(). 9740 */ 9741 if (copy_mp != NULL) { 9742 if (mctl_present) { 9743 ip6h = (ip6_t *)copy_mp->b_cont->b_rptr; 9744 } else { 9745 ip6h = (ip6_t *)copy_mp->b_rptr; 9746 } 9747 ip_newroute_v6(q, copy_mp, &ip6h->ip6_dst, 9748 &ip6h->ip6_src, NULL, zoneid, ipst); 9749 } 9750 if (ill != NULL) 9751 ill_refrele(ill); 9752 return; 9753 } 9754 9755 /* 9756 * No full IRE for this destination. Send it to 9757 * ip_newroute_v6 to see if anything else matches. 9758 * Mark this packet as having originated on this 9759 * machine. 9760 * Update rptr if there was an ip6i_t header. 9761 */ 9762 mp->b_prev = NULL; 9763 mp->b_next = NULL; 9764 if (ip6i != NULL) 9765 mp->b_rptr -= sizeof (ip6i_t); 9766 9767 if (unspec_src) { 9768 if (ip6i == NULL) { 9769 /* 9770 * Add ip6i_t header to carry unspec_src 9771 * until the packet comes back in ip_wput_v6. 9772 */ 9773 mp = ip_add_info_v6(mp, NULL, v6dstp); 9774 if (mp == NULL) { 9775 if (do_outrequests) 9776 BUMP_MIB(mibptr, 9777 ipIfStatsHCOutRequests); 9778 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 9779 if (mctl_present) 9780 freeb(first_mp); 9781 if (ill != NULL) 9782 ill_refrele(ill); 9783 if (need_decref) 9784 CONN_DEC_REF(connp); 9785 return; 9786 } 9787 ip6i = (ip6i_t *)mp->b_rptr; 9788 9789 if (mctl_present) { 9790 ASSERT(first_mp != mp); 9791 first_mp->b_cont = mp; 9792 } else { 9793 first_mp = mp; 9794 } 9795 9796 if ((mp->b_wptr - (uchar_t *)ip6i) == 9797 sizeof (ip6i_t)) { 9798 /* 9799 * ndp_resolver called from ip_newroute_v6 9800 * expects pulled up message. 9801 */ 9802 if (!pullupmsg(mp, -1)) { 9803 ip1dbg(("ip_wput_v6: pullupmsg" 9804 " failed\n")); 9805 if (do_outrequests) { 9806 BUMP_MIB(mibptr, 9807 ipIfStatsHCOutRequests); 9808 } 9809 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 9810 freemsg(first_mp); 9811 if (ill != NULL) 9812 ill_refrele(ill); 9813 if (need_decref) 9814 CONN_DEC_REF(connp); 9815 return; 9816 } 9817 ip6i = (ip6i_t *)mp->b_rptr; 9818 } 9819 ip6h = (ip6_t *)&ip6i[1]; 9820 v6dstp = &ip6h->ip6_dst; 9821 } 9822 ip6i->ip6i_flags |= IP6I_UNSPEC_SRC; 9823 if (mctl_present) { 9824 ASSERT(io != NULL); 9825 io->ipsec_out_unspec_src = unspec_src; 9826 } 9827 } 9828 if (do_outrequests) 9829 BUMP_MIB(mibptr, ipIfStatsHCOutRequests); 9830 if (need_decref) 9831 CONN_DEC_REF(connp); 9832 ip_newroute_v6(q, first_mp, v6dstp, &ip6h->ip6_src, NULL, zoneid, ipst); 9833 if (ill != NULL) 9834 ill_refrele(ill); 9835 return; 9836 9837 9838 /* 9839 * Handle multicast packets with or without an conn. 9840 * Assumes that the transports set ip6_hops taking 9841 * IPV6_MULTICAST_HOPS (and the other ways to set the hoplimit) 9842 * into account. 9843 */ 9844 ipv6multicast: 9845 ip2dbg(("ip_wput_v6: multicast\n")); 9846 9847 /* 9848 * Hold the conn_lock till we refhold the ill of interest that is 9849 * pointed to from the conn. Since we cannot do an ill/ipif_refrele 9850 * while holding any locks, postpone the refrele until after the 9851 * conn_lock is dropped. 9852 */ 9853 if (connp != NULL) { 9854 mutex_enter(&connp->conn_lock); 9855 conn_lock_held = B_TRUE; 9856 } else { 9857 conn_lock_held = B_FALSE; 9858 } 9859 if (ip6i != NULL && (ip6i->ip6i_flags & IP6I_IFINDEX)) { 9860 /* 1. If an ip6i_t with IP6I_IFINDEX set then use that ill. */ 9861 ASSERT(ill != NULL); 9862 } else if (ill != NULL) { 9863 /* 9864 * 2. If q is an ill queue and (link local or multicast 9865 * destination) then use that ill. 9866 * We don't need the ipif initialization here. 9867 * This useless assert below is just to prevent lint from 9868 * reporting a null body if statement. 9869 */ 9870 ASSERT(ill != NULL); 9871 } else if (connp != NULL) { 9872 /* 9873 * 3. If IPV6_BOUND_IF has been set use that ill. 9874 * 9875 * 4. For multicast: if IPV6_MULTICAST_IF has been set use it. 9876 * Otherwise look for the best IRE match for the unspecified 9877 * group to determine the ill. 9878 * 9879 * conn_multicast_ill is used for only IPv6 packets. 9880 * conn_multicast_ipif is used for only IPv4 packets. 9881 * Thus a PF_INET6 socket send both IPv4 and IPv6 9882 * multicast packets using different IP*_MULTICAST_IF 9883 * interfaces. 9884 */ 9885 if (connp->conn_outgoing_ill != NULL) { 9886 err = ill_check_and_refhold(connp->conn_outgoing_ill); 9887 if (err == ILL_LOOKUP_FAILED) { 9888 ip1dbg(("ip_output_v6: multicast" 9889 " conn_outgoing_ill no ipif\n")); 9890 multicast_discard: 9891 ASSERT(saved_ill == NULL); 9892 if (conn_lock_held) 9893 mutex_exit(&connp->conn_lock); 9894 if (ill != NULL) 9895 ill_refrele(ill); 9896 freemsg(first_mp); 9897 if (do_outrequests) 9898 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 9899 if (need_decref) 9900 CONN_DEC_REF(connp); 9901 return; 9902 } 9903 ill = connp->conn_outgoing_ill; 9904 } else if (connp->conn_multicast_ill != NULL) { 9905 err = ill_check_and_refhold(connp->conn_multicast_ill); 9906 if (err == ILL_LOOKUP_FAILED) { 9907 ip1dbg(("ip_output_v6: multicast" 9908 " conn_multicast_ill no ipif\n")); 9909 goto multicast_discard; 9910 } 9911 ill = connp->conn_multicast_ill; 9912 } else { 9913 mutex_exit(&connp->conn_lock); 9914 conn_lock_held = B_FALSE; 9915 ipif = ipif_lookup_group_v6(v6dstp, zoneid, ipst); 9916 if (ipif == NULL) { 9917 ip1dbg(("ip_output_v6: multicast no ipif\n")); 9918 goto multicast_discard; 9919 } 9920 /* 9921 * We have a ref to this ipif, so we can safely 9922 * access ipif_ill. 9923 */ 9924 ill = ipif->ipif_ill; 9925 mutex_enter(&ill->ill_lock); 9926 if (!ILL_CAN_LOOKUP(ill)) { 9927 mutex_exit(&ill->ill_lock); 9928 ipif_refrele(ipif); 9929 ill = NULL; 9930 ip1dbg(("ip_output_v6: multicast no ipif\n")); 9931 goto multicast_discard; 9932 } 9933 ill_refhold_locked(ill); 9934 mutex_exit(&ill->ill_lock); 9935 ipif_refrele(ipif); 9936 /* 9937 * Save binding until IPV6_MULTICAST_IF 9938 * changes it 9939 */ 9940 mutex_enter(&connp->conn_lock); 9941 connp->conn_multicast_ill = ill; 9942 mutex_exit(&connp->conn_lock); 9943 } 9944 } 9945 if (conn_lock_held) 9946 mutex_exit(&connp->conn_lock); 9947 9948 if (saved_ill != NULL) 9949 ill_refrele(saved_ill); 9950 9951 ASSERT(ill != NULL); 9952 /* 9953 * For multicast loopback interfaces replace the multicast address 9954 * with a unicast address for the ire lookup. 9955 */ 9956 if (IS_LOOPBACK(ill)) 9957 v6dstp = &ill->ill_ipif->ipif_v6lcl_addr; 9958 9959 mibptr = ill->ill_ip_mib; 9960 if (do_outrequests) { 9961 BUMP_MIB(mibptr, ipIfStatsHCOutRequests); 9962 do_outrequests = B_FALSE; 9963 } 9964 BUMP_MIB(mibptr, ipIfStatsHCOutMcastPkts); 9965 UPDATE_MIB(mibptr, ipIfStatsHCOutMcastOctets, 9966 ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN); 9967 9968 /* 9969 * As we may lose the conn by the time we reach ip_wput_ire_v6 9970 * we copy conn_multicast_loop and conn_dontroute on to an 9971 * ipsec_out. In case if this datagram goes out secure, 9972 * we need the ill_index also. Copy that also into the 9973 * ipsec_out. 9974 */ 9975 if (mctl_present) { 9976 io = (ipsec_out_t *)first_mp->b_rptr; 9977 ASSERT(first_mp->b_datap->db_type == M_CTL); 9978 ASSERT(io->ipsec_out_type == IPSEC_OUT); 9979 } else { 9980 ASSERT(mp == first_mp); 9981 if ((first_mp = ipsec_alloc_ipsec_out(ipst->ips_netstack)) == 9982 NULL) { 9983 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 9984 freemsg(mp); 9985 if (ill != NULL) 9986 ill_refrele(ill); 9987 if (need_decref) 9988 CONN_DEC_REF(connp); 9989 return; 9990 } 9991 io = (ipsec_out_t *)first_mp->b_rptr; 9992 /* This is not a secure packet */ 9993 io->ipsec_out_secure = B_FALSE; 9994 io->ipsec_out_use_global_policy = B_TRUE; 9995 io->ipsec_out_zoneid = 9996 (zoneid != ALL_ZONES ? zoneid : GLOBAL_ZONEID); 9997 first_mp->b_cont = mp; 9998 mctl_present = B_TRUE; 9999 } 10000 io->ipsec_out_ill_index = ill->ill_phyint->phyint_ifindex; 10001 io->ipsec_out_unspec_src = unspec_src; 10002 if (connp != NULL) 10003 io->ipsec_out_dontroute = connp->conn_dontroute; 10004 10005 send_from_ill: 10006 ASSERT(ill != NULL); 10007 ASSERT(mibptr == ill->ill_ip_mib); 10008 10009 if (do_outrequests) { 10010 BUMP_MIB(mibptr, ipIfStatsHCOutRequests); 10011 do_outrequests = B_FALSE; 10012 } 10013 10014 /* 10015 * Because nce_xmit() calls ip_output_v6() and NCEs are always tied to 10016 * an underlying interface, IS_UNDER_IPMP() may be true even when 10017 * building IREs that will be used for data traffic. As such, use the 10018 * packet's source address to determine whether the traffic is test 10019 * traffic, and set MATCH_IRE_MARK_TESTHIDDEN if so. 10020 * 10021 * Separately, we also need to mark probe packets so that ND can 10022 * process them specially; see the comments in nce_queue_mp_common(). 10023 */ 10024 if (IS_UNDER_IPMP(ill) && !IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src) && 10025 ipif_lookup_testaddr_v6(ill, &ip6h->ip6_src, NULL)) { 10026 if (ip6i == NULL) { 10027 if ((mp = ip_add_info_v6(mp, NULL, v6dstp)) == NULL) { 10028 if (mctl_present) 10029 freeb(first_mp); 10030 goto discard; 10031 } 10032 10033 if (mctl_present) 10034 first_mp->b_cont = mp; 10035 else 10036 first_mp = mp; 10037 10038 /* ndp_resolver() expects a pulled-up message */ 10039 if (MBLKL(mp) == sizeof (ip6i_t) && 10040 pullupmsg(mp, -1) == 0) { 10041 ip1dbg(("ip_output_v6: pullupmsg failed\n")); 10042 discard: BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10043 ill_refrele(ill); 10044 if (need_decref) 10045 CONN_DEC_REF(connp); 10046 return; 10047 } 10048 ip6i = (ip6i_t *)mp->b_rptr; 10049 ip6h = (ip6_t *)&ip6i[1]; 10050 v6dstp = &ip6h->ip6_dst; 10051 mp->b_rptr = (uchar_t *)ip6h; /* rewound below */ 10052 } 10053 ip6i->ip6i_flags |= IP6I_IPMP_PROBE; 10054 match_flags |= MATCH_IRE_MARK_TESTHIDDEN; 10055 } 10056 10057 if (io != NULL) 10058 io->ipsec_out_ill_index = ill->ill_phyint->phyint_ifindex; 10059 10060 /* 10061 * When a specific ill is specified (using IPV6_PKTINFO, 10062 * IPV6_MULTICAST_IF, or IPV6_BOUND_IF) we will only match 10063 * on routing entries (ftable and ctable) that have a matching 10064 * ire->ire_ipif->ipif_ill. Thus this can only be used 10065 * for destinations that are on-link for the specific ill 10066 * and that can appear on multiple links. Thus it is useful 10067 * for multicast destinations, link-local destinations, and 10068 * at some point perhaps for site-local destinations (if the 10069 * node sits at a site boundary). 10070 * We create the cache entries in the regular ctable since 10071 * it can not "confuse" things for other destinations. 10072 * table. 10073 * 10074 * NOTE : conn_ire_cache is not used for caching ire_ctable_lookups. 10075 * It is used only when ire_cache_lookup is used above. 10076 */ 10077 ire = ire_ctable_lookup_v6(v6dstp, 0, 0, ill->ill_ipif, 10078 zoneid, msg_getlabel(mp), match_flags, ipst); 10079 if (ire != NULL) { 10080 /* 10081 * Check if the ire has the RTF_MULTIRT flag, inherited 10082 * from an IRE_OFFSUBNET ire entry in ip_newroute(). 10083 */ 10084 if (ire->ire_flags & RTF_MULTIRT) { 10085 /* 10086 * Force hop limit of multirouted packets if required. 10087 * The hop limit of such packets is bounded by the 10088 * ip_multirt_ttl ndd variable. 10089 * NDP packets must have a hop limit of 255; don't 10090 * change the hop limit in that case. 10091 */ 10092 if ((ipst->ips_ip_multirt_ttl > 0) && 10093 (ip6h->ip6_hops > ipst->ips_ip_multirt_ttl) && 10094 (ip6h->ip6_hops != IPV6_MAX_HOPS)) { 10095 if (ip_debug > 3) { 10096 ip2dbg(("ip_wput_v6: forcing multirt " 10097 "hop limit to %d (was %d) ", 10098 ipst->ips_ip_multirt_ttl, 10099 ip6h->ip6_hops)); 10100 pr_addr_dbg("v6dst %s\n", AF_INET6, 10101 &ire->ire_addr_v6); 10102 } 10103 ip6h->ip6_hops = ipst->ips_ip_multirt_ttl; 10104 } 10105 10106 /* 10107 * We look at this point if there are pending 10108 * unresolved routes. ire_multirt_need_resolve_v6() 10109 * checks in O(n) that all IRE_OFFSUBNET ire 10110 * entries for the packet's destination and 10111 * flagged RTF_MULTIRT are currently resolved. 10112 * If some remain unresolved, we make a copy 10113 * of the current message. It will be used 10114 * to initiate additional route resolutions. 10115 */ 10116 multirt_need_resolve = 10117 ire_multirt_need_resolve_v6(&ire->ire_addr_v6, 10118 msg_getlabel(first_mp), ipst); 10119 ip2dbg(("ip_wput_v6[send_from_ill]: ire %p, " 10120 "multirt_need_resolve %d, first_mp %p\n", 10121 (void *)ire, multirt_need_resolve, 10122 (void *)first_mp)); 10123 if (multirt_need_resolve) { 10124 copy_mp = copymsg(first_mp); 10125 if (copy_mp != NULL) { 10126 MULTIRT_DEBUG_TAG(copy_mp); 10127 } 10128 } 10129 } 10130 10131 ip1dbg(("ip_wput_v6: send on %s, ire = %p, ill index = %d\n", 10132 ill->ill_name, (void *)ire, 10133 ill->ill_phyint->phyint_ifindex)); 10134 ip_wput_ire_v6(q, first_mp, ire, unspec_src, cksum_request, 10135 connp, caller, ip6i_flags, zoneid); 10136 ire_refrele(ire); 10137 if (need_decref) { 10138 CONN_DEC_REF(connp); 10139 connp = NULL; 10140 } 10141 10142 /* 10143 * Try to resolve another multiroute if 10144 * ire_multirt_need_resolve_v6() deemed it necessary. 10145 * copy_mp will be consumed (sent or freed) by 10146 * ip_newroute_[ipif_]v6(). 10147 */ 10148 if (copy_mp != NULL) { 10149 if (mctl_present) { 10150 ip6h = (ip6_t *)copy_mp->b_cont->b_rptr; 10151 } else { 10152 ip6h = (ip6_t *)copy_mp->b_rptr; 10153 } 10154 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 10155 ipif = ipif_lookup_group_v6(&ip6h->ip6_dst, 10156 zoneid, ipst); 10157 if (ipif == NULL) { 10158 ip1dbg(("ip_wput_v6: No ipif for " 10159 "multicast\n")); 10160 MULTIRT_DEBUG_UNTAG(copy_mp); 10161 freemsg(copy_mp); 10162 return; 10163 } 10164 ip_newroute_ipif_v6(q, copy_mp, ipif, 10165 &ip6h->ip6_dst, &ip6h->ip6_src, unspec_src, 10166 zoneid); 10167 ipif_refrele(ipif); 10168 } else { 10169 ip_newroute_v6(q, copy_mp, &ip6h->ip6_dst, 10170 &ip6h->ip6_src, ill, zoneid, ipst); 10171 } 10172 } 10173 ill_refrele(ill); 10174 return; 10175 } 10176 if (need_decref) { 10177 CONN_DEC_REF(connp); 10178 connp = NULL; 10179 } 10180 10181 /* Update rptr if there was an ip6i_t header. */ 10182 if (ip6i != NULL) 10183 mp->b_rptr -= sizeof (ip6i_t); 10184 if (unspec_src) { 10185 if (ip6i == NULL) { 10186 /* 10187 * Add ip6i_t header to carry unspec_src 10188 * until the packet comes back in ip_wput_v6. 10189 */ 10190 if (mctl_present) { 10191 first_mp->b_cont = 10192 ip_add_info_v6(mp, NULL, v6dstp); 10193 mp = first_mp->b_cont; 10194 if (mp == NULL) 10195 freeb(first_mp); 10196 } else { 10197 first_mp = mp = ip_add_info_v6(mp, NULL, 10198 v6dstp); 10199 } 10200 if (mp == NULL) { 10201 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10202 ill_refrele(ill); 10203 return; 10204 } 10205 ip6i = (ip6i_t *)mp->b_rptr; 10206 if ((mp->b_wptr - (uchar_t *)ip6i) == 10207 sizeof (ip6i_t)) { 10208 /* 10209 * ndp_resolver called from ip_newroute_v6 10210 * expects a pulled up message. 10211 */ 10212 if (!pullupmsg(mp, -1)) { 10213 ip1dbg(("ip_wput_v6: pullupmsg" 10214 " failed\n")); 10215 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10216 freemsg(first_mp); 10217 return; 10218 } 10219 ip6i = (ip6i_t *)mp->b_rptr; 10220 } 10221 ip6h = (ip6_t *)&ip6i[1]; 10222 v6dstp = &ip6h->ip6_dst; 10223 } 10224 ip6i->ip6i_flags |= IP6I_UNSPEC_SRC; 10225 if (mctl_present) { 10226 ASSERT(io != NULL); 10227 io->ipsec_out_unspec_src = unspec_src; 10228 } 10229 } 10230 if (IN6_IS_ADDR_MULTICAST(v6dstp)) { 10231 ip_newroute_ipif_v6(q, first_mp, ill->ill_ipif, v6dstp, 10232 &ip6h->ip6_src, unspec_src, zoneid); 10233 } else { 10234 ip_newroute_v6(q, first_mp, v6dstp, &ip6h->ip6_src, ill, 10235 zoneid, ipst); 10236 } 10237 ill_refrele(ill); 10238 return; 10239 10240 notv6: 10241 /* FIXME?: assume the caller calls the right version of ip_output? */ 10242 if (q->q_next == NULL) { 10243 connp = Q_TO_CONN(q); 10244 10245 /* 10246 * We can change conn_send for all types of conn, even 10247 * though only TCP uses it right now. 10248 * FIXME: sctp could use conn_send but doesn't currently. 10249 */ 10250 ip_setpktversion(connp, B_FALSE, B_TRUE, ipst); 10251 } 10252 BUMP_MIB(mibptr, ipIfStatsOutWrongIPVersion); 10253 (void) ip_output(arg, first_mp, arg2, caller); 10254 if (ill != NULL) 10255 ill_refrele(ill); 10256 } 10257 10258 /* 10259 * If this is a conn_t queue, then we pass in the conn. This includes the 10260 * zoneid. 10261 * Otherwise, this is a message for an ill_t queue, 10262 * in which case we use the global zoneid since those are all part of 10263 * the global zone. 10264 */ 10265 void 10266 ip_wput_v6(queue_t *q, mblk_t *mp) 10267 { 10268 if (CONN_Q(q)) 10269 ip_output_v6(Q_TO_CONN(q), mp, q, IP_WPUT); 10270 else 10271 ip_output_v6(GLOBAL_ZONEID, mp, q, IP_WPUT); 10272 } 10273 10274 /* 10275 * NULL send-to queue - packet is to be delivered locally. 10276 */ 10277 void 10278 ip_wput_local_v6(queue_t *q, ill_t *ill, ip6_t *ip6h, mblk_t *first_mp, 10279 ire_t *ire, int fanout_flags, zoneid_t zoneid) 10280 { 10281 uint32_t ports; 10282 mblk_t *mp = first_mp, *first_mp1; 10283 boolean_t mctl_present; 10284 uint8_t nexthdr; 10285 uint16_t hdr_length; 10286 ipsec_out_t *io; 10287 mib2_ipIfStatsEntry_t *mibptr; 10288 ilm_t *ilm; 10289 uint_t nexthdr_offset; 10290 ip_stack_t *ipst = ill->ill_ipst; 10291 10292 if (DB_TYPE(mp) == M_CTL) { 10293 io = (ipsec_out_t *)mp->b_rptr; 10294 if (!io->ipsec_out_secure) { 10295 mp = mp->b_cont; 10296 freeb(first_mp); 10297 first_mp = mp; 10298 mctl_present = B_FALSE; 10299 } else { 10300 mctl_present = B_TRUE; 10301 mp = first_mp->b_cont; 10302 ipsec_out_to_in(first_mp); 10303 } 10304 } else { 10305 mctl_present = B_FALSE; 10306 } 10307 10308 /* 10309 * Remove reachability confirmation bit from version field 10310 * before passing the packet on to any firewall hooks or 10311 * looping back the packet. 10312 */ 10313 if (ip6h->ip6_vcf & IP_FORWARD_PROG) 10314 ip6h->ip6_vcf &= ~IP_FORWARD_PROG; 10315 10316 DTRACE_PROBE4(ip6__loopback__in__start, 10317 ill_t *, ill, ill_t *, NULL, 10318 ip6_t *, ip6h, mblk_t *, first_mp); 10319 10320 FW_HOOKS6(ipst->ips_ip6_loopback_in_event, 10321 ipst->ips_ipv6firewall_loopback_in, 10322 ill, NULL, ip6h, first_mp, mp, 0, ipst); 10323 10324 DTRACE_PROBE1(ip6__loopback__in__end, mblk_t *, first_mp); 10325 10326 if (first_mp == NULL) 10327 return; 10328 10329 if (ipst->ips_ipobs_enabled) { 10330 zoneid_t szone, dzone, lookup_zoneid = ALL_ZONES; 10331 zoneid_t stackzoneid = netstackid_to_zoneid( 10332 ipst->ips_netstack->netstack_stackid); 10333 10334 szone = (stackzoneid == GLOBAL_ZONEID) ? zoneid : stackzoneid; 10335 /* 10336 * ::1 is special, as we cannot lookup its zoneid by 10337 * address. For this case, restrict the lookup to the 10338 * source zone. 10339 */ 10340 if (IN6_IS_ADDR_LOOPBACK(&ip6h->ip6_dst)) 10341 lookup_zoneid = zoneid; 10342 dzone = ip_get_zoneid_v6(&ip6h->ip6_dst, mp, ill, ipst, 10343 lookup_zoneid); 10344 ipobs_hook(mp, IPOBS_HOOK_LOCAL, szone, dzone, ill, 10345 IPV6_VERSION, 0, ipst); 10346 } 10347 10348 DTRACE_IP7(receive, mblk_t *, first_mp, conn_t *, NULL, void_ip_t *, 10349 ip6h, __dtrace_ipsr_ill_t *, ill, ipha_t *, NULL, ip6_t *, ip6h, 10350 int, 1); 10351 10352 nexthdr = ip6h->ip6_nxt; 10353 mibptr = ill->ill_ip_mib; 10354 10355 /* Fastpath */ 10356 switch (nexthdr) { 10357 case IPPROTO_TCP: 10358 case IPPROTO_UDP: 10359 case IPPROTO_ICMPV6: 10360 case IPPROTO_SCTP: 10361 hdr_length = IPV6_HDR_LEN; 10362 nexthdr_offset = (uint_t)((uchar_t *)&ip6h->ip6_nxt - 10363 (uchar_t *)ip6h); 10364 break; 10365 default: { 10366 uint8_t *nexthdrp; 10367 10368 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, 10369 &hdr_length, &nexthdrp)) { 10370 /* Malformed packet */ 10371 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10372 freemsg(first_mp); 10373 return; 10374 } 10375 nexthdr = *nexthdrp; 10376 nexthdr_offset = nexthdrp - (uint8_t *)ip6h; 10377 break; 10378 } 10379 } 10380 10381 UPDATE_OB_PKT_COUNT(ire); 10382 ire->ire_last_used_time = lbolt; 10383 10384 switch (nexthdr) { 10385 case IPPROTO_TCP: 10386 if (DB_TYPE(mp) == M_DATA) { 10387 /* 10388 * M_DATA mblk, so init mblk (chain) for 10389 * no struio(). 10390 */ 10391 mblk_t *mp1 = mp; 10392 10393 do { 10394 mp1->b_datap->db_struioflag = 0; 10395 } while ((mp1 = mp1->b_cont) != NULL); 10396 } 10397 ports = *(uint32_t *)(mp->b_rptr + hdr_length + 10398 TCP_PORTS_OFFSET); 10399 ip_fanout_tcp_v6(q, first_mp, ip6h, ill, ill, 10400 fanout_flags|IP_FF_SEND_ICMP|IP_FF_SYN_ADDIRE| 10401 IP_FF_IPINFO|IP6_NO_IPPOLICY|IP_FF_LOOPBACK, 10402 hdr_length, mctl_present, ire->ire_zoneid); 10403 return; 10404 10405 case IPPROTO_UDP: 10406 ports = *(uint32_t *)(mp->b_rptr + hdr_length + 10407 UDP_PORTS_OFFSET); 10408 ip_fanout_udp_v6(q, first_mp, ip6h, ports, ill, ill, 10409 fanout_flags|IP_FF_SEND_ICMP|IP_FF_IPINFO| 10410 IP6_NO_IPPOLICY, mctl_present, ire->ire_zoneid); 10411 return; 10412 10413 case IPPROTO_SCTP: 10414 { 10415 ports = *(uint32_t *)(mp->b_rptr + hdr_length); 10416 ip_fanout_sctp(first_mp, ill, (ipha_t *)ip6h, ports, 10417 fanout_flags|IP_FF_SEND_ICMP|IP_FF_IPINFO, 10418 mctl_present, IP6_NO_IPPOLICY, ire->ire_zoneid); 10419 return; 10420 } 10421 case IPPROTO_ICMPV6: { 10422 icmp6_t *icmp6; 10423 10424 /* check for full IPv6+ICMPv6 header */ 10425 if ((mp->b_wptr - mp->b_rptr) < 10426 (hdr_length + ICMP6_MINLEN)) { 10427 if (!pullupmsg(mp, hdr_length + ICMP6_MINLEN)) { 10428 ip1dbg(("ip_wput_v6: ICMP hdr pullupmsg" 10429 " failed\n")); 10430 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10431 freemsg(first_mp); 10432 return; 10433 } 10434 ip6h = (ip6_t *)mp->b_rptr; 10435 } 10436 icmp6 = (icmp6_t *)((uchar_t *)ip6h + hdr_length); 10437 10438 /* Update output mib stats */ 10439 icmp_update_out_mib_v6(ill, icmp6); 10440 10441 /* Check variable for testing applications */ 10442 if (ipst->ips_ipv6_drop_inbound_icmpv6) { 10443 freemsg(first_mp); 10444 return; 10445 } 10446 /* 10447 * Assume that there is always at least one conn for 10448 * ICMPv6 (in.ndpd) i.e. don't optimize the case 10449 * where there is no conn. 10450 */ 10451 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst) && 10452 !IS_LOOPBACK(ill)) { 10453 ilm_walker_t ilw; 10454 10455 /* 10456 * In the multicast case, applications may have 10457 * joined the group from different zones, so we 10458 * need to deliver the packet to each of them. 10459 * Loop through the multicast memberships 10460 * structures (ilm) on the receive ill and send 10461 * a copy of the packet up each matching one. 10462 * However, we don't do this for multicasts sent 10463 * on the loopback interface (PHYI_LOOPBACK flag 10464 * set) as they must stay in the sender's zone. 10465 */ 10466 ilm = ilm_walker_start(&ilw, ill); 10467 for (; ilm != NULL; 10468 ilm = ilm_walker_step(&ilw, ilm)) { 10469 if (!IN6_ARE_ADDR_EQUAL( 10470 &ilm->ilm_v6addr, &ip6h->ip6_dst)) 10471 continue; 10472 if ((fanout_flags & 10473 IP_FF_NO_MCAST_LOOP) && 10474 ilm->ilm_zoneid == ire->ire_zoneid) 10475 continue; 10476 if (!ipif_lookup_zoneid( 10477 ilw.ilw_walk_ill, ilm->ilm_zoneid, 10478 IPIF_UP, NULL)) 10479 continue; 10480 10481 first_mp1 = ip_copymsg(first_mp); 10482 if (first_mp1 == NULL) 10483 continue; 10484 icmp_inbound_v6(q, first_mp1, 10485 ilw.ilw_walk_ill, ill, hdr_length, 10486 mctl_present, IP6_NO_IPPOLICY, 10487 ilm->ilm_zoneid, NULL); 10488 } 10489 ilm_walker_finish(&ilw); 10490 } else { 10491 first_mp1 = ip_copymsg(first_mp); 10492 if (first_mp1 != NULL) 10493 icmp_inbound_v6(q, first_mp1, ill, ill, 10494 hdr_length, mctl_present, 10495 IP6_NO_IPPOLICY, ire->ire_zoneid, 10496 NULL); 10497 } 10498 } 10499 /* FALLTHRU */ 10500 default: { 10501 /* 10502 * Handle protocols with which IPv6 is less intimate. 10503 */ 10504 fanout_flags |= IP_FF_RAWIP|IP_FF_IPINFO; 10505 10506 /* 10507 * Enable sending ICMP for "Unknown" nexthdr 10508 * case. i.e. where we did not FALLTHRU from 10509 * IPPROTO_ICMPV6 processing case above. 10510 */ 10511 if (nexthdr != IPPROTO_ICMPV6) 10512 fanout_flags |= IP_FF_SEND_ICMP; 10513 /* 10514 * Note: There can be more than one stream bound 10515 * to a particular protocol. When this is the case, 10516 * each one gets a copy of any incoming packets. 10517 */ 10518 ip_fanout_proto_v6(q, first_mp, ip6h, ill, ill, nexthdr, 10519 nexthdr_offset, fanout_flags|IP6_NO_IPPOLICY, 10520 mctl_present, ire->ire_zoneid); 10521 return; 10522 } 10523 } 10524 } 10525 10526 /* 10527 * Send packet using IRE. 10528 * Checksumming is controlled by cksum_request: 10529 * -1 => normal i.e. TCP/UDP/SCTP/ICMPv6 are checksummed and nothing else. 10530 * 1 => Skip TCP/UDP/SCTP checksum 10531 * Otherwise => checksum_request contains insert offset for checksum 10532 * 10533 * Assumes that the following set of headers appear in the first 10534 * mblk: 10535 * ip6_t 10536 * Any extension headers 10537 * TCP/UDP/SCTP header (if present) 10538 * The routine can handle an ICMPv6 header that is not in the first mblk. 10539 * 10540 * NOTE : This function does not ire_refrele the ire passed in as the 10541 * argument unlike ip_wput_ire where the REFRELE is done. 10542 * Refer to ip_wput_ire for more on this. 10543 */ 10544 static void 10545 ip_wput_ire_v6(queue_t *q, mblk_t *mp, ire_t *ire, int unspec_src, 10546 int cksum_request, conn_t *connp, int caller, int flags, zoneid_t zoneid) 10547 { 10548 ip6_t *ip6h; 10549 uint8_t nexthdr; 10550 uint16_t hdr_length; 10551 uint_t reachable = 0x0; 10552 ill_t *ill; 10553 mib2_ipIfStatsEntry_t *mibptr; 10554 mblk_t *first_mp; 10555 boolean_t mctl_present; 10556 ipsec_out_t *io; 10557 boolean_t conn_dontroute; /* conn value for multicast */ 10558 boolean_t conn_multicast_loop; /* conn value for multicast */ 10559 boolean_t multicast_forward; /* Should we forward ? */ 10560 int max_frag; 10561 ip_stack_t *ipst = ire->ire_ipst; 10562 ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec; 10563 10564 ill = ire_to_ill(ire); 10565 first_mp = mp; 10566 multicast_forward = B_FALSE; 10567 10568 if (mp->b_datap->db_type != M_CTL) { 10569 ip6h = (ip6_t *)first_mp->b_rptr; 10570 } else { 10571 io = (ipsec_out_t *)first_mp->b_rptr; 10572 ASSERT(io->ipsec_out_type == IPSEC_OUT); 10573 /* 10574 * Grab the zone id now because the M_CTL can be discarded by 10575 * ip_wput_ire_parse_ipsec_out() below. 10576 */ 10577 ASSERT(zoneid == io->ipsec_out_zoneid); 10578 ASSERT(zoneid != ALL_ZONES); 10579 ip6h = (ip6_t *)first_mp->b_cont->b_rptr; 10580 /* 10581 * For the multicast case, ipsec_out carries conn_dontroute and 10582 * conn_multicast_loop as conn may not be available here. We 10583 * need this for multicast loopback and forwarding which is done 10584 * later in the code. 10585 */ 10586 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 10587 conn_dontroute = io->ipsec_out_dontroute; 10588 conn_multicast_loop = io->ipsec_out_multicast_loop; 10589 /* 10590 * If conn_dontroute is not set or conn_multicast_loop 10591 * is set, we need to do forwarding/loopback. For 10592 * datagrams from ip_wput_multicast, conn_dontroute is 10593 * set to B_TRUE and conn_multicast_loop is set to 10594 * B_FALSE so that we neither do forwarding nor 10595 * loopback. 10596 */ 10597 if (!conn_dontroute || conn_multicast_loop) 10598 multicast_forward = B_TRUE; 10599 } 10600 } 10601 10602 /* 10603 * If the sender didn't supply the hop limit and there is a default 10604 * unicast hop limit associated with the output interface, we use 10605 * that if the packet is unicast. Interface specific unicast hop 10606 * limits as set via the SIOCSLIFLNKINFO ioctl. 10607 */ 10608 if (ill->ill_max_hops != 0 && !(flags & IP6I_HOPLIMIT) && 10609 !(IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst))) { 10610 ip6h->ip6_hops = ill->ill_max_hops; 10611 } 10612 10613 if (ire->ire_type == IRE_LOCAL && ire->ire_zoneid != zoneid && 10614 ire->ire_zoneid != ALL_ZONES) { 10615 /* 10616 * When a zone sends a packet to another zone, we try to deliver 10617 * the packet under the same conditions as if the destination 10618 * was a real node on the network. To do so, we look for a 10619 * matching route in the forwarding table. 10620 * RTF_REJECT and RTF_BLACKHOLE are handled just like 10621 * ip_newroute_v6() does. 10622 * Note that IRE_LOCAL are special, since they are used 10623 * when the zoneid doesn't match in some cases. This means that 10624 * we need to handle ipha_src differently since ire_src_addr 10625 * belongs to the receiving zone instead of the sending zone. 10626 * When ip_restrict_interzone_loopback is set, then 10627 * ire_cache_lookup_v6() ensures that IRE_LOCAL are only used 10628 * for loopback between zones when the logical "Ethernet" would 10629 * have looped them back. 10630 */ 10631 ire_t *src_ire; 10632 10633 src_ire = ire_ftable_lookup_v6(&ip6h->ip6_dst, 0, 0, 0, 10634 NULL, NULL, zoneid, 0, NULL, (MATCH_IRE_RECURSIVE | 10635 MATCH_IRE_DEFAULT | MATCH_IRE_RJ_BHOLE), ipst); 10636 if (src_ire != NULL && 10637 !(src_ire->ire_flags & (RTF_REJECT | RTF_BLACKHOLE)) && 10638 (!ipst->ips_ip_restrict_interzone_loopback || 10639 ire_local_same_lan(ire, src_ire))) { 10640 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src) && 10641 !unspec_src) { 10642 ip6h->ip6_src = src_ire->ire_src_addr_v6; 10643 } 10644 ire_refrele(src_ire); 10645 } else { 10646 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutNoRoutes); 10647 if (src_ire != NULL) { 10648 if (src_ire->ire_flags & RTF_BLACKHOLE) { 10649 ire_refrele(src_ire); 10650 freemsg(first_mp); 10651 return; 10652 } 10653 ire_refrele(src_ire); 10654 } 10655 if (ip_hdr_complete_v6(ip6h, zoneid, ipst)) { 10656 /* Failed */ 10657 freemsg(first_mp); 10658 return; 10659 } 10660 icmp_unreachable_v6(q, first_mp, 10661 ICMP6_DST_UNREACH_NOROUTE, B_FALSE, B_FALSE, 10662 zoneid, ipst); 10663 return; 10664 } 10665 } 10666 10667 if (mp->b_datap->db_type == M_CTL || 10668 ipss->ipsec_outbound_v6_policy_present) { 10669 mp = ip_wput_ire_parse_ipsec_out(first_mp, NULL, ip6h, ire, 10670 connp, unspec_src, zoneid); 10671 if (mp == NULL) { 10672 return; 10673 } 10674 } 10675 10676 first_mp = mp; 10677 if (mp->b_datap->db_type == M_CTL) { 10678 io = (ipsec_out_t *)mp->b_rptr; 10679 ASSERT(io->ipsec_out_type == IPSEC_OUT); 10680 mp = mp->b_cont; 10681 mctl_present = B_TRUE; 10682 } else { 10683 mctl_present = B_FALSE; 10684 } 10685 10686 ip6h = (ip6_t *)mp->b_rptr; 10687 nexthdr = ip6h->ip6_nxt; 10688 mibptr = ill->ill_ip_mib; 10689 10690 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src) && !unspec_src) { 10691 ipif_t *ipif; 10692 10693 /* 10694 * Select the source address using ipif_select_source_v6. 10695 */ 10696 ipif = ipif_select_source_v6(ill, &ip6h->ip6_dst, B_FALSE, 10697 IPV6_PREFER_SRC_DEFAULT, zoneid); 10698 if (ipif == NULL) { 10699 if (ip_debug > 2) { 10700 /* ip1dbg */ 10701 pr_addr_dbg("ip_wput_ire_v6: no src for " 10702 "dst %s\n", AF_INET6, &ip6h->ip6_dst); 10703 printf("through interface %s\n", ill->ill_name); 10704 } 10705 freemsg(first_mp); 10706 return; 10707 } 10708 ip6h->ip6_src = ipif->ipif_v6src_addr; 10709 ipif_refrele(ipif); 10710 } 10711 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 10712 if ((connp != NULL && connp->conn_multicast_loop) || 10713 !IS_LOOPBACK(ill)) { 10714 if (ilm_lookup_ill_v6(ill, &ip6h->ip6_dst, B_FALSE, 10715 ALL_ZONES) != NULL) { 10716 mblk_t *nmp; 10717 int fanout_flags = 0; 10718 10719 if (connp != NULL && 10720 !connp->conn_multicast_loop) { 10721 fanout_flags |= IP_FF_NO_MCAST_LOOP; 10722 } 10723 ip1dbg(("ip_wput_ire_v6: " 10724 "Loopback multicast\n")); 10725 nmp = ip_copymsg(first_mp); 10726 if (nmp != NULL) { 10727 ip6_t *nip6h; 10728 mblk_t *mp_ip6h; 10729 10730 if (mctl_present) { 10731 nip6h = (ip6_t *) 10732 nmp->b_cont->b_rptr; 10733 mp_ip6h = nmp->b_cont; 10734 } else { 10735 nip6h = (ip6_t *)nmp->b_rptr; 10736 mp_ip6h = nmp; 10737 } 10738 10739 DTRACE_PROBE4( 10740 ip6__loopback__out__start, 10741 ill_t *, NULL, 10742 ill_t *, ill, 10743 ip6_t *, nip6h, 10744 mblk_t *, nmp); 10745 10746 FW_HOOKS6( 10747 ipst->ips_ip6_loopback_out_event, 10748 ipst->ips_ipv6firewall_loopback_out, 10749 NULL, ill, nip6h, nmp, mp_ip6h, 10750 0, ipst); 10751 10752 DTRACE_PROBE1( 10753 ip6__loopback__out__end, 10754 mblk_t *, nmp); 10755 10756 /* 10757 * DTrace this as ip:::send. A blocked 10758 * packet will fire the send probe, but 10759 * not the receive probe. 10760 */ 10761 DTRACE_IP7(send, mblk_t *, nmp, 10762 conn_t *, NULL, void_ip_t *, nip6h, 10763 __dtrace_ipsr_ill_t *, ill, 10764 ipha_t *, NULL, ip6_t *, nip6h, 10765 int, 1); 10766 10767 if (nmp != NULL) { 10768 /* 10769 * Deliver locally and to 10770 * every local zone, except 10771 * the sending zone when 10772 * IPV6_MULTICAST_LOOP is 10773 * disabled. 10774 */ 10775 ip_wput_local_v6(RD(q), ill, 10776 nip6h, nmp, ire, 10777 fanout_flags, zoneid); 10778 } 10779 } else { 10780 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10781 ip1dbg(("ip_wput_ire_v6: " 10782 "copymsg failed\n")); 10783 } 10784 } 10785 } 10786 if (ip6h->ip6_hops == 0 || 10787 IN6_IS_ADDR_MC_NODELOCAL(&ip6h->ip6_dst) || 10788 IS_LOOPBACK(ill)) { 10789 /* 10790 * Local multicast or just loopback on loopback 10791 * interface. 10792 */ 10793 BUMP_MIB(mibptr, ipIfStatsHCOutMcastPkts); 10794 UPDATE_MIB(mibptr, ipIfStatsHCOutMcastOctets, 10795 ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN); 10796 ip1dbg(("ip_wput_ire_v6: local multicast only\n")); 10797 freemsg(first_mp); 10798 return; 10799 } 10800 } 10801 10802 if (ire->ire_stq != NULL) { 10803 uint32_t sum; 10804 uint_t ill_index = ((ill_t *)ire->ire_stq->q_ptr)-> 10805 ill_phyint->phyint_ifindex; 10806 queue_t *dev_q = ire->ire_stq->q_next; 10807 10808 /* 10809 * non-NULL send-to queue - packet is to be sent 10810 * out an interface. 10811 */ 10812 10813 /* Driver is flow-controlling? */ 10814 if (!IP_FLOW_CONTROLLED_ULP(nexthdr) && 10815 DEV_Q_FLOW_BLOCKED(dev_q)) { 10816 /* 10817 * Queue packet if we have an conn to give back 10818 * pressure. We can't queue packets intended for 10819 * hardware acceleration since we've tossed that 10820 * state already. If the packet is being fed back 10821 * from ire_send_v6, we don't know the position in 10822 * the queue to enqueue the packet and we discard 10823 * the packet. 10824 */ 10825 if (ipst->ips_ip_output_queue && connp != NULL && 10826 !mctl_present && caller != IRE_SEND) { 10827 if (caller == IP_WSRV) { 10828 idl_tx_list_t *idl_txl; 10829 10830 idl_txl = &ipst->ips_idl_tx_list[0]; 10831 connp->conn_did_putbq = 1; 10832 (void) putbq(connp->conn_wq, mp); 10833 conn_drain_insert(connp, idl_txl); 10834 /* 10835 * caller == IP_WSRV implies we are 10836 * the service thread, and the 10837 * queue is already noenabled. 10838 * The check for canput and 10839 * the putbq is not atomic. 10840 * So we need to check again. 10841 */ 10842 if (canput(dev_q)) 10843 connp->conn_did_putbq = 0; 10844 } else { 10845 (void) putq(connp->conn_wq, mp); 10846 } 10847 return; 10848 } 10849 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10850 freemsg(first_mp); 10851 return; 10852 } 10853 10854 /* 10855 * Look for reachability confirmations from the transport. 10856 */ 10857 if (ip6h->ip6_vcf & IP_FORWARD_PROG) { 10858 reachable |= IPV6_REACHABILITY_CONFIRMATION; 10859 ip6h->ip6_vcf &= ~IP_FORWARD_PROG; 10860 if (mctl_present) 10861 io->ipsec_out_reachable = B_TRUE; 10862 } 10863 /* Fastpath */ 10864 switch (nexthdr) { 10865 case IPPROTO_TCP: 10866 case IPPROTO_UDP: 10867 case IPPROTO_ICMPV6: 10868 case IPPROTO_SCTP: 10869 hdr_length = IPV6_HDR_LEN; 10870 break; 10871 default: { 10872 uint8_t *nexthdrp; 10873 10874 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, 10875 &hdr_length, &nexthdrp)) { 10876 /* Malformed packet */ 10877 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10878 freemsg(first_mp); 10879 return; 10880 } 10881 nexthdr = *nexthdrp; 10882 break; 10883 } 10884 } 10885 10886 if (cksum_request != -1 && nexthdr != IPPROTO_ICMPV6) { 10887 uint16_t *up; 10888 uint16_t *insp; 10889 10890 /* 10891 * The packet header is processed once for all, even 10892 * in the multirouting case. We disable hardware 10893 * checksum if the packet is multirouted, as it will be 10894 * replicated via several interfaces, and not all of 10895 * them may have this capability. 10896 */ 10897 if (cksum_request == 1 && 10898 !(ire->ire_flags & RTF_MULTIRT)) { 10899 /* Skip the transport checksum */ 10900 goto cksum_done; 10901 } 10902 /* 10903 * Do user-configured raw checksum. 10904 * Compute checksum and insert at offset "cksum_request" 10905 */ 10906 10907 /* check for enough headers for checksum */ 10908 cksum_request += hdr_length; /* offset from rptr */ 10909 if ((mp->b_wptr - mp->b_rptr) < 10910 (cksum_request + sizeof (int16_t))) { 10911 if (!pullupmsg(mp, 10912 cksum_request + sizeof (int16_t))) { 10913 ip1dbg(("ip_wput_v6: ICMP hdr pullupmsg" 10914 " failed\n")); 10915 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10916 freemsg(first_mp); 10917 return; 10918 } 10919 ip6h = (ip6_t *)mp->b_rptr; 10920 } 10921 insp = (uint16_t *)((uchar_t *)ip6h + cksum_request); 10922 ASSERT(((uintptr_t)insp & 0x1) == 0); 10923 up = (uint16_t *)&ip6h->ip6_src; 10924 /* 10925 * icmp has placed length and routing 10926 * header adjustment in *insp. 10927 */ 10928 sum = htons(nexthdr) + 10929 up[0] + up[1] + up[2] + up[3] + 10930 up[4] + up[5] + up[6] + up[7] + 10931 up[8] + up[9] + up[10] + up[11] + 10932 up[12] + up[13] + up[14] + up[15]; 10933 sum = (sum & 0xffff) + (sum >> 16); 10934 *insp = IP_CSUM(mp, hdr_length, sum); 10935 } else if (nexthdr == IPPROTO_TCP) { 10936 uint16_t *up; 10937 10938 /* 10939 * Check for full IPv6 header + enough TCP header 10940 * to get at the checksum field. 10941 */ 10942 if ((mp->b_wptr - mp->b_rptr) < 10943 (hdr_length + TCP_CHECKSUM_OFFSET + 10944 TCP_CHECKSUM_SIZE)) { 10945 if (!pullupmsg(mp, hdr_length + 10946 TCP_CHECKSUM_OFFSET + TCP_CHECKSUM_SIZE)) { 10947 ip1dbg(("ip_wput_v6: TCP hdr pullupmsg" 10948 " failed\n")); 10949 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10950 freemsg(first_mp); 10951 return; 10952 } 10953 ip6h = (ip6_t *)mp->b_rptr; 10954 } 10955 10956 up = (uint16_t *)&ip6h->ip6_src; 10957 /* 10958 * Note: The TCP module has stored the length value 10959 * into the tcp checksum field, so we don't 10960 * need to explicitly sum it in here. 10961 */ 10962 sum = up[0] + up[1] + up[2] + up[3] + 10963 up[4] + up[5] + up[6] + up[7] + 10964 up[8] + up[9] + up[10] + up[11] + 10965 up[12] + up[13] + up[14] + up[15]; 10966 10967 /* Fold the initial sum */ 10968 sum = (sum & 0xffff) + (sum >> 16); 10969 10970 up = (uint16_t *)(((uchar_t *)ip6h) + 10971 hdr_length + TCP_CHECKSUM_OFFSET); 10972 10973 IP_CKSUM_XMIT(ill, ire, mp, ip6h, up, IPPROTO_TCP, 10974 hdr_length, ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN, 10975 ire->ire_max_frag, mctl_present, sum); 10976 10977 /* Software checksum? */ 10978 if (DB_CKSUMFLAGS(mp) == 0) { 10979 IP6_STAT(ipst, ip6_out_sw_cksum); 10980 IP6_STAT_UPDATE(ipst, 10981 ip6_tcp_out_sw_cksum_bytes, 10982 (ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN) - 10983 hdr_length); 10984 } 10985 } else if (nexthdr == IPPROTO_UDP) { 10986 uint16_t *up; 10987 10988 /* 10989 * check for full IPv6 header + enough UDP header 10990 * to get at the UDP checksum field 10991 */ 10992 if ((mp->b_wptr - mp->b_rptr) < (hdr_length + 10993 UDP_CHECKSUM_OFFSET + UDP_CHECKSUM_SIZE)) { 10994 if (!pullupmsg(mp, hdr_length + 10995 UDP_CHECKSUM_OFFSET + UDP_CHECKSUM_SIZE)) { 10996 ip1dbg(("ip_wput_v6: UDP hdr pullupmsg" 10997 " failed\n")); 10998 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10999 freemsg(first_mp); 11000 return; 11001 } 11002 ip6h = (ip6_t *)mp->b_rptr; 11003 } 11004 up = (uint16_t *)&ip6h->ip6_src; 11005 /* 11006 * Note: The UDP module has stored the length value 11007 * into the udp checksum field, so we don't 11008 * need to explicitly sum it in here. 11009 */ 11010 sum = up[0] + up[1] + up[2] + up[3] + 11011 up[4] + up[5] + up[6] + up[7] + 11012 up[8] + up[9] + up[10] + up[11] + 11013 up[12] + up[13] + up[14] + up[15]; 11014 11015 /* Fold the initial sum */ 11016 sum = (sum & 0xffff) + (sum >> 16); 11017 11018 up = (uint16_t *)(((uchar_t *)ip6h) + 11019 hdr_length + UDP_CHECKSUM_OFFSET); 11020 11021 IP_CKSUM_XMIT(ill, ire, mp, ip6h, up, IPPROTO_UDP, 11022 hdr_length, ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN, 11023 ire->ire_max_frag, mctl_present, sum); 11024 11025 /* Software checksum? */ 11026 if (DB_CKSUMFLAGS(mp) == 0) { 11027 IP6_STAT(ipst, ip6_out_sw_cksum); 11028 IP6_STAT_UPDATE(ipst, 11029 ip6_udp_out_sw_cksum_bytes, 11030 (ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN) - 11031 hdr_length); 11032 } 11033 } else if (nexthdr == IPPROTO_ICMPV6) { 11034 uint16_t *up; 11035 icmp6_t *icmp6; 11036 11037 /* check for full IPv6+ICMPv6 header */ 11038 if ((mp->b_wptr - mp->b_rptr) < 11039 (hdr_length + ICMP6_MINLEN)) { 11040 if (!pullupmsg(mp, hdr_length + ICMP6_MINLEN)) { 11041 ip1dbg(("ip_wput_v6: ICMP hdr pullupmsg" 11042 " failed\n")); 11043 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 11044 freemsg(first_mp); 11045 return; 11046 } 11047 ip6h = (ip6_t *)mp->b_rptr; 11048 } 11049 icmp6 = (icmp6_t *)((uchar_t *)ip6h + hdr_length); 11050 up = (uint16_t *)&ip6h->ip6_src; 11051 /* 11052 * icmp has placed length and routing 11053 * header adjustment in icmp6_cksum. 11054 */ 11055 sum = htons(IPPROTO_ICMPV6) + 11056 up[0] + up[1] + up[2] + up[3] + 11057 up[4] + up[5] + up[6] + up[7] + 11058 up[8] + up[9] + up[10] + up[11] + 11059 up[12] + up[13] + up[14] + up[15]; 11060 sum = (sum & 0xffff) + (sum >> 16); 11061 icmp6->icmp6_cksum = IP_CSUM(mp, hdr_length, sum); 11062 11063 /* Update output mib stats */ 11064 icmp_update_out_mib_v6(ill, icmp6); 11065 } else if (nexthdr == IPPROTO_SCTP) { 11066 sctp_hdr_t *sctph; 11067 11068 if (MBLKL(mp) < (hdr_length + sizeof (*sctph))) { 11069 if (!pullupmsg(mp, hdr_length + 11070 sizeof (*sctph))) { 11071 ip1dbg(("ip_wput_v6: SCTP hdr pullupmsg" 11072 " failed\n")); 11073 BUMP_MIB(ill->ill_ip_mib, 11074 ipIfStatsOutDiscards); 11075 freemsg(mp); 11076 return; 11077 } 11078 ip6h = (ip6_t *)mp->b_rptr; 11079 } 11080 sctph = (sctp_hdr_t *)(mp->b_rptr + hdr_length); 11081 sctph->sh_chksum = 0; 11082 sctph->sh_chksum = sctp_cksum(mp, hdr_length); 11083 } 11084 11085 cksum_done: 11086 /* 11087 * We force the insertion of a fragment header using the 11088 * IPH_FRAG_HDR flag in two cases: 11089 * - after reception of an ICMPv6 "packet too big" message 11090 * with a MTU < 1280 (cf. RFC 2460 section 5) 11091 * - for multirouted IPv6 packets, so that the receiver can 11092 * discard duplicates according to their fragment identifier 11093 * 11094 * Two flags modifed from the API can modify this behavior. 11095 * The first is IPV6_USE_MIN_MTU. With this API the user 11096 * can specify how to manage PMTUD for unicast and multicast. 11097 * 11098 * IPV6_DONTFRAG disallows fragmentation. 11099 */ 11100 max_frag = ire->ire_max_frag; 11101 switch (IP6I_USE_MIN_MTU_API(flags)) { 11102 case IPV6_USE_MIN_MTU_DEFAULT: 11103 case IPV6_USE_MIN_MTU_UNICAST: 11104 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 11105 max_frag = IPV6_MIN_MTU; 11106 } 11107 break; 11108 11109 case IPV6_USE_MIN_MTU_NEVER: 11110 max_frag = IPV6_MIN_MTU; 11111 break; 11112 } 11113 if (ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN > max_frag || 11114 (ire->ire_frag_flag & IPH_FRAG_HDR)) { 11115 if (connp != NULL && (flags & IP6I_DONTFRAG)) { 11116 icmp_pkt2big_v6(ire->ire_stq, first_mp, 11117 max_frag, B_FALSE, B_TRUE, zoneid, ipst); 11118 return; 11119 } 11120 11121 if (ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN != 11122 (mp->b_cont ? msgdsize(mp) : 11123 mp->b_wptr - (uchar_t *)ip6h)) { 11124 ip0dbg(("Packet length mismatch: %d, %ld\n", 11125 ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN, 11126 msgdsize(mp))); 11127 freemsg(first_mp); 11128 return; 11129 } 11130 /* Do IPSEC processing first */ 11131 if (mctl_present) { 11132 ipsec_out_process(q, first_mp, ire, ill_index); 11133 return; 11134 } 11135 ASSERT(mp->b_prev == NULL); 11136 ip2dbg(("Fragmenting Size = %d, mtu = %d\n", 11137 ntohs(ip6h->ip6_plen) + 11138 IPV6_HDR_LEN, max_frag)); 11139 ASSERT(mp == first_mp); 11140 /* Initiate IPPF processing */ 11141 if (IPP_ENABLED(IPP_LOCAL_OUT, ipst)) { 11142 ip_process(IPP_LOCAL_OUT, &mp, ill_index); 11143 if (mp == NULL) { 11144 return; 11145 } 11146 } 11147 ip_wput_frag_v6(mp, ire, reachable, connp, 11148 caller, max_frag); 11149 return; 11150 } 11151 /* Do IPSEC processing first */ 11152 if (mctl_present) { 11153 int extra_len = ipsec_out_extra_length(first_mp); 11154 11155 if (ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN + extra_len > 11156 max_frag && connp != NULL && 11157 (flags & IP6I_DONTFRAG)) { 11158 /* 11159 * IPsec headers will push the packet over the 11160 * MTU limit. Issue an ICMPv6 Packet Too Big 11161 * message for this packet if the upper-layer 11162 * that issued this packet will be able to 11163 * react to the icmp_pkt2big_v6() that we'll 11164 * generate. 11165 */ 11166 icmp_pkt2big_v6(ire->ire_stq, first_mp, 11167 max_frag, B_FALSE, B_TRUE, zoneid, ipst); 11168 return; 11169 } 11170 ipsec_out_process(q, first_mp, ire, ill_index); 11171 return; 11172 } 11173 /* 11174 * XXX multicast: add ip_mforward_v6() here. 11175 * Check conn_dontroute 11176 */ 11177 #ifdef lint 11178 /* 11179 * XXX The only purpose of this statement is to avoid lint 11180 * errors. See the above "XXX multicast". When that gets 11181 * fixed, remove this whole #ifdef lint section. 11182 */ 11183 ip3dbg(("multicast forward is %s.\n", 11184 (multicast_forward ? "TRUE" : "FALSE"))); 11185 #endif 11186 11187 UPDATE_OB_PKT_COUNT(ire); 11188 ire->ire_last_used_time = lbolt; 11189 ASSERT(mp == first_mp); 11190 ip_xmit_v6(mp, ire, reachable, connp, caller, NULL); 11191 } else { 11192 /* 11193 * DTrace this as ip:::send. A blocked packet will fire the 11194 * send probe, but not the receive probe. 11195 */ 11196 DTRACE_IP7(send, mblk_t *, first_mp, conn_t *, NULL, 11197 void_ip_t *, ip6h, __dtrace_ipsr_ill_t *, ill, ipha_t *, 11198 NULL, ip6_t *, ip6h, int, 1); 11199 DTRACE_PROBE4(ip6__loopback__out__start, 11200 ill_t *, NULL, ill_t *, ill, 11201 ip6_t *, ip6h, mblk_t *, first_mp); 11202 FW_HOOKS6(ipst->ips_ip6_loopback_out_event, 11203 ipst->ips_ipv6firewall_loopback_out, 11204 NULL, ill, ip6h, first_mp, mp, 0, ipst); 11205 DTRACE_PROBE1(ip6__loopback__out__end, mblk_t *, first_mp); 11206 if (first_mp != NULL) { 11207 ip_wput_local_v6(RD(q), ill, ip6h, first_mp, ire, 0, 11208 zoneid); 11209 } 11210 } 11211 } 11212 11213 /* 11214 * Outbound IPv6 fragmentation routine using MDT. 11215 */ 11216 static void 11217 ip_wput_frag_mdt_v6(mblk_t *mp, ire_t *ire, size_t max_chunk, 11218 size_t unfragmentable_len, uint8_t nexthdr, uint_t prev_nexthdr_offset) 11219 { 11220 ip6_t *ip6h = (ip6_t *)mp->b_rptr; 11221 uint_t pkts, wroff, hdr_chunk_len, pbuf_idx; 11222 mblk_t *hdr_mp, *md_mp = NULL; 11223 int i1; 11224 multidata_t *mmd; 11225 unsigned char *hdr_ptr, *pld_ptr; 11226 ip_pdescinfo_t pdi; 11227 uint32_t ident; 11228 size_t len; 11229 uint16_t offset; 11230 queue_t *stq = ire->ire_stq; 11231 ill_t *ill = (ill_t *)stq->q_ptr; 11232 ip_stack_t *ipst = ill->ill_ipst; 11233 11234 ASSERT(DB_TYPE(mp) == M_DATA); 11235 ASSERT(MBLKL(mp) > unfragmentable_len); 11236 11237 /* 11238 * Move read ptr past unfragmentable portion, we don't want this part 11239 * of the data in our fragments. 11240 */ 11241 mp->b_rptr += unfragmentable_len; 11242 11243 /* Calculate how many packets we will send out */ 11244 i1 = (mp->b_cont == NULL) ? MBLKL(mp) : msgsize(mp); 11245 pkts = (i1 + max_chunk - 1) / max_chunk; 11246 ASSERT(pkts > 1); 11247 11248 /* Allocate a message block which will hold all the IP Headers. */ 11249 wroff = ipst->ips_ip_wroff_extra; 11250 hdr_chunk_len = wroff + unfragmentable_len + sizeof (ip6_frag_t); 11251 11252 i1 = pkts * hdr_chunk_len; 11253 /* 11254 * Create the header buffer, Multidata and destination address 11255 * and SAP attribute that should be associated with it. 11256 */ 11257 if ((hdr_mp = allocb(i1, BPRI_HI)) == NULL || 11258 ((hdr_mp->b_wptr += i1), 11259 (mmd = mmd_alloc(hdr_mp, &md_mp, KM_NOSLEEP)) == NULL) || 11260 !ip_md_addr_attr(mmd, NULL, ire->ire_nce->nce_res_mp)) { 11261 freemsg(mp); 11262 if (md_mp == NULL) { 11263 freemsg(hdr_mp); 11264 } else { 11265 free_mmd: IP6_STAT(ipst, ip6_frag_mdt_discarded); 11266 freemsg(md_mp); 11267 } 11268 IP6_STAT(ipst, ip6_frag_mdt_allocfail); 11269 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails); 11270 return; 11271 } 11272 IP6_STAT(ipst, ip6_frag_mdt_allocd); 11273 11274 /* 11275 * Add a payload buffer to the Multidata; this operation must not 11276 * fail, or otherwise our logic in this routine is broken. There 11277 * is no memory allocation done by the routine, so any returned 11278 * failure simply tells us that we've done something wrong. 11279 * 11280 * A failure tells us that either we're adding the same payload 11281 * buffer more than once, or we're trying to add more buffers than 11282 * allowed. None of the above cases should happen, and we panic 11283 * because either there's horrible heap corruption, and/or 11284 * programming mistake. 11285 */ 11286 if ((pbuf_idx = mmd_addpldbuf(mmd, mp)) < 0) { 11287 goto pbuf_panic; 11288 } 11289 11290 hdr_ptr = hdr_mp->b_rptr; 11291 pld_ptr = mp->b_rptr; 11292 11293 pdi.flags = PDESC_HBUF_REF | PDESC_PBUF_REF; 11294 11295 ident = htonl(atomic_add_32_nv(&ire->ire_ident, 1)); 11296 11297 /* 11298 * len is the total length of the fragmentable data in this 11299 * datagram. For each fragment sent, we will decrement len 11300 * by the amount of fragmentable data sent in that fragment 11301 * until len reaches zero. 11302 */ 11303 len = ntohs(ip6h->ip6_plen) - (unfragmentable_len - IPV6_HDR_LEN); 11304 11305 offset = 0; 11306 prev_nexthdr_offset += wroff; 11307 11308 while (len != 0) { 11309 size_t mlen; 11310 ip6_t *fip6h; 11311 ip6_frag_t *fraghdr; 11312 int error; 11313 11314 ASSERT((hdr_ptr + hdr_chunk_len) <= hdr_mp->b_wptr); 11315 mlen = MIN(len, max_chunk); 11316 len -= mlen; 11317 11318 fip6h = (ip6_t *)(hdr_ptr + wroff); 11319 ASSERT(OK_32PTR(fip6h)); 11320 bcopy(ip6h, fip6h, unfragmentable_len); 11321 hdr_ptr[prev_nexthdr_offset] = IPPROTO_FRAGMENT; 11322 11323 fip6h->ip6_plen = htons((uint16_t)(mlen + 11324 unfragmentable_len - IPV6_HDR_LEN + sizeof (ip6_frag_t))); 11325 11326 fraghdr = (ip6_frag_t *)((unsigned char *)fip6h + 11327 unfragmentable_len); 11328 fraghdr->ip6f_nxt = nexthdr; 11329 fraghdr->ip6f_reserved = 0; 11330 fraghdr->ip6f_offlg = htons(offset) | 11331 ((len != 0) ? IP6F_MORE_FRAG : 0); 11332 fraghdr->ip6f_ident = ident; 11333 11334 /* 11335 * Record offset and size of header and data of the next packet 11336 * in the multidata message. 11337 */ 11338 PDESC_HDR_ADD(&pdi, hdr_ptr, wroff, 11339 unfragmentable_len + sizeof (ip6_frag_t), 0); 11340 PDESC_PLD_INIT(&pdi); 11341 i1 = MIN(mp->b_wptr - pld_ptr, mlen); 11342 ASSERT(i1 > 0); 11343 PDESC_PLD_SPAN_ADD(&pdi, pbuf_idx, pld_ptr, i1); 11344 if (i1 == mlen) { 11345 pld_ptr += mlen; 11346 } else { 11347 i1 = mlen - i1; 11348 mp = mp->b_cont; 11349 ASSERT(mp != NULL); 11350 ASSERT(MBLKL(mp) >= i1); 11351 /* 11352 * Attach the next payload message block to the 11353 * multidata message. 11354 */ 11355 if ((pbuf_idx = mmd_addpldbuf(mmd, mp)) < 0) 11356 goto pbuf_panic; 11357 PDESC_PLD_SPAN_ADD(&pdi, pbuf_idx, mp->b_rptr, i1); 11358 pld_ptr = mp->b_rptr + i1; 11359 } 11360 11361 if ((mmd_addpdesc(mmd, (pdescinfo_t *)&pdi, &error, 11362 KM_NOSLEEP)) == NULL) { 11363 /* 11364 * Any failure other than ENOMEM indicates that we 11365 * have passed in invalid pdesc info or parameters 11366 * to mmd_addpdesc, which must not happen. 11367 * 11368 * EINVAL is a result of failure on boundary checks 11369 * against the pdesc info contents. It should not 11370 * happen, and we panic because either there's 11371 * horrible heap corruption, and/or programming 11372 * mistake. 11373 */ 11374 if (error != ENOMEM) { 11375 cmn_err(CE_PANIC, "ip_wput_frag_mdt_v6: " 11376 "pdesc logic error detected for " 11377 "mmd %p pinfo %p (%d)\n", 11378 (void *)mmd, (void *)&pdi, error); 11379 /* NOTREACHED */ 11380 } 11381 IP6_STAT(ipst, ip6_frag_mdt_addpdescfail); 11382 /* Free unattached payload message blocks as well */ 11383 md_mp->b_cont = mp->b_cont; 11384 goto free_mmd; 11385 } 11386 11387 /* Advance fragment offset. */ 11388 offset += mlen; 11389 11390 /* Advance to location for next header in the buffer. */ 11391 hdr_ptr += hdr_chunk_len; 11392 11393 /* Did we reach the next payload message block? */ 11394 if (pld_ptr == mp->b_wptr && mp->b_cont != NULL) { 11395 mp = mp->b_cont; 11396 /* 11397 * Attach the next message block with payload 11398 * data to the multidata message. 11399 */ 11400 if ((pbuf_idx = mmd_addpldbuf(mmd, mp)) < 0) 11401 goto pbuf_panic; 11402 pld_ptr = mp->b_rptr; 11403 } 11404 } 11405 11406 ASSERT(hdr_mp->b_wptr == hdr_ptr); 11407 ASSERT(mp->b_wptr == pld_ptr); 11408 11409 /* Update IP statistics */ 11410 UPDATE_MIB(ill->ill_ip_mib, ipIfStatsOutFragCreates, pkts); 11411 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragOKs); 11412 UPDATE_MIB(ill->ill_ip_mib, ipIfStatsHCOutTransmits, pkts); 11413 /* 11414 * The ipv6 header len is accounted for in unfragmentable_len so 11415 * when calculating the fragmentation overhead just add the frag 11416 * header len. 11417 */ 11418 UPDATE_MIB(ill->ill_ip_mib, ipIfStatsHCOutOctets, 11419 (ntohs(ip6h->ip6_plen) - (unfragmentable_len - IPV6_HDR_LEN)) + 11420 pkts * (unfragmentable_len + sizeof (ip6_frag_t))); 11421 IP6_STAT_UPDATE(ipst, ip6_frag_mdt_pkt_out, pkts); 11422 11423 ire->ire_ob_pkt_count += pkts; 11424 if (ire->ire_ipif != NULL) 11425 atomic_add_32(&ire->ire_ipif->ipif_ob_pkt_count, pkts); 11426 11427 ire->ire_last_used_time = lbolt; 11428 /* Send it down */ 11429 putnext(stq, md_mp); 11430 return; 11431 11432 pbuf_panic: 11433 cmn_err(CE_PANIC, "ip_wput_frag_mdt_v6: payload buffer logic " 11434 "error for mmd %p pbuf %p (%d)", (void *)mmd, (void *)mp, 11435 pbuf_idx); 11436 /* NOTREACHED */ 11437 } 11438 11439 /* 11440 * IPv6 fragmentation. Essentially the same as IPv4 fragmentation. 11441 * We have not optimized this in terms of number of mblks 11442 * allocated. For instance, for each fragment sent we always allocate a 11443 * mblk to hold the IPv6 header and fragment header. 11444 * 11445 * Assumes that all the extension headers are contained in the first mblk. 11446 * 11447 * The fragment header is inserted after an hop-by-hop options header 11448 * and after [an optional destinations header followed by] a routing header. 11449 * 11450 * NOTE : This function does not ire_refrele the ire passed in as 11451 * the argument. 11452 */ 11453 void 11454 ip_wput_frag_v6(mblk_t *mp, ire_t *ire, uint_t reachable, conn_t *connp, 11455 int caller, int max_frag) 11456 { 11457 ip6_t *ip6h = (ip6_t *)mp->b_rptr; 11458 ip6_t *fip6h; 11459 mblk_t *hmp; 11460 mblk_t *hmp0; 11461 mblk_t *dmp; 11462 ip6_frag_t *fraghdr; 11463 size_t unfragmentable_len; 11464 size_t len; 11465 size_t mlen; 11466 size_t max_chunk; 11467 uint32_t ident; 11468 uint16_t off_flags; 11469 uint16_t offset = 0; 11470 ill_t *ill; 11471 uint8_t nexthdr; 11472 uint_t prev_nexthdr_offset; 11473 uint8_t *ptr; 11474 ip_stack_t *ipst = ire->ire_ipst; 11475 11476 ASSERT(ire->ire_type == IRE_CACHE); 11477 ill = (ill_t *)ire->ire_stq->q_ptr; 11478 11479 if (max_frag <= 0) { 11480 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails); 11481 freemsg(mp); 11482 return; 11483 } 11484 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragReqds); 11485 11486 /* 11487 * Determine the length of the unfragmentable portion of this 11488 * datagram. This consists of the IPv6 header, a potential 11489 * hop-by-hop options header, a potential pre-routing-header 11490 * destination options header, and a potential routing header. 11491 */ 11492 nexthdr = ip6h->ip6_nxt; 11493 prev_nexthdr_offset = (uint8_t *)&ip6h->ip6_nxt - (uint8_t *)ip6h; 11494 ptr = (uint8_t *)&ip6h[1]; 11495 11496 if (nexthdr == IPPROTO_HOPOPTS) { 11497 ip6_hbh_t *hbh_hdr; 11498 uint_t hdr_len; 11499 11500 hbh_hdr = (ip6_hbh_t *)ptr; 11501 hdr_len = 8 * (hbh_hdr->ip6h_len + 1); 11502 nexthdr = hbh_hdr->ip6h_nxt; 11503 prev_nexthdr_offset = (uint8_t *)&hbh_hdr->ip6h_nxt 11504 - (uint8_t *)ip6h; 11505 ptr += hdr_len; 11506 } 11507 if (nexthdr == IPPROTO_DSTOPTS) { 11508 ip6_dest_t *dest_hdr; 11509 uint_t hdr_len; 11510 11511 dest_hdr = (ip6_dest_t *)ptr; 11512 if (dest_hdr->ip6d_nxt == IPPROTO_ROUTING) { 11513 hdr_len = 8 * (dest_hdr->ip6d_len + 1); 11514 nexthdr = dest_hdr->ip6d_nxt; 11515 prev_nexthdr_offset = (uint8_t *)&dest_hdr->ip6d_nxt 11516 - (uint8_t *)ip6h; 11517 ptr += hdr_len; 11518 } 11519 } 11520 if (nexthdr == IPPROTO_ROUTING) { 11521 ip6_rthdr_t *rthdr; 11522 uint_t hdr_len; 11523 11524 rthdr = (ip6_rthdr_t *)ptr; 11525 nexthdr = rthdr->ip6r_nxt; 11526 prev_nexthdr_offset = (uint8_t *)&rthdr->ip6r_nxt 11527 - (uint8_t *)ip6h; 11528 hdr_len = 8 * (rthdr->ip6r_len + 1); 11529 ptr += hdr_len; 11530 } 11531 unfragmentable_len = (uint_t)(ptr - (uint8_t *)ip6h); 11532 11533 max_chunk = (min(max_frag, ire->ire_max_frag) - unfragmentable_len - 11534 sizeof (ip6_frag_t)) & ~7; 11535 11536 /* Check if we can use MDT to send out the frags. */ 11537 ASSERT(!IRE_IS_LOCAL(ire)); 11538 if (ipst->ips_ip_multidata_outbound && reachable == 0 && 11539 !(ire->ire_flags & RTF_MULTIRT) && ILL_MDT_CAPABLE(ill) && 11540 IP_CAN_FRAG_MDT(mp, unfragmentable_len, max_chunk)) { 11541 ip_wput_frag_mdt_v6(mp, ire, max_chunk, unfragmentable_len, 11542 nexthdr, prev_nexthdr_offset); 11543 return; 11544 } 11545 11546 /* 11547 * Allocate an mblk with enough room for the link-layer 11548 * header, the unfragmentable part of the datagram, and the 11549 * fragment header. This (or a copy) will be used as the 11550 * first mblk for each fragment we send. 11551 */ 11552 hmp = allocb_tmpl(unfragmentable_len + sizeof (ip6_frag_t) + 11553 ipst->ips_ip_wroff_extra, mp); 11554 if (hmp == NULL) { 11555 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails); 11556 freemsg(mp); 11557 return; 11558 } 11559 hmp->b_rptr += ipst->ips_ip_wroff_extra; 11560 hmp->b_wptr = hmp->b_rptr + unfragmentable_len + sizeof (ip6_frag_t); 11561 11562 fip6h = (ip6_t *)hmp->b_rptr; 11563 fraghdr = (ip6_frag_t *)(hmp->b_rptr + unfragmentable_len); 11564 11565 bcopy(ip6h, fip6h, unfragmentable_len); 11566 hmp->b_rptr[prev_nexthdr_offset] = IPPROTO_FRAGMENT; 11567 11568 ident = atomic_add_32_nv(&ire->ire_ident, 1); 11569 11570 fraghdr->ip6f_nxt = nexthdr; 11571 fraghdr->ip6f_reserved = 0; 11572 fraghdr->ip6f_offlg = 0; 11573 fraghdr->ip6f_ident = htonl(ident); 11574 11575 /* 11576 * len is the total length of the fragmentable data in this 11577 * datagram. For each fragment sent, we will decrement len 11578 * by the amount of fragmentable data sent in that fragment 11579 * until len reaches zero. 11580 */ 11581 len = ntohs(ip6h->ip6_plen) - (unfragmentable_len - IPV6_HDR_LEN); 11582 11583 /* 11584 * Move read ptr past unfragmentable portion, we don't want this part 11585 * of the data in our fragments. 11586 */ 11587 mp->b_rptr += unfragmentable_len; 11588 11589 while (len != 0) { 11590 mlen = MIN(len, max_chunk); 11591 len -= mlen; 11592 if (len != 0) { 11593 /* Not last */ 11594 hmp0 = copyb(hmp); 11595 if (hmp0 == NULL) { 11596 freeb(hmp); 11597 freemsg(mp); 11598 BUMP_MIB(ill->ill_ip_mib, 11599 ipIfStatsOutFragFails); 11600 ip1dbg(("ip_wput_frag_v6: copyb failed\n")); 11601 return; 11602 } 11603 off_flags = IP6F_MORE_FRAG; 11604 } else { 11605 /* Last fragment */ 11606 hmp0 = hmp; 11607 hmp = NULL; 11608 off_flags = 0; 11609 } 11610 fip6h = (ip6_t *)(hmp0->b_rptr); 11611 fraghdr = (ip6_frag_t *)(hmp0->b_rptr + unfragmentable_len); 11612 11613 fip6h->ip6_plen = htons((uint16_t)(mlen + 11614 unfragmentable_len - IPV6_HDR_LEN + sizeof (ip6_frag_t))); 11615 /* 11616 * Note: Optimization alert. 11617 * In IPv6 (and IPv4) protocol header, Fragment Offset 11618 * ("offset") is 13 bits wide and in 8-octet units. 11619 * In IPv6 protocol header (unlike IPv4) in a 16 bit field, 11620 * it occupies the most significant 13 bits. 11621 * (least significant 13 bits in IPv4). 11622 * We do not do any shifts here. Not shifting is same effect 11623 * as taking offset value in octet units, dividing by 8 and 11624 * then shifting 3 bits left to line it up in place in proper 11625 * place protocol header. 11626 */ 11627 fraghdr->ip6f_offlg = htons(offset) | off_flags; 11628 11629 if (!(dmp = ip_carve_mp(&mp, mlen))) { 11630 /* mp has already been freed by ip_carve_mp() */ 11631 if (hmp != NULL) 11632 freeb(hmp); 11633 freeb(hmp0); 11634 ip1dbg(("ip_carve_mp: failed\n")); 11635 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails); 11636 return; 11637 } 11638 hmp0->b_cont = dmp; 11639 /* Get the priority marking, if any */ 11640 hmp0->b_band = dmp->b_band; 11641 UPDATE_OB_PKT_COUNT(ire); 11642 ire->ire_last_used_time = lbolt; 11643 ip_xmit_v6(hmp0, ire, reachable | IP6_NO_IPPOLICY, connp, 11644 caller, NULL); 11645 reachable = 0; /* No need to redo state machine in loop */ 11646 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragCreates); 11647 offset += mlen; 11648 } 11649 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragOKs); 11650 } 11651 11652 /* 11653 * Determine if the ill and multicast aspects of that packets 11654 * "matches" the conn. 11655 */ 11656 boolean_t 11657 conn_wantpacket_v6(conn_t *connp, ill_t *ill, ip6_t *ip6h, int fanout_flags, 11658 zoneid_t zoneid) 11659 { 11660 ill_t *bound_ill; 11661 boolean_t wantpacket; 11662 in6_addr_t *v6dst_ptr = &ip6h->ip6_dst; 11663 in6_addr_t *v6src_ptr = &ip6h->ip6_src; 11664 11665 /* 11666 * conn_incoming_ill is set by IPV6_BOUND_IF which limits 11667 * unicast and multicast reception to conn_incoming_ill. 11668 * conn_wantpacket_v6 is called both for unicast and 11669 * multicast. 11670 */ 11671 bound_ill = connp->conn_incoming_ill; 11672 if (bound_ill != NULL) { 11673 if (IS_IPMP(bound_ill)) { 11674 if (bound_ill->ill_grp != ill->ill_grp) 11675 return (B_FALSE); 11676 } else { 11677 if (bound_ill != ill) 11678 return (B_FALSE); 11679 } 11680 } 11681 11682 if (connp->conn_multi_router) 11683 return (B_TRUE); 11684 11685 if (!IN6_IS_ADDR_MULTICAST(v6dst_ptr) && 11686 !IN6_IS_ADDR_V4MAPPED_CLASSD(v6dst_ptr)) { 11687 /* 11688 * Unicast case: we match the conn only if it's in the specified 11689 * zone. 11690 */ 11691 return (IPCL_ZONE_MATCH(connp, zoneid)); 11692 } 11693 11694 if ((fanout_flags & IP_FF_NO_MCAST_LOOP) && 11695 (connp->conn_zoneid == zoneid || zoneid == ALL_ZONES)) { 11696 /* 11697 * Loopback case: the sending endpoint has IP_MULTICAST_LOOP 11698 * disabled, therefore we don't dispatch the multicast packet to 11699 * the sending zone. 11700 */ 11701 return (B_FALSE); 11702 } 11703 11704 if (IS_LOOPBACK(ill) && connp->conn_zoneid != zoneid && 11705 zoneid != ALL_ZONES) { 11706 /* 11707 * Multicast packet on the loopback interface: we only match 11708 * conns who joined the group in the specified zone. 11709 */ 11710 return (B_FALSE); 11711 } 11712 11713 mutex_enter(&connp->conn_lock); 11714 wantpacket = 11715 ilg_lookup_ill_withsrc_v6(connp, v6dst_ptr, v6src_ptr, ill) != NULL; 11716 mutex_exit(&connp->conn_lock); 11717 11718 return (wantpacket); 11719 } 11720 11721 11722 /* 11723 * Transmit a packet and update any NUD state based on the flags 11724 * XXX need to "recover" any ip6i_t when doing putq! 11725 * 11726 * NOTE : This function does not ire_refrele the ire passed in as the 11727 * argument. 11728 */ 11729 void 11730 ip_xmit_v6(mblk_t *mp, ire_t *ire, uint_t flags, conn_t *connp, 11731 int caller, ipsec_out_t *io) 11732 { 11733 mblk_t *mp1; 11734 nce_t *nce = ire->ire_nce; 11735 ill_t *ill; 11736 ill_t *out_ill; 11737 uint64_t delta; 11738 ip6_t *ip6h; 11739 queue_t *stq = ire->ire_stq; 11740 ire_t *ire1 = NULL; 11741 ire_t *save_ire = ire; 11742 boolean_t multirt_send = B_FALSE; 11743 mblk_t *next_mp = NULL; 11744 ip_stack_t *ipst = ire->ire_ipst; 11745 boolean_t fp_prepend = B_FALSE; 11746 uint32_t hlen; 11747 11748 ip6h = (ip6_t *)mp->b_rptr; 11749 ASSERT(!IN6_IS_ADDR_V4MAPPED(&ire->ire_addr_v6)); 11750 ASSERT(ire->ire_ipversion == IPV6_VERSION); 11751 ASSERT(nce != NULL); 11752 ASSERT(mp->b_datap->db_type == M_DATA); 11753 ASSERT(stq != NULL); 11754 11755 ill = ire_to_ill(ire); 11756 if (!ill) { 11757 ip0dbg(("ip_xmit_v6: ire_to_ill failed\n")); 11758 freemsg(mp); 11759 return; 11760 } 11761 11762 /* 11763 * If a packet is to be sent out an interface that is a 6to4 11764 * tunnel, outgoing IPv6 packets, with a 6to4 addressed IPv6 11765 * destination, must be checked to have a 6to4 prefix 11766 * (2002:V4ADDR::/48) that is NOT equal to the 6to4 prefix of 11767 * address configured on the sending interface. Otherwise, 11768 * the packet was delivered to this interface in error and the 11769 * packet must be dropped. 11770 */ 11771 if ((ill->ill_is_6to4tun) && IN6_IS_ADDR_6TO4(&ip6h->ip6_dst)) { 11772 ipif_t *ipif = ill->ill_ipif; 11773 11774 if (IN6_ARE_6TO4_PREFIX_EQUAL(&ipif->ipif_v6lcl_addr, 11775 &ip6h->ip6_dst)) { 11776 if (ip_debug > 2) { 11777 /* ip1dbg */ 11778 pr_addr_dbg("ip_xmit_v6: attempting to " 11779 "send 6to4 addressed IPv6 " 11780 "destination (%s) out the wrong " 11781 "interface.\n", AF_INET6, 11782 &ip6h->ip6_dst); 11783 } 11784 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); 11785 freemsg(mp); 11786 return; 11787 } 11788 } 11789 11790 /* Flow-control check has been done in ip_wput_ire_v6 */ 11791 if (IP_FLOW_CONTROLLED_ULP(ip6h->ip6_nxt) || caller == IP_WPUT || 11792 caller == IP_WSRV || canput(stq->q_next)) { 11793 uint32_t ill_index; 11794 11795 /* 11796 * In most cases, the emission loop below is entered only 11797 * once. Only in the case where the ire holds the 11798 * RTF_MULTIRT flag, do we loop to process all RTF_MULTIRT 11799 * flagged ires in the bucket, and send the packet 11800 * through all crossed RTF_MULTIRT routes. 11801 */ 11802 if (ire->ire_flags & RTF_MULTIRT) { 11803 /* 11804 * Multirouting case. The bucket where ire is stored 11805 * probably holds other RTF_MULTIRT flagged ires 11806 * to the destination. In this call to ip_xmit_v6, 11807 * we attempt to send the packet through all 11808 * those ires. Thus, we first ensure that ire is the 11809 * first RTF_MULTIRT ire in the bucket, 11810 * before walking the ire list. 11811 */ 11812 ire_t *first_ire; 11813 irb_t *irb = ire->ire_bucket; 11814 ASSERT(irb != NULL); 11815 multirt_send = B_TRUE; 11816 11817 /* Make sure we do not omit any multiroute ire. */ 11818 IRB_REFHOLD(irb); 11819 for (first_ire = irb->irb_ire; 11820 first_ire != NULL; 11821 first_ire = first_ire->ire_next) { 11822 if ((first_ire->ire_flags & RTF_MULTIRT) && 11823 (IN6_ARE_ADDR_EQUAL(&first_ire->ire_addr_v6, 11824 &ire->ire_addr_v6)) && 11825 !(first_ire->ire_marks & 11826 (IRE_MARK_CONDEMNED | IRE_MARK_TESTHIDDEN))) 11827 break; 11828 } 11829 11830 if ((first_ire != NULL) && (first_ire != ire)) { 11831 IRE_REFHOLD(first_ire); 11832 /* ire will be released by the caller */ 11833 ire = first_ire; 11834 nce = ire->ire_nce; 11835 stq = ire->ire_stq; 11836 ill = ire_to_ill(ire); 11837 } 11838 IRB_REFRELE(irb); 11839 } else if (connp != NULL && IPCL_IS_TCP(connp) && 11840 connp->conn_mdt_ok && !connp->conn_tcp->tcp_mdt && 11841 ILL_MDT_USABLE(ill)) { 11842 /* 11843 * This tcp connection was marked as MDT-capable, but 11844 * it has been turned off due changes in the interface. 11845 * Now that the interface support is back, turn it on 11846 * by notifying tcp. We don't directly modify tcp_mdt, 11847 * since we leave all the details to the tcp code that 11848 * knows better. 11849 */ 11850 mblk_t *mdimp = ip_mdinfo_alloc(ill->ill_mdt_capab); 11851 11852 if (mdimp == NULL) { 11853 ip0dbg(("ip_xmit_v6: can't re-enable MDT for " 11854 "connp %p (ENOMEM)\n", (void *)connp)); 11855 } else { 11856 CONN_INC_REF(connp); 11857 SQUEUE_ENTER_ONE(connp->conn_sqp, mdimp, 11858 tcp_input, connp, SQ_FILL, 11859 SQTAG_TCP_INPUT_MCTL); 11860 } 11861 } 11862 11863 do { 11864 mblk_t *mp_ip6h; 11865 11866 if (multirt_send) { 11867 irb_t *irb; 11868 /* 11869 * We are in a multiple send case, need to get 11870 * the next ire and make a duplicate of the 11871 * packet. ire1 holds here the next ire to 11872 * process in the bucket. If multirouting is 11873 * expected, any non-RTF_MULTIRT ire that has 11874 * the right destination address is ignored. 11875 */ 11876 irb = ire->ire_bucket; 11877 ASSERT(irb != NULL); 11878 11879 IRB_REFHOLD(irb); 11880 for (ire1 = ire->ire_next; 11881 ire1 != NULL; 11882 ire1 = ire1->ire_next) { 11883 if (!(ire1->ire_flags & RTF_MULTIRT)) 11884 continue; 11885 if (!IN6_ARE_ADDR_EQUAL( 11886 &ire1->ire_addr_v6, 11887 &ire->ire_addr_v6)) 11888 continue; 11889 if (ire1->ire_marks & 11890 IRE_MARK_CONDEMNED) 11891 continue; 11892 11893 /* Got one */ 11894 if (ire1 != save_ire) { 11895 IRE_REFHOLD(ire1); 11896 } 11897 break; 11898 } 11899 IRB_REFRELE(irb); 11900 11901 if (ire1 != NULL) { 11902 next_mp = copyb(mp); 11903 if ((next_mp == NULL) || 11904 ((mp->b_cont != NULL) && 11905 ((next_mp->b_cont = 11906 dupmsg(mp->b_cont)) == NULL))) { 11907 freemsg(next_mp); 11908 next_mp = NULL; 11909 ire_refrele(ire1); 11910 ire1 = NULL; 11911 } 11912 } 11913 11914 /* Last multiroute ire; don't loop anymore. */ 11915 if (ire1 == NULL) { 11916 multirt_send = B_FALSE; 11917 } 11918 } 11919 11920 ill_index = 11921 ((ill_t *)stq->q_ptr)->ill_phyint->phyint_ifindex; 11922 11923 /* Initiate IPPF processing */ 11924 if (IP6_OUT_IPP(flags, ipst)) { 11925 ip_process(IPP_LOCAL_OUT, &mp, ill_index); 11926 if (mp == NULL) { 11927 BUMP_MIB(ill->ill_ip_mib, 11928 ipIfStatsOutDiscards); 11929 if (next_mp != NULL) 11930 freemsg(next_mp); 11931 if (ire != save_ire) { 11932 ire_refrele(ire); 11933 } 11934 return; 11935 } 11936 ip6h = (ip6_t *)mp->b_rptr; 11937 } 11938 mp_ip6h = mp; 11939 11940 /* 11941 * Check for fastpath, we need to hold nce_lock to 11942 * prevent fastpath update from chaining nce_fp_mp. 11943 */ 11944 11945 ASSERT(nce->nce_ipversion != IPV4_VERSION); 11946 mutex_enter(&nce->nce_lock); 11947 if ((mp1 = nce->nce_fp_mp) != NULL) { 11948 uchar_t *rptr; 11949 11950 hlen = MBLKL(mp1); 11951 rptr = mp->b_rptr - hlen; 11952 /* 11953 * make sure there is room for the fastpath 11954 * datalink header 11955 */ 11956 if (rptr < mp->b_datap->db_base) { 11957 mp1 = copyb(mp1); 11958 mutex_exit(&nce->nce_lock); 11959 if (mp1 == NULL) { 11960 BUMP_MIB(ill->ill_ip_mib, 11961 ipIfStatsOutDiscards); 11962 freemsg(mp); 11963 if (next_mp != NULL) 11964 freemsg(next_mp); 11965 if (ire != save_ire) { 11966 ire_refrele(ire); 11967 } 11968 return; 11969 } 11970 mp1->b_cont = mp; 11971 11972 /* Get the priority marking, if any */ 11973 mp1->b_band = mp->b_band; 11974 mp = mp1; 11975 } else { 11976 mp->b_rptr = rptr; 11977 /* 11978 * fastpath - pre-pend datalink 11979 * header 11980 */ 11981 bcopy(mp1->b_rptr, rptr, hlen); 11982 mutex_exit(&nce->nce_lock); 11983 fp_prepend = B_TRUE; 11984 } 11985 } else { 11986 /* 11987 * Get the DL_UNITDATA_REQ. 11988 */ 11989 mp1 = nce->nce_res_mp; 11990 if (mp1 == NULL) { 11991 mutex_exit(&nce->nce_lock); 11992 ip1dbg(("ip_xmit_v6: No resolution " 11993 "block ire = %p\n", (void *)ire)); 11994 freemsg(mp); 11995 if (next_mp != NULL) 11996 freemsg(next_mp); 11997 if (ire != save_ire) { 11998 ire_refrele(ire); 11999 } 12000 return; 12001 } 12002 /* 12003 * Prepend the DL_UNITDATA_REQ. 12004 */ 12005 mp1 = copyb(mp1); 12006 mutex_exit(&nce->nce_lock); 12007 if (mp1 == NULL) { 12008 BUMP_MIB(ill->ill_ip_mib, 12009 ipIfStatsOutDiscards); 12010 freemsg(mp); 12011 if (next_mp != NULL) 12012 freemsg(next_mp); 12013 if (ire != save_ire) { 12014 ire_refrele(ire); 12015 } 12016 return; 12017 } 12018 mp1->b_cont = mp; 12019 12020 /* Get the priority marking, if any */ 12021 mp1->b_band = mp->b_band; 12022 mp = mp1; 12023 } 12024 12025 out_ill = (ill_t *)stq->q_ptr; 12026 12027 DTRACE_PROBE4(ip6__physical__out__start, 12028 ill_t *, NULL, ill_t *, out_ill, 12029 ip6_t *, ip6h, mblk_t *, mp); 12030 12031 FW_HOOKS6(ipst->ips_ip6_physical_out_event, 12032 ipst->ips_ipv6firewall_physical_out, 12033 NULL, out_ill, ip6h, mp, mp_ip6h, 0, ipst); 12034 12035 DTRACE_PROBE1(ip6__physical__out__end, mblk_t *, mp); 12036 12037 if (mp == NULL) { 12038 if (multirt_send) { 12039 ASSERT(ire1 != NULL); 12040 if (ire != save_ire) { 12041 ire_refrele(ire); 12042 } 12043 /* 12044 * Proceed with the next RTF_MULTIRT 12045 * ire, also set up the send-to queue 12046 * accordingly. 12047 */ 12048 ire = ire1; 12049 ire1 = NULL; 12050 stq = ire->ire_stq; 12051 nce = ire->ire_nce; 12052 ill = ire_to_ill(ire); 12053 mp = next_mp; 12054 next_mp = NULL; 12055 continue; 12056 } else { 12057 ASSERT(next_mp == NULL); 12058 ASSERT(ire1 == NULL); 12059 break; 12060 } 12061 } 12062 12063 if (ipst->ips_ipobs_enabled) { 12064 zoneid_t szone; 12065 12066 szone = ip_get_zoneid_v6(&ip6h->ip6_src, 12067 mp_ip6h, out_ill, ipst, ALL_ZONES); 12068 ipobs_hook(mp_ip6h, IPOBS_HOOK_OUTBOUND, szone, 12069 ALL_ZONES, out_ill, IPV6_VERSION, 12070 fp_prepend ? hlen : 0, ipst); 12071 } 12072 12073 /* 12074 * Update ire and MIB counters; for save_ire, this has 12075 * been done by the caller. 12076 */ 12077 if (ire != save_ire) { 12078 UPDATE_OB_PKT_COUNT(ire); 12079 ire->ire_last_used_time = lbolt; 12080 12081 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 12082 BUMP_MIB(ill->ill_ip_mib, 12083 ipIfStatsHCOutMcastPkts); 12084 UPDATE_MIB(ill->ill_ip_mib, 12085 ipIfStatsHCOutMcastOctets, 12086 ntohs(ip6h->ip6_plen) + 12087 IPV6_HDR_LEN); 12088 } 12089 } 12090 12091 /* 12092 * Send it down. XXX Do we want to flow control AH/ESP 12093 * packets that carry TCP payloads? We don't flow 12094 * control TCP packets, but we should also not 12095 * flow-control TCP packets that have been protected. 12096 * We don't have an easy way to find out if an AH/ESP 12097 * packet was originally TCP or not currently. 12098 */ 12099 if (io == NULL) { 12100 BUMP_MIB(ill->ill_ip_mib, 12101 ipIfStatsHCOutTransmits); 12102 UPDATE_MIB(ill->ill_ip_mib, 12103 ipIfStatsHCOutOctets, 12104 ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN); 12105 DTRACE_IP7(send, mblk_t *, mp, conn_t *, NULL, 12106 void_ip_t *, ip6h, __dtrace_ipsr_ill_t *, 12107 out_ill, ipha_t *, NULL, ip6_t *, ip6h, 12108 int, 0); 12109 12110 putnext(stq, mp); 12111 } else { 12112 /* 12113 * Safety Pup says: make sure this is 12114 * going to the right interface! 12115 */ 12116 if (io->ipsec_out_capab_ill_index != 12117 ill_index) { 12118 /* IPsec kstats: bump lose counter */ 12119 freemsg(mp1); 12120 } else { 12121 BUMP_MIB(ill->ill_ip_mib, 12122 ipIfStatsHCOutTransmits); 12123 UPDATE_MIB(ill->ill_ip_mib, 12124 ipIfStatsHCOutOctets, 12125 ntohs(ip6h->ip6_plen) + 12126 IPV6_HDR_LEN); 12127 DTRACE_IP7(send, mblk_t *, mp, 12128 conn_t *, NULL, void_ip_t *, ip6h, 12129 __dtrace_ipsr_ill_t *, out_ill, 12130 ipha_t *, NULL, ip6_t *, ip6h, int, 12131 0); 12132 ipsec_hw_putnext(stq, mp); 12133 } 12134 } 12135 12136 if (nce->nce_flags & (NCE_F_NONUD|NCE_F_PERMANENT)) { 12137 if (ire != save_ire) { 12138 ire_refrele(ire); 12139 } 12140 if (multirt_send) { 12141 ASSERT(ire1 != NULL); 12142 /* 12143 * Proceed with the next RTF_MULTIRT 12144 * ire, also set up the send-to queue 12145 * accordingly. 12146 */ 12147 ire = ire1; 12148 ire1 = NULL; 12149 stq = ire->ire_stq; 12150 nce = ire->ire_nce; 12151 ill = ire_to_ill(ire); 12152 mp = next_mp; 12153 next_mp = NULL; 12154 continue; 12155 } 12156 ASSERT(next_mp == NULL); 12157 ASSERT(ire1 == NULL); 12158 return; 12159 } 12160 12161 ASSERT(nce->nce_state != ND_INCOMPLETE); 12162 12163 /* 12164 * Check for upper layer advice 12165 */ 12166 if (flags & IPV6_REACHABILITY_CONFIRMATION) { 12167 /* 12168 * It should be o.k. to check the state without 12169 * a lock here, at most we lose an advice. 12170 */ 12171 nce->nce_last = TICK_TO_MSEC(lbolt64); 12172 if (nce->nce_state != ND_REACHABLE) { 12173 12174 mutex_enter(&nce->nce_lock); 12175 nce->nce_state = ND_REACHABLE; 12176 nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT; 12177 mutex_exit(&nce->nce_lock); 12178 (void) untimeout(nce->nce_timeout_id); 12179 if (ip_debug > 2) { 12180 /* ip1dbg */ 12181 pr_addr_dbg("ip_xmit_v6: state" 12182 " for %s changed to" 12183 " REACHABLE\n", AF_INET6, 12184 &ire->ire_addr_v6); 12185 } 12186 } 12187 if (ire != save_ire) { 12188 ire_refrele(ire); 12189 } 12190 if (multirt_send) { 12191 ASSERT(ire1 != NULL); 12192 /* 12193 * Proceed with the next RTF_MULTIRT 12194 * ire, also set up the send-to queue 12195 * accordingly. 12196 */ 12197 ire = ire1; 12198 ire1 = NULL; 12199 stq = ire->ire_stq; 12200 nce = ire->ire_nce; 12201 ill = ire_to_ill(ire); 12202 mp = next_mp; 12203 next_mp = NULL; 12204 continue; 12205 } 12206 ASSERT(next_mp == NULL); 12207 ASSERT(ire1 == NULL); 12208 return; 12209 } 12210 12211 delta = TICK_TO_MSEC(lbolt64) - nce->nce_last; 12212 ip1dbg(("ip_xmit_v6: delta = %" PRId64 12213 " ill_reachable_time = %d \n", delta, 12214 ill->ill_reachable_time)); 12215 if (delta > (uint64_t)ill->ill_reachable_time) { 12216 nce = ire->ire_nce; 12217 mutex_enter(&nce->nce_lock); 12218 switch (nce->nce_state) { 12219 case ND_REACHABLE: 12220 case ND_STALE: 12221 /* 12222 * ND_REACHABLE is identical to 12223 * ND_STALE in this specific case. If 12224 * reachable time has expired for this 12225 * neighbor (delta is greater than 12226 * reachable time), conceptually, the 12227 * neighbor cache is no longer in 12228 * REACHABLE state, but already in 12229 * STALE state. So the correct 12230 * transition here is to ND_DELAY. 12231 */ 12232 nce->nce_state = ND_DELAY; 12233 mutex_exit(&nce->nce_lock); 12234 NDP_RESTART_TIMER(nce, 12235 ipst->ips_delay_first_probe_time); 12236 if (ip_debug > 3) { 12237 /* ip2dbg */ 12238 pr_addr_dbg("ip_xmit_v6: state" 12239 " for %s changed to" 12240 " DELAY\n", AF_INET6, 12241 &ire->ire_addr_v6); 12242 } 12243 break; 12244 case ND_DELAY: 12245 case ND_PROBE: 12246 mutex_exit(&nce->nce_lock); 12247 /* Timers have already started */ 12248 break; 12249 case ND_UNREACHABLE: 12250 /* 12251 * ndp timer has detected that this nce 12252 * is unreachable and initiated deleting 12253 * this nce and all its associated IREs. 12254 * This is a race where we found the 12255 * ire before it was deleted and have 12256 * just sent out a packet using this 12257 * unreachable nce. 12258 */ 12259 mutex_exit(&nce->nce_lock); 12260 break; 12261 default: 12262 ASSERT(0); 12263 } 12264 } 12265 12266 if (multirt_send) { 12267 ASSERT(ire1 != NULL); 12268 /* 12269 * Proceed with the next RTF_MULTIRT ire, 12270 * Also set up the send-to queue accordingly. 12271 */ 12272 if (ire != save_ire) { 12273 ire_refrele(ire); 12274 } 12275 ire = ire1; 12276 ire1 = NULL; 12277 stq = ire->ire_stq; 12278 nce = ire->ire_nce; 12279 ill = ire_to_ill(ire); 12280 mp = next_mp; 12281 next_mp = NULL; 12282 } 12283 } while (multirt_send); 12284 /* 12285 * In the multirouting case, release the last ire used for 12286 * emission. save_ire will be released by the caller. 12287 */ 12288 if (ire != save_ire) { 12289 ire_refrele(ire); 12290 } 12291 } else { 12292 /* 12293 * Can't apply backpressure, just discard the packet. 12294 */ 12295 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); 12296 freemsg(mp); 12297 return; 12298 } 12299 } 12300 12301 /* 12302 * pr_addr_dbg function provides the needed buffer space to call 12303 * inet_ntop() function's 3rd argument. This function should be 12304 * used by any kernel routine which wants to save INET6_ADDRSTRLEN 12305 * stack buffer space in it's own stack frame. This function uses 12306 * a buffer from it's own stack and prints the information. 12307 * Example: pr_addr_dbg("func: no route for %s\n ", AF_INET, addr) 12308 * 12309 * Note: This function can call inet_ntop() once. 12310 */ 12311 void 12312 pr_addr_dbg(char *fmt1, int af, const void *addr) 12313 { 12314 char buf[INET6_ADDRSTRLEN]; 12315 12316 if (fmt1 == NULL) { 12317 ip0dbg(("pr_addr_dbg: Wrong arguments\n")); 12318 return; 12319 } 12320 12321 /* 12322 * This does not compare debug level and just prints 12323 * out. Thus it is the responsibility of the caller 12324 * to check the appropriate debug-level before calling 12325 * this function. 12326 */ 12327 if (ip_debug > 0) { 12328 printf(fmt1, inet_ntop(af, addr, buf, sizeof (buf))); 12329 } 12330 12331 12332 } 12333 12334 12335 /* 12336 * Return the length in bytes of the IPv6 headers (base header, ip6i_t 12337 * if needed and extension headers) that will be needed based on the 12338 * ip6_pkt_t structure passed by the caller. 12339 * 12340 * The returned length does not include the length of the upper level 12341 * protocol (ULP) header. 12342 */ 12343 int 12344 ip_total_hdrs_len_v6(ip6_pkt_t *ipp) 12345 { 12346 int len; 12347 12348 len = IPV6_HDR_LEN; 12349 if (ipp->ipp_fields & IPPF_HAS_IP6I) 12350 len += sizeof (ip6i_t); 12351 if (ipp->ipp_fields & IPPF_HOPOPTS) { 12352 ASSERT(ipp->ipp_hopoptslen != 0); 12353 len += ipp->ipp_hopoptslen; 12354 } 12355 if (ipp->ipp_fields & IPPF_RTHDR) { 12356 ASSERT(ipp->ipp_rthdrlen != 0); 12357 len += ipp->ipp_rthdrlen; 12358 } 12359 /* 12360 * En-route destination options 12361 * Only do them if there's a routing header as well 12362 */ 12363 if ((ipp->ipp_fields & (IPPF_RTDSTOPTS|IPPF_RTHDR)) == 12364 (IPPF_RTDSTOPTS|IPPF_RTHDR)) { 12365 ASSERT(ipp->ipp_rtdstoptslen != 0); 12366 len += ipp->ipp_rtdstoptslen; 12367 } 12368 if (ipp->ipp_fields & IPPF_DSTOPTS) { 12369 ASSERT(ipp->ipp_dstoptslen != 0); 12370 len += ipp->ipp_dstoptslen; 12371 } 12372 return (len); 12373 } 12374 12375 /* 12376 * All-purpose routine to build a header chain of an IPv6 header 12377 * followed by any required extension headers and a proto header, 12378 * preceeded (where necessary) by an ip6i_t private header. 12379 * 12380 * The fields of the IPv6 header that are derived from the ip6_pkt_t 12381 * will be filled in appropriately. 12382 * Thus the caller must fill in the rest of the IPv6 header, such as 12383 * traffic class/flowid, source address (if not set here), hoplimit (if not 12384 * set here) and destination address. 12385 * 12386 * The extension headers and ip6i_t header will all be fully filled in. 12387 */ 12388 void 12389 ip_build_hdrs_v6(uchar_t *ext_hdrs, uint_t ext_hdrs_len, 12390 ip6_pkt_t *ipp, uint8_t protocol) 12391 { 12392 uint8_t *nxthdr_ptr; 12393 uint8_t *cp; 12394 ip6i_t *ip6i; 12395 ip6_t *ip6h = (ip6_t *)ext_hdrs; 12396 12397 /* 12398 * If sending private ip6i_t header down (checksum info, nexthop, 12399 * or ifindex), adjust ip header pointer and set ip6i_t header pointer, 12400 * then fill it in. (The checksum info will be filled in by icmp). 12401 */ 12402 if (ipp->ipp_fields & IPPF_HAS_IP6I) { 12403 ip6i = (ip6i_t *)ip6h; 12404 ip6h = (ip6_t *)&ip6i[1]; 12405 12406 ip6i->ip6i_flags = 0; 12407 ip6i->ip6i_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 12408 if (ipp->ipp_fields & IPPF_IFINDEX || 12409 ipp->ipp_fields & IPPF_SCOPE_ID) { 12410 ASSERT(ipp->ipp_ifindex != 0); 12411 ip6i->ip6i_flags |= IP6I_IFINDEX; 12412 ip6i->ip6i_ifindex = ipp->ipp_ifindex; 12413 } 12414 if (ipp->ipp_fields & IPPF_ADDR) { 12415 /* 12416 * Enable per-packet source address verification if 12417 * IPV6_PKTINFO specified the source address. 12418 * ip6_src is set in the transport's _wput function. 12419 */ 12420 ASSERT(!IN6_IS_ADDR_UNSPECIFIED( 12421 &ipp->ipp_addr)); 12422 ip6i->ip6i_flags |= IP6I_VERIFY_SRC; 12423 } 12424 if (ipp->ipp_fields & IPPF_UNICAST_HOPS) { 12425 ip6h->ip6_hops = ipp->ipp_unicast_hops; 12426 /* 12427 * We need to set this flag so that IP doesn't 12428 * rewrite the IPv6 header's hoplimit with the 12429 * current default value. 12430 */ 12431 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 12432 } 12433 if (ipp->ipp_fields & IPPF_NEXTHOP) { 12434 ASSERT(!IN6_IS_ADDR_UNSPECIFIED( 12435 &ipp->ipp_nexthop)); 12436 ip6i->ip6i_flags |= IP6I_NEXTHOP; 12437 ip6i->ip6i_nexthop = ipp->ipp_nexthop; 12438 } 12439 /* 12440 * tell IP this is an ip6i_t private header 12441 */ 12442 ip6i->ip6i_nxt = IPPROTO_RAW; 12443 } 12444 /* Initialize IPv6 header */ 12445 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 12446 if (ipp->ipp_fields & IPPF_TCLASS) { 12447 ip6h->ip6_vcf = (ip6h->ip6_vcf & ~IPV6_FLOWINFO_TCLASS) | 12448 (ipp->ipp_tclass << 20); 12449 } 12450 if (ipp->ipp_fields & IPPF_ADDR) 12451 ip6h->ip6_src = ipp->ipp_addr; 12452 12453 nxthdr_ptr = (uint8_t *)&ip6h->ip6_nxt; 12454 cp = (uint8_t *)&ip6h[1]; 12455 /* 12456 * Here's where we have to start stringing together 12457 * any extension headers in the right order: 12458 * Hop-by-hop, destination, routing, and final destination opts. 12459 */ 12460 if (ipp->ipp_fields & IPPF_HOPOPTS) { 12461 /* Hop-by-hop options */ 12462 ip6_hbh_t *hbh = (ip6_hbh_t *)cp; 12463 12464 *nxthdr_ptr = IPPROTO_HOPOPTS; 12465 nxthdr_ptr = &hbh->ip6h_nxt; 12466 12467 bcopy(ipp->ipp_hopopts, cp, ipp->ipp_hopoptslen); 12468 cp += ipp->ipp_hopoptslen; 12469 } 12470 /* 12471 * En-route destination options 12472 * Only do them if there's a routing header as well 12473 */ 12474 if ((ipp->ipp_fields & (IPPF_RTDSTOPTS|IPPF_RTHDR)) == 12475 (IPPF_RTDSTOPTS|IPPF_RTHDR)) { 12476 ip6_dest_t *dst = (ip6_dest_t *)cp; 12477 12478 *nxthdr_ptr = IPPROTO_DSTOPTS; 12479 nxthdr_ptr = &dst->ip6d_nxt; 12480 12481 bcopy(ipp->ipp_rtdstopts, cp, ipp->ipp_rtdstoptslen); 12482 cp += ipp->ipp_rtdstoptslen; 12483 } 12484 /* 12485 * Routing header next 12486 */ 12487 if (ipp->ipp_fields & IPPF_RTHDR) { 12488 ip6_rthdr_t *rt = (ip6_rthdr_t *)cp; 12489 12490 *nxthdr_ptr = IPPROTO_ROUTING; 12491 nxthdr_ptr = &rt->ip6r_nxt; 12492 12493 bcopy(ipp->ipp_rthdr, cp, ipp->ipp_rthdrlen); 12494 cp += ipp->ipp_rthdrlen; 12495 } 12496 /* 12497 * Do ultimate destination options 12498 */ 12499 if (ipp->ipp_fields & IPPF_DSTOPTS) { 12500 ip6_dest_t *dest = (ip6_dest_t *)cp; 12501 12502 *nxthdr_ptr = IPPROTO_DSTOPTS; 12503 nxthdr_ptr = &dest->ip6d_nxt; 12504 12505 bcopy(ipp->ipp_dstopts, cp, ipp->ipp_dstoptslen); 12506 cp += ipp->ipp_dstoptslen; 12507 } 12508 /* 12509 * Now set the last header pointer to the proto passed in 12510 */ 12511 *nxthdr_ptr = protocol; 12512 ASSERT((int)(cp - ext_hdrs) == ext_hdrs_len); 12513 } 12514 12515 /* 12516 * Return a pointer to the routing header extension header 12517 * in the IPv6 header(s) chain passed in. 12518 * If none found, return NULL 12519 * Assumes that all extension headers are in same mblk as the v6 header 12520 */ 12521 ip6_rthdr_t * 12522 ip_find_rthdr_v6(ip6_t *ip6h, uint8_t *endptr) 12523 { 12524 ip6_dest_t *desthdr; 12525 ip6_frag_t *fraghdr; 12526 uint_t hdrlen; 12527 uint8_t nexthdr; 12528 uint8_t *ptr = (uint8_t *)&ip6h[1]; 12529 12530 if (ip6h->ip6_nxt == IPPROTO_ROUTING) 12531 return ((ip6_rthdr_t *)ptr); 12532 12533 /* 12534 * The routing header will precede all extension headers 12535 * other than the hop-by-hop and destination options 12536 * extension headers, so if we see anything other than those, 12537 * we're done and didn't find it. 12538 * We could see a destination options header alone but no 12539 * routing header, in which case we'll return NULL as soon as 12540 * we see anything after that. 12541 * Hop-by-hop and destination option headers are identical, 12542 * so we can use either one we want as a template. 12543 */ 12544 nexthdr = ip6h->ip6_nxt; 12545 while (ptr < endptr) { 12546 /* Is there enough left for len + nexthdr? */ 12547 if (ptr + MIN_EHDR_LEN > endptr) 12548 return (NULL); 12549 12550 switch (nexthdr) { 12551 case IPPROTO_HOPOPTS: 12552 case IPPROTO_DSTOPTS: 12553 /* Assumes the headers are identical for hbh and dst */ 12554 desthdr = (ip6_dest_t *)ptr; 12555 hdrlen = 8 * (desthdr->ip6d_len + 1); 12556 nexthdr = desthdr->ip6d_nxt; 12557 break; 12558 12559 case IPPROTO_ROUTING: 12560 return ((ip6_rthdr_t *)ptr); 12561 12562 case IPPROTO_FRAGMENT: 12563 fraghdr = (ip6_frag_t *)ptr; 12564 hdrlen = sizeof (ip6_frag_t); 12565 nexthdr = fraghdr->ip6f_nxt; 12566 break; 12567 12568 default: 12569 return (NULL); 12570 } 12571 ptr += hdrlen; 12572 } 12573 return (NULL); 12574 } 12575 12576 /* 12577 * Called for source-routed packets originating on this node. 12578 * Manipulates the original routing header by moving every entry up 12579 * one slot, placing the first entry in the v6 header's v6_dst field, 12580 * and placing the ultimate destination in the routing header's last 12581 * slot. 12582 * 12583 * Returns the checksum diference between the ultimate destination 12584 * (last hop in the routing header when the packet is sent) and 12585 * the first hop (ip6_dst when the packet is sent) 12586 */ 12587 /* ARGSUSED2 */ 12588 uint32_t 12589 ip_massage_options_v6(ip6_t *ip6h, ip6_rthdr_t *rth, netstack_t *ns) 12590 { 12591 uint_t numaddr; 12592 uint_t i; 12593 in6_addr_t *addrptr; 12594 in6_addr_t tmp; 12595 ip6_rthdr0_t *rthdr = (ip6_rthdr0_t *)rth; 12596 uint32_t cksm; 12597 uint32_t addrsum = 0; 12598 uint16_t *ptr; 12599 12600 /* 12601 * Perform any processing needed for source routing. 12602 * We know that all extension headers will be in the same mblk 12603 * as the IPv6 header. 12604 */ 12605 12606 /* 12607 * If no segments left in header, or the header length field is zero, 12608 * don't move hop addresses around; 12609 * Checksum difference is zero. 12610 */ 12611 if ((rthdr->ip6r0_segleft == 0) || (rthdr->ip6r0_len == 0)) 12612 return (0); 12613 12614 ptr = (uint16_t *)&ip6h->ip6_dst; 12615 cksm = 0; 12616 for (i = 0; i < (sizeof (in6_addr_t) / sizeof (uint16_t)); i++) { 12617 cksm += ptr[i]; 12618 } 12619 cksm = (cksm & 0xFFFF) + (cksm >> 16); 12620 12621 /* 12622 * Here's where the fun begins - we have to 12623 * move all addresses up one spot, take the 12624 * first hop and make it our first ip6_dst, 12625 * and place the ultimate destination in the 12626 * newly-opened last slot. 12627 */ 12628 addrptr = (in6_addr_t *)((char *)rthdr + sizeof (*rthdr)); 12629 numaddr = rthdr->ip6r0_len / 2; 12630 tmp = *addrptr; 12631 for (i = 0; i < (numaddr - 1); addrptr++, i++) { 12632 *addrptr = addrptr[1]; 12633 } 12634 *addrptr = ip6h->ip6_dst; 12635 ip6h->ip6_dst = tmp; 12636 12637 /* 12638 * From the checksummed ultimate destination subtract the checksummed 12639 * current ip6_dst (the first hop address). Return that number. 12640 * (In the v4 case, the second part of this is done in each routine 12641 * that calls ip_massage_options(). We do it all in this one place 12642 * for v6). 12643 */ 12644 ptr = (uint16_t *)&ip6h->ip6_dst; 12645 for (i = 0; i < (sizeof (in6_addr_t) / sizeof (uint16_t)); i++) { 12646 addrsum += ptr[i]; 12647 } 12648 cksm -= ((addrsum >> 16) + (addrsum & 0xFFFF)); 12649 if ((int)cksm < 0) 12650 cksm--; 12651 cksm = (cksm & 0xFFFF) + (cksm >> 16); 12652 12653 return (cksm); 12654 } 12655 12656 /* 12657 * Propagate a multicast group membership operation (join/leave) (*fn) on 12658 * all interfaces crossed by the related multirt routes. 12659 * The call is considered successful if the operation succeeds 12660 * on at least one interface. 12661 * The function is called if the destination address in the packet to send 12662 * is multirouted. 12663 */ 12664 int 12665 ip_multirt_apply_membership_v6(int (*fn)(conn_t *, boolean_t, 12666 const in6_addr_t *, int, mcast_record_t, const in6_addr_t *, mblk_t *), 12667 ire_t *ire, conn_t *connp, boolean_t checkonly, const in6_addr_t *v6grp, 12668 mcast_record_t fmode, const in6_addr_t *v6src, mblk_t *first_mp) 12669 { 12670 ire_t *ire_gw; 12671 irb_t *irb; 12672 int index, error = 0; 12673 opt_restart_t *or; 12674 ip_stack_t *ipst = ire->ire_ipst; 12675 12676 irb = ire->ire_bucket; 12677 ASSERT(irb != NULL); 12678 12679 ASSERT(DB_TYPE(first_mp) == M_CTL); 12680 or = (opt_restart_t *)first_mp->b_rptr; 12681 12682 IRB_REFHOLD(irb); 12683 for (; ire != NULL; ire = ire->ire_next) { 12684 if ((ire->ire_flags & RTF_MULTIRT) == 0) 12685 continue; 12686 if (!IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, v6grp)) 12687 continue; 12688 12689 ire_gw = ire_ftable_lookup_v6(&ire->ire_gateway_addr_v6, 0, 0, 12690 IRE_INTERFACE, NULL, NULL, ALL_ZONES, 0, NULL, 12691 MATCH_IRE_RECURSIVE | MATCH_IRE_TYPE, ipst); 12692 /* No resolver exists for the gateway; skip this ire. */ 12693 if (ire_gw == NULL) 12694 continue; 12695 index = ire_gw->ire_ipif->ipif_ill->ill_phyint->phyint_ifindex; 12696 /* 12697 * A resolver exists: we can get the interface on which we have 12698 * to apply the operation. 12699 */ 12700 error = fn(connp, checkonly, v6grp, index, fmode, v6src, 12701 first_mp); 12702 if (error == 0) 12703 or->or_private = CGTP_MCAST_SUCCESS; 12704 12705 if (ip_debug > 0) { 12706 ulong_t off; 12707 char *ksym; 12708 12709 ksym = kobj_getsymname((uintptr_t)fn, &off); 12710 ip2dbg(("ip_multirt_apply_membership_v6: " 12711 "called %s, multirt group 0x%08x via itf 0x%08x, " 12712 "error %d [success %u]\n", 12713 ksym ? ksym : "?", 12714 ntohl(V4_PART_OF_V6((*v6grp))), 12715 ntohl(V4_PART_OF_V6(ire_gw->ire_src_addr_v6)), 12716 error, or->or_private)); 12717 } 12718 12719 ire_refrele(ire_gw); 12720 if (error == EINPROGRESS) { 12721 IRB_REFRELE(irb); 12722 return (error); 12723 } 12724 } 12725 IRB_REFRELE(irb); 12726 /* 12727 * Consider the call as successful if we succeeded on at least 12728 * one interface. Otherwise, return the last encountered error. 12729 */ 12730 return (or->or_private == CGTP_MCAST_SUCCESS ? 0 : error); 12731 } 12732 12733 void 12734 *ip6_kstat_init(netstackid_t stackid, ip6_stat_t *ip6_statisticsp) 12735 { 12736 kstat_t *ksp; 12737 12738 ip6_stat_t template = { 12739 { "ip6_udp_fast_path", KSTAT_DATA_UINT64 }, 12740 { "ip6_udp_slow_path", KSTAT_DATA_UINT64 }, 12741 { "ip6_udp_fannorm", KSTAT_DATA_UINT64 }, 12742 { "ip6_udp_fanmb", KSTAT_DATA_UINT64 }, 12743 { "ip6_out_sw_cksum", KSTAT_DATA_UINT64 }, 12744 { "ip6_in_sw_cksum", KSTAT_DATA_UINT64 }, 12745 { "ip6_tcp_in_full_hw_cksum_err", KSTAT_DATA_UINT64 }, 12746 { "ip6_tcp_in_part_hw_cksum_err", KSTAT_DATA_UINT64 }, 12747 { "ip6_tcp_in_sw_cksum_err", KSTAT_DATA_UINT64 }, 12748 { "ip6_tcp_out_sw_cksum_bytes", KSTAT_DATA_UINT64 }, 12749 { "ip6_udp_in_full_hw_cksum_err", KSTAT_DATA_UINT64 }, 12750 { "ip6_udp_in_part_hw_cksum_err", KSTAT_DATA_UINT64 }, 12751 { "ip6_udp_in_sw_cksum_err", KSTAT_DATA_UINT64 }, 12752 { "ip6_udp_out_sw_cksum_bytes", KSTAT_DATA_UINT64 }, 12753 { "ip6_frag_mdt_pkt_out", KSTAT_DATA_UINT64 }, 12754 { "ip6_frag_mdt_discarded", KSTAT_DATA_UINT64 }, 12755 { "ip6_frag_mdt_allocfail", KSTAT_DATA_UINT64 }, 12756 { "ip6_frag_mdt_addpdescfail", KSTAT_DATA_UINT64 }, 12757 { "ip6_frag_mdt_allocd", KSTAT_DATA_UINT64 }, 12758 }; 12759 ksp = kstat_create_netstack("ip", 0, "ip6stat", "net", 12760 KSTAT_TYPE_NAMED, sizeof (template) / sizeof (kstat_named_t), 12761 KSTAT_FLAG_VIRTUAL, stackid); 12762 12763 if (ksp == NULL) 12764 return (NULL); 12765 12766 bcopy(&template, ip6_statisticsp, sizeof (template)); 12767 ksp->ks_data = (void *)ip6_statisticsp; 12768 ksp->ks_private = (void *)(uintptr_t)stackid; 12769 12770 kstat_install(ksp); 12771 return (ksp); 12772 } 12773 12774 void 12775 ip6_kstat_fini(netstackid_t stackid, kstat_t *ksp) 12776 { 12777 if (ksp != NULL) { 12778 ASSERT(stackid == (netstackid_t)(uintptr_t)ksp->ks_private); 12779 kstat_delete_netstack(ksp, stackid); 12780 } 12781 } 12782 12783 /* 12784 * The following two functions set and get the value for the 12785 * IPV6_SRC_PREFERENCES socket option. 12786 */ 12787 int 12788 ip6_set_src_preferences(conn_t *connp, uint32_t prefs) 12789 { 12790 /* 12791 * We only support preferences that are covered by 12792 * IPV6_PREFER_SRC_MASK. 12793 */ 12794 if (prefs & ~IPV6_PREFER_SRC_MASK) 12795 return (EINVAL); 12796 12797 /* 12798 * Look for conflicting preferences or default preferences. If 12799 * both bits of a related pair are clear, the application wants the 12800 * system's default value for that pair. Both bits in a pair can't 12801 * be set. 12802 */ 12803 if ((prefs & IPV6_PREFER_SRC_MIPMASK) == 0) { 12804 prefs |= IPV6_PREFER_SRC_MIPDEFAULT; 12805 } else if ((prefs & IPV6_PREFER_SRC_MIPMASK) == 12806 IPV6_PREFER_SRC_MIPMASK) { 12807 return (EINVAL); 12808 } 12809 if ((prefs & IPV6_PREFER_SRC_TMPMASK) == 0) { 12810 prefs |= IPV6_PREFER_SRC_TMPDEFAULT; 12811 } else if ((prefs & IPV6_PREFER_SRC_TMPMASK) == 12812 IPV6_PREFER_SRC_TMPMASK) { 12813 return (EINVAL); 12814 } 12815 if ((prefs & IPV6_PREFER_SRC_CGAMASK) == 0) { 12816 prefs |= IPV6_PREFER_SRC_CGADEFAULT; 12817 } else if ((prefs & IPV6_PREFER_SRC_CGAMASK) == 12818 IPV6_PREFER_SRC_CGAMASK) { 12819 return (EINVAL); 12820 } 12821 12822 connp->conn_src_preferences = prefs; 12823 return (0); 12824 } 12825 12826 size_t 12827 ip6_get_src_preferences(conn_t *connp, uint32_t *val) 12828 { 12829 *val = connp->conn_src_preferences; 12830 return (sizeof (connp->conn_src_preferences)); 12831 } 12832 12833 int 12834 ip6_set_pktinfo(cred_t *cr, conn_t *connp, struct in6_pktinfo *pkti) 12835 { 12836 ire_t *ire; 12837 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 12838 12839 /* 12840 * Verify the source address and ifindex. Privileged users can use 12841 * any source address. For ancillary data the source address is 12842 * checked in ip_wput_v6. 12843 */ 12844 if (pkti->ipi6_ifindex != 0) { 12845 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 12846 if (!phyint_exists(pkti->ipi6_ifindex, ipst)) { 12847 rw_exit(&ipst->ips_ill_g_lock); 12848 return (ENXIO); 12849 } 12850 rw_exit(&ipst->ips_ill_g_lock); 12851 } 12852 if (!IN6_IS_ADDR_UNSPECIFIED(&pkti->ipi6_addr) && 12853 secpolicy_net_rawaccess(cr) != 0) { 12854 ire = ire_route_lookup_v6(&pkti->ipi6_addr, 0, 0, 12855 (IRE_LOCAL|IRE_LOOPBACK), NULL, NULL, 12856 connp->conn_zoneid, NULL, MATCH_IRE_TYPE, ipst); 12857 if (ire != NULL) 12858 ire_refrele(ire); 12859 else 12860 return (ENXIO); 12861 } 12862 return (0); 12863 } 12864 12865 /* 12866 * Get the size of the IP options (including the IP headers size) 12867 * without including the AH header's size. If till_ah is B_FALSE, 12868 * and if AH header is present, dest options beyond AH header will 12869 * also be included in the returned size. 12870 */ 12871 int 12872 ipsec_ah_get_hdr_size_v6(mblk_t *mp, boolean_t till_ah) 12873 { 12874 ip6_t *ip6h; 12875 uint8_t nexthdr; 12876 uint8_t *whereptr; 12877 ip6_hbh_t *hbhhdr; 12878 ip6_dest_t *dsthdr; 12879 ip6_rthdr_t *rthdr; 12880 int ehdrlen; 12881 int size; 12882 ah_t *ah; 12883 12884 ip6h = (ip6_t *)mp->b_rptr; 12885 size = IPV6_HDR_LEN; 12886 nexthdr = ip6h->ip6_nxt; 12887 whereptr = (uint8_t *)&ip6h[1]; 12888 for (;;) { 12889 /* Assume IP has already stripped it */ 12890 ASSERT(nexthdr != IPPROTO_FRAGMENT && nexthdr != IPPROTO_RAW); 12891 switch (nexthdr) { 12892 case IPPROTO_HOPOPTS: 12893 hbhhdr = (ip6_hbh_t *)whereptr; 12894 nexthdr = hbhhdr->ip6h_nxt; 12895 ehdrlen = 8 * (hbhhdr->ip6h_len + 1); 12896 break; 12897 case IPPROTO_DSTOPTS: 12898 dsthdr = (ip6_dest_t *)whereptr; 12899 nexthdr = dsthdr->ip6d_nxt; 12900 ehdrlen = 8 * (dsthdr->ip6d_len + 1); 12901 break; 12902 case IPPROTO_ROUTING: 12903 rthdr = (ip6_rthdr_t *)whereptr; 12904 nexthdr = rthdr->ip6r_nxt; 12905 ehdrlen = 8 * (rthdr->ip6r_len + 1); 12906 break; 12907 default : 12908 if (till_ah) { 12909 ASSERT(nexthdr == IPPROTO_AH); 12910 return (size); 12911 } 12912 /* 12913 * If we don't have a AH header to traverse, 12914 * return now. This happens normally for 12915 * outbound datagrams where we have not inserted 12916 * the AH header. 12917 */ 12918 if (nexthdr != IPPROTO_AH) { 12919 return (size); 12920 } 12921 12922 /* 12923 * We don't include the AH header's size 12924 * to be symmetrical with other cases where 12925 * we either don't have a AH header (outbound) 12926 * or peek into the AH header yet (inbound and 12927 * not pulled up yet). 12928 */ 12929 ah = (ah_t *)whereptr; 12930 nexthdr = ah->ah_nexthdr; 12931 ehdrlen = (ah->ah_length << 2) + 8; 12932 12933 if (nexthdr == IPPROTO_DSTOPTS) { 12934 if (whereptr + ehdrlen >= mp->b_wptr) { 12935 /* 12936 * The destination options header 12937 * is not part of the first mblk. 12938 */ 12939 whereptr = mp->b_cont->b_rptr; 12940 } else { 12941 whereptr += ehdrlen; 12942 } 12943 12944 dsthdr = (ip6_dest_t *)whereptr; 12945 ehdrlen = 8 * (dsthdr->ip6d_len + 1); 12946 size += ehdrlen; 12947 } 12948 return (size); 12949 } 12950 whereptr += ehdrlen; 12951 size += ehdrlen; 12952 } 12953 } 12954 12955 /* 12956 * Utility routine that checks if `v6srcp' is a valid address on underlying 12957 * interface `ill'. If `ipifp' is non-NULL, it's set to a held ipif 12958 * associated with `v6srcp' on success. NOTE: if this is not called from 12959 * inside the IPSQ (ill_g_lock is not held), `ill' may be removed from the 12960 * group during or after this lookup. 12961 */ 12962 static boolean_t 12963 ipif_lookup_testaddr_v6(ill_t *ill, const in6_addr_t *v6srcp, ipif_t **ipifp) 12964 { 12965 ipif_t *ipif; 12966 12967 ipif = ipif_lookup_addr_exact_v6(v6srcp, ill, ill->ill_ipst); 12968 if (ipif != NULL) { 12969 if (ipifp != NULL) 12970 *ipifp = ipif; 12971 else 12972 ipif_refrele(ipif); 12973 return (B_TRUE); 12974 } 12975 12976 if (ip_debug > 2) { 12977 pr_addr_dbg("ipif_lookup_testaddr_v6: cannot find ipif for " 12978 "src %s\n", AF_INET6, v6srcp); 12979 } 12980 return (B_FALSE); 12981 } 12982