1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* 26 * Copyright (c) 1990 Mentat Inc. 27 */ 28 29 #include <sys/types.h> 30 #include <sys/stream.h> 31 #include <sys/dlpi.h> 32 #include <sys/stropts.h> 33 #include <sys/sysmacros.h> 34 #include <sys/strsun.h> 35 #include <sys/strlog.h> 36 #include <sys/strsubr.h> 37 #define _SUN_TPI_VERSION 2 38 #include <sys/tihdr.h> 39 #include <sys/ddi.h> 40 #include <sys/sunddi.h> 41 #include <sys/cmn_err.h> 42 #include <sys/debug.h> 43 #include <sys/sdt.h> 44 #include <sys/kobj.h> 45 #include <sys/zone.h> 46 #include <sys/neti.h> 47 #include <sys/hook.h> 48 49 #include <sys/kmem.h> 50 #include <sys/systm.h> 51 #include <sys/param.h> 52 #include <sys/socket.h> 53 #include <sys/vtrace.h> 54 #include <sys/isa_defs.h> 55 #include <sys/atomic.h> 56 #include <sys/iphada.h> 57 #include <sys/policy.h> 58 #include <net/if.h> 59 #include <net/if_types.h> 60 #include <net/route.h> 61 #include <net/if_dl.h> 62 #include <sys/sockio.h> 63 #include <netinet/in.h> 64 #include <netinet/ip6.h> 65 #include <netinet/icmp6.h> 66 #include <netinet/sctp.h> 67 68 #include <inet/common.h> 69 #include <inet/mi.h> 70 #include <inet/optcom.h> 71 #include <inet/mib2.h> 72 #include <inet/nd.h> 73 #include <inet/arp.h> 74 75 #include <inet/ip.h> 76 #include <inet/ip_impl.h> 77 #include <inet/ip6.h> 78 #include <inet/ip6_asp.h> 79 #include <inet/tcp.h> 80 #include <inet/tcp_impl.h> 81 #include <inet/udp_impl.h> 82 #include <inet/ipp_common.h> 83 84 #include <inet/ip_multi.h> 85 #include <inet/ip_if.h> 86 #include <inet/ip_ire.h> 87 #include <inet/ip_rts.h> 88 #include <inet/ip_ndp.h> 89 #include <net/pfkeyv2.h> 90 #include <inet/ipsec_info.h> 91 #include <inet/sadb.h> 92 #include <inet/ipsec_impl.h> 93 #include <inet/tun.h> 94 #include <inet/sctp_ip.h> 95 #include <sys/pattr.h> 96 #include <inet/ipclassifier.h> 97 #include <inet/ipsecah.h> 98 #include <inet/rawip_impl.h> 99 #include <inet/rts_impl.h> 100 #include <sys/squeue_impl.h> 101 #include <sys/squeue.h> 102 103 #include <sys/tsol/label.h> 104 #include <sys/tsol/tnet.h> 105 106 #include <rpc/pmap_prot.h> 107 108 /* Temporary; for CR 6451644 work-around */ 109 #include <sys/ethernet.h> 110 111 extern int ip_squeue_flag; 112 113 /* 114 * Naming conventions: 115 * These rules should be judiciously applied 116 * if there is a need to identify something as IPv6 versus IPv4 117 * IPv6 funcions will end with _v6 in the ip module. 118 * IPv6 funcions will end with _ipv6 in the transport modules. 119 * IPv6 macros: 120 * Some macros end with _V6; e.g. ILL_FRAG_HASH_V6 121 * Some macros start with V6_; e.g. V6_OR_V4_INADDR_ANY 122 * And then there are ..V4_PART_OF_V6. 123 * The intent is that macros in the ip module end with _V6. 124 * IPv6 global variables will start with ipv6_ 125 * IPv6 structures will start with ipv6 126 * IPv6 defined constants should start with IPV6_ 127 * (but then there are NDP_DEFAULT_VERS_PRI_AND_FLOW, etc) 128 */ 129 130 /* 131 * ip6opt_ls is used to enable IPv6 (via /etc/system on TX systems). 132 * We need to do this because we didn't obtain the IP6OPT_LS (0x0a) 133 * from IANA. This mechanism will remain in effect until an official 134 * number is obtained. 135 */ 136 uchar_t ip6opt_ls; 137 138 const in6_addr_t ipv6_all_ones = 139 { 0xffffffffU, 0xffffffffU, 0xffffffffU, 0xffffffffU }; 140 const in6_addr_t ipv6_all_zeros = { 0, 0, 0, 0 }; 141 142 #ifdef _BIG_ENDIAN 143 const in6_addr_t ipv6_unspecified_group = { 0xff000000U, 0, 0, 0 }; 144 #else /* _BIG_ENDIAN */ 145 const in6_addr_t ipv6_unspecified_group = { 0x000000ffU, 0, 0, 0 }; 146 #endif /* _BIG_ENDIAN */ 147 148 #ifdef _BIG_ENDIAN 149 const in6_addr_t ipv6_loopback = { 0, 0, 0, 0x00000001U }; 150 #else /* _BIG_ENDIAN */ 151 const in6_addr_t ipv6_loopback = { 0, 0, 0, 0x01000000U }; 152 #endif /* _BIG_ENDIAN */ 153 154 #ifdef _BIG_ENDIAN 155 const in6_addr_t ipv6_all_hosts_mcast = { 0xff020000U, 0, 0, 0x00000001U }; 156 #else /* _BIG_ENDIAN */ 157 const in6_addr_t ipv6_all_hosts_mcast = { 0x000002ffU, 0, 0, 0x01000000U }; 158 #endif /* _BIG_ENDIAN */ 159 160 #ifdef _BIG_ENDIAN 161 const in6_addr_t ipv6_all_rtrs_mcast = { 0xff020000U, 0, 0, 0x00000002U }; 162 #else /* _BIG_ENDIAN */ 163 const in6_addr_t ipv6_all_rtrs_mcast = { 0x000002ffU, 0, 0, 0x02000000U }; 164 #endif /* _BIG_ENDIAN */ 165 166 #ifdef _BIG_ENDIAN 167 const in6_addr_t ipv6_all_v2rtrs_mcast = { 0xff020000U, 0, 0, 0x00000016U }; 168 #else /* _BIG_ENDIAN */ 169 const in6_addr_t ipv6_all_v2rtrs_mcast = { 0x000002ffU, 0, 0, 0x16000000U }; 170 #endif /* _BIG_ENDIAN */ 171 172 #ifdef _BIG_ENDIAN 173 const in6_addr_t ipv6_solicited_node_mcast = 174 { 0xff020000U, 0, 0x00000001U, 0xff000000U }; 175 #else /* _BIG_ENDIAN */ 176 const in6_addr_t ipv6_solicited_node_mcast = 177 { 0x000002ffU, 0, 0x01000000U, 0x000000ffU }; 178 #endif /* _BIG_ENDIAN */ 179 180 /* Leave room for ip_newroute to tack on the src and target addresses */ 181 #define OK_RESOLVER_MP_V6(mp) \ 182 ((mp) && ((mp)->b_wptr - (mp)->b_rptr) >= (2 * IPV6_ADDR_LEN)) 183 184 #define IP6_MBLK_OK 0 185 #define IP6_MBLK_HDR_ERR 1 186 #define IP6_MBLK_LEN_ERR 2 187 188 static void icmp_inbound_too_big_v6(queue_t *, mblk_t *, ill_t *, ill_t *, 189 boolean_t, zoneid_t); 190 static void icmp_pkt_v6(queue_t *, mblk_t *, void *, size_t, 191 const in6_addr_t *, boolean_t, zoneid_t, ip_stack_t *); 192 static void icmp_redirect_v6(queue_t *, mblk_t *, ill_t *ill); 193 static int ip_bind_connected_v6(conn_t *, mblk_t **, uint8_t, in6_addr_t *, 194 uint16_t, const in6_addr_t *, ip6_pkt_t *, uint16_t, 195 boolean_t, boolean_t, cred_t *); 196 static boolean_t ip_bind_get_ire_v6(mblk_t **, ire_t *, const in6_addr_t *, 197 iulp_t *, ip_stack_t *); 198 static void ip_bind_post_handling_v6(conn_t *, mblk_t *, boolean_t, 199 boolean_t, ip_stack_t *); 200 static int ip_bind_laddr_v6(conn_t *, mblk_t **, uint8_t, 201 const in6_addr_t *, uint16_t, boolean_t); 202 static void ip_fanout_proto_v6(queue_t *, mblk_t *, ip6_t *, ill_t *, 203 ill_t *, uint8_t, uint_t, uint_t, boolean_t, zoneid_t); 204 static void ip_fanout_tcp_v6(queue_t *, mblk_t *, ip6_t *, ill_t *, 205 ill_t *, uint_t, uint_t, boolean_t, zoneid_t); 206 static void ip_fanout_udp_v6(queue_t *, mblk_t *, ip6_t *, uint32_t, 207 ill_t *, ill_t *, uint_t, boolean_t, zoneid_t); 208 static int ip_process_options_v6(queue_t *, mblk_t *, ip6_t *, 209 uint8_t *, uint_t, uint8_t, ip_stack_t *); 210 static mblk_t *ip_rput_frag_v6(ill_t *, ill_t *, mblk_t *, ip6_t *, 211 ip6_frag_t *, uint_t, uint_t *, uint32_t *, uint16_t *); 212 static boolean_t ip_source_routed_v6(ip6_t *, mblk_t *, ip_stack_t *); 213 static void ip_wput_ire_v6(queue_t *, mblk_t *, ire_t *, int, int, 214 conn_t *, int, int, zoneid_t); 215 static boolean_t ipif_lookup_testaddr_v6(ill_t *, const in6_addr_t *, 216 ipif_t **); 217 218 /* 219 * A template for an IPv6 AR_ENTRY_QUERY 220 */ 221 static areq_t ipv6_areq_template = { 222 AR_ENTRY_QUERY, /* cmd */ 223 sizeof (areq_t)+(2*IPV6_ADDR_LEN), /* name offset */ 224 sizeof (areq_t), /* name len (filled by ill_arp_alloc) */ 225 IP6_DL_SAP, /* protocol, from arps perspective */ 226 sizeof (areq_t), /* target addr offset */ 227 IPV6_ADDR_LEN, /* target addr_length */ 228 0, /* flags */ 229 sizeof (areq_t) + IPV6_ADDR_LEN, /* sender addr offset */ 230 IPV6_ADDR_LEN, /* sender addr length */ 231 6, /* xmit_count */ 232 1000, /* (re)xmit_interval in milliseconds */ 233 4 /* max # of requests to buffer */ 234 /* anything else filled in by the code */ 235 }; 236 237 /* 238 * Handle IPv6 ICMP packets sent to us. Consume the mblk passed in. 239 * The message has already been checksummed and if needed, 240 * a copy has been made to be sent any interested ICMP client (conn) 241 * Note that this is different than icmp_inbound() which does the fanout 242 * to conn's as well as local processing of the ICMP packets. 243 * 244 * All error messages are passed to the matching transport stream. 245 * 246 * Zones notes: 247 * The packet is only processed in the context of the specified zone: typically 248 * only this zone will reply to an echo request. This means that the caller must 249 * call icmp_inbound_v6() for each relevant zone. 250 */ 251 static void 252 icmp_inbound_v6(queue_t *q, mblk_t *mp, ill_t *ill, ill_t *inill, 253 uint_t hdr_length, boolean_t mctl_present, uint_t flags, zoneid_t zoneid, 254 mblk_t *dl_mp) 255 { 256 icmp6_t *icmp6; 257 ip6_t *ip6h; 258 boolean_t interested; 259 in6_addr_t origsrc; 260 mblk_t *first_mp; 261 ipsec_in_t *ii; 262 ip_stack_t *ipst = ill->ill_ipst; 263 264 ASSERT(ill != NULL); 265 first_mp = mp; 266 if (mctl_present) { 267 mp = first_mp->b_cont; 268 ASSERT(mp != NULL); 269 270 ii = (ipsec_in_t *)first_mp->b_rptr; 271 ASSERT(ii->ipsec_in_type == IPSEC_IN); 272 } 273 274 ip6h = (ip6_t *)mp->b_rptr; 275 276 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInMsgs); 277 278 if ((mp->b_wptr - mp->b_rptr) < (hdr_length + ICMP6_MINLEN)) { 279 if (!pullupmsg(mp, hdr_length + ICMP6_MINLEN)) { 280 ip1dbg(("icmp_inbound_v6: pullupmsg failed\n")); 281 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors); 282 freemsg(first_mp); 283 return; 284 } 285 ip6h = (ip6_t *)mp->b_rptr; 286 } 287 if (ipst->ips_icmp_accept_clear_messages == 0) { 288 first_mp = ipsec_check_global_policy(first_mp, NULL, 289 NULL, ip6h, mctl_present, ipst->ips_netstack); 290 if (first_mp == NULL) 291 return; 292 } 293 294 /* 295 * On a labeled system, we have to check whether the zone itself is 296 * permitted to receive raw traffic. 297 */ 298 if (is_system_labeled()) { 299 if (zoneid == ALL_ZONES) 300 zoneid = tsol_packet_to_zoneid(mp); 301 if (!tsol_can_accept_raw(mp, B_FALSE)) { 302 ip1dbg(("icmp_inbound_v6: zone %d can't receive raw", 303 zoneid)); 304 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors); 305 freemsg(first_mp); 306 return; 307 } 308 } 309 310 icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]); 311 ip2dbg(("icmp_inbound_v6: type %d code %d\n", icmp6->icmp6_type, 312 icmp6->icmp6_code)); 313 interested = !(icmp6->icmp6_type & ICMP6_INFOMSG_MASK); 314 315 /* Initiate IPPF processing here */ 316 if (IP6_IN_IPP(flags, ipst)) { 317 318 /* 319 * If the ifindex changes due to SIOCSLIFINDEX 320 * packet may return to IP on the wrong ill. 321 */ 322 ip_process(IPP_LOCAL_IN, &mp, ill->ill_phyint->phyint_ifindex); 323 if (mp == NULL) { 324 if (mctl_present) { 325 freeb(first_mp); 326 } 327 return; 328 } 329 } 330 331 switch (icmp6->icmp6_type) { 332 case ICMP6_DST_UNREACH: 333 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInDestUnreachs); 334 if (icmp6->icmp6_code == ICMP6_DST_UNREACH_ADMIN) 335 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInAdminProhibs); 336 break; 337 338 case ICMP6_TIME_EXCEEDED: 339 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInTimeExcds); 340 break; 341 342 case ICMP6_PARAM_PROB: 343 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInParmProblems); 344 break; 345 346 case ICMP6_PACKET_TOO_BIG: 347 icmp_inbound_too_big_v6(q, first_mp, ill, inill, mctl_present, 348 zoneid); 349 return; 350 case ICMP6_ECHO_REQUEST: 351 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInEchos); 352 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst) && 353 !ipst->ips_ipv6_resp_echo_mcast) 354 break; 355 356 /* 357 * We must have exclusive use of the mblk to convert it to 358 * a response. 359 * If not, we copy it. 360 */ 361 if (mp->b_datap->db_ref > 1) { 362 mblk_t *mp1; 363 364 mp1 = copymsg(mp); 365 freemsg(mp); 366 if (mp1 == NULL) { 367 BUMP_MIB(ill->ill_icmp6_mib, 368 ipv6IfIcmpInErrors); 369 if (mctl_present) 370 freeb(first_mp); 371 return; 372 } 373 mp = mp1; 374 ip6h = (ip6_t *)mp->b_rptr; 375 icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]); 376 if (mctl_present) 377 first_mp->b_cont = mp; 378 else 379 first_mp = mp; 380 } 381 382 /* 383 * Turn the echo into an echo reply. 384 * Remove any extension headers (do not reverse a source route) 385 * and clear the flow id (keep traffic class for now). 386 */ 387 if (hdr_length != IPV6_HDR_LEN) { 388 int i; 389 390 for (i = 0; i < IPV6_HDR_LEN; i++) 391 mp->b_rptr[hdr_length - i - 1] = 392 mp->b_rptr[IPV6_HDR_LEN - i - 1]; 393 mp->b_rptr += (hdr_length - IPV6_HDR_LEN); 394 ip6h = (ip6_t *)mp->b_rptr; 395 ip6h->ip6_nxt = IPPROTO_ICMPV6; 396 hdr_length = IPV6_HDR_LEN; 397 } 398 ip6h->ip6_vcf &= ~IPV6_FLOWINFO_FLOWLABEL; 399 icmp6->icmp6_type = ICMP6_ECHO_REPLY; 400 401 ip6h->ip6_plen = 402 htons((uint16_t)(msgdsize(mp) - IPV6_HDR_LEN)); 403 origsrc = ip6h->ip6_src; 404 /* 405 * Reverse the source and destination addresses. 406 * If the return address is a multicast, zero out the source 407 * (ip_wput_v6 will set an address). 408 */ 409 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 410 ip6h->ip6_src = ipv6_all_zeros; 411 ip6h->ip6_dst = origsrc; 412 } else { 413 ip6h->ip6_src = ip6h->ip6_dst; 414 ip6h->ip6_dst = origsrc; 415 } 416 417 /* set the hop limit */ 418 ip6h->ip6_hops = ipst->ips_ipv6_def_hops; 419 420 /* 421 * Prepare for checksum by putting icmp length in the icmp 422 * checksum field. The checksum is calculated in ip_wput_v6. 423 */ 424 icmp6->icmp6_cksum = ip6h->ip6_plen; 425 426 if (!mctl_present) { 427 /* 428 * This packet should go out the same way as it 429 * came in i.e in clear. To make sure that global 430 * policy will not be applied to this in ip_wput, 431 * we attach a IPSEC_IN mp and clear ipsec_in_secure. 432 */ 433 ASSERT(first_mp == mp); 434 first_mp = ipsec_in_alloc(B_FALSE, ipst->ips_netstack); 435 if (first_mp == NULL) { 436 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 437 freemsg(mp); 438 return; 439 } 440 ii = (ipsec_in_t *)first_mp->b_rptr; 441 442 /* This is not a secure packet */ 443 ii->ipsec_in_secure = B_FALSE; 444 first_mp->b_cont = mp; 445 } 446 ii->ipsec_in_zoneid = zoneid; 447 ASSERT(zoneid != ALL_ZONES); 448 if (!ipsec_in_to_out(first_mp, NULL, ip6h)) { 449 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 450 return; 451 } 452 put(WR(q), first_mp); 453 return; 454 455 case ICMP6_ECHO_REPLY: 456 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInEchoReplies); 457 break; 458 459 case ND_ROUTER_SOLICIT: 460 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInRouterSolicits); 461 break; 462 463 case ND_ROUTER_ADVERT: 464 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInRouterAdvertisements); 465 break; 466 467 case ND_NEIGHBOR_SOLICIT: 468 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInNeighborSolicits); 469 if (mctl_present) 470 freeb(first_mp); 471 /* XXX may wish to pass first_mp up to ndp_input someday. */ 472 ndp_input(inill, mp, dl_mp); 473 return; 474 475 case ND_NEIGHBOR_ADVERT: 476 BUMP_MIB(ill->ill_icmp6_mib, 477 ipv6IfIcmpInNeighborAdvertisements); 478 if (mctl_present) 479 freeb(first_mp); 480 /* XXX may wish to pass first_mp up to ndp_input someday. */ 481 ndp_input(inill, mp, dl_mp); 482 return; 483 484 case ND_REDIRECT: { 485 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInRedirects); 486 487 if (ipst->ips_ipv6_ignore_redirect) 488 break; 489 490 /* 491 * As there is no upper client to deliver, we don't 492 * need the first_mp any more. 493 */ 494 if (mctl_present) 495 freeb(first_mp); 496 if (!pullupmsg(mp, -1)) { 497 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects); 498 break; 499 } 500 icmp_redirect_v6(q, mp, ill); 501 return; 502 } 503 504 /* 505 * The next three icmp messages will be handled by MLD. 506 * Pass all valid MLD packets up to any process(es) 507 * listening on a raw ICMP socket. MLD messages are 508 * freed by mld_input function. 509 */ 510 case MLD_LISTENER_QUERY: 511 case MLD_LISTENER_REPORT: 512 case MLD_LISTENER_REDUCTION: 513 if (mctl_present) 514 freeb(first_mp); 515 mld_input(q, mp, ill); 516 return; 517 default: 518 break; 519 } 520 if (interested) { 521 icmp_inbound_error_fanout_v6(q, first_mp, ip6h, icmp6, ill, 522 inill, mctl_present, zoneid); 523 } else { 524 freemsg(first_mp); 525 } 526 } 527 528 /* 529 * Process received IPv6 ICMP Packet too big. 530 * After updating any IRE it does the fanout to any matching transport streams. 531 * Assumes the IPv6 plus ICMPv6 headers have been pulled up but nothing else. 532 */ 533 /* ARGSUSED */ 534 static void 535 icmp_inbound_too_big_v6(queue_t *q, mblk_t *mp, ill_t *ill, ill_t *inill, 536 boolean_t mctl_present, zoneid_t zoneid) 537 { 538 ip6_t *ip6h; 539 ip6_t *inner_ip6h; 540 icmp6_t *icmp6; 541 uint16_t hdr_length; 542 uint32_t mtu; 543 ire_t *ire, *first_ire; 544 mblk_t *first_mp; 545 ip_stack_t *ipst = ill->ill_ipst; 546 547 first_mp = mp; 548 if (mctl_present) 549 mp = first_mp->b_cont; 550 /* 551 * We must have exclusive use of the mblk to update the MTU 552 * in the packet. 553 * If not, we copy it. 554 * 555 * If there's an M_CTL present, we know that allocated first_mp 556 * earlier in this function, so we know first_mp has refcnt of one. 557 */ 558 ASSERT(!mctl_present || first_mp->b_datap->db_ref == 1); 559 if (mp->b_datap->db_ref > 1) { 560 mblk_t *mp1; 561 562 mp1 = copymsg(mp); 563 freemsg(mp); 564 if (mp1 == NULL) { 565 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 566 if (mctl_present) 567 freeb(first_mp); 568 return; 569 } 570 mp = mp1; 571 if (mctl_present) 572 first_mp->b_cont = mp; 573 else 574 first_mp = mp; 575 } 576 ip6h = (ip6_t *)mp->b_rptr; 577 if (ip6h->ip6_nxt != IPPROTO_ICMPV6) 578 hdr_length = ip_hdr_length_v6(mp, ip6h); 579 else 580 hdr_length = IPV6_HDR_LEN; 581 582 icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]); 583 ASSERT((size_t)(mp->b_wptr - mp->b_rptr) >= hdr_length + ICMP6_MINLEN); 584 inner_ip6h = (ip6_t *)&icmp6[1]; /* Packet in error */ 585 if ((uchar_t *)&inner_ip6h[1] > mp->b_wptr) { 586 if (!pullupmsg(mp, (uchar_t *)&inner_ip6h[1] - mp->b_rptr)) { 587 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 588 freemsg(first_mp); 589 return; 590 } 591 ip6h = (ip6_t *)mp->b_rptr; 592 icmp6 = (icmp6_t *)&mp->b_rptr[hdr_length]; 593 inner_ip6h = (ip6_t *)&icmp6[1]; 594 } 595 596 /* 597 * For link local destinations matching simply on IRE type is not 598 * sufficient. Same link local addresses for different ILL's is 599 * possible. 600 */ 601 if (IN6_IS_ADDR_LINKLOCAL(&inner_ip6h->ip6_dst)) { 602 first_ire = ire_ctable_lookup_v6(&inner_ip6h->ip6_dst, NULL, 603 IRE_CACHE, ill->ill_ipif, ALL_ZONES, NULL, 604 MATCH_IRE_TYPE | MATCH_IRE_ILL, ipst); 605 606 if (first_ire == NULL) { 607 if (ip_debug > 2) { 608 /* ip1dbg */ 609 pr_addr_dbg("icmp_inbound_too_big_v6:" 610 "no ire for dst %s\n", AF_INET6, 611 &inner_ip6h->ip6_dst); 612 } 613 freemsg(first_mp); 614 return; 615 } 616 617 mtu = ntohl(icmp6->icmp6_mtu); 618 rw_enter(&first_ire->ire_bucket->irb_lock, RW_READER); 619 for (ire = first_ire; ire != NULL && 620 IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, &inner_ip6h->ip6_dst); 621 ire = ire->ire_next) { 622 mutex_enter(&ire->ire_lock); 623 if (mtu < IPV6_MIN_MTU) { 624 ip1dbg(("Received mtu less than IPv6 " 625 "min mtu %d: %d\n", IPV6_MIN_MTU, mtu)); 626 mtu = IPV6_MIN_MTU; 627 /* 628 * If an mtu less than IPv6 min mtu is received, 629 * we must include a fragment header in 630 * subsequent packets. 631 */ 632 ire->ire_frag_flag |= IPH_FRAG_HDR; 633 } 634 ip1dbg(("Received mtu from router: %d\n", mtu)); 635 ire->ire_max_frag = MIN(ire->ire_max_frag, mtu); 636 if (ire->ire_max_frag == mtu) { 637 /* Decreased it */ 638 ire->ire_marks |= IRE_MARK_PMTU; 639 } 640 /* Record the new max frag size for the ULP. */ 641 if (ire->ire_frag_flag & IPH_FRAG_HDR) { 642 /* 643 * If we need a fragment header in every packet 644 * (above case or multirouting), make sure the 645 * ULP takes it into account when computing the 646 * payload size. 647 */ 648 icmp6->icmp6_mtu = htonl(ire->ire_max_frag - 649 sizeof (ip6_frag_t)); 650 } else { 651 icmp6->icmp6_mtu = htonl(ire->ire_max_frag); 652 } 653 mutex_exit(&ire->ire_lock); 654 } 655 rw_exit(&first_ire->ire_bucket->irb_lock); 656 ire_refrele(first_ire); 657 } else { 658 irb_t *irb = NULL; 659 /* 660 * for non-link local destinations we match only on the IRE type 661 */ 662 ire = ire_ctable_lookup_v6(&inner_ip6h->ip6_dst, NULL, 663 IRE_CACHE, ill->ill_ipif, ALL_ZONES, NULL, MATCH_IRE_TYPE, 664 ipst); 665 if (ire == NULL) { 666 if (ip_debug > 2) { 667 /* ip1dbg */ 668 pr_addr_dbg("icmp_inbound_too_big_v6:" 669 "no ire for dst %s\n", 670 AF_INET6, &inner_ip6h->ip6_dst); 671 } 672 freemsg(first_mp); 673 return; 674 } 675 irb = ire->ire_bucket; 676 ire_refrele(ire); 677 rw_enter(&irb->irb_lock, RW_READER); 678 for (ire = irb->irb_ire; ire != NULL; ire = ire->ire_next) { 679 if (IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, 680 &inner_ip6h->ip6_dst)) { 681 mtu = ntohl(icmp6->icmp6_mtu); 682 mutex_enter(&ire->ire_lock); 683 if (mtu < IPV6_MIN_MTU) { 684 ip1dbg(("Received mtu less than IPv6" 685 "min mtu %d: %d\n", 686 IPV6_MIN_MTU, mtu)); 687 mtu = IPV6_MIN_MTU; 688 /* 689 * If an mtu less than IPv6 min mtu is 690 * received, we must include a fragment 691 * header in subsequent packets. 692 */ 693 ire->ire_frag_flag |= IPH_FRAG_HDR; 694 } 695 696 ip1dbg(("Received mtu from router: %d\n", mtu)); 697 ire->ire_max_frag = MIN(ire->ire_max_frag, mtu); 698 if (ire->ire_max_frag == mtu) { 699 /* Decreased it */ 700 ire->ire_marks |= IRE_MARK_PMTU; 701 } 702 /* Record the new max frag size for the ULP. */ 703 if (ire->ire_frag_flag & IPH_FRAG_HDR) { 704 /* 705 * If we need a fragment header in 706 * every packet (above case or 707 * multirouting), make sure the ULP 708 * takes it into account when computing 709 * the payload size. 710 */ 711 icmp6->icmp6_mtu = 712 htonl(ire->ire_max_frag - 713 sizeof (ip6_frag_t)); 714 } else { 715 icmp6->icmp6_mtu = 716 htonl(ire->ire_max_frag); 717 } 718 mutex_exit(&ire->ire_lock); 719 } 720 } 721 rw_exit(&irb->irb_lock); 722 } 723 icmp_inbound_error_fanout_v6(q, first_mp, ip6h, icmp6, ill, inill, 724 mctl_present, zoneid); 725 } 726 727 /* 728 * Fanout received ICMPv6 error packets to the transports. 729 * Assumes the IPv6 plus ICMPv6 headers have been pulled up but nothing else. 730 */ 731 void 732 icmp_inbound_error_fanout_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, 733 icmp6_t *icmp6, ill_t *ill, ill_t *inill, boolean_t mctl_present, 734 zoneid_t zoneid) 735 { 736 uint16_t *up; /* Pointer to ports in ULP header */ 737 uint32_t ports; /* reversed ports for fanout */ 738 ip6_t rip6h; /* With reversed addresses */ 739 uint16_t hdr_length; 740 uint8_t *nexthdrp; 741 uint8_t nexthdr; 742 mblk_t *first_mp; 743 ipsec_in_t *ii; 744 tcpha_t *tcpha; 745 conn_t *connp; 746 ip_stack_t *ipst = ill->ill_ipst; 747 748 first_mp = mp; 749 if (mctl_present) { 750 mp = first_mp->b_cont; 751 ASSERT(mp != NULL); 752 753 ii = (ipsec_in_t *)first_mp->b_rptr; 754 ASSERT(ii->ipsec_in_type == IPSEC_IN); 755 } else { 756 ii = NULL; 757 } 758 759 hdr_length = (uint16_t)((uchar_t *)icmp6 - (uchar_t *)ip6h); 760 ASSERT((size_t)(mp->b_wptr - (uchar_t *)icmp6) >= ICMP6_MINLEN); 761 762 /* 763 * Need to pullup everything in order to use 764 * ip_hdr_length_nexthdr_v6() 765 */ 766 if (mp->b_cont != NULL) { 767 if (!pullupmsg(mp, -1)) { 768 ip1dbg(("icmp_inbound_error_fanout_v6: " 769 "pullupmsg failed\n")); 770 goto drop_pkt; 771 } 772 ip6h = (ip6_t *)mp->b_rptr; 773 icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]); 774 } 775 776 ip6h = (ip6_t *)&icmp6[1]; /* Packet in error */ 777 if ((uchar_t *)&ip6h[1] > mp->b_wptr) 778 goto drop_pkt; 779 780 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_length, &nexthdrp)) 781 goto drop_pkt; 782 nexthdr = *nexthdrp; 783 784 /* Set message type, must be done after pullups */ 785 mp->b_datap->db_type = M_CTL; 786 787 /* Try to pass the ICMP message to clients who need it */ 788 switch (nexthdr) { 789 case IPPROTO_UDP: { 790 /* 791 * Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of 792 * UDP header to get the port information. 793 */ 794 if ((uchar_t *)ip6h + hdr_length + ICMP_MIN_TP_HDR_LEN > 795 mp->b_wptr) { 796 break; 797 } 798 /* 799 * Attempt to find a client stream based on port. 800 * Note that we do a reverse lookup since the header is 801 * in the form we sent it out. 802 * The rip6h header is only used for the IPCL_UDP_MATCH_V6 803 * and we only set the src and dst addresses and nexthdr. 804 */ 805 up = (uint16_t *)((uchar_t *)ip6h + hdr_length); 806 rip6h.ip6_src = ip6h->ip6_dst; 807 rip6h.ip6_dst = ip6h->ip6_src; 808 rip6h.ip6_nxt = nexthdr; 809 ((uint16_t *)&ports)[0] = up[1]; 810 ((uint16_t *)&ports)[1] = up[0]; 811 812 ip_fanout_udp_v6(q, first_mp, &rip6h, ports, ill, inill, 813 IP6_NO_IPPOLICY, mctl_present, zoneid); 814 return; 815 } 816 case IPPROTO_TCP: { 817 /* 818 * Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of 819 * the TCP header to get the port information. 820 */ 821 if ((uchar_t *)ip6h + hdr_length + ICMP_MIN_TP_HDR_LEN > 822 mp->b_wptr) { 823 break; 824 } 825 826 /* 827 * Attempt to find a client stream based on port. 828 * Note that we do a reverse lookup since the header is 829 * in the form we sent it out. 830 * The rip6h header is only used for the IP_TCP_*MATCH_V6 and 831 * we only set the src and dst addresses and nexthdr. 832 */ 833 834 tcpha = (tcpha_t *)((char *)ip6h + hdr_length); 835 connp = ipcl_tcp_lookup_reversed_ipv6(ip6h, tcpha, 836 TCPS_LISTEN, ill->ill_phyint->phyint_ifindex, ipst); 837 if (connp == NULL) { 838 goto drop_pkt; 839 } 840 841 SQUEUE_ENTER_ONE(connp->conn_sqp, first_mp, tcp_input, connp, 842 SQ_FILL, SQTAG_TCP6_INPUT_ICMP_ERR); 843 return; 844 845 } 846 case IPPROTO_SCTP: 847 /* 848 * Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of 849 * the SCTP header to get the port information. 850 */ 851 if ((uchar_t *)ip6h + hdr_length + ICMP_MIN_TP_HDR_LEN > 852 mp->b_wptr) { 853 break; 854 } 855 856 up = (uint16_t *)((uchar_t *)ip6h + hdr_length); 857 ((uint16_t *)&ports)[0] = up[1]; 858 ((uint16_t *)&ports)[1] = up[0]; 859 ip_fanout_sctp(first_mp, inill, (ipha_t *)ip6h, ports, 0, 860 mctl_present, IP6_NO_IPPOLICY, zoneid); 861 return; 862 case IPPROTO_ESP: 863 case IPPROTO_AH: { 864 int ipsec_rc; 865 ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec; 866 867 /* 868 * We need a IPSEC_IN in the front to fanout to AH/ESP. 869 * We will re-use the IPSEC_IN if it is already present as 870 * AH/ESP will not affect any fields in the IPSEC_IN for 871 * ICMP errors. If there is no IPSEC_IN, allocate a new 872 * one and attach it in the front. 873 */ 874 if (ii != NULL) { 875 /* 876 * ip_fanout_proto_again converts the ICMP errors 877 * that come back from AH/ESP to M_DATA so that 878 * if it is non-AH/ESP and we do a pullupmsg in 879 * this function, it would work. Convert it back 880 * to M_CTL before we send up as this is a ICMP 881 * error. This could have been generated locally or 882 * by some router. Validate the inner IPSEC 883 * headers. 884 * 885 * NOTE : ill_index is used by ip_fanout_proto_again 886 * to locate the ill. 887 */ 888 ASSERT(ill != NULL); 889 ii->ipsec_in_ill_index = 890 ill->ill_phyint->phyint_ifindex; 891 ii->ipsec_in_rill_index = 892 inill->ill_phyint->phyint_ifindex; 893 first_mp->b_cont->b_datap->db_type = M_CTL; 894 } else { 895 /* 896 * IPSEC_IN is not present. We attach a ipsec_in 897 * message and send up to IPSEC for validating 898 * and removing the IPSEC headers. Clear 899 * ipsec_in_secure so that when we return 900 * from IPSEC, we don't mistakenly think that this 901 * is a secure packet came from the network. 902 * 903 * NOTE : ill_index is used by ip_fanout_proto_again 904 * to locate the ill. 905 */ 906 ASSERT(first_mp == mp); 907 first_mp = ipsec_in_alloc(B_FALSE, ipst->ips_netstack); 908 ASSERT(ill != NULL); 909 if (first_mp == NULL) { 910 freemsg(mp); 911 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 912 return; 913 } 914 ii = (ipsec_in_t *)first_mp->b_rptr; 915 916 /* This is not a secure packet */ 917 ii->ipsec_in_secure = B_FALSE; 918 first_mp->b_cont = mp; 919 mp->b_datap->db_type = M_CTL; 920 ii->ipsec_in_ill_index = 921 ill->ill_phyint->phyint_ifindex; 922 ii->ipsec_in_rill_index = 923 inill->ill_phyint->phyint_ifindex; 924 } 925 926 if (!ipsec_loaded(ipss)) { 927 ip_proto_not_sup(q, first_mp, 0, zoneid, ipst); 928 return; 929 } 930 931 if (nexthdr == IPPROTO_ESP) 932 ipsec_rc = ipsecesp_icmp_error(first_mp); 933 else 934 ipsec_rc = ipsecah_icmp_error(first_mp); 935 if (ipsec_rc == IPSEC_STATUS_FAILED) 936 return; 937 938 ip_fanout_proto_again(first_mp, ill, inill, NULL); 939 return; 940 } 941 case IPPROTO_ENCAP: 942 case IPPROTO_IPV6: 943 if ((uint8_t *)ip6h + hdr_length + 944 (nexthdr == IPPROTO_ENCAP ? sizeof (ipha_t) : 945 sizeof (ip6_t)) > mp->b_wptr) { 946 goto drop_pkt; 947 } 948 949 if (nexthdr == IPPROTO_ENCAP || 950 !IN6_ARE_ADDR_EQUAL( 951 &((ip6_t *)(((uint8_t *)ip6h) + hdr_length))->ip6_src, 952 &ip6h->ip6_src) || 953 !IN6_ARE_ADDR_EQUAL( 954 &((ip6_t *)(((uint8_t *)ip6h) + hdr_length))->ip6_dst, 955 &ip6h->ip6_dst)) { 956 /* 957 * For tunnels that have used IPsec protection, 958 * we need to adjust the MTU to take into account 959 * the IPsec overhead. 960 */ 961 if (ii != NULL) 962 icmp6->icmp6_mtu = htonl( 963 ntohl(icmp6->icmp6_mtu) - 964 ipsec_in_extra_length(first_mp)); 965 } else { 966 /* 967 * Self-encapsulated case. As in the ipv4 case, 968 * we need to strip the 2nd IP header. Since mp 969 * is already pulled-up, we can simply bcopy 970 * the 3rd header + data over the 2nd header. 971 */ 972 uint16_t unused_len; 973 ip6_t *inner_ip6h = (ip6_t *) 974 ((uchar_t *)ip6h + hdr_length); 975 976 /* 977 * Make sure we don't do recursion more than once. 978 */ 979 if (!ip_hdr_length_nexthdr_v6(mp, inner_ip6h, 980 &unused_len, &nexthdrp) || 981 *nexthdrp == IPPROTO_IPV6) { 982 goto drop_pkt; 983 } 984 985 /* 986 * We are about to modify the packet. Make a copy if 987 * someone else has a reference to it. 988 */ 989 if (DB_REF(mp) > 1) { 990 mblk_t *mp1; 991 uint16_t icmp6_offset; 992 993 mp1 = copymsg(mp); 994 if (mp1 == NULL) { 995 goto drop_pkt; 996 } 997 icmp6_offset = (uint16_t) 998 ((uchar_t *)icmp6 - mp->b_rptr); 999 freemsg(mp); 1000 mp = mp1; 1001 1002 icmp6 = (icmp6_t *)(mp->b_rptr + icmp6_offset); 1003 ip6h = (ip6_t *)&icmp6[1]; 1004 inner_ip6h = (ip6_t *) 1005 ((uchar_t *)ip6h + hdr_length); 1006 1007 if (mctl_present) 1008 first_mp->b_cont = mp; 1009 else 1010 first_mp = mp; 1011 } 1012 1013 /* 1014 * Need to set db_type back to M_DATA before 1015 * refeeding mp into this function. 1016 */ 1017 DB_TYPE(mp) = M_DATA; 1018 1019 /* 1020 * Copy the 3rd header + remaining data on top 1021 * of the 2nd header. 1022 */ 1023 bcopy(inner_ip6h, ip6h, 1024 mp->b_wptr - (uchar_t *)inner_ip6h); 1025 1026 /* 1027 * Subtract length of the 2nd header. 1028 */ 1029 mp->b_wptr -= hdr_length; 1030 1031 /* 1032 * Now recurse, and see what I _really_ should be 1033 * doing here. 1034 */ 1035 icmp_inbound_error_fanout_v6(q, first_mp, 1036 (ip6_t *)mp->b_rptr, icmp6, ill, inill, 1037 mctl_present, zoneid); 1038 return; 1039 } 1040 /* FALLTHRU */ 1041 default: 1042 /* 1043 * The rip6h header is only used for the lookup and we 1044 * only set the src and dst addresses and nexthdr. 1045 */ 1046 rip6h.ip6_src = ip6h->ip6_dst; 1047 rip6h.ip6_dst = ip6h->ip6_src; 1048 rip6h.ip6_nxt = nexthdr; 1049 ip_fanout_proto_v6(q, first_mp, &rip6h, ill, inill, nexthdr, 0, 1050 IP6_NO_IPPOLICY, mctl_present, zoneid); 1051 return; 1052 } 1053 /* NOTREACHED */ 1054 drop_pkt: 1055 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors); 1056 ip1dbg(("icmp_inbound_error_fanout_v6: drop pkt\n")); 1057 freemsg(first_mp); 1058 } 1059 1060 /* 1061 * Process received IPv6 ICMP Redirect messages. 1062 */ 1063 /* ARGSUSED */ 1064 static void 1065 icmp_redirect_v6(queue_t *q, mblk_t *mp, ill_t *ill) 1066 { 1067 ip6_t *ip6h; 1068 uint16_t hdr_length; 1069 nd_redirect_t *rd; 1070 ire_t *ire; 1071 ire_t *prev_ire; 1072 ire_t *redir_ire; 1073 in6_addr_t *src, *dst, *gateway; 1074 nd_opt_hdr_t *opt; 1075 nce_t *nce; 1076 int nce_flags = 0; 1077 int err = 0; 1078 boolean_t redirect_to_router = B_FALSE; 1079 int len; 1080 int optlen; 1081 iulp_t ulp_info = { 0 }; 1082 ill_t *prev_ire_ill; 1083 ipif_t *ipif; 1084 ip_stack_t *ipst = ill->ill_ipst; 1085 1086 ip6h = (ip6_t *)mp->b_rptr; 1087 if (ip6h->ip6_nxt != IPPROTO_ICMPV6) 1088 hdr_length = ip_hdr_length_v6(mp, ip6h); 1089 else 1090 hdr_length = IPV6_HDR_LEN; 1091 1092 rd = (nd_redirect_t *)&mp->b_rptr[hdr_length]; 1093 len = mp->b_wptr - mp->b_rptr - hdr_length; 1094 src = &ip6h->ip6_src; 1095 dst = &rd->nd_rd_dst; 1096 gateway = &rd->nd_rd_target; 1097 1098 /* Verify if it is a valid redirect */ 1099 if (!IN6_IS_ADDR_LINKLOCAL(src) || 1100 (ip6h->ip6_hops != IPV6_MAX_HOPS) || 1101 (rd->nd_rd_code != 0) || 1102 (len < sizeof (nd_redirect_t)) || 1103 (IN6_IS_ADDR_V4MAPPED(dst)) || 1104 (IN6_IS_ADDR_MULTICAST(dst))) { 1105 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects); 1106 freemsg(mp); 1107 return; 1108 } 1109 1110 if (!(IN6_IS_ADDR_LINKLOCAL(gateway) || 1111 IN6_ARE_ADDR_EQUAL(gateway, dst))) { 1112 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects); 1113 freemsg(mp); 1114 return; 1115 } 1116 1117 if (len > sizeof (nd_redirect_t)) { 1118 if (!ndp_verify_optlen((nd_opt_hdr_t *)&rd[1], 1119 len - sizeof (nd_redirect_t))) { 1120 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects); 1121 freemsg(mp); 1122 return; 1123 } 1124 } 1125 1126 if (!IN6_ARE_ADDR_EQUAL(gateway, dst)) { 1127 redirect_to_router = B_TRUE; 1128 nce_flags |= NCE_F_ISROUTER; 1129 } 1130 1131 /* ipif will be refreleased afterwards */ 1132 ipif = ipif_get_next_ipif(NULL, ill); 1133 if (ipif == NULL) { 1134 freemsg(mp); 1135 return; 1136 } 1137 1138 /* 1139 * Verify that the IP source address of the redirect is 1140 * the same as the current first-hop router for the specified 1141 * ICMP destination address. 1142 * Also, Make sure we had a route for the dest in question and 1143 * that route was pointing to the old gateway (the source of the 1144 * redirect packet.) 1145 */ 1146 1147 prev_ire = ire_route_lookup_v6(dst, 0, src, 0, ipif, NULL, ALL_ZONES, 1148 NULL, MATCH_IRE_GW | MATCH_IRE_ILL | MATCH_IRE_DEFAULT, ipst); 1149 1150 /* 1151 * Check that 1152 * the redirect was not from ourselves 1153 * old gateway is still directly reachable 1154 */ 1155 if (prev_ire == NULL || 1156 prev_ire->ire_type == IRE_LOCAL) { 1157 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects); 1158 ipif_refrele(ipif); 1159 goto fail_redirect; 1160 } 1161 prev_ire_ill = ire_to_ill(prev_ire); 1162 ASSERT(prev_ire_ill != NULL); 1163 if (prev_ire_ill->ill_flags & ILLF_NONUD) 1164 nce_flags |= NCE_F_NONUD; 1165 1166 /* 1167 * Should we use the old ULP info to create the new gateway? From 1168 * a user's perspective, we should inherit the info so that it 1169 * is a "smooth" transition. If we do not do that, then new 1170 * connections going thru the new gateway will have no route metrics, 1171 * which is counter-intuitive to user. From a network point of 1172 * view, this may or may not make sense even though the new gateway 1173 * is still directly connected to us so the route metrics should not 1174 * change much. 1175 * 1176 * But if the old ire_uinfo is not initialized, we do another 1177 * recursive lookup on the dest using the new gateway. There may 1178 * be a route to that. If so, use it to initialize the redirect 1179 * route. 1180 */ 1181 if (prev_ire->ire_uinfo.iulp_set) { 1182 bcopy(&prev_ire->ire_uinfo, &ulp_info, sizeof (iulp_t)); 1183 } else if (redirect_to_router) { 1184 /* 1185 * Only do the following if the redirection is really to 1186 * a router. 1187 */ 1188 ire_t *tmp_ire; 1189 ire_t *sire; 1190 1191 tmp_ire = ire_ftable_lookup_v6(dst, 0, gateway, 0, NULL, &sire, 1192 ALL_ZONES, 0, NULL, 1193 (MATCH_IRE_RECURSIVE | MATCH_IRE_GW | MATCH_IRE_DEFAULT), 1194 ipst); 1195 if (sire != NULL) { 1196 bcopy(&sire->ire_uinfo, &ulp_info, sizeof (iulp_t)); 1197 ASSERT(tmp_ire != NULL); 1198 ire_refrele(tmp_ire); 1199 ire_refrele(sire); 1200 } else if (tmp_ire != NULL) { 1201 bcopy(&tmp_ire->ire_uinfo, &ulp_info, 1202 sizeof (iulp_t)); 1203 ire_refrele(tmp_ire); 1204 } 1205 } 1206 1207 optlen = mp->b_wptr - mp->b_rptr - hdr_length - sizeof (nd_redirect_t); 1208 opt = (nd_opt_hdr_t *)&rd[1]; 1209 opt = ndp_get_option(opt, optlen, ND_OPT_TARGET_LINKADDR); 1210 if (opt != NULL) { 1211 err = ndp_lookup_then_add_v6(ill, 1212 B_FALSE, /* don't match across illgrp */ 1213 (uchar_t *)&opt[1], /* Link layer address */ 1214 gateway, 1215 &ipv6_all_ones, /* prefix mask */ 1216 &ipv6_all_zeros, /* Mapping mask */ 1217 0, 1218 nce_flags, 1219 ND_STALE, 1220 &nce); 1221 switch (err) { 1222 case 0: 1223 NCE_REFRELE(nce); 1224 break; 1225 case EEXIST: 1226 /* 1227 * Check to see if link layer address has changed and 1228 * process the nce_state accordingly. 1229 */ 1230 ndp_process(nce, (uchar_t *)&opt[1], 0, B_FALSE); 1231 NCE_REFRELE(nce); 1232 break; 1233 default: 1234 ip1dbg(("icmp_redirect_v6: NCE create failed %d\n", 1235 err)); 1236 ipif_refrele(ipif); 1237 goto fail_redirect; 1238 } 1239 } 1240 if (redirect_to_router) { 1241 /* icmp_redirect_ok_v6() must have already verified this */ 1242 ASSERT(IN6_IS_ADDR_LINKLOCAL(gateway)); 1243 1244 /* 1245 * Create a Route Association. This will allow us to remember 1246 * a router told us to use the particular gateway. 1247 */ 1248 ire = ire_create_v6( 1249 dst, 1250 &ipv6_all_ones, /* mask */ 1251 &prev_ire->ire_src_addr_v6, /* source addr */ 1252 gateway, /* gateway addr */ 1253 &prev_ire->ire_max_frag, /* max frag */ 1254 NULL, /* no src nce */ 1255 NULL, /* no rfq */ 1256 NULL, /* no stq */ 1257 IRE_HOST, 1258 prev_ire->ire_ipif, 1259 NULL, 1260 0, 1261 0, 1262 (RTF_DYNAMIC | RTF_GATEWAY | RTF_HOST), 1263 &ulp_info, 1264 NULL, 1265 NULL, 1266 ipst); 1267 } else { 1268 queue_t *stq; 1269 1270 stq = (ipif->ipif_net_type == IRE_IF_RESOLVER) 1271 ? ipif->ipif_rq : ipif->ipif_wq; 1272 1273 /* 1274 * Just create an on link entry, i.e. interface route. 1275 */ 1276 ire = ire_create_v6( 1277 dst, /* gateway == dst */ 1278 &ipv6_all_ones, /* mask */ 1279 &prev_ire->ire_src_addr_v6, /* source addr */ 1280 &ipv6_all_zeros, /* gateway addr */ 1281 &prev_ire->ire_max_frag, /* max frag */ 1282 NULL, /* no src nce */ 1283 NULL, /* ire rfq */ 1284 stq, /* ire stq */ 1285 ipif->ipif_net_type, /* IF_[NO]RESOLVER */ 1286 prev_ire->ire_ipif, 1287 &ipv6_all_ones, 1288 0, 1289 0, 1290 (RTF_DYNAMIC | RTF_HOST), 1291 &ulp_info, 1292 NULL, 1293 NULL, 1294 ipst); 1295 } 1296 1297 /* Release reference from earlier ipif_get_next_ipif() */ 1298 ipif_refrele(ipif); 1299 1300 if (ire == NULL) 1301 goto fail_redirect; 1302 1303 if (ire_add(&ire, NULL, NULL, NULL, B_FALSE) == 0) { 1304 1305 /* tell routing sockets that we received a redirect */ 1306 ip_rts_change_v6(RTM_REDIRECT, 1307 &rd->nd_rd_dst, 1308 &rd->nd_rd_target, 1309 &ipv6_all_ones, 0, &ire->ire_src_addr_v6, 1310 (RTF_DYNAMIC | RTF_GATEWAY | RTF_HOST), 0, 1311 (RTA_DST | RTA_GATEWAY | RTA_NETMASK | RTA_AUTHOR), ipst); 1312 1313 /* 1314 * Delete any existing IRE_HOST type ires for this destination. 1315 * This together with the added IRE has the effect of 1316 * modifying an existing redirect. 1317 */ 1318 redir_ire = ire_ftable_lookup_v6(dst, 0, src, IRE_HOST, 1319 ire->ire_ipif, NULL, ALL_ZONES, 0, NULL, 1320 (MATCH_IRE_GW | MATCH_IRE_TYPE | MATCH_IRE_ILL), ipst); 1321 1322 ire_refrele(ire); /* Held in ire_add_v6 */ 1323 1324 if (redir_ire != NULL) { 1325 if (redir_ire->ire_flags & RTF_DYNAMIC) 1326 ire_delete(redir_ire); 1327 ire_refrele(redir_ire); 1328 } 1329 } 1330 1331 if (prev_ire->ire_type == IRE_CACHE) 1332 ire_delete(prev_ire); 1333 ire_refrele(prev_ire); 1334 prev_ire = NULL; 1335 1336 fail_redirect: 1337 if (prev_ire != NULL) 1338 ire_refrele(prev_ire); 1339 freemsg(mp); 1340 } 1341 1342 static ill_t * 1343 ip_queue_to_ill_v6(queue_t *q, ip_stack_t *ipst) 1344 { 1345 ill_t *ill; 1346 1347 ASSERT(WR(q) == q); 1348 1349 if (q->q_next != NULL) { 1350 ill = (ill_t *)q->q_ptr; 1351 if (ILL_CAN_LOOKUP(ill)) 1352 ill_refhold(ill); 1353 else 1354 ill = NULL; 1355 } else { 1356 ill = ill_lookup_on_name(ipif_loopback_name, B_FALSE, B_TRUE, 1357 NULL, NULL, NULL, NULL, NULL, ipst); 1358 } 1359 if (ill == NULL) 1360 ip0dbg(("ip_queue_to_ill_v6: no ill\n")); 1361 return (ill); 1362 } 1363 1364 /* 1365 * Assigns an appropriate source address to the packet. 1366 * If origdst is one of our IP addresses that use it as the source. 1367 * If the queue is an ill queue then select a source from that ill. 1368 * Otherwise pick a source based on a route lookup back to the origsrc. 1369 * 1370 * src is the return parameter. Returns a pointer to src or NULL if failure. 1371 */ 1372 static in6_addr_t * 1373 icmp_pick_source_v6(queue_t *wq, in6_addr_t *origsrc, in6_addr_t *origdst, 1374 in6_addr_t *src, zoneid_t zoneid, ip_stack_t *ipst) 1375 { 1376 ill_t *ill; 1377 ire_t *ire; 1378 ipif_t *ipif; 1379 1380 ASSERT(!(wq->q_flag & QREADR)); 1381 if (wq->q_next != NULL) { 1382 ill = (ill_t *)wq->q_ptr; 1383 } else { 1384 ill = NULL; 1385 } 1386 1387 ire = ire_route_lookup_v6(origdst, 0, 0, (IRE_LOCAL|IRE_LOOPBACK), 1388 NULL, NULL, zoneid, NULL, (MATCH_IRE_TYPE|MATCH_IRE_ZONEONLY), 1389 ipst); 1390 if (ire != NULL) { 1391 /* Destined to one of our addresses */ 1392 *src = *origdst; 1393 ire_refrele(ire); 1394 return (src); 1395 } 1396 if (ire != NULL) { 1397 ire_refrele(ire); 1398 ire = NULL; 1399 } 1400 if (ill == NULL) { 1401 /* What is the route back to the original source? */ 1402 ire = ire_route_lookup_v6(origsrc, 0, 0, 0, 1403 NULL, NULL, zoneid, NULL, 1404 (MATCH_IRE_DEFAULT|MATCH_IRE_RECURSIVE), ipst); 1405 if (ire == NULL) { 1406 BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsOutNoRoutes); 1407 return (NULL); 1408 } 1409 ASSERT(ire->ire_ipif != NULL); 1410 ill = ire->ire_ipif->ipif_ill; 1411 ire_refrele(ire); 1412 } 1413 ipif = ipif_select_source_v6(ill, origsrc, B_FALSE, 1414 IPV6_PREFER_SRC_DEFAULT, zoneid); 1415 if (ipif != NULL) { 1416 *src = ipif->ipif_v6src_addr; 1417 ipif_refrele(ipif); 1418 return (src); 1419 } 1420 /* 1421 * Unusual case - can't find a usable source address to reach the 1422 * original source. Use what in the route to the source. 1423 */ 1424 ire = ire_route_lookup_v6(origsrc, 0, 0, 0, 1425 NULL, NULL, zoneid, NULL, 1426 (MATCH_IRE_DEFAULT|MATCH_IRE_RECURSIVE), ipst); 1427 if (ire == NULL) { 1428 BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsOutNoRoutes); 1429 return (NULL); 1430 } 1431 ASSERT(ire != NULL); 1432 *src = ire->ire_src_addr_v6; 1433 ire_refrele(ire); 1434 return (src); 1435 } 1436 1437 /* 1438 * Build and ship an IPv6 ICMP message using the packet data in mp, 1439 * and the ICMP header pointed to by "stuff". (May be called as 1440 * writer.) 1441 * Note: assumes that icmp_pkt_err_ok_v6 has been called to 1442 * verify that an icmp error packet can be sent. 1443 * 1444 * If q is an ill write side queue (which is the case when packets 1445 * arrive from ip_rput) then ip_wput code will ensure that packets to 1446 * link-local destinations are sent out that ill. 1447 * 1448 * If v6src_ptr is set use it as a source. Otherwise select a reasonable 1449 * source address (see above function). 1450 */ 1451 static void 1452 icmp_pkt_v6(queue_t *q, mblk_t *mp, void *stuff, size_t len, 1453 const in6_addr_t *v6src_ptr, boolean_t mctl_present, zoneid_t zoneid, 1454 ip_stack_t *ipst) 1455 { 1456 ip6_t *ip6h; 1457 in6_addr_t v6dst; 1458 size_t len_needed; 1459 size_t msg_len; 1460 mblk_t *mp1; 1461 icmp6_t *icmp6; 1462 ill_t *ill; 1463 in6_addr_t v6src; 1464 mblk_t *ipsec_mp; 1465 ipsec_out_t *io; 1466 1467 ill = ip_queue_to_ill_v6(q, ipst); 1468 if (ill == NULL) { 1469 freemsg(mp); 1470 return; 1471 } 1472 1473 if (mctl_present) { 1474 /* 1475 * If it is : 1476 * 1477 * 1) a IPSEC_OUT, then this is caused by outbound 1478 * datagram originating on this host. IPSEC processing 1479 * may or may not have been done. Refer to comments above 1480 * icmp_inbound_error_fanout for details. 1481 * 1482 * 2) a IPSEC_IN if we are generating a icmp_message 1483 * for an incoming datagram destined for us i.e called 1484 * from ip_fanout_send_icmp. 1485 */ 1486 ipsec_info_t *in; 1487 1488 ipsec_mp = mp; 1489 mp = ipsec_mp->b_cont; 1490 1491 in = (ipsec_info_t *)ipsec_mp->b_rptr; 1492 ip6h = (ip6_t *)mp->b_rptr; 1493 1494 ASSERT(in->ipsec_info_type == IPSEC_OUT || 1495 in->ipsec_info_type == IPSEC_IN); 1496 1497 if (in->ipsec_info_type == IPSEC_IN) { 1498 /* 1499 * Convert the IPSEC_IN to IPSEC_OUT. 1500 */ 1501 if (!ipsec_in_to_out(ipsec_mp, NULL, ip6h)) { 1502 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 1503 ill_refrele(ill); 1504 return; 1505 } 1506 } else { 1507 ASSERT(in->ipsec_info_type == IPSEC_OUT); 1508 io = (ipsec_out_t *)in; 1509 /* 1510 * Clear out ipsec_out_proc_begin, so we do a fresh 1511 * ire lookup. 1512 */ 1513 io->ipsec_out_proc_begin = B_FALSE; 1514 } 1515 } else { 1516 /* 1517 * This is in clear. The icmp message we are building 1518 * here should go out in clear. 1519 */ 1520 ipsec_in_t *ii; 1521 ASSERT(mp->b_datap->db_type == M_DATA); 1522 ipsec_mp = ipsec_in_alloc(B_FALSE, ipst->ips_netstack); 1523 if (ipsec_mp == NULL) { 1524 freemsg(mp); 1525 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 1526 ill_refrele(ill); 1527 return; 1528 } 1529 ii = (ipsec_in_t *)ipsec_mp->b_rptr; 1530 1531 /* This is not a secure packet */ 1532 ii->ipsec_in_secure = B_FALSE; 1533 /* 1534 * For trusted extensions using a shared IP address we can 1535 * send using any zoneid. 1536 */ 1537 if (zoneid == ALL_ZONES) 1538 ii->ipsec_in_zoneid = GLOBAL_ZONEID; 1539 else 1540 ii->ipsec_in_zoneid = zoneid; 1541 ipsec_mp->b_cont = mp; 1542 ip6h = (ip6_t *)mp->b_rptr; 1543 /* 1544 * Convert the IPSEC_IN to IPSEC_OUT. 1545 */ 1546 if (!ipsec_in_to_out(ipsec_mp, NULL, ip6h)) { 1547 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 1548 ill_refrele(ill); 1549 return; 1550 } 1551 } 1552 io = (ipsec_out_t *)ipsec_mp->b_rptr; 1553 1554 if (v6src_ptr != NULL) { 1555 v6src = *v6src_ptr; 1556 } else { 1557 if (icmp_pick_source_v6(q, &ip6h->ip6_src, &ip6h->ip6_dst, 1558 &v6src, zoneid, ipst) == NULL) { 1559 freemsg(ipsec_mp); 1560 ill_refrele(ill); 1561 return; 1562 } 1563 } 1564 v6dst = ip6h->ip6_src; 1565 len_needed = ipst->ips_ipv6_icmp_return - IPV6_HDR_LEN - len; 1566 msg_len = msgdsize(mp); 1567 if (msg_len > len_needed) { 1568 if (!adjmsg(mp, len_needed - msg_len)) { 1569 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutErrors); 1570 freemsg(ipsec_mp); 1571 ill_refrele(ill); 1572 return; 1573 } 1574 msg_len = len_needed; 1575 } 1576 mp1 = allocb_tmpl(IPV6_HDR_LEN + len, mp); 1577 if (mp1 == NULL) { 1578 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutErrors); 1579 freemsg(ipsec_mp); 1580 ill_refrele(ill); 1581 return; 1582 } 1583 ill_refrele(ill); 1584 mp1->b_cont = mp; 1585 mp = mp1; 1586 ASSERT(ipsec_mp->b_datap->db_type == M_CTL && 1587 io->ipsec_out_type == IPSEC_OUT); 1588 ipsec_mp->b_cont = mp; 1589 1590 /* 1591 * Set ipsec_out_icmp_loopback so we can let the ICMP messages this 1592 * node generates be accepted in peace by all on-host destinations. 1593 * If we do NOT assume that all on-host destinations trust 1594 * self-generated ICMP messages, then rework here, ip.c, and spd.c. 1595 * (Look for ipsec_out_icmp_loopback). 1596 */ 1597 io->ipsec_out_icmp_loopback = B_TRUE; 1598 1599 ip6h = (ip6_t *)mp->b_rptr; 1600 mp1->b_wptr = (uchar_t *)ip6h + (IPV6_HDR_LEN + len); 1601 1602 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 1603 ip6h->ip6_nxt = IPPROTO_ICMPV6; 1604 ip6h->ip6_hops = ipst->ips_ipv6_def_hops; 1605 ip6h->ip6_dst = v6dst; 1606 ip6h->ip6_src = v6src; 1607 msg_len += IPV6_HDR_LEN + len; 1608 if (msg_len > IP_MAXPACKET + IPV6_HDR_LEN) { 1609 (void) adjmsg(mp, IP_MAXPACKET + IPV6_HDR_LEN - msg_len); 1610 msg_len = IP_MAXPACKET + IPV6_HDR_LEN; 1611 } 1612 ip6h->ip6_plen = htons((uint16_t)(msgdsize(mp) - IPV6_HDR_LEN)); 1613 icmp6 = (icmp6_t *)&ip6h[1]; 1614 bcopy(stuff, (char *)icmp6, len); 1615 /* 1616 * Prepare for checksum by putting icmp length in the icmp 1617 * checksum field. The checksum is calculated in ip_wput_v6. 1618 */ 1619 icmp6->icmp6_cksum = ip6h->ip6_plen; 1620 if (icmp6->icmp6_type == ND_REDIRECT) { 1621 ip6h->ip6_hops = IPV6_MAX_HOPS; 1622 } 1623 /* Send to V6 writeside put routine */ 1624 put(q, ipsec_mp); 1625 } 1626 1627 /* 1628 * Update the output mib when ICMPv6 packets are sent. 1629 */ 1630 static void 1631 icmp_update_out_mib_v6(ill_t *ill, icmp6_t *icmp6) 1632 { 1633 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutMsgs); 1634 1635 switch (icmp6->icmp6_type) { 1636 case ICMP6_DST_UNREACH: 1637 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutDestUnreachs); 1638 if (icmp6->icmp6_code == ICMP6_DST_UNREACH_ADMIN) 1639 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutAdminProhibs); 1640 break; 1641 1642 case ICMP6_TIME_EXCEEDED: 1643 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutTimeExcds); 1644 break; 1645 1646 case ICMP6_PARAM_PROB: 1647 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutParmProblems); 1648 break; 1649 1650 case ICMP6_PACKET_TOO_BIG: 1651 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutPktTooBigs); 1652 break; 1653 1654 case ICMP6_ECHO_REQUEST: 1655 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutEchos); 1656 break; 1657 1658 case ICMP6_ECHO_REPLY: 1659 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutEchoReplies); 1660 break; 1661 1662 case ND_ROUTER_SOLICIT: 1663 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutRouterSolicits); 1664 break; 1665 1666 case ND_ROUTER_ADVERT: 1667 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutRouterAdvertisements); 1668 break; 1669 1670 case ND_NEIGHBOR_SOLICIT: 1671 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutNeighborSolicits); 1672 break; 1673 1674 case ND_NEIGHBOR_ADVERT: 1675 BUMP_MIB(ill->ill_icmp6_mib, 1676 ipv6IfIcmpOutNeighborAdvertisements); 1677 break; 1678 1679 case ND_REDIRECT: 1680 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutRedirects); 1681 break; 1682 1683 case MLD_LISTENER_QUERY: 1684 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutGroupMembQueries); 1685 break; 1686 1687 case MLD_LISTENER_REPORT: 1688 case MLD_V2_LISTENER_REPORT: 1689 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutGroupMembResponses); 1690 break; 1691 1692 case MLD_LISTENER_REDUCTION: 1693 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutGroupMembReductions); 1694 break; 1695 } 1696 } 1697 1698 /* 1699 * Check if it is ok to send an ICMPv6 error packet in 1700 * response to the IP packet in mp. 1701 * Free the message and return null if no 1702 * ICMP error packet should be sent. 1703 */ 1704 static mblk_t * 1705 icmp_pkt_err_ok_v6(queue_t *q, mblk_t *mp, 1706 boolean_t llbcast, boolean_t mcast_ok, ip_stack_t *ipst) 1707 { 1708 ip6_t *ip6h; 1709 1710 if (!mp) 1711 return (NULL); 1712 1713 ip6h = (ip6_t *)mp->b_rptr; 1714 1715 /* Check if source address uniquely identifies the host */ 1716 1717 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_src) || 1718 IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_src) || 1719 IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 1720 freemsg(mp); 1721 return (NULL); 1722 } 1723 1724 if (ip6h->ip6_nxt == IPPROTO_ICMPV6) { 1725 size_t len_needed = IPV6_HDR_LEN + ICMP6_MINLEN; 1726 icmp6_t *icmp6; 1727 1728 if (mp->b_wptr - mp->b_rptr < len_needed) { 1729 if (!pullupmsg(mp, len_needed)) { 1730 ill_t *ill; 1731 1732 ill = ip_queue_to_ill_v6(q, ipst); 1733 if (ill == NULL) { 1734 BUMP_MIB(&ipst->ips_icmp6_mib, 1735 ipv6IfIcmpInErrors); 1736 } else { 1737 BUMP_MIB(ill->ill_icmp6_mib, 1738 ipv6IfIcmpInErrors); 1739 ill_refrele(ill); 1740 } 1741 freemsg(mp); 1742 return (NULL); 1743 } 1744 ip6h = (ip6_t *)mp->b_rptr; 1745 } 1746 icmp6 = (icmp6_t *)&ip6h[1]; 1747 /* Explicitly do not generate errors in response to redirects */ 1748 if (ICMP6_IS_ERROR(icmp6->icmp6_type) || 1749 icmp6->icmp6_type == ND_REDIRECT) { 1750 freemsg(mp); 1751 return (NULL); 1752 } 1753 } 1754 /* 1755 * Check that the destination is not multicast and that the packet 1756 * was not sent on link layer broadcast or multicast. (Exception 1757 * is Packet too big message as per the draft - when mcast_ok is set.) 1758 */ 1759 if (!mcast_ok && 1760 (llbcast || IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst))) { 1761 freemsg(mp); 1762 return (NULL); 1763 } 1764 if (icmp_err_rate_limit(ipst)) { 1765 /* 1766 * Only send ICMP error packets every so often. 1767 * This should be done on a per port/source basis, 1768 * but for now this will suffice. 1769 */ 1770 freemsg(mp); 1771 return (NULL); 1772 } 1773 return (mp); 1774 } 1775 1776 /* 1777 * Generate an ICMPv6 redirect message. 1778 * Include target link layer address option if it exits. 1779 * Always include redirect header. 1780 */ 1781 static void 1782 icmp_send_redirect_v6(queue_t *q, mblk_t *mp, in6_addr_t *targetp, 1783 in6_addr_t *dest, ill_t *ill, boolean_t llbcast) 1784 { 1785 nd_redirect_t *rd; 1786 nd_opt_rd_hdr_t *rdh; 1787 uchar_t *buf; 1788 nce_t *nce = NULL; 1789 nd_opt_hdr_t *opt; 1790 int len; 1791 int ll_opt_len = 0; 1792 int max_redir_hdr_data_len; 1793 int pkt_len; 1794 in6_addr_t *srcp; 1795 ip_stack_t *ipst = ill->ill_ipst; 1796 1797 /* 1798 * We are called from ip_rput where we could 1799 * not have attached an IPSEC_IN. 1800 */ 1801 ASSERT(mp->b_datap->db_type == M_DATA); 1802 1803 mp = icmp_pkt_err_ok_v6(q, mp, llbcast, B_FALSE, ipst); 1804 if (mp == NULL) 1805 return; 1806 nce = ndp_lookup_v6(ill, B_TRUE, targetp, B_FALSE); 1807 if (nce != NULL && nce->nce_state != ND_INCOMPLETE) { 1808 ll_opt_len = (sizeof (nd_opt_hdr_t) + 1809 ill->ill_phys_addr_length + 7)/8 * 8; 1810 } 1811 len = sizeof (nd_redirect_t) + sizeof (nd_opt_rd_hdr_t) + ll_opt_len; 1812 ASSERT(len % 4 == 0); 1813 buf = kmem_alloc(len, KM_NOSLEEP); 1814 if (buf == NULL) { 1815 if (nce != NULL) 1816 NCE_REFRELE(nce); 1817 freemsg(mp); 1818 return; 1819 } 1820 1821 rd = (nd_redirect_t *)buf; 1822 rd->nd_rd_type = (uint8_t)ND_REDIRECT; 1823 rd->nd_rd_code = 0; 1824 rd->nd_rd_reserved = 0; 1825 rd->nd_rd_target = *targetp; 1826 rd->nd_rd_dst = *dest; 1827 1828 opt = (nd_opt_hdr_t *)(buf + sizeof (nd_redirect_t)); 1829 if (nce != NULL && ll_opt_len != 0) { 1830 opt->nd_opt_type = ND_OPT_TARGET_LINKADDR; 1831 opt->nd_opt_len = ll_opt_len/8; 1832 bcopy((char *)nce->nce_res_mp->b_rptr + 1833 NCE_LL_ADDR_OFFSET(ill), &opt[1], 1834 ill->ill_phys_addr_length); 1835 } 1836 if (nce != NULL) 1837 NCE_REFRELE(nce); 1838 rdh = (nd_opt_rd_hdr_t *)(buf + sizeof (nd_redirect_t) + ll_opt_len); 1839 rdh->nd_opt_rh_type = (uint8_t)ND_OPT_REDIRECTED_HEADER; 1840 /* max_redir_hdr_data_len and nd_opt_rh_len must be multiple of 8 */ 1841 max_redir_hdr_data_len = 1842 (ipst->ips_ipv6_icmp_return - IPV6_HDR_LEN - len)/8*8; 1843 pkt_len = msgdsize(mp); 1844 /* Make sure mp is 8 byte aligned */ 1845 if (pkt_len > max_redir_hdr_data_len) { 1846 rdh->nd_opt_rh_len = (max_redir_hdr_data_len + 1847 sizeof (nd_opt_rd_hdr_t))/8; 1848 (void) adjmsg(mp, max_redir_hdr_data_len - pkt_len); 1849 } else { 1850 rdh->nd_opt_rh_len = (pkt_len + sizeof (nd_opt_rd_hdr_t))/8; 1851 (void) adjmsg(mp, -(pkt_len % 8)); 1852 } 1853 rdh->nd_opt_rh_reserved1 = 0; 1854 rdh->nd_opt_rh_reserved2 = 0; 1855 /* ipif_v6src_addr contains the link-local source address */ 1856 srcp = &ill->ill_ipif->ipif_v6src_addr; 1857 1858 /* Redirects sent by router, and router is global zone */ 1859 icmp_pkt_v6(q, mp, buf, len, srcp, B_FALSE, GLOBAL_ZONEID, ipst); 1860 kmem_free(buf, len); 1861 } 1862 1863 1864 /* Generate an ICMP time exceeded message. (May be called as writer.) */ 1865 void 1866 icmp_time_exceeded_v6(queue_t *q, mblk_t *mp, uint8_t code, 1867 boolean_t llbcast, boolean_t mcast_ok, zoneid_t zoneid, 1868 ip_stack_t *ipst) 1869 { 1870 icmp6_t icmp6; 1871 boolean_t mctl_present; 1872 mblk_t *first_mp; 1873 1874 EXTRACT_PKT_MP(mp, first_mp, mctl_present); 1875 1876 mp = icmp_pkt_err_ok_v6(q, mp, llbcast, mcast_ok, ipst); 1877 if (mp == NULL) { 1878 if (mctl_present) 1879 freeb(first_mp); 1880 return; 1881 } 1882 bzero(&icmp6, sizeof (icmp6_t)); 1883 icmp6.icmp6_type = ICMP6_TIME_EXCEEDED; 1884 icmp6.icmp6_code = code; 1885 icmp_pkt_v6(q, first_mp, &icmp6, sizeof (icmp6_t), NULL, mctl_present, 1886 zoneid, ipst); 1887 } 1888 1889 /* 1890 * Generate an ICMP unreachable message. 1891 */ 1892 void 1893 icmp_unreachable_v6(queue_t *q, mblk_t *mp, uint8_t code, 1894 boolean_t llbcast, boolean_t mcast_ok, zoneid_t zoneid, 1895 ip_stack_t *ipst) 1896 { 1897 icmp6_t icmp6; 1898 boolean_t mctl_present; 1899 mblk_t *first_mp; 1900 1901 EXTRACT_PKT_MP(mp, first_mp, mctl_present); 1902 1903 mp = icmp_pkt_err_ok_v6(q, mp, llbcast, mcast_ok, ipst); 1904 if (mp == NULL) { 1905 if (mctl_present) 1906 freeb(first_mp); 1907 return; 1908 } 1909 bzero(&icmp6, sizeof (icmp6_t)); 1910 icmp6.icmp6_type = ICMP6_DST_UNREACH; 1911 icmp6.icmp6_code = code; 1912 icmp_pkt_v6(q, first_mp, &icmp6, sizeof (icmp6_t), NULL, mctl_present, 1913 zoneid, ipst); 1914 } 1915 1916 /* 1917 * Generate an ICMP pkt too big message. 1918 */ 1919 static void 1920 icmp_pkt2big_v6(queue_t *q, mblk_t *mp, uint32_t mtu, 1921 boolean_t llbcast, boolean_t mcast_ok, zoneid_t zoneid, ip_stack_t *ipst) 1922 { 1923 icmp6_t icmp6; 1924 mblk_t *first_mp; 1925 boolean_t mctl_present; 1926 1927 EXTRACT_PKT_MP(mp, first_mp, mctl_present); 1928 1929 mp = icmp_pkt_err_ok_v6(q, mp, llbcast, mcast_ok, ipst); 1930 if (mp == NULL) { 1931 if (mctl_present) 1932 freeb(first_mp); 1933 return; 1934 } 1935 bzero(&icmp6, sizeof (icmp6_t)); 1936 icmp6.icmp6_type = ICMP6_PACKET_TOO_BIG; 1937 icmp6.icmp6_code = 0; 1938 icmp6.icmp6_mtu = htonl(mtu); 1939 1940 icmp_pkt_v6(q, first_mp, &icmp6, sizeof (icmp6_t), NULL, mctl_present, 1941 zoneid, ipst); 1942 } 1943 1944 /* 1945 * Generate an ICMP parameter problem message. (May be called as writer.) 1946 * 'offset' is the offset from the beginning of the packet in error. 1947 */ 1948 static void 1949 icmp_param_problem_v6(queue_t *q, mblk_t *mp, uint8_t code, 1950 uint32_t offset, boolean_t llbcast, boolean_t mcast_ok, zoneid_t zoneid, 1951 ip_stack_t *ipst) 1952 { 1953 icmp6_t icmp6; 1954 boolean_t mctl_present; 1955 mblk_t *first_mp; 1956 1957 EXTRACT_PKT_MP(mp, first_mp, mctl_present); 1958 1959 mp = icmp_pkt_err_ok_v6(q, mp, llbcast, mcast_ok, ipst); 1960 if (mp == NULL) { 1961 if (mctl_present) 1962 freeb(first_mp); 1963 return; 1964 } 1965 bzero((char *)&icmp6, sizeof (icmp6_t)); 1966 icmp6.icmp6_type = ICMP6_PARAM_PROB; 1967 icmp6.icmp6_code = code; 1968 icmp6.icmp6_pptr = htonl(offset); 1969 icmp_pkt_v6(q, first_mp, &icmp6, sizeof (icmp6_t), NULL, mctl_present, 1970 zoneid, ipst); 1971 } 1972 1973 /* 1974 * This code will need to take into account the possibility of binding 1975 * to a link local address on a multi-homed host, in which case the 1976 * outgoing interface (from the conn) will need to be used when getting 1977 * an ire for the dst. Going through proper outgoing interface and 1978 * choosing the source address corresponding to the outgoing interface 1979 * is necessary when the destination address is a link-local address and 1980 * IPV6_BOUND_IF or IPV6_PKTINFO or scope_id has been set. 1981 * This can happen when active connection is setup; thus ipp pointer 1982 * is passed here from tcp_connect_*() routines, in non-TCP cases NULL 1983 * pointer is passed as ipp pointer. 1984 */ 1985 mblk_t * 1986 ip_bind_v6(queue_t *q, mblk_t *mp, conn_t *connp, ip6_pkt_t *ipp) 1987 { 1988 ssize_t len; 1989 int protocol; 1990 struct T_bind_req *tbr; 1991 sin6_t *sin6; 1992 ipa6_conn_t *ac6; 1993 in6_addr_t *v6srcp; 1994 in6_addr_t *v6dstp; 1995 uint16_t lport; 1996 uint16_t fport; 1997 uchar_t *ucp; 1998 int error = 0; 1999 boolean_t local_bind; 2000 ipa6_conn_x_t *acx6; 2001 boolean_t verify_dst; 2002 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 2003 cred_t *cr; 2004 2005 /* 2006 * All Solaris components should pass a db_credp 2007 * for this TPI message, hence we ASSERT. 2008 * But in case there is some other M_PROTO that looks 2009 * like a TPI message sent by some other kernel 2010 * component, we check and return an error. 2011 */ 2012 cr = msg_getcred(mp, NULL); 2013 ASSERT(cr != NULL); 2014 if (cr == NULL) { 2015 error = EINVAL; 2016 goto bad_addr; 2017 } 2018 2019 ASSERT(connp->conn_af_isv6); 2020 len = mp->b_wptr - mp->b_rptr; 2021 if (len < (sizeof (*tbr) + 1)) { 2022 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 2023 "ip_bind_v6: bogus msg, len %ld", len); 2024 goto bad_addr; 2025 } 2026 /* Back up and extract the protocol identifier. */ 2027 mp->b_wptr--; 2028 tbr = (struct T_bind_req *)mp->b_rptr; 2029 /* Reset the message type in preparation for shipping it back. */ 2030 mp->b_datap->db_type = M_PCPROTO; 2031 2032 protocol = *mp->b_wptr & 0xFF; 2033 connp->conn_ulp = (uint8_t)protocol; 2034 2035 /* 2036 * Check for a zero length address. This is from a protocol that 2037 * wants to register to receive all packets of its type. 2038 */ 2039 if (tbr->ADDR_length == 0) { 2040 if ((protocol == IPPROTO_TCP || protocol == IPPROTO_SCTP || 2041 protocol == IPPROTO_ESP || protocol == IPPROTO_AH) && 2042 ipst->ips_ipcl_proto_fanout_v6[protocol].connf_head != 2043 NULL) { 2044 /* 2045 * TCP, SCTP, AH, and ESP have single protocol fanouts. 2046 * Do not allow others to bind to these. 2047 */ 2048 goto bad_addr; 2049 } 2050 2051 /* 2052 * 2053 * The udp module never sends down a zero-length address, 2054 * and allowing this on a labeled system will break MLP 2055 * functionality. 2056 */ 2057 if (is_system_labeled() && protocol == IPPROTO_UDP) 2058 goto bad_addr; 2059 2060 /* Allow ipsec plumbing */ 2061 if (connp->conn_mac_exempt && protocol != IPPROTO_AH && 2062 protocol != IPPROTO_ESP) 2063 goto bad_addr; 2064 2065 connp->conn_srcv6 = ipv6_all_zeros; 2066 ipcl_proto_insert_v6(connp, protocol); 2067 2068 tbr->PRIM_type = T_BIND_ACK; 2069 return (mp); 2070 } 2071 2072 /* Extract the address pointer from the message. */ 2073 ucp = (uchar_t *)mi_offset_param(mp, tbr->ADDR_offset, 2074 tbr->ADDR_length); 2075 if (ucp == NULL) { 2076 ip1dbg(("ip_bind_v6: no address\n")); 2077 goto bad_addr; 2078 } 2079 if (!OK_32PTR(ucp)) { 2080 ip1dbg(("ip_bind_v6: unaligned address\n")); 2081 goto bad_addr; 2082 } 2083 2084 switch (tbr->ADDR_length) { 2085 default: 2086 ip1dbg(("ip_bind_v6: bad address length %d\n", 2087 (int)tbr->ADDR_length)); 2088 goto bad_addr; 2089 2090 case IPV6_ADDR_LEN: 2091 /* Verification of local address only */ 2092 v6srcp = (in6_addr_t *)ucp; 2093 lport = 0; 2094 local_bind = B_TRUE; 2095 break; 2096 2097 case sizeof (sin6_t): 2098 sin6 = (sin6_t *)ucp; 2099 v6srcp = &sin6->sin6_addr; 2100 lport = sin6->sin6_port; 2101 local_bind = B_TRUE; 2102 break; 2103 2104 case sizeof (ipa6_conn_t): 2105 /* 2106 * Verify that both the source and destination addresses 2107 * are valid. 2108 */ 2109 ac6 = (ipa6_conn_t *)ucp; 2110 v6srcp = &ac6->ac6_laddr; 2111 v6dstp = &ac6->ac6_faddr; 2112 fport = ac6->ac6_fport; 2113 /* For raw socket, the local port is not set. */ 2114 lport = ac6->ac6_lport != 0 ? ac6->ac6_lport : 2115 connp->conn_lport; 2116 local_bind = B_FALSE; 2117 /* Always verify destination reachability. */ 2118 verify_dst = B_TRUE; 2119 break; 2120 2121 case sizeof (ipa6_conn_x_t): 2122 /* 2123 * Verify that the source address is valid. 2124 */ 2125 acx6 = (ipa6_conn_x_t *)ucp; 2126 ac6 = &acx6->ac6x_conn; 2127 v6srcp = &ac6->ac6_laddr; 2128 v6dstp = &ac6->ac6_faddr; 2129 fport = ac6->ac6_fport; 2130 lport = ac6->ac6_lport; 2131 local_bind = B_FALSE; 2132 /* 2133 * Client that passed ipa6_conn_x_t to us specifies whether to 2134 * verify destination reachability. 2135 */ 2136 verify_dst = (acx6->ac6x_flags & ACX_VERIFY_DST) != 0; 2137 break; 2138 } 2139 if (local_bind) { 2140 error = ip_proto_bind_laddr_v6(connp, &mp->b_cont, protocol, 2141 v6srcp, lport, tbr->ADDR_length != IPV6_ADDR_LEN); 2142 } else { 2143 error = ip_proto_bind_connected_v6(connp, &mp->b_cont, protocol, 2144 v6srcp, lport, v6dstp, ipp, fport, B_TRUE, verify_dst, cr); 2145 } 2146 2147 if (error == 0) { 2148 /* Send it home. */ 2149 mp->b_datap->db_type = M_PCPROTO; 2150 tbr->PRIM_type = T_BIND_ACK; 2151 return (mp); 2152 } 2153 2154 bad_addr: 2155 ASSERT(error != EINPROGRESS); 2156 if (error > 0) 2157 mp = mi_tpi_err_ack_alloc(mp, TSYSERR, error); 2158 else 2159 mp = mi_tpi_err_ack_alloc(mp, TBADADDR, 0); 2160 return (mp); 2161 } 2162 2163 static void 2164 ip_bind_post_handling_v6(conn_t *connp, mblk_t *mp, 2165 boolean_t version_changed, boolean_t ire_requested, ip_stack_t *ipst) 2166 { 2167 /* Update conn_send and pktversion if v4/v6 changed */ 2168 if (version_changed) { 2169 ip_setpktversion(connp, connp->conn_pkt_isv6, B_TRUE, ipst); 2170 } 2171 2172 /* 2173 * Pass the IPSEC headers size in ire_ipsec_overhead. 2174 * We can't do this in ip_bind_insert_ire because the policy 2175 * may not have been inherited at that point in time and hence 2176 * conn_out_enforce_policy may not be set. 2177 */ 2178 if (ire_requested && connp->conn_out_enforce_policy && 2179 mp != NULL && DB_TYPE(mp) == IRE_DB_REQ_TYPE) { 2180 ire_t *ire = (ire_t *)mp->b_rptr; 2181 ASSERT(MBLKL(mp) >= sizeof (ire_t)); 2182 ire->ire_ipsec_overhead = (conn_ipsec_length(connp)); 2183 } 2184 } 2185 2186 /* 2187 * Here address is verified to be a valid local address. 2188 * If the IRE_DB_REQ_TYPE mp is present, a multicast 2189 * address is also considered a valid local address. 2190 * In the case of a multicast address, however, the 2191 * upper protocol is expected to reset the src address 2192 * to 0 if it sees an ire with IN6_IS_ADDR_MULTICAST returned so that 2193 * no packets are emitted with multicast address as 2194 * source address. 2195 * The addresses valid for bind are: 2196 * (1) - in6addr_any 2197 * (2) - IP address of an UP interface 2198 * (3) - IP address of a DOWN interface 2199 * (4) - a multicast address. In this case 2200 * the conn will only receive packets destined to 2201 * the specified multicast address. Note: the 2202 * application still has to issue an 2203 * IPV6_JOIN_GROUP socket option. 2204 * 2205 * In all the above cases, the bound address must be valid in the current zone. 2206 * When the address is loopback or multicast, there might be many matching IREs 2207 * so bind has to look up based on the zone. 2208 */ 2209 /* 2210 * Verify the local IP address. Does not change the conn_t except 2211 * conn_fully_bound and conn_policy_cached. 2212 */ 2213 static int 2214 ip_bind_laddr_v6(conn_t *connp, mblk_t **mpp, uint8_t protocol, 2215 const in6_addr_t *v6src, uint16_t lport, boolean_t fanout_insert) 2216 { 2217 int error = 0; 2218 ire_t *src_ire = NULL; 2219 zoneid_t zoneid; 2220 mblk_t *mp = NULL; 2221 boolean_t ire_requested; 2222 boolean_t ipsec_policy_set; 2223 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 2224 2225 if (mpp) 2226 mp = *mpp; 2227 2228 ire_requested = (mp != NULL && DB_TYPE(mp) == IRE_DB_REQ_TYPE); 2229 ipsec_policy_set = (mp != NULL && DB_TYPE(mp) == IPSEC_POLICY_SET); 2230 2231 /* 2232 * If it was previously connected, conn_fully_bound would have 2233 * been set. 2234 */ 2235 connp->conn_fully_bound = B_FALSE; 2236 2237 zoneid = connp->conn_zoneid; 2238 2239 if (!IN6_IS_ADDR_UNSPECIFIED(v6src)) { 2240 src_ire = ire_route_lookup_v6(v6src, 0, 0, 2241 0, NULL, NULL, zoneid, NULL, MATCH_IRE_ZONEONLY, ipst); 2242 /* 2243 * If an address other than in6addr_any is requested, 2244 * we verify that it is a valid address for bind 2245 * Note: Following code is in if-else-if form for 2246 * readability compared to a condition check. 2247 */ 2248 ASSERT(src_ire == NULL || !(src_ire->ire_type & IRE_BROADCAST)); 2249 /* LINTED - statement has no consequent */ 2250 if (IRE_IS_LOCAL(src_ire)) { 2251 /* 2252 * (2) Bind to address of local UP interface 2253 */ 2254 } else if (IN6_IS_ADDR_MULTICAST(v6src)) { 2255 ipif_t *multi_ipif = NULL; 2256 ire_t *save_ire; 2257 /* 2258 * (4) bind to multicast address. 2259 * Fake out the IRE returned to upper 2260 * layer to be a broadcast IRE in 2261 * ip_bind_insert_ire_v6(). 2262 * Pass other information that matches 2263 * the ipif (e.g. the source address). 2264 * conn_multicast_ill is only used for 2265 * IPv6 packets 2266 */ 2267 mutex_enter(&connp->conn_lock); 2268 if (connp->conn_multicast_ill != NULL) { 2269 (void) ipif_lookup_zoneid( 2270 connp->conn_multicast_ill, zoneid, 0, 2271 &multi_ipif); 2272 } else { 2273 /* 2274 * Look for default like 2275 * ip_wput_v6 2276 */ 2277 multi_ipif = ipif_lookup_group_v6( 2278 &ipv6_unspecified_group, zoneid, ipst); 2279 } 2280 mutex_exit(&connp->conn_lock); 2281 save_ire = src_ire; 2282 src_ire = NULL; 2283 if (multi_ipif == NULL || !ire_requested || 2284 (src_ire = ipif_to_ire_v6(multi_ipif)) == NULL) { 2285 src_ire = save_ire; 2286 error = EADDRNOTAVAIL; 2287 } else { 2288 ASSERT(src_ire != NULL); 2289 if (save_ire != NULL) 2290 ire_refrele(save_ire); 2291 } 2292 if (multi_ipif != NULL) 2293 ipif_refrele(multi_ipif); 2294 } else { 2295 if (!ip_addr_exists_v6(v6src, zoneid, ipst)) { 2296 /* 2297 * Not a valid address for bind 2298 */ 2299 error = EADDRNOTAVAIL; 2300 } 2301 } 2302 2303 if (error != 0) { 2304 /* Red Alert! Attempting to be a bogon! */ 2305 if (ip_debug > 2) { 2306 /* ip1dbg */ 2307 pr_addr_dbg("ip_bind_laddr_v6: bad src" 2308 " address %s\n", AF_INET6, v6src); 2309 } 2310 goto bad_addr; 2311 } 2312 } 2313 2314 /* 2315 * Allow setting new policies. For example, disconnects come 2316 * down as ipa_t bind. As we would have set conn_policy_cached 2317 * to B_TRUE before, we should set it to B_FALSE, so that policy 2318 * can change after the disconnect. 2319 */ 2320 connp->conn_policy_cached = B_FALSE; 2321 2322 /* If not fanout_insert this was just an address verification */ 2323 if (fanout_insert) { 2324 /* 2325 * The addresses have been verified. Time to insert in 2326 * the correct fanout list. 2327 */ 2328 connp->conn_srcv6 = *v6src; 2329 connp->conn_remv6 = ipv6_all_zeros; 2330 connp->conn_lport = lport; 2331 connp->conn_fport = 0; 2332 error = ipcl_bind_insert_v6(connp, protocol, v6src, lport); 2333 } 2334 if (error == 0) { 2335 if (ire_requested) { 2336 if (!ip_bind_get_ire_v6(mpp, src_ire, v6src, NULL, 2337 ipst)) { 2338 error = -1; 2339 goto bad_addr; 2340 } 2341 mp = *mpp; 2342 } else if (ipsec_policy_set) { 2343 if (!ip_bind_ipsec_policy_set(connp, mp)) { 2344 error = -1; 2345 goto bad_addr; 2346 } 2347 } 2348 } 2349 bad_addr: 2350 if (error != 0) { 2351 if (connp->conn_anon_port) { 2352 (void) tsol_mlp_anon(crgetzone(connp->conn_cred), 2353 connp->conn_mlp_type, connp->conn_ulp, ntohs(lport), 2354 B_FALSE); 2355 } 2356 connp->conn_mlp_type = mlptSingle; 2357 } 2358 2359 if (src_ire != NULL) 2360 ire_refrele(src_ire); 2361 2362 if (ipsec_policy_set) { 2363 ASSERT(mp != NULL); 2364 freeb(mp); 2365 /* 2366 * As of now assume that nothing else accompanies 2367 * IPSEC_POLICY_SET. 2368 */ 2369 *mpp = NULL; 2370 } 2371 2372 return (error); 2373 } 2374 int 2375 ip_proto_bind_laddr_v6(conn_t *connp, mblk_t **mpp, uint8_t protocol, 2376 const in6_addr_t *v6srcp, uint16_t lport, boolean_t fanout_insert) 2377 { 2378 int error; 2379 boolean_t ire_requested; 2380 mblk_t *mp = NULL; 2381 boolean_t orig_pkt_isv6 = connp->conn_pkt_isv6; 2382 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 2383 2384 /* 2385 * Note that we allow connect to broadcast and multicast 2386 * address when ire_requested is set. Thus the ULP 2387 * has to check for IRE_BROADCAST and multicast. 2388 */ 2389 if (mpp) 2390 mp = *mpp; 2391 ire_requested = (mp && DB_TYPE(mp) == IRE_DB_REQ_TYPE); 2392 2393 ASSERT(connp->conn_af_isv6); 2394 connp->conn_ulp = protocol; 2395 2396 if (IN6_IS_ADDR_V4MAPPED(v6srcp) && !connp->conn_ipv6_v6only) { 2397 /* Bind to IPv4 address */ 2398 ipaddr_t v4src; 2399 2400 IN6_V4MAPPED_TO_IPADDR(v6srcp, v4src); 2401 2402 error = ip_bind_laddr_v4(connp, mpp, protocol, v4src, lport, 2403 fanout_insert); 2404 if (error != 0) 2405 goto bad_addr; 2406 connp->conn_pkt_isv6 = B_FALSE; 2407 } else { 2408 if (IN6_IS_ADDR_V4MAPPED(v6srcp)) { 2409 error = 0; 2410 goto bad_addr; 2411 } 2412 error = ip_bind_laddr_v6(connp, mpp, protocol, v6srcp, 2413 lport, fanout_insert); 2414 if (error != 0) 2415 goto bad_addr; 2416 connp->conn_pkt_isv6 = B_TRUE; 2417 } 2418 2419 ip_bind_post_handling_v6(connp, mpp ? *mpp : NULL, 2420 orig_pkt_isv6 != connp->conn_pkt_isv6, ire_requested, ipst); 2421 return (0); 2422 2423 bad_addr: 2424 if (error < 0) 2425 error = -TBADADDR; 2426 return (error); 2427 } 2428 2429 /* 2430 * Verify that both the source and destination addresses 2431 * are valid. If verify_dst, then destination address must also be reachable, 2432 * i.e. have a route. Protocols like TCP want this. Tunnels do not. 2433 * It takes ip6_pkt_t * as one of the arguments to determine correct 2434 * source address when IPV6_PKTINFO or scope_id is set along with a link-local 2435 * destination address. Note that parameter ipp is only useful for TCP connect 2436 * when scope_id is set or IPV6_PKTINFO option is set with an ifindex. For all 2437 * non-TCP cases, it is NULL and for all other tcp cases it is not useful. 2438 * 2439 */ 2440 int 2441 ip_bind_connected_v6(conn_t *connp, mblk_t **mpp, uint8_t protocol, 2442 in6_addr_t *v6src, uint16_t lport, const in6_addr_t *v6dst, 2443 ip6_pkt_t *ipp, uint16_t fport, boolean_t fanout_insert, 2444 boolean_t verify_dst, cred_t *cr) 2445 { 2446 ire_t *src_ire; 2447 ire_t *dst_ire; 2448 int error = 0; 2449 ire_t *sire = NULL; 2450 ire_t *md_dst_ire = NULL; 2451 ill_t *md_ill = NULL; 2452 ill_t *dst_ill = NULL; 2453 ipif_t *src_ipif = NULL; 2454 zoneid_t zoneid; 2455 boolean_t ill_held = B_FALSE; 2456 mblk_t *mp = NULL; 2457 boolean_t ire_requested = B_FALSE; 2458 boolean_t ipsec_policy_set = B_FALSE; 2459 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 2460 ts_label_t *tsl = NULL; 2461 2462 if (mpp) 2463 mp = *mpp; 2464 2465 if (mp != NULL) { 2466 ire_requested = (DB_TYPE(mp) == IRE_DB_REQ_TYPE); 2467 ipsec_policy_set = (DB_TYPE(mp) == IPSEC_POLICY_SET); 2468 } 2469 if (cr != NULL) 2470 tsl = crgetlabel(cr); 2471 2472 src_ire = dst_ire = NULL; 2473 /* 2474 * If we never got a disconnect before, clear it now. 2475 */ 2476 connp->conn_fully_bound = B_FALSE; 2477 2478 zoneid = connp->conn_zoneid; 2479 2480 if (IN6_IS_ADDR_MULTICAST(v6dst)) { 2481 ipif_t *ipif; 2482 2483 /* 2484 * Use an "emulated" IRE_BROADCAST to tell the transport it 2485 * is a multicast. 2486 * Pass other information that matches 2487 * the ipif (e.g. the source address). 2488 * 2489 * conn_multicast_ill is only used for IPv6 packets 2490 */ 2491 mutex_enter(&connp->conn_lock); 2492 if (connp->conn_multicast_ill != NULL) { 2493 (void) ipif_lookup_zoneid(connp->conn_multicast_ill, 2494 zoneid, 0, &ipif); 2495 } else { 2496 /* Look for default like ip_wput_v6 */ 2497 ipif = ipif_lookup_group_v6(v6dst, zoneid, ipst); 2498 } 2499 mutex_exit(&connp->conn_lock); 2500 if (ipif == NULL || ire_requested || 2501 (dst_ire = ipif_to_ire_v6(ipif)) == NULL) { 2502 if (ipif != NULL) 2503 ipif_refrele(ipif); 2504 if (ip_debug > 2) { 2505 /* ip1dbg */ 2506 pr_addr_dbg("ip_bind_connected_v6: bad " 2507 "connected multicast %s\n", AF_INET6, 2508 v6dst); 2509 } 2510 error = ENETUNREACH; 2511 goto bad_addr; 2512 } 2513 if (ipif != NULL) 2514 ipif_refrele(ipif); 2515 } else { 2516 dst_ire = ire_route_lookup_v6(v6dst, NULL, NULL, 0, 2517 NULL, &sire, zoneid, tsl, 2518 MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT | 2519 MATCH_IRE_PARENT | MATCH_IRE_RJ_BHOLE | MATCH_IRE_SECATTR, 2520 ipst); 2521 /* 2522 * We also prevent ire's with src address INADDR_ANY to 2523 * be used, which are created temporarily for 2524 * sending out packets from endpoints that have 2525 * conn_unspec_src set. 2526 */ 2527 if (dst_ire == NULL || 2528 (dst_ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) || 2529 IN6_IS_ADDR_UNSPECIFIED(&dst_ire->ire_src_addr_v6)) { 2530 /* 2531 * When verifying destination reachability, we always 2532 * complain. 2533 * 2534 * When not verifying destination reachability but we 2535 * found an IRE, i.e. the destination is reachable, 2536 * then the other tests still apply and we complain. 2537 */ 2538 if (verify_dst || (dst_ire != NULL)) { 2539 if (ip_debug > 2) { 2540 /* ip1dbg */ 2541 pr_addr_dbg("ip_bind_connected_v6: bad" 2542 " connected dst %s\n", AF_INET6, 2543 v6dst); 2544 } 2545 if (dst_ire == NULL || 2546 !(dst_ire->ire_type & IRE_HOST)) { 2547 error = ENETUNREACH; 2548 } else { 2549 error = EHOSTUNREACH; 2550 } 2551 goto bad_addr; 2552 } 2553 } 2554 } 2555 2556 /* 2557 * We now know that routing will allow us to reach the destination. 2558 * Check whether Trusted Solaris policy allows communication with this 2559 * host, and pretend that the destination is unreachable if not. 2560 * 2561 * This is never a problem for TCP, since that transport is known to 2562 * compute the label properly as part of the tcp_rput_other T_BIND_ACK 2563 * handling. If the remote is unreachable, it will be detected at that 2564 * point, so there's no reason to check it here. 2565 * 2566 * Note that for sendto (and other datagram-oriented friends), this 2567 * check is done as part of the data path label computation instead. 2568 * The check here is just to make non-TCP connect() report the right 2569 * error. 2570 */ 2571 if (dst_ire != NULL && is_system_labeled() && 2572 !IPCL_IS_TCP(connp) && 2573 tsol_compute_label_v6(cr, v6dst, NULL, 2574 connp->conn_mac_exempt, ipst) != 0) { 2575 error = EHOSTUNREACH; 2576 if (ip_debug > 2) { 2577 pr_addr_dbg("ip_bind_connected: no label for dst %s\n", 2578 AF_INET6, v6dst); 2579 } 2580 goto bad_addr; 2581 } 2582 2583 /* 2584 * If the app does a connect(), it means that it will most likely 2585 * send more than 1 packet to the destination. It makes sense 2586 * to clear the temporary flag. 2587 */ 2588 if (dst_ire != NULL && dst_ire->ire_type == IRE_CACHE && 2589 (dst_ire->ire_marks & IRE_MARK_TEMPORARY)) { 2590 irb_t *irb = dst_ire->ire_bucket; 2591 2592 rw_enter(&irb->irb_lock, RW_WRITER); 2593 /* 2594 * We need to recheck for IRE_MARK_TEMPORARY after acquiring 2595 * the lock in order to guarantee irb_tmp_ire_cnt. 2596 */ 2597 if (dst_ire->ire_marks & IRE_MARK_TEMPORARY) { 2598 dst_ire->ire_marks &= ~IRE_MARK_TEMPORARY; 2599 irb->irb_tmp_ire_cnt--; 2600 } 2601 rw_exit(&irb->irb_lock); 2602 } 2603 2604 ASSERT(dst_ire == NULL || dst_ire->ire_ipversion == IPV6_VERSION); 2605 2606 /* 2607 * See if we should notify ULP about MDT; we do this whether or not 2608 * ire_requested is TRUE, in order to handle active connects; MDT 2609 * eligibility tests for passive connects are handled separately 2610 * through tcp_adapt_ire(). We do this before the source address 2611 * selection, because dst_ire may change after a call to 2612 * ipif_select_source_v6(). This is a best-effort check, as the 2613 * packet for this connection may not actually go through 2614 * dst_ire->ire_stq, and the exact IRE can only be known after 2615 * calling ip_newroute_v6(). This is why we further check on the 2616 * IRE during Multidata packet transmission in tcp_multisend(). 2617 */ 2618 if (ipst->ips_ip_multidata_outbound && !ipsec_policy_set && 2619 dst_ire != NULL && 2620 !(dst_ire->ire_type & (IRE_LOCAL | IRE_LOOPBACK | IRE_BROADCAST)) && 2621 (md_ill = ire_to_ill(dst_ire), md_ill != NULL) && 2622 ILL_MDT_CAPABLE(md_ill)) { 2623 md_dst_ire = dst_ire; 2624 IRE_REFHOLD(md_dst_ire); 2625 } 2626 2627 if (dst_ire != NULL && 2628 dst_ire->ire_type == IRE_LOCAL && 2629 dst_ire->ire_zoneid != zoneid && 2630 dst_ire->ire_zoneid != ALL_ZONES) { 2631 src_ire = ire_ftable_lookup_v6(v6dst, 0, 0, 0, NULL, NULL, 2632 zoneid, 0, NULL, 2633 MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT | 2634 MATCH_IRE_RJ_BHOLE, ipst); 2635 if (src_ire == NULL) { 2636 error = EHOSTUNREACH; 2637 goto bad_addr; 2638 } else if (src_ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) { 2639 if (!(src_ire->ire_type & IRE_HOST)) 2640 error = ENETUNREACH; 2641 else 2642 error = EHOSTUNREACH; 2643 goto bad_addr; 2644 } 2645 if (IN6_IS_ADDR_UNSPECIFIED(v6src)) { 2646 src_ipif = src_ire->ire_ipif; 2647 ipif_refhold(src_ipif); 2648 *v6src = src_ipif->ipif_v6lcl_addr; 2649 } 2650 ire_refrele(src_ire); 2651 src_ire = NULL; 2652 } else if (IN6_IS_ADDR_UNSPECIFIED(v6src) && dst_ire != NULL) { 2653 if ((sire != NULL) && (sire->ire_flags & RTF_SETSRC)) { 2654 *v6src = sire->ire_src_addr_v6; 2655 ire_refrele(dst_ire); 2656 dst_ire = sire; 2657 sire = NULL; 2658 } else if (dst_ire->ire_type == IRE_CACHE && 2659 (dst_ire->ire_flags & RTF_SETSRC)) { 2660 ASSERT(dst_ire->ire_zoneid == zoneid || 2661 dst_ire->ire_zoneid == ALL_ZONES); 2662 *v6src = dst_ire->ire_src_addr_v6; 2663 } else { 2664 /* 2665 * Pick a source address so that a proper inbound load 2666 * spreading would happen. Use dst_ill specified by the 2667 * app. when socket option or scopeid is set. 2668 */ 2669 int err; 2670 2671 if (ipp != NULL && ipp->ipp_ifindex != 0) { 2672 uint_t if_index; 2673 2674 /* 2675 * Scope id or IPV6_PKTINFO 2676 */ 2677 2678 if_index = ipp->ipp_ifindex; 2679 dst_ill = ill_lookup_on_ifindex( 2680 if_index, B_TRUE, NULL, NULL, NULL, NULL, 2681 ipst); 2682 if (dst_ill == NULL) { 2683 ip1dbg(("ip_bind_connected_v6:" 2684 " bad ifindex %d\n", if_index)); 2685 error = EADDRNOTAVAIL; 2686 goto bad_addr; 2687 } 2688 ill_held = B_TRUE; 2689 } else if (connp->conn_outgoing_ill != NULL) { 2690 /* 2691 * For IPV6_BOUND_IF socket option, 2692 * conn_outgoing_ill should be set 2693 * already in TCP or UDP/ICMP. 2694 */ 2695 dst_ill = conn_get_held_ill(connp, 2696 &connp->conn_outgoing_ill, &err); 2697 if (err == ILL_LOOKUP_FAILED) { 2698 ip1dbg(("ip_bind_connected_v6:" 2699 "no ill for bound_if\n")); 2700 error = EADDRNOTAVAIL; 2701 goto bad_addr; 2702 } 2703 ill_held = B_TRUE; 2704 } else if (dst_ire->ire_stq != NULL) { 2705 /* No need to hold ill here */ 2706 dst_ill = (ill_t *)dst_ire->ire_stq->q_ptr; 2707 } else { 2708 /* No need to hold ill here */ 2709 dst_ill = dst_ire->ire_ipif->ipif_ill; 2710 } 2711 if (ip6_asp_can_lookup(ipst)) { 2712 src_ipif = ipif_select_source_v6(dst_ill, 2713 v6dst, B_FALSE, connp->conn_src_preferences, 2714 zoneid); 2715 ip6_asp_table_refrele(ipst); 2716 if (src_ipif == NULL) { 2717 pr_addr_dbg("ip_bind_connected_v6: " 2718 "no usable source address for " 2719 "connection to %s\n", 2720 AF_INET6, v6dst); 2721 error = EADDRNOTAVAIL; 2722 goto bad_addr; 2723 } 2724 *v6src = src_ipif->ipif_v6lcl_addr; 2725 } else { 2726 error = EADDRNOTAVAIL; 2727 goto bad_addr; 2728 } 2729 } 2730 } 2731 2732 /* 2733 * We do ire_route_lookup_v6() here (and not an interface lookup) 2734 * as we assert that v6src should only come from an 2735 * UP interface for hard binding. 2736 */ 2737 src_ire = ire_route_lookup_v6(v6src, 0, 0, 0, NULL, 2738 NULL, zoneid, NULL, MATCH_IRE_ZONEONLY, ipst); 2739 2740 /* src_ire must be a local|loopback */ 2741 if (!IRE_IS_LOCAL(src_ire)) { 2742 if (ip_debug > 2) { 2743 /* ip1dbg */ 2744 pr_addr_dbg("ip_bind_connected_v6: bad " 2745 "connected src %s\n", AF_INET6, v6src); 2746 } 2747 error = EADDRNOTAVAIL; 2748 goto bad_addr; 2749 } 2750 2751 /* 2752 * If the source address is a loopback address, the 2753 * destination had best be local or multicast. 2754 * The transports that can't handle multicast will reject 2755 * those addresses. 2756 */ 2757 if (src_ire->ire_type == IRE_LOOPBACK && 2758 !(IRE_IS_LOCAL(dst_ire) || IN6_IS_ADDR_MULTICAST(v6dst) || 2759 IN6_IS_ADDR_V4MAPPED_CLASSD(v6dst))) { 2760 ip1dbg(("ip_bind_connected_v6: bad connected loopback\n")); 2761 error = -1; 2762 goto bad_addr; 2763 } 2764 /* 2765 * Allow setting new policies. For example, disconnects come 2766 * down as ipa_t bind. As we would have set conn_policy_cached 2767 * to B_TRUE before, we should set it to B_FALSE, so that policy 2768 * can change after the disconnect. 2769 */ 2770 connp->conn_policy_cached = B_FALSE; 2771 2772 /* 2773 * The addresses have been verified. Initialize the conn 2774 * before calling the policy as they expect the conns 2775 * initialized. 2776 */ 2777 connp->conn_srcv6 = *v6src; 2778 connp->conn_remv6 = *v6dst; 2779 connp->conn_lport = lport; 2780 connp->conn_fport = fport; 2781 2782 ASSERT(!(ipsec_policy_set && ire_requested)); 2783 if (ire_requested) { 2784 iulp_t *ulp_info = NULL; 2785 2786 /* 2787 * Note that sire will not be NULL if this is an off-link 2788 * connection and there is not cache for that dest yet. 2789 * 2790 * XXX Because of an existing bug, if there are multiple 2791 * default routes, the IRE returned now may not be the actual 2792 * default route used (default routes are chosen in a 2793 * round robin fashion). So if the metrics for different 2794 * default routes are different, we may return the wrong 2795 * metrics. This will not be a problem if the existing 2796 * bug is fixed. 2797 */ 2798 if (sire != NULL) 2799 ulp_info = &(sire->ire_uinfo); 2800 2801 if (!ip_bind_get_ire_v6(mpp, dst_ire, v6dst, ulp_info, 2802 ipst)) { 2803 error = -1; 2804 goto bad_addr; 2805 } 2806 } else if (ipsec_policy_set) { 2807 if (!ip_bind_ipsec_policy_set(connp, mp)) { 2808 error = -1; 2809 goto bad_addr; 2810 } 2811 } 2812 2813 /* 2814 * Cache IPsec policy in this conn. If we have per-socket policy, 2815 * we'll cache that. If we don't, we'll inherit global policy. 2816 * 2817 * We can't insert until the conn reflects the policy. Note that 2818 * conn_policy_cached is set by ipsec_conn_cache_policy() even for 2819 * connections where we don't have a policy. This is to prevent 2820 * global policy lookups in the inbound path. 2821 * 2822 * If we insert before we set conn_policy_cached, 2823 * CONN_INBOUND_POLICY_PRESENT_V6() check can still evaluate true 2824 * because global policy cound be non-empty. We normally call 2825 * ipsec_check_policy() for conn_policy_cached connections only if 2826 * conn_in_enforce_policy is set. But in this case, 2827 * conn_policy_cached can get set anytime since we made the 2828 * CONN_INBOUND_POLICY_PRESENT_V6() check and ipsec_check_policy() 2829 * is called, which will make the above assumption false. Thus, we 2830 * need to insert after we set conn_policy_cached. 2831 */ 2832 if ((error = ipsec_conn_cache_policy(connp, B_FALSE)) != 0) 2833 goto bad_addr; 2834 2835 /* If not fanout_insert this was just an address verification */ 2836 if (fanout_insert) { 2837 /* 2838 * The addresses have been verified. Time to insert in 2839 * the correct fanout list. 2840 */ 2841 error = ipcl_conn_insert_v6(connp, protocol, v6src, v6dst, 2842 connp->conn_ports, 2843 IPCL_IS_TCP(connp) ? connp->conn_tcp->tcp_bound_if : 0); 2844 } 2845 if (error == 0) { 2846 connp->conn_fully_bound = B_TRUE; 2847 /* 2848 * Our initial checks for MDT have passed; the IRE is not 2849 * LOCAL/LOOPBACK/BROADCAST, and the link layer seems to 2850 * be supporting MDT. Pass the IRE, IPC and ILL into 2851 * ip_mdinfo_return(), which performs further checks 2852 * against them and upon success, returns the MDT info 2853 * mblk which we will attach to the bind acknowledgment. 2854 */ 2855 if (md_dst_ire != NULL) { 2856 mblk_t *mdinfo_mp; 2857 2858 ASSERT(md_ill != NULL); 2859 ASSERT(md_ill->ill_mdt_capab != NULL); 2860 if ((mdinfo_mp = ip_mdinfo_return(md_dst_ire, connp, 2861 md_ill->ill_name, md_ill->ill_mdt_capab)) != NULL) { 2862 if (mp == NULL) { 2863 *mpp = mdinfo_mp; 2864 } else { 2865 linkb(mp, mdinfo_mp); 2866 } 2867 } 2868 } 2869 } 2870 bad_addr: 2871 if (ipsec_policy_set) { 2872 ASSERT(mp != NULL); 2873 freeb(mp); 2874 /* 2875 * As of now assume that nothing else accompanies 2876 * IPSEC_POLICY_SET. 2877 */ 2878 *mpp = NULL; 2879 } 2880 refrele_and_quit: 2881 if (src_ire != NULL) 2882 IRE_REFRELE(src_ire); 2883 if (dst_ire != NULL) 2884 IRE_REFRELE(dst_ire); 2885 if (sire != NULL) 2886 IRE_REFRELE(sire); 2887 if (src_ipif != NULL) 2888 ipif_refrele(src_ipif); 2889 if (md_dst_ire != NULL) 2890 IRE_REFRELE(md_dst_ire); 2891 if (ill_held && dst_ill != NULL) 2892 ill_refrele(dst_ill); 2893 return (error); 2894 } 2895 2896 /* ARGSUSED */ 2897 int 2898 ip_proto_bind_connected_v6(conn_t *connp, mblk_t **mpp, uint8_t protocol, 2899 in6_addr_t *v6srcp, uint16_t lport, const in6_addr_t *v6dstp, 2900 ip6_pkt_t *ipp, uint16_t fport, boolean_t fanout_insert, 2901 boolean_t verify_dst, cred_t *cr) 2902 { 2903 int error = 0; 2904 boolean_t orig_pkt_isv6 = connp->conn_pkt_isv6; 2905 boolean_t ire_requested; 2906 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 2907 2908 /* 2909 * Note that we allow connect to broadcast and multicast 2910 * address when ire_requested is set. Thus the ULP 2911 * has to check for IRE_BROADCAST and multicast. 2912 */ 2913 ASSERT(mpp != NULL); 2914 ire_requested = (*mpp != NULL && DB_TYPE(*mpp) == IRE_DB_REQ_TYPE); 2915 2916 ASSERT(connp->conn_af_isv6); 2917 connp->conn_ulp = protocol; 2918 2919 /* For raw socket, the local port is not set. */ 2920 lport = lport != 0 ? lport : connp->conn_lport; 2921 2922 /* 2923 * Bind to local and remote address. Local might be 2924 * unspecified in which case it will be extracted from 2925 * ire_src_addr_v6 2926 */ 2927 if (IN6_IS_ADDR_V4MAPPED(v6dstp) && !connp->conn_ipv6_v6only) { 2928 /* Connect to IPv4 address */ 2929 ipaddr_t v4src; 2930 ipaddr_t v4dst; 2931 2932 /* Is the source unspecified or mapped? */ 2933 if (!IN6_IS_ADDR_V4MAPPED(v6srcp) && 2934 !IN6_IS_ADDR_UNSPECIFIED(v6srcp)) { 2935 ip1dbg(("ip_proto_bind_connected_v6: " 2936 "dst is mapped, but not the src\n")); 2937 goto bad_addr; 2938 } 2939 IN6_V4MAPPED_TO_IPADDR(v6srcp, v4src); 2940 IN6_V4MAPPED_TO_IPADDR(v6dstp, v4dst); 2941 2942 /* Always verify destination reachability. */ 2943 error = ip_bind_connected_v4(connp, mpp, protocol, &v4src, 2944 lport, v4dst, fport, B_TRUE, B_TRUE, cr); 2945 if (error != 0) 2946 goto bad_addr; 2947 IN6_IPADDR_TO_V4MAPPED(v4src, v6srcp); 2948 connp->conn_pkt_isv6 = B_FALSE; 2949 } else if (IN6_IS_ADDR_V4MAPPED(v6srcp)) { 2950 ip1dbg(("ip_proto_bind_connected_v6: " 2951 "src is mapped, but not the dst\n")); 2952 goto bad_addr; 2953 } else { 2954 error = ip_bind_connected_v6(connp, mpp, protocol, v6srcp, 2955 lport, v6dstp, ipp, fport, B_TRUE, verify_dst, cr); 2956 if (error != 0) 2957 goto bad_addr; 2958 connp->conn_pkt_isv6 = B_TRUE; 2959 } 2960 2961 ip_bind_post_handling_v6(connp, mpp ? *mpp : NULL, 2962 orig_pkt_isv6 != connp->conn_pkt_isv6, ire_requested, ipst); 2963 2964 /* Send it home. */ 2965 return (0); 2966 2967 bad_addr: 2968 if (error == 0) 2969 error = -TBADADDR; 2970 return (error); 2971 } 2972 2973 /* 2974 * Get the ire in *mpp. Returns false if it fails (due to lack of space). 2975 * Makes the IRE be IRE_BROADCAST if dst is a multicast address. 2976 */ 2977 /* ARGSUSED4 */ 2978 static boolean_t 2979 ip_bind_get_ire_v6(mblk_t **mpp, ire_t *ire, const in6_addr_t *dst, 2980 iulp_t *ulp_info, ip_stack_t *ipst) 2981 { 2982 mblk_t *mp = *mpp; 2983 ire_t *ret_ire; 2984 2985 ASSERT(mp != NULL); 2986 2987 if (ire != NULL) { 2988 /* 2989 * mp initialized above to IRE_DB_REQ_TYPE 2990 * appended mblk. Its <upper protocol>'s 2991 * job to make sure there is room. 2992 */ 2993 if ((mp->b_datap->db_lim - mp->b_rptr) < sizeof (ire_t)) 2994 return (B_FALSE); 2995 2996 mp->b_datap->db_type = IRE_DB_TYPE; 2997 mp->b_wptr = mp->b_rptr + sizeof (ire_t); 2998 bcopy(ire, mp->b_rptr, sizeof (ire_t)); 2999 ret_ire = (ire_t *)mp->b_rptr; 3000 if (IN6_IS_ADDR_MULTICAST(dst) || 3001 IN6_IS_ADDR_V4MAPPED_CLASSD(dst)) { 3002 ret_ire->ire_type = IRE_BROADCAST; 3003 ret_ire->ire_addr_v6 = *dst; 3004 } 3005 if (ulp_info != NULL) { 3006 bcopy(ulp_info, &(ret_ire->ire_uinfo), 3007 sizeof (iulp_t)); 3008 } 3009 ret_ire->ire_mp = mp; 3010 } else { 3011 /* 3012 * No IRE was found. Remove IRE mblk. 3013 */ 3014 *mpp = mp->b_cont; 3015 freeb(mp); 3016 } 3017 return (B_TRUE); 3018 } 3019 3020 /* 3021 * Add an ip6i_t header to the front of the mblk. 3022 * Inline if possible else allocate a separate mblk containing only the ip6i_t. 3023 * Returns NULL if allocation fails (and frees original message). 3024 * Used in outgoing path when going through ip_newroute_*v6(). 3025 * Used in incoming path to pass ifindex to transports. 3026 */ 3027 mblk_t * 3028 ip_add_info_v6(mblk_t *mp, ill_t *ill, const in6_addr_t *dst) 3029 { 3030 mblk_t *mp1; 3031 ip6i_t *ip6i; 3032 ip6_t *ip6h; 3033 3034 ip6h = (ip6_t *)mp->b_rptr; 3035 ip6i = (ip6i_t *)(mp->b_rptr - sizeof (ip6i_t)); 3036 if ((uchar_t *)ip6i < mp->b_datap->db_base || 3037 mp->b_datap->db_ref > 1) { 3038 mp1 = allocb(sizeof (ip6i_t), BPRI_MED); 3039 if (mp1 == NULL) { 3040 freemsg(mp); 3041 return (NULL); 3042 } 3043 mp1->b_wptr = mp1->b_rptr = mp1->b_datap->db_lim; 3044 mp1->b_cont = mp; 3045 mp = mp1; 3046 ip6i = (ip6i_t *)(mp->b_rptr - sizeof (ip6i_t)); 3047 } 3048 mp->b_rptr = (uchar_t *)ip6i; 3049 ip6i->ip6i_vcf = ip6h->ip6_vcf; 3050 ip6i->ip6i_nxt = IPPROTO_RAW; 3051 if (ill != NULL) { 3052 ip6i->ip6i_flags = IP6I_IFINDEX; 3053 /* 3054 * If `ill' is in an IPMP group, make sure we use the IPMP 3055 * interface index so that e.g. IPV6_RECVPKTINFO will get the 3056 * IPMP interface index and not an underlying interface index. 3057 */ 3058 if (IS_UNDER_IPMP(ill)) 3059 ip6i->ip6i_ifindex = ipmp_ill_get_ipmp_ifindex(ill); 3060 else 3061 ip6i->ip6i_ifindex = ill->ill_phyint->phyint_ifindex; 3062 } else { 3063 ip6i->ip6i_flags = 0; 3064 } 3065 ip6i->ip6i_nexthop = *dst; 3066 return (mp); 3067 } 3068 3069 /* 3070 * Handle protocols with which IP is less intimate. There 3071 * can be more than one stream bound to a particular 3072 * protocol. When this is the case, normally each one gets a copy 3073 * of any incoming packets. 3074 * However, if the packet was tunneled and not multicast we only send to it 3075 * the first match. 3076 * 3077 * Zones notes: 3078 * Packets will be distributed to streams in all zones. This is really only 3079 * useful for ICMPv6 as only applications in the global zone can create raw 3080 * sockets for other protocols. 3081 */ 3082 static void 3083 ip_fanout_proto_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, ill_t *ill, 3084 ill_t *inill, uint8_t nexthdr, uint_t nexthdr_offset, uint_t flags, 3085 boolean_t mctl_present, zoneid_t zoneid) 3086 { 3087 queue_t *rq; 3088 mblk_t *mp1, *first_mp1; 3089 in6_addr_t dst = ip6h->ip6_dst; 3090 in6_addr_t src = ip6h->ip6_src; 3091 boolean_t one_only; 3092 mblk_t *first_mp = mp; 3093 boolean_t secure, shared_addr; 3094 conn_t *connp, *first_connp, *next_connp; 3095 connf_t *connfp; 3096 ip_stack_t *ipst = inill->ill_ipst; 3097 ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec; 3098 3099 if (mctl_present) { 3100 mp = first_mp->b_cont; 3101 secure = ipsec_in_is_secure(first_mp); 3102 ASSERT(mp != NULL); 3103 } else { 3104 secure = B_FALSE; 3105 } 3106 3107 /* 3108 * If the packet was tunneled and not multicast we only send to it 3109 * the first match. 3110 */ 3111 one_only = ((nexthdr == IPPROTO_ENCAP || nexthdr == IPPROTO_IPV6) && 3112 !IN6_IS_ADDR_MULTICAST(&dst)); 3113 3114 shared_addr = (zoneid == ALL_ZONES); 3115 if (shared_addr) { 3116 /* 3117 * We don't allow multilevel ports for raw IP, so no need to 3118 * check for that here. 3119 */ 3120 zoneid = tsol_packet_to_zoneid(mp); 3121 } 3122 3123 connfp = &ipst->ips_ipcl_proto_fanout_v6[nexthdr]; 3124 mutex_enter(&connfp->connf_lock); 3125 connp = connfp->connf_head; 3126 for (connp = connfp->connf_head; connp != NULL; 3127 connp = connp->conn_next) { 3128 if (IPCL_PROTO_MATCH_V6(connp, nexthdr, ip6h, ill, flags, 3129 zoneid) && 3130 (!is_system_labeled() || 3131 tsol_receive_local(mp, &dst, IPV6_VERSION, shared_addr, 3132 connp))) 3133 break; 3134 } 3135 3136 if (connp == NULL) { 3137 /* 3138 * No one bound to this port. Is 3139 * there a client that wants all 3140 * unclaimed datagrams? 3141 */ 3142 mutex_exit(&connfp->connf_lock); 3143 if (ip_fanout_send_icmp_v6(q, first_mp, flags, 3144 ICMP6_PARAM_PROB, ICMP6_PARAMPROB_NEXTHEADER, 3145 nexthdr_offset, mctl_present, zoneid, ipst)) { 3146 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInUnknownProtos); 3147 } 3148 3149 return; 3150 } 3151 3152 ASSERT(IPCL_IS_NONSTR(connp) || connp->conn_upq != NULL); 3153 3154 CONN_INC_REF(connp); 3155 first_connp = connp; 3156 3157 /* 3158 * XXX: Fix the multiple protocol listeners case. We should not 3159 * be walking the conn->next list here. 3160 */ 3161 if (one_only) { 3162 /* 3163 * Only send message to one tunnel driver by immediately 3164 * terminating the loop. 3165 */ 3166 connp = NULL; 3167 } else { 3168 connp = connp->conn_next; 3169 3170 } 3171 for (;;) { 3172 while (connp != NULL) { 3173 if (IPCL_PROTO_MATCH_V6(connp, nexthdr, ip6h, ill, 3174 flags, zoneid) && 3175 (!is_system_labeled() || 3176 tsol_receive_local(mp, &dst, IPV6_VERSION, 3177 shared_addr, connp))) 3178 break; 3179 connp = connp->conn_next; 3180 } 3181 3182 /* 3183 * Just copy the data part alone. The mctl part is 3184 * needed just for verifying policy and it is never 3185 * sent up. 3186 */ 3187 if (connp == NULL || 3188 (((first_mp1 = dupmsg(first_mp)) == NULL) && 3189 ((first_mp1 = ip_copymsg(first_mp)) == NULL))) { 3190 /* 3191 * No more intested clients or memory 3192 * allocation failed 3193 */ 3194 connp = first_connp; 3195 break; 3196 } 3197 ASSERT(IPCL_IS_NONSTR(connp) || connp->conn_rq != NULL); 3198 mp1 = mctl_present ? first_mp1->b_cont : first_mp1; 3199 CONN_INC_REF(connp); 3200 mutex_exit(&connfp->connf_lock); 3201 rq = connp->conn_rq; 3202 /* 3203 * For link-local always add ifindex so that transport can set 3204 * sin6_scope_id. Avoid it for ICMP error fanout. 3205 */ 3206 if ((connp->conn_ip_recvpktinfo || 3207 IN6_IS_ADDR_LINKLOCAL(&src)) && 3208 (flags & IP_FF_IPINFO)) { 3209 /* Add header */ 3210 mp1 = ip_add_info_v6(mp1, inill, &dst); 3211 } 3212 if (mp1 == NULL) { 3213 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3214 } else if ( 3215 (IPCL_IS_NONSTR(connp) && PROTO_FLOW_CNTRLD(connp)) || 3216 (!IPCL_IS_NONSTR(connp) && !canputnext(rq))) { 3217 if (flags & IP_FF_RAWIP) { 3218 BUMP_MIB(ill->ill_ip_mib, 3219 rawipIfStatsInOverflows); 3220 } else { 3221 BUMP_MIB(ill->ill_icmp6_mib, 3222 ipv6IfIcmpInOverflows); 3223 } 3224 3225 freemsg(mp1); 3226 } else { 3227 /* 3228 * Don't enforce here if we're a tunnel - let "tun" do 3229 * it instead. 3230 */ 3231 if (!IPCL_IS_IPTUN(connp) && 3232 (CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) || 3233 secure)) { 3234 first_mp1 = ipsec_check_inbound_policy( 3235 first_mp1, connp, NULL, ip6h, mctl_present); 3236 } 3237 if (first_mp1 != NULL) { 3238 if (mctl_present) 3239 freeb(first_mp1); 3240 BUMP_MIB(ill->ill_ip_mib, 3241 ipIfStatsHCInDelivers); 3242 (connp->conn_recv)(connp, mp1, NULL); 3243 } 3244 } 3245 mutex_enter(&connfp->connf_lock); 3246 /* Follow the next pointer before releasing the conn. */ 3247 next_connp = connp->conn_next; 3248 CONN_DEC_REF(connp); 3249 connp = next_connp; 3250 } 3251 3252 /* Last one. Send it upstream. */ 3253 mutex_exit(&connfp->connf_lock); 3254 3255 /* Initiate IPPF processing */ 3256 if (IP6_IN_IPP(flags, ipst)) { 3257 uint_t ifindex; 3258 3259 mutex_enter(&ill->ill_lock); 3260 ifindex = ill->ill_phyint->phyint_ifindex; 3261 mutex_exit(&ill->ill_lock); 3262 ip_process(IPP_LOCAL_IN, &mp, ifindex); 3263 if (mp == NULL) { 3264 CONN_DEC_REF(connp); 3265 if (mctl_present) 3266 freeb(first_mp); 3267 return; 3268 } 3269 } 3270 3271 /* 3272 * For link-local always add ifindex so that transport can set 3273 * sin6_scope_id. Avoid it for ICMP error fanout. 3274 */ 3275 if ((connp->conn_ip_recvpktinfo || IN6_IS_ADDR_LINKLOCAL(&src)) && 3276 (flags & IP_FF_IPINFO)) { 3277 /* Add header */ 3278 mp = ip_add_info_v6(mp, inill, &dst); 3279 if (mp == NULL) { 3280 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3281 CONN_DEC_REF(connp); 3282 if (mctl_present) 3283 freeb(first_mp); 3284 return; 3285 } else if (mctl_present) { 3286 first_mp->b_cont = mp; 3287 } else { 3288 first_mp = mp; 3289 } 3290 } 3291 3292 rq = connp->conn_rq; 3293 if ((IPCL_IS_NONSTR(connp) && PROTO_FLOW_CNTRLD(connp)) || 3294 (!IPCL_IS_NONSTR(connp) && !canputnext(rq))) { 3295 3296 if (flags & IP_FF_RAWIP) { 3297 BUMP_MIB(ill->ill_ip_mib, rawipIfStatsInOverflows); 3298 } else { 3299 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInOverflows); 3300 } 3301 3302 freemsg(first_mp); 3303 } else { 3304 if (IPCL_IS_IPTUN(connp)) { 3305 /* 3306 * Tunneled packet. We enforce policy in the tunnel 3307 * module itself. 3308 * 3309 * Send the WHOLE packet up (incl. IPSEC_IN) without 3310 * a policy check. 3311 */ 3312 putnext(rq, first_mp); 3313 CONN_DEC_REF(connp); 3314 return; 3315 } 3316 /* 3317 * Don't enforce here if we're a tunnel - let "tun" do 3318 * it instead. 3319 */ 3320 if (nexthdr != IPPROTO_ENCAP && nexthdr != IPPROTO_IPV6 && 3321 (CONN_INBOUND_POLICY_PRESENT(connp, ipss) || secure)) { 3322 first_mp = ipsec_check_inbound_policy(first_mp, connp, 3323 NULL, ip6h, mctl_present); 3324 if (first_mp == NULL) { 3325 CONN_DEC_REF(connp); 3326 return; 3327 } 3328 } 3329 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 3330 (connp->conn_recv)(connp, mp, NULL); 3331 if (mctl_present) 3332 freeb(first_mp); 3333 } 3334 CONN_DEC_REF(connp); 3335 } 3336 3337 /* 3338 * Send an ICMP error after patching up the packet appropriately. Returns 3339 * non-zero if the appropriate MIB should be bumped; zero otherwise. 3340 */ 3341 int 3342 ip_fanout_send_icmp_v6(queue_t *q, mblk_t *mp, uint_t flags, 3343 uint_t icmp_type, uint8_t icmp_code, uint_t nexthdr_offset, 3344 boolean_t mctl_present, zoneid_t zoneid, ip_stack_t *ipst) 3345 { 3346 ip6_t *ip6h; 3347 mblk_t *first_mp; 3348 boolean_t secure; 3349 unsigned char db_type; 3350 ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec; 3351 3352 first_mp = mp; 3353 if (mctl_present) { 3354 mp = mp->b_cont; 3355 secure = ipsec_in_is_secure(first_mp); 3356 ASSERT(mp != NULL); 3357 } else { 3358 /* 3359 * If this is an ICMP error being reported - which goes 3360 * up as M_CTLs, we need to convert them to M_DATA till 3361 * we finish checking with global policy because 3362 * ipsec_check_global_policy() assumes M_DATA as clear 3363 * and M_CTL as secure. 3364 */ 3365 db_type = mp->b_datap->db_type; 3366 mp->b_datap->db_type = M_DATA; 3367 secure = B_FALSE; 3368 } 3369 /* 3370 * We are generating an icmp error for some inbound packet. 3371 * Called from all ip_fanout_(udp, tcp, proto) functions. 3372 * Before we generate an error, check with global policy 3373 * to see whether this is allowed to enter the system. As 3374 * there is no "conn", we are checking with global policy. 3375 */ 3376 ip6h = (ip6_t *)mp->b_rptr; 3377 if (secure || ipss->ipsec_inbound_v6_policy_present) { 3378 first_mp = ipsec_check_global_policy(first_mp, NULL, 3379 NULL, ip6h, mctl_present, ipst->ips_netstack); 3380 if (first_mp == NULL) 3381 return (0); 3382 } 3383 3384 if (!mctl_present) 3385 mp->b_datap->db_type = db_type; 3386 3387 if (flags & IP_FF_SEND_ICMP) { 3388 if (flags & IP_FF_HDR_COMPLETE) { 3389 if (ip_hdr_complete_v6(ip6h, zoneid, ipst)) { 3390 freemsg(first_mp); 3391 return (1); 3392 } 3393 } 3394 switch (icmp_type) { 3395 case ICMP6_DST_UNREACH: 3396 icmp_unreachable_v6(WR(q), first_mp, icmp_code, 3397 B_FALSE, B_FALSE, zoneid, ipst); 3398 break; 3399 case ICMP6_PARAM_PROB: 3400 icmp_param_problem_v6(WR(q), first_mp, icmp_code, 3401 nexthdr_offset, B_FALSE, B_FALSE, zoneid, ipst); 3402 break; 3403 default: 3404 #ifdef DEBUG 3405 panic("ip_fanout_send_icmp_v6: wrong type"); 3406 /*NOTREACHED*/ 3407 #else 3408 freemsg(first_mp); 3409 break; 3410 #endif 3411 } 3412 } else { 3413 freemsg(first_mp); 3414 return (0); 3415 } 3416 3417 return (1); 3418 } 3419 3420 /* 3421 * Fanout for TCP packets 3422 * The caller puts <fport, lport> in the ports parameter. 3423 */ 3424 static void 3425 ip_fanout_tcp_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, ill_t *ill, ill_t *inill, 3426 uint_t flags, uint_t hdr_len, boolean_t mctl_present, zoneid_t zoneid) 3427 { 3428 mblk_t *first_mp; 3429 boolean_t secure; 3430 conn_t *connp; 3431 tcph_t *tcph; 3432 boolean_t syn_present = B_FALSE; 3433 ip_stack_t *ipst = inill->ill_ipst; 3434 ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec; 3435 3436 first_mp = mp; 3437 if (mctl_present) { 3438 mp = first_mp->b_cont; 3439 secure = ipsec_in_is_secure(first_mp); 3440 ASSERT(mp != NULL); 3441 } else { 3442 secure = B_FALSE; 3443 } 3444 3445 connp = ipcl_classify_v6(mp, IPPROTO_TCP, hdr_len, zoneid, ipst); 3446 3447 if (connp == NULL || 3448 !conn_wantpacket_v6(connp, ill, ip6h, flags, zoneid)) { 3449 /* 3450 * No hard-bound match. Send Reset. 3451 */ 3452 dblk_t *dp = mp->b_datap; 3453 uint32_t ill_index; 3454 3455 ASSERT((dp->db_struioflag & STRUIO_IP) == 0); 3456 3457 /* Initiate IPPf processing, if needed. */ 3458 if (IPP_ENABLED(IPP_LOCAL_IN, ipst) && 3459 (flags & IP6_NO_IPPOLICY)) { 3460 ill_index = ill->ill_phyint->phyint_ifindex; 3461 ip_process(IPP_LOCAL_IN, &first_mp, ill_index); 3462 if (first_mp == NULL) { 3463 if (connp != NULL) 3464 CONN_DEC_REF(connp); 3465 return; 3466 } 3467 } 3468 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 3469 if (connp != NULL) { 3470 ip_xmit_reset_serialize(first_mp, hdr_len, zoneid, 3471 ipst->ips_netstack->netstack_tcp, connp); 3472 CONN_DEC_REF(connp); 3473 } else { 3474 tcp_xmit_listeners_reset(first_mp, hdr_len, zoneid, 3475 ipst->ips_netstack->netstack_tcp, NULL); 3476 } 3477 3478 return; 3479 } 3480 3481 tcph = (tcph_t *)&mp->b_rptr[hdr_len]; 3482 if ((tcph->th_flags[0] & (TH_SYN|TH_ACK|TH_RST|TH_URG)) == TH_SYN) { 3483 if (IPCL_IS_TCP(connp)) { 3484 squeue_t *sqp; 3485 3486 /* 3487 * If the queue belongs to a conn, and fused tcp 3488 * loopback is enabled, assign the eager's squeue 3489 * to be that of the active connect's. 3490 */ 3491 if ((flags & IP_FF_LOOPBACK) && do_tcp_fusion && 3492 CONN_Q(q) && IPCL_IS_TCP(Q_TO_CONN(q)) && 3493 !CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) && 3494 !secure && 3495 !IP6_IN_IPP(flags, ipst)) { 3496 ASSERT(Q_TO_CONN(q)->conn_sqp != NULL); 3497 sqp = Q_TO_CONN(q)->conn_sqp; 3498 } else { 3499 sqp = IP_SQUEUE_GET(lbolt); 3500 } 3501 3502 mp->b_datap->db_struioflag |= STRUIO_EAGER; 3503 DB_CKSUMSTART(mp) = (intptr_t)sqp; 3504 3505 /* 3506 * db_cksumstuff is unused in the incoming 3507 * path; Thus store the ifindex here. It will 3508 * be cleared in tcp_conn_create_v6(). 3509 */ 3510 DB_CKSUMSTUFF(mp) = 3511 (intptr_t)ill->ill_phyint->phyint_ifindex; 3512 syn_present = B_TRUE; 3513 } 3514 } 3515 3516 if (IPCL_IS_TCP(connp) && IPCL_IS_BOUND(connp) && !syn_present) { 3517 uint_t flags = (unsigned int)tcph->th_flags[0] & 0xFF; 3518 if ((flags & TH_RST) || (flags & TH_URG)) { 3519 CONN_DEC_REF(connp); 3520 freemsg(first_mp); 3521 return; 3522 } 3523 if (flags & TH_ACK) { 3524 ip_xmit_reset_serialize(first_mp, hdr_len, zoneid, 3525 ipst->ips_netstack->netstack_tcp, connp); 3526 CONN_DEC_REF(connp); 3527 return; 3528 } 3529 3530 CONN_DEC_REF(connp); 3531 freemsg(first_mp); 3532 return; 3533 } 3534 3535 if (CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) || secure) { 3536 first_mp = ipsec_check_inbound_policy(first_mp, connp, 3537 NULL, ip6h, mctl_present); 3538 if (first_mp == NULL) { 3539 CONN_DEC_REF(connp); 3540 return; 3541 } 3542 if (IPCL_IS_TCP(connp) && IPCL_IS_BOUND(connp)) { 3543 ASSERT(syn_present); 3544 if (mctl_present) { 3545 ASSERT(first_mp != mp); 3546 first_mp->b_datap->db_struioflag |= 3547 STRUIO_POLICY; 3548 } else { 3549 ASSERT(first_mp == mp); 3550 mp->b_datap->db_struioflag &= 3551 ~STRUIO_EAGER; 3552 mp->b_datap->db_struioflag |= 3553 STRUIO_POLICY; 3554 } 3555 } else { 3556 /* 3557 * Discard first_mp early since we're dealing with a 3558 * fully-connected conn_t and tcp doesn't do policy in 3559 * this case. Also, if someone is bound to IPPROTO_TCP 3560 * over raw IP, they don't expect to see a M_CTL. 3561 */ 3562 if (mctl_present) { 3563 freeb(first_mp); 3564 mctl_present = B_FALSE; 3565 } 3566 first_mp = mp; 3567 } 3568 } 3569 3570 /* Initiate IPPF processing */ 3571 if (IP6_IN_IPP(flags, ipst)) { 3572 uint_t ifindex; 3573 3574 mutex_enter(&ill->ill_lock); 3575 ifindex = ill->ill_phyint->phyint_ifindex; 3576 mutex_exit(&ill->ill_lock); 3577 ip_process(IPP_LOCAL_IN, &mp, ifindex); 3578 if (mp == NULL) { 3579 CONN_DEC_REF(connp); 3580 if (mctl_present) { 3581 freeb(first_mp); 3582 } 3583 return; 3584 } else if (mctl_present) { 3585 /* 3586 * ip_add_info_v6 might return a new mp. 3587 */ 3588 ASSERT(first_mp != mp); 3589 first_mp->b_cont = mp; 3590 } else { 3591 first_mp = mp; 3592 } 3593 } 3594 3595 /* 3596 * For link-local always add ifindex so that TCP can bind to that 3597 * interface. Avoid it for ICMP error fanout. 3598 */ 3599 if (!syn_present && ((connp->conn_ip_recvpktinfo || 3600 IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src)) && 3601 (flags & IP_FF_IPINFO))) { 3602 /* Add header */ 3603 mp = ip_add_info_v6(mp, inill, &ip6h->ip6_dst); 3604 if (mp == NULL) { 3605 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3606 CONN_DEC_REF(connp); 3607 if (mctl_present) 3608 freeb(first_mp); 3609 return; 3610 } else if (mctl_present) { 3611 ASSERT(first_mp != mp); 3612 first_mp->b_cont = mp; 3613 } else { 3614 first_mp = mp; 3615 } 3616 } 3617 3618 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 3619 if (IPCL_IS_TCP(connp)) { 3620 SQUEUE_ENTER_ONE(connp->conn_sqp, first_mp, connp->conn_recv, 3621 connp, ip_squeue_flag, SQTAG_IP6_TCP_INPUT); 3622 } else { 3623 /* SOCK_RAW, IPPROTO_TCP case */ 3624 (connp->conn_recv)(connp, first_mp, NULL); 3625 CONN_DEC_REF(connp); 3626 } 3627 } 3628 3629 /* 3630 * Fanout for UDP packets. 3631 * The caller puts <fport, lport> in the ports parameter. 3632 * ire_type must be IRE_BROADCAST for multicast and broadcast packets. 3633 * 3634 * If SO_REUSEADDR is set all multicast and broadcast packets 3635 * will be delivered to all streams bound to the same port. 3636 * 3637 * Zones notes: 3638 * Multicast packets will be distributed to streams in all zones. 3639 */ 3640 static void 3641 ip_fanout_udp_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, uint32_t ports, 3642 ill_t *ill, ill_t *inill, uint_t flags, boolean_t mctl_present, 3643 zoneid_t zoneid) 3644 { 3645 uint32_t dstport, srcport; 3646 in6_addr_t dst; 3647 mblk_t *first_mp; 3648 boolean_t secure; 3649 conn_t *connp; 3650 connf_t *connfp; 3651 conn_t *first_conn; 3652 conn_t *next_conn; 3653 mblk_t *mp1, *first_mp1; 3654 in6_addr_t src; 3655 boolean_t shared_addr; 3656 ip_stack_t *ipst = inill->ill_ipst; 3657 ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec; 3658 3659 first_mp = mp; 3660 if (mctl_present) { 3661 mp = first_mp->b_cont; 3662 secure = ipsec_in_is_secure(first_mp); 3663 ASSERT(mp != NULL); 3664 } else { 3665 secure = B_FALSE; 3666 } 3667 3668 /* Extract ports in net byte order */ 3669 dstport = htons(ntohl(ports) & 0xFFFF); 3670 srcport = htons(ntohl(ports) >> 16); 3671 dst = ip6h->ip6_dst; 3672 src = ip6h->ip6_src; 3673 3674 shared_addr = (zoneid == ALL_ZONES); 3675 if (shared_addr) { 3676 /* 3677 * No need to handle exclusive-stack zones since ALL_ZONES 3678 * only applies to the shared stack. 3679 */ 3680 zoneid = tsol_mlp_findzone(IPPROTO_UDP, dstport); 3681 /* 3682 * If no shared MLP is found, tsol_mlp_findzone returns 3683 * ALL_ZONES. In that case, we assume it's SLP, and 3684 * search for the zone based on the packet label. 3685 * That will also return ALL_ZONES on failure, but 3686 * we never allow conn_zoneid to be set to ALL_ZONES. 3687 */ 3688 if (zoneid == ALL_ZONES) 3689 zoneid = tsol_packet_to_zoneid(mp); 3690 } 3691 3692 /* Attempt to find a client stream based on destination port. */ 3693 connfp = &ipst->ips_ipcl_udp_fanout[IPCL_UDP_HASH(dstport, ipst)]; 3694 mutex_enter(&connfp->connf_lock); 3695 connp = connfp->connf_head; 3696 if (!IN6_IS_ADDR_MULTICAST(&dst)) { 3697 /* 3698 * Not multicast. Send to the one (first) client we find. 3699 */ 3700 while (connp != NULL) { 3701 if (IPCL_UDP_MATCH_V6(connp, dstport, dst, srcport, 3702 src) && IPCL_ZONE_MATCH(connp, zoneid) && 3703 conn_wantpacket_v6(connp, ill, ip6h, 3704 flags, zoneid)) { 3705 break; 3706 } 3707 connp = connp->conn_next; 3708 } 3709 if (connp == NULL || connp->conn_upq == NULL) 3710 goto notfound; 3711 3712 if (is_system_labeled() && 3713 !tsol_receive_local(mp, &dst, IPV6_VERSION, shared_addr, 3714 connp)) 3715 goto notfound; 3716 3717 /* Found a client */ 3718 CONN_INC_REF(connp); 3719 mutex_exit(&connfp->connf_lock); 3720 3721 if ((IPCL_IS_NONSTR(connp) && PROTO_FLOW_CNTRLD(connp)) || 3722 (!IPCL_IS_NONSTR(connp) && CONN_UDP_FLOWCTLD(connp))) { 3723 freemsg(first_mp); 3724 CONN_DEC_REF(connp); 3725 return; 3726 } 3727 if (CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) || secure) { 3728 first_mp = ipsec_check_inbound_policy(first_mp, 3729 connp, NULL, ip6h, mctl_present); 3730 if (first_mp == NULL) { 3731 CONN_DEC_REF(connp); 3732 return; 3733 } 3734 } 3735 /* Initiate IPPF processing */ 3736 if (IP6_IN_IPP(flags, ipst)) { 3737 uint_t ifindex; 3738 3739 mutex_enter(&ill->ill_lock); 3740 ifindex = ill->ill_phyint->phyint_ifindex; 3741 mutex_exit(&ill->ill_lock); 3742 ip_process(IPP_LOCAL_IN, &mp, ifindex); 3743 if (mp == NULL) { 3744 CONN_DEC_REF(connp); 3745 if (mctl_present) 3746 freeb(first_mp); 3747 return; 3748 } 3749 } 3750 /* 3751 * For link-local always add ifindex so that 3752 * transport can set sin6_scope_id. Avoid it for 3753 * ICMP error fanout. 3754 */ 3755 if ((connp->conn_ip_recvpktinfo || 3756 IN6_IS_ADDR_LINKLOCAL(&src)) && 3757 (flags & IP_FF_IPINFO)) { 3758 /* Add header */ 3759 mp = ip_add_info_v6(mp, inill, &dst); 3760 if (mp == NULL) { 3761 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3762 CONN_DEC_REF(connp); 3763 if (mctl_present) 3764 freeb(first_mp); 3765 return; 3766 } else if (mctl_present) { 3767 first_mp->b_cont = mp; 3768 } else { 3769 first_mp = mp; 3770 } 3771 } 3772 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 3773 3774 /* Send it upstream */ 3775 (connp->conn_recv)(connp, mp, NULL); 3776 3777 IP6_STAT(ipst, ip6_udp_fannorm); 3778 CONN_DEC_REF(connp); 3779 if (mctl_present) 3780 freeb(first_mp); 3781 return; 3782 } 3783 3784 while (connp != NULL) { 3785 if ((IPCL_UDP_MATCH_V6(connp, dstport, dst, srcport, src)) && 3786 conn_wantpacket_v6(connp, ill, ip6h, flags, zoneid) && 3787 (!is_system_labeled() || 3788 tsol_receive_local(mp, &dst, IPV6_VERSION, shared_addr, 3789 connp))) 3790 break; 3791 connp = connp->conn_next; 3792 } 3793 3794 if (connp == NULL || connp->conn_upq == NULL) 3795 goto notfound; 3796 3797 first_conn = connp; 3798 3799 CONN_INC_REF(connp); 3800 connp = connp->conn_next; 3801 for (;;) { 3802 while (connp != NULL) { 3803 if (IPCL_UDP_MATCH_V6(connp, dstport, dst, srcport, 3804 src) && conn_wantpacket_v6(connp, ill, ip6h, 3805 flags, zoneid) && 3806 (!is_system_labeled() || 3807 tsol_receive_local(mp, &dst, IPV6_VERSION, 3808 shared_addr, connp))) 3809 break; 3810 connp = connp->conn_next; 3811 } 3812 /* 3813 * Just copy the data part alone. The mctl part is 3814 * needed just for verifying policy and it is never 3815 * sent up. 3816 */ 3817 if (connp == NULL || 3818 (((first_mp1 = dupmsg(first_mp)) == NULL) && 3819 ((first_mp1 = ip_copymsg(first_mp)) == NULL))) { 3820 /* 3821 * No more interested clients or memory 3822 * allocation failed 3823 */ 3824 connp = first_conn; 3825 break; 3826 } 3827 mp1 = mctl_present ? first_mp1->b_cont : first_mp1; 3828 CONN_INC_REF(connp); 3829 mutex_exit(&connfp->connf_lock); 3830 /* 3831 * For link-local always add ifindex so that transport 3832 * can set sin6_scope_id. Avoid it for ICMP error 3833 * fanout. 3834 */ 3835 if ((connp->conn_ip_recvpktinfo || 3836 IN6_IS_ADDR_LINKLOCAL(&src)) && 3837 (flags & IP_FF_IPINFO)) { 3838 /* Add header */ 3839 mp1 = ip_add_info_v6(mp1, inill, &dst); 3840 } 3841 /* mp1 could have changed */ 3842 if (mctl_present) 3843 first_mp1->b_cont = mp1; 3844 else 3845 first_mp1 = mp1; 3846 if (mp1 == NULL) { 3847 if (mctl_present) 3848 freeb(first_mp1); 3849 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3850 goto next_one; 3851 } 3852 if ((IPCL_IS_NONSTR(connp) && PROTO_FLOW_CNTRLD(connp)) || 3853 (!IPCL_IS_NONSTR(connp) && CONN_UDP_FLOWCTLD(connp))) { 3854 BUMP_MIB(ill->ill_ip_mib, udpIfStatsInOverflows); 3855 freemsg(first_mp1); 3856 goto next_one; 3857 } 3858 3859 if (CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) || secure) { 3860 first_mp1 = ipsec_check_inbound_policy 3861 (first_mp1, connp, NULL, ip6h, 3862 mctl_present); 3863 } 3864 if (first_mp1 != NULL) { 3865 if (mctl_present) 3866 freeb(first_mp1); 3867 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 3868 3869 /* Send it upstream */ 3870 (connp->conn_recv)(connp, mp1, NULL); 3871 } 3872 next_one: 3873 mutex_enter(&connfp->connf_lock); 3874 /* Follow the next pointer before releasing the conn. */ 3875 next_conn = connp->conn_next; 3876 IP6_STAT(ipst, ip6_udp_fanmb); 3877 CONN_DEC_REF(connp); 3878 connp = next_conn; 3879 } 3880 3881 /* Last one. Send it upstream. */ 3882 mutex_exit(&connfp->connf_lock); 3883 3884 /* Initiate IPPF processing */ 3885 if (IP6_IN_IPP(flags, ipst)) { 3886 uint_t ifindex; 3887 3888 mutex_enter(&ill->ill_lock); 3889 ifindex = ill->ill_phyint->phyint_ifindex; 3890 mutex_exit(&ill->ill_lock); 3891 ip_process(IPP_LOCAL_IN, &mp, ifindex); 3892 if (mp == NULL) { 3893 CONN_DEC_REF(connp); 3894 if (mctl_present) { 3895 freeb(first_mp); 3896 } 3897 return; 3898 } 3899 } 3900 3901 /* 3902 * For link-local always add ifindex so that transport can set 3903 * sin6_scope_id. Avoid it for ICMP error fanout. 3904 */ 3905 if ((connp->conn_ip_recvpktinfo || 3906 IN6_IS_ADDR_LINKLOCAL(&src)) && (flags & IP_FF_IPINFO)) { 3907 /* Add header */ 3908 mp = ip_add_info_v6(mp, inill, &dst); 3909 if (mp == NULL) { 3910 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3911 CONN_DEC_REF(connp); 3912 if (mctl_present) 3913 freeb(first_mp); 3914 return; 3915 } else if (mctl_present) { 3916 first_mp->b_cont = mp; 3917 } else { 3918 first_mp = mp; 3919 } 3920 } 3921 if ((IPCL_IS_NONSTR(connp) && PROTO_FLOW_CNTRLD(connp)) || 3922 (!IPCL_IS_NONSTR(connp) && CONN_UDP_FLOWCTLD(connp))) { 3923 BUMP_MIB(ill->ill_ip_mib, udpIfStatsInOverflows); 3924 freemsg(mp); 3925 } else { 3926 if (CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) || secure) { 3927 first_mp = ipsec_check_inbound_policy(first_mp, 3928 connp, NULL, ip6h, mctl_present); 3929 if (first_mp == NULL) { 3930 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3931 CONN_DEC_REF(connp); 3932 return; 3933 } 3934 } 3935 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 3936 3937 /* Send it upstream */ 3938 (connp->conn_recv)(connp, mp, NULL); 3939 } 3940 IP6_STAT(ipst, ip6_udp_fanmb); 3941 CONN_DEC_REF(connp); 3942 if (mctl_present) 3943 freeb(first_mp); 3944 return; 3945 3946 notfound: 3947 mutex_exit(&connfp->connf_lock); 3948 /* 3949 * No one bound to this port. Is 3950 * there a client that wants all 3951 * unclaimed datagrams? 3952 */ 3953 if (ipst->ips_ipcl_proto_fanout_v6[IPPROTO_UDP].connf_head != NULL) { 3954 ip_fanout_proto_v6(q, first_mp, ip6h, ill, inill, IPPROTO_UDP, 3955 0, flags | IP_FF_RAWIP | IP_FF_IPINFO, mctl_present, 3956 zoneid); 3957 } else { 3958 if (ip_fanout_send_icmp_v6(q, first_mp, flags, 3959 ICMP6_DST_UNREACH, ICMP6_DST_UNREACH_NOPORT, 0, 3960 mctl_present, zoneid, ipst)) { 3961 BUMP_MIB(ill->ill_ip_mib, udpIfStatsNoPorts); 3962 } 3963 } 3964 } 3965 3966 /* 3967 * int ip_find_hdr_v6() 3968 * 3969 * This routine is used by the upper layer protocols and the IP tunnel 3970 * module to: 3971 * - Set extension header pointers to appropriate locations 3972 * - Determine IPv6 header length and return it 3973 * - Return a pointer to the last nexthdr value 3974 * 3975 * The caller must initialize ipp_fields. 3976 * 3977 * NOTE: If multiple extension headers of the same type are present, 3978 * ip_find_hdr_v6() will set the respective extension header pointers 3979 * to the first one that it encounters in the IPv6 header. It also 3980 * skips fragment headers. This routine deals with malformed packets 3981 * of various sorts in which case the returned length is up to the 3982 * malformed part. 3983 */ 3984 int 3985 ip_find_hdr_v6(mblk_t *mp, ip6_t *ip6h, ip6_pkt_t *ipp, uint8_t *nexthdrp) 3986 { 3987 uint_t length, ehdrlen; 3988 uint8_t nexthdr; 3989 uint8_t *whereptr, *endptr; 3990 ip6_dest_t *tmpdstopts; 3991 ip6_rthdr_t *tmprthdr; 3992 ip6_hbh_t *tmphopopts; 3993 ip6_frag_t *tmpfraghdr; 3994 3995 length = IPV6_HDR_LEN; 3996 whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */ 3997 endptr = mp->b_wptr; 3998 3999 nexthdr = ip6h->ip6_nxt; 4000 while (whereptr < endptr) { 4001 /* Is there enough left for len + nexthdr? */ 4002 if (whereptr + MIN_EHDR_LEN > endptr) 4003 goto done; 4004 4005 switch (nexthdr) { 4006 case IPPROTO_HOPOPTS: 4007 tmphopopts = (ip6_hbh_t *)whereptr; 4008 ehdrlen = 8 * (tmphopopts->ip6h_len + 1); 4009 if ((uchar_t *)tmphopopts + ehdrlen > endptr) 4010 goto done; 4011 nexthdr = tmphopopts->ip6h_nxt; 4012 /* return only 1st hbh */ 4013 if (!(ipp->ipp_fields & IPPF_HOPOPTS)) { 4014 ipp->ipp_fields |= IPPF_HOPOPTS; 4015 ipp->ipp_hopopts = tmphopopts; 4016 ipp->ipp_hopoptslen = ehdrlen; 4017 } 4018 break; 4019 case IPPROTO_DSTOPTS: 4020 tmpdstopts = (ip6_dest_t *)whereptr; 4021 ehdrlen = 8 * (tmpdstopts->ip6d_len + 1); 4022 if ((uchar_t *)tmpdstopts + ehdrlen > endptr) 4023 goto done; 4024 nexthdr = tmpdstopts->ip6d_nxt; 4025 /* 4026 * ipp_dstopts is set to the destination header after a 4027 * routing header. 4028 * Assume it is a post-rthdr destination header 4029 * and adjust when we find an rthdr. 4030 */ 4031 if (!(ipp->ipp_fields & IPPF_DSTOPTS)) { 4032 ipp->ipp_fields |= IPPF_DSTOPTS; 4033 ipp->ipp_dstopts = tmpdstopts; 4034 ipp->ipp_dstoptslen = ehdrlen; 4035 } 4036 break; 4037 case IPPROTO_ROUTING: 4038 tmprthdr = (ip6_rthdr_t *)whereptr; 4039 ehdrlen = 8 * (tmprthdr->ip6r_len + 1); 4040 if ((uchar_t *)tmprthdr + ehdrlen > endptr) 4041 goto done; 4042 nexthdr = tmprthdr->ip6r_nxt; 4043 /* return only 1st rthdr */ 4044 if (!(ipp->ipp_fields & IPPF_RTHDR)) { 4045 ipp->ipp_fields |= IPPF_RTHDR; 4046 ipp->ipp_rthdr = tmprthdr; 4047 ipp->ipp_rthdrlen = ehdrlen; 4048 } 4049 /* 4050 * Make any destination header we've seen be a 4051 * pre-rthdr destination header. 4052 */ 4053 if (ipp->ipp_fields & IPPF_DSTOPTS) { 4054 ipp->ipp_fields &= ~IPPF_DSTOPTS; 4055 ipp->ipp_fields |= IPPF_RTDSTOPTS; 4056 ipp->ipp_rtdstopts = ipp->ipp_dstopts; 4057 ipp->ipp_dstopts = NULL; 4058 ipp->ipp_rtdstoptslen = ipp->ipp_dstoptslen; 4059 ipp->ipp_dstoptslen = 0; 4060 } 4061 break; 4062 case IPPROTO_FRAGMENT: 4063 tmpfraghdr = (ip6_frag_t *)whereptr; 4064 ehdrlen = sizeof (ip6_frag_t); 4065 if ((uchar_t *)tmpfraghdr + ehdrlen > endptr) 4066 goto done; 4067 nexthdr = tmpfraghdr->ip6f_nxt; 4068 if (!(ipp->ipp_fields & IPPF_FRAGHDR)) { 4069 ipp->ipp_fields |= IPPF_FRAGHDR; 4070 ipp->ipp_fraghdr = tmpfraghdr; 4071 ipp->ipp_fraghdrlen = ehdrlen; 4072 } 4073 break; 4074 case IPPROTO_NONE: 4075 default: 4076 goto done; 4077 } 4078 length += ehdrlen; 4079 whereptr += ehdrlen; 4080 } 4081 done: 4082 if (nexthdrp != NULL) 4083 *nexthdrp = nexthdr; 4084 return (length); 4085 } 4086 4087 int 4088 ip_hdr_complete_v6(ip6_t *ip6h, zoneid_t zoneid, ip_stack_t *ipst) 4089 { 4090 ire_t *ire; 4091 4092 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 4093 ire = ire_lookup_local_v6(zoneid, ipst); 4094 if (ire == NULL) { 4095 ip1dbg(("ip_hdr_complete_v6: no source IRE\n")); 4096 return (1); 4097 } 4098 ip6h->ip6_src = ire->ire_addr_v6; 4099 ire_refrele(ire); 4100 } 4101 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 4102 ip6h->ip6_hops = ipst->ips_ipv6_def_hops; 4103 return (0); 4104 } 4105 4106 /* 4107 * Try to determine where and what are the IPv6 header length and 4108 * pointer to nexthdr value for the upper layer protocol (or an 4109 * unknown next hdr). 4110 * 4111 * Parameters returns a pointer to the nexthdr value; 4112 * Must handle malformed packets of various sorts. 4113 * Function returns failure for malformed cases. 4114 */ 4115 boolean_t 4116 ip_hdr_length_nexthdr_v6(mblk_t *mp, ip6_t *ip6h, uint16_t *hdr_length_ptr, 4117 uint8_t **nexthdrpp) 4118 { 4119 uint16_t length; 4120 uint_t ehdrlen; 4121 uint8_t *nexthdrp; 4122 uint8_t *whereptr; 4123 uint8_t *endptr; 4124 ip6_dest_t *desthdr; 4125 ip6_rthdr_t *rthdr; 4126 ip6_frag_t *fraghdr; 4127 4128 ASSERT((IPH_HDR_VERSION(ip6h) & ~IP_FORWARD_PROG_BIT) == IPV6_VERSION); 4129 length = IPV6_HDR_LEN; 4130 whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */ 4131 endptr = mp->b_wptr; 4132 4133 nexthdrp = &ip6h->ip6_nxt; 4134 while (whereptr < endptr) { 4135 /* Is there enough left for len + nexthdr? */ 4136 if (whereptr + MIN_EHDR_LEN > endptr) 4137 break; 4138 4139 switch (*nexthdrp) { 4140 case IPPROTO_HOPOPTS: 4141 case IPPROTO_DSTOPTS: 4142 /* Assumes the headers are identical for hbh and dst */ 4143 desthdr = (ip6_dest_t *)whereptr; 4144 ehdrlen = 8 * (desthdr->ip6d_len + 1); 4145 if ((uchar_t *)desthdr + ehdrlen > endptr) 4146 return (B_FALSE); 4147 nexthdrp = &desthdr->ip6d_nxt; 4148 break; 4149 case IPPROTO_ROUTING: 4150 rthdr = (ip6_rthdr_t *)whereptr; 4151 ehdrlen = 8 * (rthdr->ip6r_len + 1); 4152 if ((uchar_t *)rthdr + ehdrlen > endptr) 4153 return (B_FALSE); 4154 nexthdrp = &rthdr->ip6r_nxt; 4155 break; 4156 case IPPROTO_FRAGMENT: 4157 fraghdr = (ip6_frag_t *)whereptr; 4158 ehdrlen = sizeof (ip6_frag_t); 4159 if ((uchar_t *)&fraghdr[1] > endptr) 4160 return (B_FALSE); 4161 nexthdrp = &fraghdr->ip6f_nxt; 4162 break; 4163 case IPPROTO_NONE: 4164 /* No next header means we're finished */ 4165 default: 4166 *hdr_length_ptr = length; 4167 *nexthdrpp = nexthdrp; 4168 return (B_TRUE); 4169 } 4170 length += ehdrlen; 4171 whereptr += ehdrlen; 4172 *hdr_length_ptr = length; 4173 *nexthdrpp = nexthdrp; 4174 } 4175 switch (*nexthdrp) { 4176 case IPPROTO_HOPOPTS: 4177 case IPPROTO_DSTOPTS: 4178 case IPPROTO_ROUTING: 4179 case IPPROTO_FRAGMENT: 4180 /* 4181 * If any know extension headers are still to be processed, 4182 * the packet's malformed (or at least all the IP header(s) are 4183 * not in the same mblk - and that should never happen. 4184 */ 4185 return (B_FALSE); 4186 4187 default: 4188 /* 4189 * If we get here, we know that all of the IP headers were in 4190 * the same mblk, even if the ULP header is in the next mblk. 4191 */ 4192 *hdr_length_ptr = length; 4193 *nexthdrpp = nexthdrp; 4194 return (B_TRUE); 4195 } 4196 } 4197 4198 /* 4199 * Return the length of the IPv6 related headers (including extension headers) 4200 * Returns a length even if the packet is malformed. 4201 */ 4202 int 4203 ip_hdr_length_v6(mblk_t *mp, ip6_t *ip6h) 4204 { 4205 uint16_t hdr_len; 4206 uint8_t *nexthdrp; 4207 4208 (void) ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_len, &nexthdrp); 4209 return (hdr_len); 4210 } 4211 4212 /* 4213 * IPv6 - 4214 * ip_newroute_v6 is called by ip_rput_data_v6 or ip_wput_v6 whenever we need 4215 * to send out a packet to a destination address for which we do not have 4216 * specific routing information. 4217 * 4218 * Handle non-multicast packets. If ill is non-NULL the match is done 4219 * for that ill. 4220 * 4221 * When a specific ill is specified (using IPV6_PKTINFO, 4222 * IPV6_MULTICAST_IF, or IPV6_BOUND_IF) we will only match 4223 * on routing entries (ftable and ctable) that have a matching 4224 * ire->ire_ipif->ipif_ill. Thus this can only be used 4225 * for destinations that are on-link for the specific ill 4226 * and that can appear on multiple links. Thus it is useful 4227 * for multicast destinations, link-local destinations, and 4228 * at some point perhaps for site-local destinations (if the 4229 * node sits at a site boundary). 4230 * We create the cache entries in the regular ctable since 4231 * it can not "confuse" things for other destinations. 4232 * 4233 * NOTE : These are the scopes of some of the variables that point at IRE, 4234 * which needs to be followed while making any future modifications 4235 * to avoid memory leaks. 4236 * 4237 * - ire and sire are the entries looked up initially by 4238 * ire_ftable_lookup_v6. 4239 * - ipif_ire is used to hold the interface ire associated with 4240 * the new cache ire. But it's scope is limited, so we always REFRELE 4241 * it before branching out to error paths. 4242 * - save_ire is initialized before ire_create, so that ire returned 4243 * by ire_create will not over-write the ire. We REFRELE save_ire 4244 * before breaking out of the switch. 4245 * 4246 * Thus on failures, we have to REFRELE only ire and sire, if they 4247 * are not NULL. 4248 */ 4249 /* ARGSUSED */ 4250 void 4251 ip_newroute_v6(queue_t *q, mblk_t *mp, const in6_addr_t *v6dstp, 4252 const in6_addr_t *v6srcp, ill_t *ill, zoneid_t zoneid, ip_stack_t *ipst) 4253 { 4254 in6_addr_t v6gw; 4255 in6_addr_t dst; 4256 ire_t *ire = NULL; 4257 ipif_t *src_ipif = NULL; 4258 ill_t *dst_ill = NULL; 4259 ire_t *sire = NULL; 4260 ire_t *save_ire; 4261 ip6_t *ip6h; 4262 int err = 0; 4263 mblk_t *first_mp; 4264 ipsec_out_t *io; 4265 ushort_t ire_marks = 0; 4266 int match_flags; 4267 ire_t *first_sire = NULL; 4268 mblk_t *copy_mp = NULL; 4269 mblk_t *xmit_mp = NULL; 4270 in6_addr_t save_dst; 4271 uint32_t multirt_flags = 4272 MULTIRT_CACHEGW | MULTIRT_USESTAMP | MULTIRT_SETSTAMP; 4273 boolean_t multirt_is_resolvable; 4274 boolean_t multirt_resolve_next; 4275 boolean_t need_rele = B_FALSE; 4276 boolean_t ip6_asp_table_held = B_FALSE; 4277 tsol_ire_gw_secattr_t *attrp = NULL; 4278 tsol_gcgrp_t *gcgrp = NULL; 4279 tsol_gcgrp_addr_t ga; 4280 4281 ASSERT(!IN6_IS_ADDR_MULTICAST(v6dstp)); 4282 4283 first_mp = mp; 4284 if (mp->b_datap->db_type == M_CTL) { 4285 mp = mp->b_cont; 4286 io = (ipsec_out_t *)first_mp->b_rptr; 4287 ASSERT(io->ipsec_out_type == IPSEC_OUT); 4288 } else { 4289 io = NULL; 4290 } 4291 4292 ip6h = (ip6_t *)mp->b_rptr; 4293 4294 if (IN6_IS_ADDR_LOOPBACK(v6dstp)) { 4295 ip1dbg(("ip_newroute_v6: dst with loopback addr\n")); 4296 goto icmp_err_ret; 4297 } else if (IN6_IS_ADDR_LOOPBACK(v6srcp)) { 4298 ip1dbg(("ip_newroute_v6: src with loopback addr\n")); 4299 goto icmp_err_ret; 4300 } 4301 4302 /* 4303 * If this IRE is created for forwarding or it is not for 4304 * TCP traffic, mark it as temporary. 4305 * 4306 * Is it sufficient just to check the next header?? 4307 */ 4308 if (mp->b_prev != NULL || !IP_FLOW_CONTROLLED_ULP(ip6h->ip6_nxt)) 4309 ire_marks |= IRE_MARK_TEMPORARY; 4310 4311 /* 4312 * Get what we can from ire_ftable_lookup_v6 which will follow an IRE 4313 * chain until it gets the most specific information available. 4314 * For example, we know that there is no IRE_CACHE for this dest, 4315 * but there may be an IRE_OFFSUBNET which specifies a gateway. 4316 * ire_ftable_lookup_v6 will look up the gateway, etc. 4317 */ 4318 4319 if (ill == NULL) { 4320 match_flags = MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT | 4321 MATCH_IRE_PARENT | MATCH_IRE_RJ_BHOLE | MATCH_IRE_SECATTR; 4322 ire = ire_ftable_lookup_v6(v6dstp, 0, 0, 0, 4323 NULL, &sire, zoneid, 0, msg_getlabel(mp), 4324 match_flags, ipst); 4325 } else { 4326 match_flags = MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT | 4327 MATCH_IRE_RJ_BHOLE | MATCH_IRE_ILL; 4328 match_flags |= MATCH_IRE_PARENT | MATCH_IRE_SECATTR; 4329 4330 /* 4331 * Because nce_xmit() calls ip_output_v6() and NCEs are always 4332 * tied to an underlying interface, IS_UNDER_IPMP() may be 4333 * true even when building IREs that will be used for data 4334 * traffic. As such, use the packet's source address to 4335 * determine whether the traffic is test traffic, and set 4336 * MATCH_IRE_MARK_TESTHIDDEN if so. 4337 */ 4338 if (IS_UNDER_IPMP(ill) && !IN6_IS_ADDR_UNSPECIFIED(v6srcp)) { 4339 if (ipif_lookup_testaddr_v6(ill, v6srcp, NULL)) 4340 match_flags |= MATCH_IRE_MARK_TESTHIDDEN; 4341 } 4342 4343 ire = ire_ftable_lookup_v6(v6dstp, NULL, NULL, 0, ill->ill_ipif, 4344 &sire, zoneid, 0, msg_getlabel(mp), match_flags, ipst); 4345 } 4346 4347 ip3dbg(("ip_newroute_v6: ire_ftable_lookup_v6() " 4348 "returned ire %p, sire %p\n", (void *)ire, (void *)sire)); 4349 4350 /* 4351 * We enter a loop that will be run only once in most cases. 4352 * The loop is re-entered in the case where the destination 4353 * can be reached through multiple RTF_MULTIRT-flagged routes. 4354 * The intention is to compute multiple routes to a single 4355 * destination in a single ip_newroute_v6 call. 4356 * The information is contained in sire->ire_flags. 4357 */ 4358 do { 4359 multirt_resolve_next = B_FALSE; 4360 4361 if (dst_ill != NULL) { 4362 ill_refrele(dst_ill); 4363 dst_ill = NULL; 4364 } 4365 if (src_ipif != NULL) { 4366 ipif_refrele(src_ipif); 4367 src_ipif = NULL; 4368 } 4369 if ((sire != NULL) && sire->ire_flags & RTF_MULTIRT) { 4370 ip3dbg(("ip_newroute_v6: starting new resolution " 4371 "with first_mp %p, tag %d\n", 4372 (void *)first_mp, MULTIRT_DEBUG_TAGGED(first_mp))); 4373 4374 /* 4375 * We check if there are trailing unresolved routes for 4376 * the destination contained in sire. 4377 */ 4378 multirt_is_resolvable = ire_multirt_lookup_v6(&ire, 4379 &sire, multirt_flags, msg_getlabel(mp), ipst); 4380 4381 ip3dbg(("ip_newroute_v6: multirt_is_resolvable %d, " 4382 "ire %p, sire %p\n", 4383 multirt_is_resolvable, (void *)ire, (void *)sire)); 4384 4385 if (!multirt_is_resolvable) { 4386 /* 4387 * No more multirt routes to resolve; give up 4388 * (all routes resolved or no more resolvable 4389 * routes). 4390 */ 4391 if (ire != NULL) { 4392 ire_refrele(ire); 4393 ire = NULL; 4394 } 4395 } else { 4396 ASSERT(sire != NULL); 4397 ASSERT(ire != NULL); 4398 /* 4399 * We simply use first_sire as a flag that 4400 * indicates if a resolvable multirt route has 4401 * already been found during the preceding 4402 * loops. If it is not the case, we may have 4403 * to send an ICMP error to report that the 4404 * destination is unreachable. We do not 4405 * IRE_REFHOLD first_sire. 4406 */ 4407 if (first_sire == NULL) { 4408 first_sire = sire; 4409 } 4410 } 4411 } 4412 if ((ire == NULL) || (ire == sire)) { 4413 /* 4414 * either ire == NULL (the destination cannot be 4415 * resolved) or ire == sire (the gateway cannot be 4416 * resolved). At this point, there are no more routes 4417 * to resolve for the destination, thus we exit. 4418 */ 4419 if (ip_debug > 3) { 4420 /* ip2dbg */ 4421 pr_addr_dbg("ip_newroute_v6: " 4422 "can't resolve %s\n", AF_INET6, v6dstp); 4423 } 4424 ip3dbg(("ip_newroute_v6: " 4425 "ire %p, sire %p, first_sire %p\n", 4426 (void *)ire, (void *)sire, (void *)first_sire)); 4427 4428 if (sire != NULL) { 4429 ire_refrele(sire); 4430 sire = NULL; 4431 } 4432 4433 if (first_sire != NULL) { 4434 /* 4435 * At least one multirt route has been found 4436 * in the same ip_newroute() call; there is no 4437 * need to report an ICMP error. 4438 * first_sire was not IRE_REFHOLDed. 4439 */ 4440 MULTIRT_DEBUG_UNTAG(first_mp); 4441 freemsg(first_mp); 4442 return; 4443 } 4444 ip_rts_change_v6(RTM_MISS, v6dstp, 0, 0, 0, 0, 0, 0, 4445 RTA_DST, ipst); 4446 goto icmp_err_ret; 4447 } 4448 4449 ASSERT(ire->ire_ipversion == IPV6_VERSION); 4450 4451 /* 4452 * Verify that the returned IRE does not have either the 4453 * RTF_REJECT or RTF_BLACKHOLE flags set and that the IRE is 4454 * either an IRE_CACHE, IRE_IF_NORESOLVER or IRE_IF_RESOLVER. 4455 */ 4456 if ((ire->ire_flags & (RTF_REJECT | RTF_BLACKHOLE)) || 4457 (ire->ire_type & (IRE_CACHE | IRE_INTERFACE)) == 0) 4458 goto icmp_err_ret; 4459 4460 /* 4461 * Increment the ire_ob_pkt_count field for ire if it is an 4462 * INTERFACE (IF_RESOLVER or IF_NORESOLVER) IRE type, and 4463 * increment the same for the parent IRE, sire, if it is some 4464 * sort of prefix IRE (which includes DEFAULT, PREFIX, and HOST) 4465 */ 4466 if ((ire->ire_type & IRE_INTERFACE) != 0) { 4467 UPDATE_OB_PKT_COUNT(ire); 4468 ire->ire_last_used_time = lbolt; 4469 } 4470 4471 if (sire != NULL) { 4472 mutex_enter(&sire->ire_lock); 4473 v6gw = sire->ire_gateway_addr_v6; 4474 mutex_exit(&sire->ire_lock); 4475 ASSERT((sire->ire_type & (IRE_CACHETABLE | 4476 IRE_INTERFACE)) == 0); 4477 UPDATE_OB_PKT_COUNT(sire); 4478 sire->ire_last_used_time = lbolt; 4479 } else { 4480 v6gw = ipv6_all_zeros; 4481 } 4482 4483 /* 4484 * We have a route to reach the destination. Find the 4485 * appropriate ill, then get a source address that matches the 4486 * right scope via ipif_select_source_v6(). 4487 * 4488 * If we are here trying to create an IRE_CACHE for an offlink 4489 * destination and have an IRE_CACHE entry for VNI, then use 4490 * ire_stq instead since VNI's queue is a black hole. 4491 * 4492 * Note: While we pick a dst_ill we are really only interested 4493 * in the ill for load spreading. The source ipif is 4494 * determined by source address selection below. 4495 */ 4496 if ((ire->ire_type == IRE_CACHE) && 4497 IS_VNI(ire->ire_ipif->ipif_ill)) { 4498 dst_ill = ire->ire_stq->q_ptr; 4499 ill_refhold(dst_ill); 4500 } else { 4501 ill_t *ill = ire->ire_ipif->ipif_ill; 4502 4503 if (IS_IPMP(ill)) { 4504 dst_ill = 4505 ipmp_illgrp_hold_next_ill(ill->ill_grp); 4506 } else { 4507 dst_ill = ill; 4508 ill_refhold(dst_ill); 4509 } 4510 } 4511 4512 if (dst_ill == NULL) { 4513 if (ip_debug > 2) { 4514 pr_addr_dbg("ip_newroute_v6 : no dst " 4515 "ill for dst %s\n", AF_INET6, v6dstp); 4516 } 4517 goto icmp_err_ret; 4518 } 4519 4520 if (ill != NULL && dst_ill != ill && 4521 !IS_IN_SAME_ILLGRP(dst_ill, ill)) { 4522 /* 4523 * We should have found a route matching "ill" 4524 * as we called ire_ftable_lookup_v6 with 4525 * MATCH_IRE_ILL. Rather than asserting when 4526 * there is a mismatch, we just drop the packet. 4527 */ 4528 ip0dbg(("ip_newroute_v6: BOUND_IF failed: " 4529 "dst_ill %s ill %s\n", dst_ill->ill_name, 4530 ill->ill_name)); 4531 goto icmp_err_ret; 4532 } 4533 4534 /* 4535 * Pick a source address which matches the scope of the 4536 * destination address. 4537 * For RTF_SETSRC routes, the source address is imposed by the 4538 * parent ire (sire). 4539 */ 4540 ASSERT(src_ipif == NULL); 4541 4542 /* 4543 * Because nce_xmit() calls ip_output_v6() and NCEs are always 4544 * tied to the underlying interface, IS_UNDER_IPMP() may be 4545 * true even when building IREs that will be used for data 4546 * traffic. As such, see if the packet's source address is a 4547 * test address, and if so use that test address's ipif for 4548 * the IRE so that the logic that sets IRE_MARK_TESTHIDDEN in 4549 * ire_add_v6() can work properly. 4550 */ 4551 if (ill != NULL && IS_UNDER_IPMP(ill)) 4552 (void) ipif_lookup_testaddr_v6(ill, v6srcp, &src_ipif); 4553 4554 if (src_ipif == NULL && ire->ire_type == IRE_IF_RESOLVER && 4555 !IN6_IS_ADDR_UNSPECIFIED(&v6gw) && 4556 ip6_asp_can_lookup(ipst)) { 4557 /* 4558 * The ire cache entry we're adding is for the 4559 * gateway itself. The source address in this case 4560 * is relative to the gateway's address. 4561 */ 4562 ip6_asp_table_held = B_TRUE; 4563 src_ipif = ipif_select_source_v6(dst_ill, &v6gw, 4564 B_TRUE, IPV6_PREFER_SRC_DEFAULT, zoneid); 4565 if (src_ipif != NULL) 4566 ire_marks |= IRE_MARK_USESRC_CHECK; 4567 } else if (src_ipif == NULL) { 4568 if ((sire != NULL) && (sire->ire_flags & RTF_SETSRC)) { 4569 /* 4570 * Check that the ipif matching the requested 4571 * source address still exists. 4572 */ 4573 src_ipif = ipif_lookup_addr_v6( 4574 &sire->ire_src_addr_v6, NULL, zoneid, 4575 NULL, NULL, NULL, NULL, ipst); 4576 } 4577 if (src_ipif == NULL && ip6_asp_can_lookup(ipst)) { 4578 ip6_asp_table_held = B_TRUE; 4579 src_ipif = ipif_select_source_v6(dst_ill, 4580 v6dstp, B_FALSE, 4581 IPV6_PREFER_SRC_DEFAULT, zoneid); 4582 if (src_ipif != NULL) 4583 ire_marks |= IRE_MARK_USESRC_CHECK; 4584 } 4585 } 4586 4587 if (src_ipif == NULL) { 4588 if (ip_debug > 2) { 4589 /* ip1dbg */ 4590 pr_addr_dbg("ip_newroute_v6: no src for " 4591 "dst %s\n", AF_INET6, v6dstp); 4592 printf("ip_newroute_v6: interface name %s\n", 4593 dst_ill->ill_name); 4594 } 4595 goto icmp_err_ret; 4596 } 4597 4598 if (ip_debug > 3) { 4599 /* ip2dbg */ 4600 pr_addr_dbg("ip_newroute_v6: first hop %s\n", 4601 AF_INET6, &v6gw); 4602 } 4603 ip2dbg(("\tire type %s (%d)\n", 4604 ip_nv_lookup(ire_nv_tbl, ire->ire_type), ire->ire_type)); 4605 4606 /* 4607 * At this point in ip_newroute_v6(), ire is either the 4608 * IRE_CACHE of the next-hop gateway for an off-subnet 4609 * destination or an IRE_INTERFACE type that should be used 4610 * to resolve an on-subnet destination or an on-subnet 4611 * next-hop gateway. 4612 * 4613 * In the IRE_CACHE case, we have the following : 4614 * 4615 * 1) src_ipif - used for getting a source address. 4616 * 4617 * 2) dst_ill - from which we derive ire_stq/ire_rfq. This 4618 * means packets using this IRE_CACHE will go out on dst_ill. 4619 * 4620 * 3) The IRE sire will point to the prefix that is the longest 4621 * matching route for the destination. These prefix types 4622 * include IRE_DEFAULT, IRE_PREFIX, IRE_HOST. 4623 * 4624 * The newly created IRE_CACHE entry for the off-subnet 4625 * destination is tied to both the prefix route and the 4626 * interface route used to resolve the next-hop gateway 4627 * via the ire_phandle and ire_ihandle fields, respectively. 4628 * 4629 * In the IRE_INTERFACE case, we have the following : 4630 * 4631 * 1) src_ipif - used for getting a source address. 4632 * 4633 * 2) dst_ill - from which we derive ire_stq/ire_rfq. This 4634 * means packets using the IRE_CACHE that we will build 4635 * here will go out on dst_ill. 4636 * 4637 * 3) sire may or may not be NULL. But, the IRE_CACHE that is 4638 * to be created will only be tied to the IRE_INTERFACE that 4639 * was derived from the ire_ihandle field. 4640 * 4641 * If sire is non-NULL, it means the destination is off-link 4642 * and we will first create the IRE_CACHE for the gateway. 4643 * Next time through ip_newroute_v6, we will create the 4644 * IRE_CACHE for the final destination as described above. 4645 */ 4646 save_ire = ire; 4647 switch (ire->ire_type) { 4648 case IRE_CACHE: { 4649 ire_t *ipif_ire; 4650 4651 ASSERT(sire != NULL); 4652 if (IN6_IS_ADDR_UNSPECIFIED(&v6gw)) { 4653 mutex_enter(&ire->ire_lock); 4654 v6gw = ire->ire_gateway_addr_v6; 4655 mutex_exit(&ire->ire_lock); 4656 } 4657 /* 4658 * We need 3 ire's to create a new cache ire for an 4659 * off-link destination from the cache ire of the 4660 * gateway. 4661 * 4662 * 1. The prefix ire 'sire' 4663 * 2. The cache ire of the gateway 'ire' 4664 * 3. The interface ire 'ipif_ire' 4665 * 4666 * We have (1) and (2). We lookup (3) below. 4667 * 4668 * If there is no interface route to the gateway, 4669 * it is a race condition, where we found the cache 4670 * but the inteface route has been deleted. 4671 */ 4672 ipif_ire = ire_ihandle_lookup_offlink_v6(ire, sire); 4673 if (ipif_ire == NULL) { 4674 ip1dbg(("ip_newroute_v6:" 4675 "ire_ihandle_lookup_offlink_v6 failed\n")); 4676 goto icmp_err_ret; 4677 } 4678 4679 /* 4680 * Note: the new ire inherits RTF_SETSRC 4681 * and RTF_MULTIRT to propagate these flags from prefix 4682 * to cache. 4683 */ 4684 4685 /* 4686 * Check cached gateway IRE for any security 4687 * attributes; if found, associate the gateway 4688 * credentials group to the destination IRE. 4689 */ 4690 if ((attrp = save_ire->ire_gw_secattr) != NULL) { 4691 mutex_enter(&attrp->igsa_lock); 4692 if ((gcgrp = attrp->igsa_gcgrp) != NULL) 4693 GCGRP_REFHOLD(gcgrp); 4694 mutex_exit(&attrp->igsa_lock); 4695 } 4696 4697 ire = ire_create_v6( 4698 v6dstp, /* dest address */ 4699 &ipv6_all_ones, /* mask */ 4700 &src_ipif->ipif_v6src_addr, /* source address */ 4701 &v6gw, /* gateway address */ 4702 &save_ire->ire_max_frag, 4703 NULL, /* src nce */ 4704 dst_ill->ill_rq, /* recv-from queue */ 4705 dst_ill->ill_wq, /* send-to queue */ 4706 IRE_CACHE, 4707 src_ipif, 4708 &sire->ire_mask_v6, /* Parent mask */ 4709 sire->ire_phandle, /* Parent handle */ 4710 ipif_ire->ire_ihandle, /* Interface handle */ 4711 sire->ire_flags & /* flags if any */ 4712 (RTF_SETSRC | RTF_MULTIRT), 4713 &(sire->ire_uinfo), 4714 NULL, 4715 gcgrp, 4716 ipst); 4717 4718 if (ire == NULL) { 4719 if (gcgrp != NULL) { 4720 GCGRP_REFRELE(gcgrp); 4721 gcgrp = NULL; 4722 } 4723 ire_refrele(save_ire); 4724 ire_refrele(ipif_ire); 4725 break; 4726 } 4727 4728 /* reference now held by IRE */ 4729 gcgrp = NULL; 4730 4731 ire->ire_marks |= ire_marks; 4732 4733 /* 4734 * Prevent sire and ipif_ire from getting deleted. The 4735 * newly created ire is tied to both of them via the 4736 * phandle and ihandle respectively. 4737 */ 4738 IRB_REFHOLD(sire->ire_bucket); 4739 /* Has it been removed already ? */ 4740 if (sire->ire_marks & IRE_MARK_CONDEMNED) { 4741 IRB_REFRELE(sire->ire_bucket); 4742 ire_refrele(ipif_ire); 4743 ire_refrele(save_ire); 4744 break; 4745 } 4746 4747 IRB_REFHOLD(ipif_ire->ire_bucket); 4748 /* Has it been removed already ? */ 4749 if (ipif_ire->ire_marks & IRE_MARK_CONDEMNED) { 4750 IRB_REFRELE(ipif_ire->ire_bucket); 4751 IRB_REFRELE(sire->ire_bucket); 4752 ire_refrele(ipif_ire); 4753 ire_refrele(save_ire); 4754 break; 4755 } 4756 4757 xmit_mp = first_mp; 4758 if (ire->ire_flags & RTF_MULTIRT) { 4759 copy_mp = copymsg(first_mp); 4760 if (copy_mp != NULL) { 4761 xmit_mp = copy_mp; 4762 MULTIRT_DEBUG_TAG(first_mp); 4763 } 4764 } 4765 ire_add_then_send(q, ire, xmit_mp); 4766 if (ip6_asp_table_held) { 4767 ip6_asp_table_refrele(ipst); 4768 ip6_asp_table_held = B_FALSE; 4769 } 4770 ire_refrele(save_ire); 4771 4772 /* Assert that sire is not deleted yet. */ 4773 ASSERT(sire->ire_ptpn != NULL); 4774 IRB_REFRELE(sire->ire_bucket); 4775 4776 /* Assert that ipif_ire is not deleted yet. */ 4777 ASSERT(ipif_ire->ire_ptpn != NULL); 4778 IRB_REFRELE(ipif_ire->ire_bucket); 4779 ire_refrele(ipif_ire); 4780 4781 if (copy_mp != NULL) { 4782 /* 4783 * Search for the next unresolved 4784 * multirt route. 4785 */ 4786 copy_mp = NULL; 4787 ipif_ire = NULL; 4788 ire = NULL; 4789 /* re-enter the loop */ 4790 multirt_resolve_next = B_TRUE; 4791 continue; 4792 } 4793 ire_refrele(sire); 4794 ill_refrele(dst_ill); 4795 ipif_refrele(src_ipif); 4796 return; 4797 } 4798 case IRE_IF_NORESOLVER: 4799 /* 4800 * We have what we need to build an IRE_CACHE. 4801 * 4802 * handle the Gated case, where we create 4803 * a NORESOLVER route for loopback. 4804 */ 4805 if (dst_ill->ill_net_type != IRE_IF_NORESOLVER) 4806 break; 4807 /* 4808 * TSol note: We are creating the ire cache for the 4809 * destination 'dst'. If 'dst' is offlink, going 4810 * through the first hop 'gw', the security attributes 4811 * of 'dst' must be set to point to the gateway 4812 * credentials of gateway 'gw'. If 'dst' is onlink, it 4813 * is possible that 'dst' is a potential gateway that is 4814 * referenced by some route that has some security 4815 * attributes. Thus in the former case, we need to do a 4816 * gcgrp_lookup of 'gw' while in the latter case we 4817 * need to do gcgrp_lookup of 'dst' itself. 4818 */ 4819 ga.ga_af = AF_INET6; 4820 if (!IN6_IS_ADDR_UNSPECIFIED(&v6gw)) 4821 ga.ga_addr = v6gw; 4822 else 4823 ga.ga_addr = *v6dstp; 4824 gcgrp = gcgrp_lookup(&ga, B_FALSE); 4825 4826 /* 4827 * Note: the new ire inherits sire flags RTF_SETSRC 4828 * and RTF_MULTIRT to propagate those rules from prefix 4829 * to cache. 4830 */ 4831 ire = ire_create_v6( 4832 v6dstp, /* dest address */ 4833 &ipv6_all_ones, /* mask */ 4834 &src_ipif->ipif_v6src_addr, /* source address */ 4835 &v6gw, /* gateway address */ 4836 &save_ire->ire_max_frag, 4837 NULL, /* no src nce */ 4838 dst_ill->ill_rq, /* recv-from queue */ 4839 dst_ill->ill_wq, /* send-to queue */ 4840 IRE_CACHE, 4841 src_ipif, 4842 &save_ire->ire_mask_v6, /* Parent mask */ 4843 (sire != NULL) ? /* Parent handle */ 4844 sire->ire_phandle : 0, 4845 save_ire->ire_ihandle, /* Interface handle */ 4846 (sire != NULL) ? /* flags if any */ 4847 sire->ire_flags & 4848 (RTF_SETSRC | RTF_MULTIRT) : 0, 4849 &(save_ire->ire_uinfo), 4850 NULL, 4851 gcgrp, 4852 ipst); 4853 4854 if (ire == NULL) { 4855 if (gcgrp != NULL) { 4856 GCGRP_REFRELE(gcgrp); 4857 gcgrp = NULL; 4858 } 4859 ire_refrele(save_ire); 4860 break; 4861 } 4862 4863 /* reference now held by IRE */ 4864 gcgrp = NULL; 4865 4866 ire->ire_marks |= ire_marks; 4867 4868 if (!IN6_IS_ADDR_UNSPECIFIED(&v6gw)) 4869 dst = v6gw; 4870 else 4871 dst = *v6dstp; 4872 err = ndp_noresolver(dst_ill, &dst); 4873 if (err != 0) { 4874 ire_refrele(save_ire); 4875 break; 4876 } 4877 4878 /* Prevent save_ire from getting deleted */ 4879 IRB_REFHOLD(save_ire->ire_bucket); 4880 /* Has it been removed already ? */ 4881 if (save_ire->ire_marks & IRE_MARK_CONDEMNED) { 4882 IRB_REFRELE(save_ire->ire_bucket); 4883 ire_refrele(save_ire); 4884 break; 4885 } 4886 4887 xmit_mp = first_mp; 4888 /* 4889 * In case of MULTIRT, a copy of the current packet 4890 * to send is made to further re-enter the 4891 * loop and attempt another route resolution 4892 */ 4893 if ((sire != NULL) && sire->ire_flags & RTF_MULTIRT) { 4894 copy_mp = copymsg(first_mp); 4895 if (copy_mp != NULL) { 4896 xmit_mp = copy_mp; 4897 MULTIRT_DEBUG_TAG(first_mp); 4898 } 4899 } 4900 ire_add_then_send(q, ire, xmit_mp); 4901 if (ip6_asp_table_held) { 4902 ip6_asp_table_refrele(ipst); 4903 ip6_asp_table_held = B_FALSE; 4904 } 4905 4906 /* Assert that it is not deleted yet. */ 4907 ASSERT(save_ire->ire_ptpn != NULL); 4908 IRB_REFRELE(save_ire->ire_bucket); 4909 ire_refrele(save_ire); 4910 4911 if (copy_mp != NULL) { 4912 /* 4913 * If we found a (no)resolver, we ignore any 4914 * trailing top priority IRE_CACHE in 4915 * further loops. This ensures that we do not 4916 * omit any (no)resolver despite the priority 4917 * in this call. 4918 * IRE_CACHE, if any, will be processed 4919 * by another thread entering ip_newroute(), 4920 * (on resolver response, for example). 4921 * We use this to force multiple parallel 4922 * resolution as soon as a packet needs to be 4923 * sent. The result is, after one packet 4924 * emission all reachable routes are generally 4925 * resolved. 4926 * Otherwise, complete resolution of MULTIRT 4927 * routes would require several emissions as 4928 * side effect. 4929 */ 4930 multirt_flags &= ~MULTIRT_CACHEGW; 4931 4932 /* 4933 * Search for the next unresolved multirt 4934 * route. 4935 */ 4936 copy_mp = NULL; 4937 save_ire = NULL; 4938 ire = NULL; 4939 /* re-enter the loop */ 4940 multirt_resolve_next = B_TRUE; 4941 continue; 4942 } 4943 4944 /* Don't need sire anymore */ 4945 if (sire != NULL) 4946 ire_refrele(sire); 4947 ill_refrele(dst_ill); 4948 ipif_refrele(src_ipif); 4949 return; 4950 4951 case IRE_IF_RESOLVER: 4952 /* 4953 * We can't build an IRE_CACHE yet, but at least we 4954 * found a resolver that can help. 4955 */ 4956 dst = *v6dstp; 4957 4958 /* 4959 * To be at this point in the code with a non-zero gw 4960 * means that dst is reachable through a gateway that 4961 * we have never resolved. By changing dst to the gw 4962 * addr we resolve the gateway first. When 4963 * ire_add_then_send() tries to put the IP dg to dst, 4964 * it will reenter ip_newroute() at which time we will 4965 * find the IRE_CACHE for the gw and create another 4966 * IRE_CACHE above (for dst itself). 4967 */ 4968 if (!IN6_IS_ADDR_UNSPECIFIED(&v6gw)) { 4969 save_dst = dst; 4970 dst = v6gw; 4971 v6gw = ipv6_all_zeros; 4972 } 4973 if (dst_ill->ill_flags & ILLF_XRESOLV) { 4974 /* 4975 * Ask the external resolver to do its thing. 4976 * Make an mblk chain in the following form: 4977 * ARQ_REQ_MBLK-->IRE_MBLK-->packet 4978 */ 4979 mblk_t *ire_mp; 4980 mblk_t *areq_mp; 4981 areq_t *areq; 4982 in6_addr_t *addrp; 4983 4984 ip1dbg(("ip_newroute_v6:ILLF_XRESOLV\n")); 4985 if (ip6_asp_table_held) { 4986 ip6_asp_table_refrele(ipst); 4987 ip6_asp_table_held = B_FALSE; 4988 } 4989 ire = ire_create_mp_v6( 4990 &dst, /* dest address */ 4991 &ipv6_all_ones, /* mask */ 4992 &src_ipif->ipif_v6src_addr, 4993 /* source address */ 4994 &v6gw, /* gateway address */ 4995 NULL, /* no src nce */ 4996 dst_ill->ill_rq, /* recv-from queue */ 4997 dst_ill->ill_wq, /* send-to queue */ 4998 IRE_CACHE, 4999 src_ipif, 5000 &save_ire->ire_mask_v6, /* Parent mask */ 5001 0, 5002 save_ire->ire_ihandle, 5003 /* Interface handle */ 5004 0, /* flags if any */ 5005 &(save_ire->ire_uinfo), 5006 NULL, 5007 NULL, 5008 ipst); 5009 5010 ire_refrele(save_ire); 5011 if (ire == NULL) { 5012 ip1dbg(("ip_newroute_v6:" 5013 "ire is NULL\n")); 5014 break; 5015 } 5016 5017 if ((sire != NULL) && 5018 (sire->ire_flags & RTF_MULTIRT)) { 5019 /* 5020 * processing a copy of the packet to 5021 * send for further resolution loops 5022 */ 5023 copy_mp = copymsg(first_mp); 5024 if (copy_mp != NULL) 5025 MULTIRT_DEBUG_TAG(copy_mp); 5026 } 5027 ire->ire_marks |= ire_marks; 5028 ire_mp = ire->ire_mp; 5029 /* 5030 * Now create or find an nce for this interface. 5031 * The hw addr will need to to be set from 5032 * the reply to the AR_ENTRY_QUERY that 5033 * we're about to send. This will be done in 5034 * ire_add_v6(). 5035 */ 5036 err = ndp_resolver(dst_ill, &dst, mp, zoneid); 5037 switch (err) { 5038 case 0: 5039 /* 5040 * New cache entry created. 5041 * Break, then ask the external 5042 * resolver. 5043 */ 5044 break; 5045 case EINPROGRESS: 5046 /* 5047 * Resolution in progress; 5048 * packet has been queued by 5049 * ndp_resolver(). 5050 */ 5051 ire_delete(ire); 5052 ire = NULL; 5053 /* 5054 * Check if another multirt 5055 * route must be resolved. 5056 */ 5057 if (copy_mp != NULL) { 5058 /* 5059 * If we found a resolver, we 5060 * ignore any trailing top 5061 * priority IRE_CACHE in 5062 * further loops. The reason is 5063 * the same as for noresolver. 5064 */ 5065 multirt_flags &= 5066 ~MULTIRT_CACHEGW; 5067 /* 5068 * Search for the next 5069 * unresolved multirt route. 5070 */ 5071 first_mp = copy_mp; 5072 copy_mp = NULL; 5073 mp = first_mp; 5074 if (mp->b_datap->db_type == 5075 M_CTL) { 5076 mp = mp->b_cont; 5077 } 5078 ASSERT(sire != NULL); 5079 dst = save_dst; 5080 /* 5081 * re-enter the loop 5082 */ 5083 multirt_resolve_next = 5084 B_TRUE; 5085 continue; 5086 } 5087 5088 if (sire != NULL) 5089 ire_refrele(sire); 5090 ill_refrele(dst_ill); 5091 ipif_refrele(src_ipif); 5092 return; 5093 default: 5094 /* 5095 * Transient error; packet will be 5096 * freed. 5097 */ 5098 ire_delete(ire); 5099 ire = NULL; 5100 break; 5101 } 5102 if (err != 0) 5103 break; 5104 /* 5105 * Now set up the AR_ENTRY_QUERY and send it. 5106 */ 5107 areq_mp = ill_arp_alloc(dst_ill, 5108 (uchar_t *)&ipv6_areq_template, 5109 (caddr_t)&dst); 5110 if (areq_mp == NULL) { 5111 ip1dbg(("ip_newroute_v6:" 5112 "areq_mp is NULL\n")); 5113 freemsg(ire_mp); 5114 break; 5115 } 5116 areq = (areq_t *)areq_mp->b_rptr; 5117 addrp = (in6_addr_t *)((char *)areq + 5118 areq->areq_target_addr_offset); 5119 *addrp = dst; 5120 addrp = (in6_addr_t *)((char *)areq + 5121 areq->areq_sender_addr_offset); 5122 *addrp = src_ipif->ipif_v6src_addr; 5123 /* 5124 * link the chain, then send up to the resolver. 5125 */ 5126 linkb(areq_mp, ire_mp); 5127 linkb(areq_mp, mp); 5128 ip1dbg(("ip_newroute_v6:" 5129 "putnext to resolver\n")); 5130 putnext(dst_ill->ill_rq, areq_mp); 5131 /* 5132 * Check if another multirt route 5133 * must be resolved. 5134 */ 5135 ire = NULL; 5136 if (copy_mp != NULL) { 5137 /* 5138 * If we find a resolver, we ignore any 5139 * trailing top priority IRE_CACHE in 5140 * further loops. The reason is the 5141 * same as for noresolver. 5142 */ 5143 multirt_flags &= ~MULTIRT_CACHEGW; 5144 /* 5145 * Search for the next unresolved 5146 * multirt route. 5147 */ 5148 first_mp = copy_mp; 5149 copy_mp = NULL; 5150 mp = first_mp; 5151 if (mp->b_datap->db_type == M_CTL) { 5152 mp = mp->b_cont; 5153 } 5154 ASSERT(sire != NULL); 5155 dst = save_dst; 5156 /* 5157 * re-enter the loop 5158 */ 5159 multirt_resolve_next = B_TRUE; 5160 continue; 5161 } 5162 5163 if (sire != NULL) 5164 ire_refrele(sire); 5165 ill_refrele(dst_ill); 5166 ipif_refrele(src_ipif); 5167 return; 5168 } 5169 /* 5170 * Non-external resolver case. 5171 * 5172 * TSol note: Please see the note above the 5173 * IRE_IF_NORESOLVER case. 5174 */ 5175 ga.ga_af = AF_INET6; 5176 ga.ga_addr = dst; 5177 gcgrp = gcgrp_lookup(&ga, B_FALSE); 5178 5179 ire = ire_create_v6( 5180 &dst, /* dest address */ 5181 &ipv6_all_ones, /* mask */ 5182 &src_ipif->ipif_v6src_addr, /* source address */ 5183 &v6gw, /* gateway address */ 5184 &save_ire->ire_max_frag, 5185 NULL, /* no src nce */ 5186 dst_ill->ill_rq, /* recv-from queue */ 5187 dst_ill->ill_wq, /* send-to queue */ 5188 IRE_CACHE, 5189 src_ipif, 5190 &save_ire->ire_mask_v6, /* Parent mask */ 5191 0, 5192 save_ire->ire_ihandle, /* Interface handle */ 5193 0, /* flags if any */ 5194 &(save_ire->ire_uinfo), 5195 NULL, 5196 gcgrp, 5197 ipst); 5198 5199 if (ire == NULL) { 5200 if (gcgrp != NULL) { 5201 GCGRP_REFRELE(gcgrp); 5202 gcgrp = NULL; 5203 } 5204 ire_refrele(save_ire); 5205 break; 5206 } 5207 5208 /* reference now held by IRE */ 5209 gcgrp = NULL; 5210 5211 if ((sire != NULL) && 5212 (sire->ire_flags & RTF_MULTIRT)) { 5213 copy_mp = copymsg(first_mp); 5214 if (copy_mp != NULL) 5215 MULTIRT_DEBUG_TAG(copy_mp); 5216 } 5217 5218 ire->ire_marks |= ire_marks; 5219 err = ndp_resolver(dst_ill, &dst, first_mp, zoneid); 5220 switch (err) { 5221 case 0: 5222 /* Prevent save_ire from getting deleted */ 5223 IRB_REFHOLD(save_ire->ire_bucket); 5224 /* Has it been removed already ? */ 5225 if (save_ire->ire_marks & IRE_MARK_CONDEMNED) { 5226 IRB_REFRELE(save_ire->ire_bucket); 5227 ire_refrele(save_ire); 5228 break; 5229 } 5230 5231 /* 5232 * We have a resolved cache entry, 5233 * add in the IRE. 5234 */ 5235 ire_add_then_send(q, ire, first_mp); 5236 if (ip6_asp_table_held) { 5237 ip6_asp_table_refrele(ipst); 5238 ip6_asp_table_held = B_FALSE; 5239 } 5240 5241 /* Assert that it is not deleted yet. */ 5242 ASSERT(save_ire->ire_ptpn != NULL); 5243 IRB_REFRELE(save_ire->ire_bucket); 5244 ire_refrele(save_ire); 5245 /* 5246 * Check if another multirt route 5247 * must be resolved. 5248 */ 5249 ire = NULL; 5250 if (copy_mp != NULL) { 5251 /* 5252 * If we find a resolver, we ignore any 5253 * trailing top priority IRE_CACHE in 5254 * further loops. The reason is the 5255 * same as for noresolver. 5256 */ 5257 multirt_flags &= ~MULTIRT_CACHEGW; 5258 /* 5259 * Search for the next unresolved 5260 * multirt route. 5261 */ 5262 first_mp = copy_mp; 5263 copy_mp = NULL; 5264 mp = first_mp; 5265 if (mp->b_datap->db_type == M_CTL) { 5266 mp = mp->b_cont; 5267 } 5268 ASSERT(sire != NULL); 5269 dst = save_dst; 5270 /* 5271 * re-enter the loop 5272 */ 5273 multirt_resolve_next = B_TRUE; 5274 continue; 5275 } 5276 5277 if (sire != NULL) 5278 ire_refrele(sire); 5279 ill_refrele(dst_ill); 5280 ipif_refrele(src_ipif); 5281 return; 5282 5283 case EINPROGRESS: 5284 /* 5285 * mp was consumed - presumably queued. 5286 * No need for ire, presumably resolution is 5287 * in progress, and ire will be added when the 5288 * address is resolved. 5289 */ 5290 if (ip6_asp_table_held) { 5291 ip6_asp_table_refrele(ipst); 5292 ip6_asp_table_held = B_FALSE; 5293 } 5294 ASSERT(ire->ire_nce == NULL); 5295 ire_delete(ire); 5296 ire_refrele(save_ire); 5297 /* 5298 * Check if another multirt route 5299 * must be resolved. 5300 */ 5301 ire = NULL; 5302 if (copy_mp != NULL) { 5303 /* 5304 * If we find a resolver, we ignore any 5305 * trailing top priority IRE_CACHE in 5306 * further loops. The reason is the 5307 * same as for noresolver. 5308 */ 5309 multirt_flags &= ~MULTIRT_CACHEGW; 5310 /* 5311 * Search for the next unresolved 5312 * multirt route. 5313 */ 5314 first_mp = copy_mp; 5315 copy_mp = NULL; 5316 mp = first_mp; 5317 if (mp->b_datap->db_type == M_CTL) { 5318 mp = mp->b_cont; 5319 } 5320 ASSERT(sire != NULL); 5321 dst = save_dst; 5322 /* 5323 * re-enter the loop 5324 */ 5325 multirt_resolve_next = B_TRUE; 5326 continue; 5327 } 5328 if (sire != NULL) 5329 ire_refrele(sire); 5330 ill_refrele(dst_ill); 5331 ipif_refrele(src_ipif); 5332 return; 5333 default: 5334 /* Some transient error */ 5335 ASSERT(ire->ire_nce == NULL); 5336 ire_refrele(save_ire); 5337 break; 5338 } 5339 break; 5340 default: 5341 break; 5342 } 5343 if (ip6_asp_table_held) { 5344 ip6_asp_table_refrele(ipst); 5345 ip6_asp_table_held = B_FALSE; 5346 } 5347 } while (multirt_resolve_next); 5348 5349 err_ret: 5350 ip1dbg(("ip_newroute_v6: dropped\n")); 5351 if (src_ipif != NULL) 5352 ipif_refrele(src_ipif); 5353 if (dst_ill != NULL) { 5354 need_rele = B_TRUE; 5355 ill = dst_ill; 5356 } 5357 if (ill != NULL) { 5358 if (mp->b_prev != NULL) { 5359 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 5360 } else { 5361 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); 5362 } 5363 5364 if (need_rele) 5365 ill_refrele(ill); 5366 } else { 5367 if (mp->b_prev != NULL) { 5368 BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsInDiscards); 5369 } else { 5370 BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsOutDiscards); 5371 } 5372 } 5373 /* Did this packet originate externally? */ 5374 if (mp->b_prev) { 5375 mp->b_next = NULL; 5376 mp->b_prev = NULL; 5377 } 5378 if (copy_mp != NULL) { 5379 MULTIRT_DEBUG_UNTAG(copy_mp); 5380 freemsg(copy_mp); 5381 } 5382 MULTIRT_DEBUG_UNTAG(first_mp); 5383 freemsg(first_mp); 5384 if (ire != NULL) 5385 ire_refrele(ire); 5386 if (sire != NULL) 5387 ire_refrele(sire); 5388 return; 5389 5390 icmp_err_ret: 5391 if (ip6_asp_table_held) 5392 ip6_asp_table_refrele(ipst); 5393 if (src_ipif != NULL) 5394 ipif_refrele(src_ipif); 5395 if (dst_ill != NULL) { 5396 need_rele = B_TRUE; 5397 ill = dst_ill; 5398 } 5399 ip1dbg(("ip_newroute_v6: no route\n")); 5400 if (sire != NULL) 5401 ire_refrele(sire); 5402 /* 5403 * We need to set sire to NULL to avoid double freeing if we 5404 * ever goto err_ret from below. 5405 */ 5406 sire = NULL; 5407 ip6h = (ip6_t *)mp->b_rptr; 5408 /* Skip ip6i_t header if present */ 5409 if (ip6h->ip6_nxt == IPPROTO_RAW) { 5410 /* Make sure the IPv6 header is present */ 5411 if ((mp->b_wptr - (uchar_t *)ip6h) < 5412 sizeof (ip6i_t) + IPV6_HDR_LEN) { 5413 if (!pullupmsg(mp, sizeof (ip6i_t) + IPV6_HDR_LEN)) { 5414 ip1dbg(("ip_newroute_v6: pullupmsg failed\n")); 5415 goto err_ret; 5416 } 5417 } 5418 mp->b_rptr += sizeof (ip6i_t); 5419 ip6h = (ip6_t *)mp->b_rptr; 5420 } 5421 /* Did this packet originate externally? */ 5422 if (mp->b_prev) { 5423 if (ill != NULL) { 5424 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInNoRoutes); 5425 } else { 5426 BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsInNoRoutes); 5427 } 5428 mp->b_next = NULL; 5429 mp->b_prev = NULL; 5430 q = WR(q); 5431 } else { 5432 if (ill != NULL) { 5433 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutNoRoutes); 5434 } else { 5435 BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsOutNoRoutes); 5436 } 5437 if (ip_hdr_complete_v6(ip6h, zoneid, ipst)) { 5438 /* Failed */ 5439 if (copy_mp != NULL) { 5440 MULTIRT_DEBUG_UNTAG(copy_mp); 5441 freemsg(copy_mp); 5442 } 5443 MULTIRT_DEBUG_UNTAG(first_mp); 5444 freemsg(first_mp); 5445 if (ire != NULL) 5446 ire_refrele(ire); 5447 if (need_rele) 5448 ill_refrele(ill); 5449 return; 5450 } 5451 } 5452 5453 if (need_rele) 5454 ill_refrele(ill); 5455 5456 /* 5457 * At this point we will have ire only if RTF_BLACKHOLE 5458 * or RTF_REJECT flags are set on the IRE. It will not 5459 * generate ICMP6_DST_UNREACH_NOROUTE if RTF_BLACKHOLE is set. 5460 */ 5461 if (ire != NULL) { 5462 if (ire->ire_flags & RTF_BLACKHOLE) { 5463 ire_refrele(ire); 5464 if (copy_mp != NULL) { 5465 MULTIRT_DEBUG_UNTAG(copy_mp); 5466 freemsg(copy_mp); 5467 } 5468 MULTIRT_DEBUG_UNTAG(first_mp); 5469 freemsg(first_mp); 5470 return; 5471 } 5472 ire_refrele(ire); 5473 } 5474 if (ip_debug > 3) { 5475 /* ip2dbg */ 5476 pr_addr_dbg("ip_newroute_v6: no route to %s\n", 5477 AF_INET6, v6dstp); 5478 } 5479 icmp_unreachable_v6(WR(q), first_mp, ICMP6_DST_UNREACH_NOROUTE, 5480 B_FALSE, B_FALSE, zoneid, ipst); 5481 } 5482 5483 /* 5484 * ip_newroute_ipif_v6 is called by ip_wput_v6 and ip_wput_ipsec_out_v6 whenever 5485 * we need to send out a packet to a destination address for which we do not 5486 * have specific routing information. It is only used for multicast packets. 5487 * 5488 * If unspec_src we allow creating an IRE with source address zero. 5489 * ire_send_v6() will delete it after the packet is sent. 5490 */ 5491 void 5492 ip_newroute_ipif_v6(queue_t *q, mblk_t *mp, ipif_t *ipif, 5493 const in6_addr_t *v6dstp, const in6_addr_t *v6srcp, int unspec_src, 5494 zoneid_t zoneid) 5495 { 5496 ire_t *ire = NULL; 5497 ipif_t *src_ipif = NULL; 5498 int err = 0; 5499 ill_t *dst_ill = NULL; 5500 ire_t *save_ire; 5501 ipsec_out_t *io; 5502 ill_t *ill; 5503 mblk_t *first_mp; 5504 ire_t *fire = NULL; 5505 mblk_t *copy_mp = NULL; 5506 const in6_addr_t *ire_v6srcp; 5507 boolean_t probe = B_FALSE; 5508 boolean_t multirt_resolve_next; 5509 boolean_t ipif_held = B_FALSE; 5510 boolean_t ill_held = B_FALSE; 5511 boolean_t ip6_asp_table_held = B_FALSE; 5512 ip_stack_t *ipst = ipif->ipif_ill->ill_ipst; 5513 5514 /* 5515 * This loop is run only once in most cases. 5516 * We loop to resolve further routes only when the destination 5517 * can be reached through multiple RTF_MULTIRT-flagged ires. 5518 */ 5519 do { 5520 multirt_resolve_next = B_FALSE; 5521 if (dst_ill != NULL) { 5522 ill_refrele(dst_ill); 5523 dst_ill = NULL; 5524 } 5525 5526 if (src_ipif != NULL) { 5527 ipif_refrele(src_ipif); 5528 src_ipif = NULL; 5529 } 5530 ASSERT(ipif != NULL); 5531 ill = ipif->ipif_ill; 5532 5533 ASSERT(!IN6_IS_ADDR_V4MAPPED(v6dstp)); 5534 if (ip_debug > 2) { 5535 /* ip1dbg */ 5536 pr_addr_dbg("ip_newroute_ipif_v6: v6dst %s\n", 5537 AF_INET6, v6dstp); 5538 printf("ip_newroute_ipif_v6: if %s, v6 %d\n", 5539 ill->ill_name, ipif->ipif_isv6); 5540 } 5541 5542 first_mp = mp; 5543 if (mp->b_datap->db_type == M_CTL) { 5544 mp = mp->b_cont; 5545 io = (ipsec_out_t *)first_mp->b_rptr; 5546 ASSERT(io->ipsec_out_type == IPSEC_OUT); 5547 } else { 5548 io = NULL; 5549 } 5550 5551 /* 5552 * If the interface is a pt-pt interface we look for an 5553 * IRE_IF_RESOLVER or IRE_IF_NORESOLVER that matches both the 5554 * local_address and the pt-pt destination address. 5555 * Otherwise we just match the local address. 5556 */ 5557 if (!(ill->ill_flags & ILLF_MULTICAST)) { 5558 goto err_ret; 5559 } 5560 5561 /* 5562 * We check if an IRE_OFFSUBNET for the addr that goes through 5563 * ipif exists. We need it to determine if the RTF_SETSRC and/or 5564 * RTF_MULTIRT flags must be honored. 5565 */ 5566 fire = ipif_lookup_multi_ire_v6(ipif, v6dstp); 5567 ip2dbg(("ip_newroute_ipif_v6: " 5568 "ipif_lookup_multi_ire_v6(" 5569 "ipif %p, dst %08x) = fire %p\n", 5570 (void *)ipif, ntohl(V4_PART_OF_V6((*v6dstp))), 5571 (void *)fire)); 5572 5573 ASSERT(src_ipif == NULL); 5574 5575 /* 5576 * Because nce_xmit() calls ip_output_v6() and NCEs are always 5577 * tied to the underlying interface, IS_UNDER_IPMP() may be 5578 * true even when building IREs that will be used for data 5579 * traffic. As such, see if the packet's source address is a 5580 * test address, and if so use that test address's ipif for 5581 * the IRE so that the logic that sets IRE_MARK_TESTHIDDEN in 5582 * ire_add_v6() can work properly. 5583 */ 5584 if (IS_UNDER_IPMP(ill)) 5585 probe = ipif_lookup_testaddr_v6(ill, v6srcp, &src_ipif); 5586 5587 /* 5588 * Determine the outbound (destination) ill for this route. 5589 * If IPMP is not in use, that's the same as our ill. If IPMP 5590 * is in-use and we're on the IPMP interface, or we're on an 5591 * underlying ill but sending data traffic, use a suitable 5592 * destination ill from the group. The latter case covers a 5593 * subtle edge condition with multicast: when we bring up an 5594 * IPv6 data address, we will create an NCE on an underlying 5595 * interface, and send solitications to ff02::1, which would 5596 * take us through here, and cause us to create an IRE for 5597 * ff02::1. To meet our defined semantics for multicast (and 5598 * ensure there aren't unexpected echoes), that IRE needs to 5599 * use the IPMP group's nominated multicast interface. 5600 * 5601 * Note: the source ipif is determined by source address 5602 * selection later. 5603 */ 5604 if (IS_IPMP(ill) || (IS_UNDER_IPMP(ill) && !probe)) { 5605 ill_t *ipmp_ill; 5606 ipmp_illgrp_t *illg; 5607 5608 if (IS_UNDER_IPMP(ill)) { 5609 ipmp_ill = ipmp_ill_hold_ipmp_ill(ill); 5610 } else { 5611 ipmp_ill = ill; 5612 ill_refhold(ipmp_ill); /* for symmetry */ 5613 } 5614 5615 if (ipmp_ill == NULL) 5616 goto err_ret; 5617 5618 illg = ipmp_ill->ill_grp; 5619 if (IN6_IS_ADDR_MULTICAST(v6dstp)) 5620 dst_ill = ipmp_illgrp_hold_cast_ill(illg); 5621 else 5622 dst_ill = ipmp_illgrp_hold_next_ill(illg); 5623 5624 ill_refrele(ipmp_ill); 5625 } else { 5626 dst_ill = ill; 5627 ill_refhold(dst_ill); /* for symmetry */ 5628 } 5629 5630 if (dst_ill == NULL) { 5631 if (ip_debug > 2) { 5632 pr_addr_dbg("ip_newroute_ipif_v6: " 5633 "no dst ill for dst %s\n", 5634 AF_INET6, v6dstp); 5635 } 5636 goto err_ret; 5637 } 5638 5639 /* 5640 * Pick a source address which matches the scope of the 5641 * destination address. 5642 * For RTF_SETSRC routes, the source address is imposed by the 5643 * parent ire (fire). 5644 */ 5645 5646 if (src_ipif == NULL && fire != NULL && 5647 (fire->ire_flags & RTF_SETSRC)) { 5648 /* 5649 * Check that the ipif matching the requested source 5650 * address still exists. 5651 */ 5652 src_ipif = ipif_lookup_addr_v6(&fire->ire_src_addr_v6, 5653 NULL, zoneid, NULL, NULL, NULL, NULL, ipst); 5654 } 5655 5656 if (src_ipif == NULL && ip6_asp_can_lookup(ipst)) { 5657 ip6_asp_table_held = B_TRUE; 5658 src_ipif = ipif_select_source_v6(dst_ill, v6dstp, 5659 B_FALSE, IPV6_PREFER_SRC_DEFAULT, zoneid); 5660 } 5661 5662 if (src_ipif == NULL) { 5663 if (!unspec_src) { 5664 if (ip_debug > 2) { 5665 /* ip1dbg */ 5666 pr_addr_dbg("ip_newroute_ipif_v6: " 5667 "no src for dst %s\n", 5668 AF_INET6, v6dstp); 5669 printf(" through interface %s\n", 5670 dst_ill->ill_name); 5671 } 5672 goto err_ret; 5673 } 5674 ire_v6srcp = &ipv6_all_zeros; 5675 src_ipif = ipif; 5676 ipif_refhold(src_ipif); 5677 } else { 5678 ire_v6srcp = &src_ipif->ipif_v6src_addr; 5679 } 5680 5681 ire = ipif_to_ire_v6(ipif); 5682 if (ire == NULL) { 5683 if (ip_debug > 2) { 5684 /* ip1dbg */ 5685 pr_addr_dbg("ip_newroute_ipif_v6: v6src %s\n", 5686 AF_INET6, &ipif->ipif_v6lcl_addr); 5687 printf("ip_newroute_ipif_v6: " 5688 "if %s\n", dst_ill->ill_name); 5689 } 5690 goto err_ret; 5691 } 5692 if (ire->ire_flags & (RTF_REJECT | RTF_BLACKHOLE)) 5693 goto err_ret; 5694 5695 ASSERT(ire->ire_ipversion == IPV6_VERSION); 5696 5697 ip1dbg(("ip_newroute_ipif_v6: interface type %s (%d),", 5698 ip_nv_lookup(ire_nv_tbl, ire->ire_type), ire->ire_type)); 5699 if (ip_debug > 2) { 5700 /* ip1dbg */ 5701 pr_addr_dbg(" address %s\n", 5702 AF_INET6, &ire->ire_src_addr_v6); 5703 } 5704 save_ire = ire; 5705 ip2dbg(("ip_newroute_ipif: ire %p, ipif %p\n", 5706 (void *)ire, (void *)ipif)); 5707 5708 if ((fire != NULL) && (fire->ire_flags & RTF_MULTIRT)) { 5709 /* 5710 * an IRE_OFFSUBET was looked up 5711 * on that interface. 5712 * this ire has RTF_MULTIRT flag, 5713 * so the resolution loop 5714 * will be re-entered to resolve 5715 * additional routes on other 5716 * interfaces. For that purpose, 5717 * a copy of the packet is 5718 * made at this point. 5719 */ 5720 fire->ire_last_used_time = lbolt; 5721 copy_mp = copymsg(first_mp); 5722 if (copy_mp) { 5723 MULTIRT_DEBUG_TAG(copy_mp); 5724 } 5725 } 5726 5727 switch (ire->ire_type) { 5728 case IRE_IF_NORESOLVER: { 5729 /* 5730 * We have what we need to build an IRE_CACHE. 5731 * 5732 * handle the Gated case, where we create 5733 * a NORESOLVER route for loopback. 5734 */ 5735 if (dst_ill->ill_net_type != IRE_IF_NORESOLVER) 5736 break; 5737 /* 5738 * The newly created ire will inherit the flags of the 5739 * parent ire, if any. 5740 */ 5741 ire = ire_create_v6( 5742 v6dstp, /* dest address */ 5743 &ipv6_all_ones, /* mask */ 5744 ire_v6srcp, /* source address */ 5745 NULL, /* gateway address */ 5746 &save_ire->ire_max_frag, 5747 NULL, /* no src nce */ 5748 dst_ill->ill_rq, /* recv-from queue */ 5749 dst_ill->ill_wq, /* send-to queue */ 5750 IRE_CACHE, 5751 src_ipif, 5752 NULL, 5753 (fire != NULL) ? /* Parent handle */ 5754 fire->ire_phandle : 0, 5755 save_ire->ire_ihandle, /* Interface handle */ 5756 (fire != NULL) ? 5757 (fire->ire_flags & (RTF_SETSRC | RTF_MULTIRT)) : 5758 0, 5759 &ire_uinfo_null, 5760 NULL, 5761 NULL, 5762 ipst); 5763 5764 if (ire == NULL) { 5765 ire_refrele(save_ire); 5766 break; 5767 } 5768 5769 err = ndp_noresolver(dst_ill, v6dstp); 5770 if (err != 0) { 5771 ire_refrele(save_ire); 5772 break; 5773 } 5774 5775 /* Prevent save_ire from getting deleted */ 5776 IRB_REFHOLD(save_ire->ire_bucket); 5777 /* Has it been removed already ? */ 5778 if (save_ire->ire_marks & IRE_MARK_CONDEMNED) { 5779 IRB_REFRELE(save_ire->ire_bucket); 5780 ire_refrele(save_ire); 5781 break; 5782 } 5783 5784 ire_add_then_send(q, ire, first_mp); 5785 if (ip6_asp_table_held) { 5786 ip6_asp_table_refrele(ipst); 5787 ip6_asp_table_held = B_FALSE; 5788 } 5789 5790 /* Assert that it is not deleted yet. */ 5791 ASSERT(save_ire->ire_ptpn != NULL); 5792 IRB_REFRELE(save_ire->ire_bucket); 5793 ire_refrele(save_ire); 5794 if (fire != NULL) { 5795 ire_refrele(fire); 5796 fire = NULL; 5797 } 5798 5799 /* 5800 * The resolution loop is re-entered if we 5801 * actually are in a multirouting case. 5802 */ 5803 if (copy_mp != NULL) { 5804 boolean_t need_resolve = 5805 ire_multirt_need_resolve_v6(v6dstp, 5806 msg_getlabel(copy_mp), ipst); 5807 if (!need_resolve) { 5808 MULTIRT_DEBUG_UNTAG(copy_mp); 5809 freemsg(copy_mp); 5810 copy_mp = NULL; 5811 } else { 5812 /* 5813 * ipif_lookup_group_v6() calls 5814 * ire_lookup_multi_v6() that uses 5815 * ire_ftable_lookup_v6() to find 5816 * an IRE_INTERFACE for the group. 5817 * In the multirt case, 5818 * ire_lookup_multi_v6() then invokes 5819 * ire_multirt_lookup_v6() to find 5820 * the next resolvable ire. 5821 * As a result, we obtain a new 5822 * interface, derived from the 5823 * next ire. 5824 */ 5825 if (ipif_held) { 5826 ipif_refrele(ipif); 5827 ipif_held = B_FALSE; 5828 } 5829 ipif = ipif_lookup_group_v6(v6dstp, 5830 zoneid, ipst); 5831 ip2dbg(("ip_newroute_ipif: " 5832 "multirt dst %08x, ipif %p\n", 5833 ntohl(V4_PART_OF_V6((*v6dstp))), 5834 (void *)ipif)); 5835 if (ipif != NULL) { 5836 ipif_held = B_TRUE; 5837 mp = copy_mp; 5838 copy_mp = NULL; 5839 multirt_resolve_next = 5840 B_TRUE; 5841 continue; 5842 } else { 5843 freemsg(copy_mp); 5844 } 5845 } 5846 } 5847 ill_refrele(dst_ill); 5848 if (ipif_held) { 5849 ipif_refrele(ipif); 5850 ipif_held = B_FALSE; 5851 } 5852 if (src_ipif != NULL) 5853 ipif_refrele(src_ipif); 5854 return; 5855 } 5856 case IRE_IF_RESOLVER: { 5857 5858 ASSERT(dst_ill->ill_isv6); 5859 5860 /* 5861 * We obtain a partial IRE_CACHE which we will pass 5862 * along with the resolver query. When the response 5863 * comes back it will be there ready for us to add. 5864 */ 5865 /* 5866 * the newly created ire will inherit the flags of the 5867 * parent ire, if any. 5868 */ 5869 ire = ire_create_v6( 5870 v6dstp, /* dest address */ 5871 &ipv6_all_ones, /* mask */ 5872 ire_v6srcp, /* source address */ 5873 NULL, /* gateway address */ 5874 &save_ire->ire_max_frag, 5875 NULL, /* src nce */ 5876 dst_ill->ill_rq, /* recv-from queue */ 5877 dst_ill->ill_wq, /* send-to queue */ 5878 IRE_CACHE, 5879 src_ipif, 5880 NULL, 5881 (fire != NULL) ? /* Parent handle */ 5882 fire->ire_phandle : 0, 5883 save_ire->ire_ihandle, /* Interface handle */ 5884 (fire != NULL) ? 5885 (fire->ire_flags & (RTF_SETSRC | RTF_MULTIRT)) : 5886 0, 5887 &ire_uinfo_null, 5888 NULL, 5889 NULL, 5890 ipst); 5891 5892 if (ire == NULL) { 5893 ire_refrele(save_ire); 5894 break; 5895 } 5896 5897 /* Resolve and add ire to the ctable */ 5898 err = ndp_resolver(dst_ill, v6dstp, first_mp, zoneid); 5899 switch (err) { 5900 case 0: 5901 /* Prevent save_ire from getting deleted */ 5902 IRB_REFHOLD(save_ire->ire_bucket); 5903 /* Has it been removed already ? */ 5904 if (save_ire->ire_marks & IRE_MARK_CONDEMNED) { 5905 IRB_REFRELE(save_ire->ire_bucket); 5906 ire_refrele(save_ire); 5907 break; 5908 } 5909 /* 5910 * We have a resolved cache entry, 5911 * add in the IRE. 5912 */ 5913 ire_add_then_send(q, ire, first_mp); 5914 if (ip6_asp_table_held) { 5915 ip6_asp_table_refrele(ipst); 5916 ip6_asp_table_held = B_FALSE; 5917 } 5918 5919 /* Assert that it is not deleted yet. */ 5920 ASSERT(save_ire->ire_ptpn != NULL); 5921 IRB_REFRELE(save_ire->ire_bucket); 5922 ire_refrele(save_ire); 5923 if (fire != NULL) { 5924 ire_refrele(fire); 5925 fire = NULL; 5926 } 5927 5928 /* 5929 * The resolution loop is re-entered if we 5930 * actually are in a multirouting case. 5931 */ 5932 if (copy_mp != NULL) { 5933 boolean_t need_resolve = 5934 ire_multirt_need_resolve_v6(v6dstp, 5935 msg_getlabel(copy_mp), ipst); 5936 if (!need_resolve) { 5937 MULTIRT_DEBUG_UNTAG(copy_mp); 5938 freemsg(copy_mp); 5939 copy_mp = NULL; 5940 } else { 5941 /* 5942 * ipif_lookup_group_v6() calls 5943 * ire_lookup_multi_v6() that 5944 * uses ire_ftable_lookup_v6() 5945 * to find an IRE_INTERFACE for 5946 * the group. In the multirt 5947 * case, ire_lookup_multi_v6() 5948 * then invokes 5949 * ire_multirt_lookup_v6() to 5950 * find the next resolvable ire. 5951 * As a result, we obtain a new 5952 * interface, derived from the 5953 * next ire. 5954 */ 5955 if (ipif_held) { 5956 ipif_refrele(ipif); 5957 ipif_held = B_FALSE; 5958 } 5959 ipif = ipif_lookup_group_v6( 5960 v6dstp, zoneid, ipst); 5961 ip2dbg(("ip_newroute_ipif: " 5962 "multirt dst %08x, " 5963 "ipif %p\n", 5964 ntohl(V4_PART_OF_V6( 5965 (*v6dstp))), 5966 (void *)ipif)); 5967 if (ipif != NULL) { 5968 ipif_held = B_TRUE; 5969 mp = copy_mp; 5970 copy_mp = NULL; 5971 multirt_resolve_next = 5972 B_TRUE; 5973 continue; 5974 } else { 5975 freemsg(copy_mp); 5976 } 5977 } 5978 } 5979 ill_refrele(dst_ill); 5980 if (ipif_held) { 5981 ipif_refrele(ipif); 5982 ipif_held = B_FALSE; 5983 } 5984 if (src_ipif != NULL) 5985 ipif_refrele(src_ipif); 5986 return; 5987 5988 case EINPROGRESS: 5989 /* 5990 * mp was consumed - presumably queued. 5991 * No need for ire, presumably resolution is 5992 * in progress, and ire will be added when the 5993 * address is resolved. 5994 */ 5995 if (ip6_asp_table_held) { 5996 ip6_asp_table_refrele(ipst); 5997 ip6_asp_table_held = B_FALSE; 5998 } 5999 ire_delete(ire); 6000 ire_refrele(save_ire); 6001 if (fire != NULL) { 6002 ire_refrele(fire); 6003 fire = NULL; 6004 } 6005 6006 /* 6007 * The resolution loop is re-entered if we 6008 * actually are in a multirouting case. 6009 */ 6010 if (copy_mp != NULL) { 6011 boolean_t need_resolve = 6012 ire_multirt_need_resolve_v6(v6dstp, 6013 msg_getlabel(copy_mp), ipst); 6014 if (!need_resolve) { 6015 MULTIRT_DEBUG_UNTAG(copy_mp); 6016 freemsg(copy_mp); 6017 copy_mp = NULL; 6018 } else { 6019 /* 6020 * ipif_lookup_group_v6() calls 6021 * ire_lookup_multi_v6() that 6022 * uses ire_ftable_lookup_v6() 6023 * to find an IRE_INTERFACE for 6024 * the group. In the multirt 6025 * case, ire_lookup_multi_v6() 6026 * then invokes 6027 * ire_multirt_lookup_v6() to 6028 * find the next resolvable ire. 6029 * As a result, we obtain a new 6030 * interface, derived from the 6031 * next ire. 6032 */ 6033 if (ipif_held) { 6034 ipif_refrele(ipif); 6035 ipif_held = B_FALSE; 6036 } 6037 ipif = ipif_lookup_group_v6( 6038 v6dstp, zoneid, ipst); 6039 ip2dbg(("ip_newroute_ipif: " 6040 "multirt dst %08x, " 6041 "ipif %p\n", 6042 ntohl(V4_PART_OF_V6( 6043 (*v6dstp))), 6044 (void *)ipif)); 6045 if (ipif != NULL) { 6046 ipif_held = B_TRUE; 6047 mp = copy_mp; 6048 copy_mp = NULL; 6049 multirt_resolve_next = 6050 B_TRUE; 6051 continue; 6052 } else { 6053 freemsg(copy_mp); 6054 } 6055 } 6056 } 6057 ill_refrele(dst_ill); 6058 if (ipif_held) { 6059 ipif_refrele(ipif); 6060 ipif_held = B_FALSE; 6061 } 6062 if (src_ipif != NULL) 6063 ipif_refrele(src_ipif); 6064 return; 6065 default: 6066 /* Some transient error */ 6067 ire_refrele(save_ire); 6068 break; 6069 } 6070 break; 6071 } 6072 default: 6073 break; 6074 } 6075 if (ip6_asp_table_held) { 6076 ip6_asp_table_refrele(ipst); 6077 ip6_asp_table_held = B_FALSE; 6078 } 6079 } while (multirt_resolve_next); 6080 6081 err_ret: 6082 if (ip6_asp_table_held) 6083 ip6_asp_table_refrele(ipst); 6084 if (ire != NULL) 6085 ire_refrele(ire); 6086 if (fire != NULL) 6087 ire_refrele(fire); 6088 if (ipif != NULL && ipif_held) 6089 ipif_refrele(ipif); 6090 if (src_ipif != NULL) 6091 ipif_refrele(src_ipif); 6092 6093 /* Multicast - no point in trying to generate ICMP error */ 6094 if (dst_ill != NULL) { 6095 ill = dst_ill; 6096 ill_held = B_TRUE; 6097 } 6098 if (mp->b_prev || mp->b_next) { 6099 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 6100 } else { 6101 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); 6102 } 6103 ip1dbg(("ip_newroute_ipif_v6: dropped\n")); 6104 mp->b_next = NULL; 6105 mp->b_prev = NULL; 6106 freemsg(first_mp); 6107 if (ill_held) 6108 ill_refrele(ill); 6109 } 6110 6111 /* 6112 * Parse and process any hop-by-hop or destination options. 6113 * 6114 * Assumes that q is an ill read queue so that ICMP errors for link-local 6115 * destinations are sent out the correct interface. 6116 * 6117 * Returns -1 if there was an error and mp has been consumed. 6118 * Returns 0 if no special action is needed. 6119 * Returns 1 if the packet contained a router alert option for this node 6120 * which is verified to be "interesting/known" for our implementation. 6121 * 6122 * XXX Note: In future as more hbh or dest options are defined, 6123 * it may be better to have different routines for hbh and dest 6124 * options as opt_type fields other than IP6OPT_PAD1 and IP6OPT_PADN 6125 * may have same value in different namespaces. Or is it same namespace ?? 6126 * Current code checks for each opt_type (other than pads) if it is in 6127 * the expected nexthdr (hbh or dest) 6128 */ 6129 static int 6130 ip_process_options_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, 6131 uint8_t *optptr, uint_t optlen, uint8_t hdr_type, ip_stack_t *ipst) 6132 { 6133 uint8_t opt_type; 6134 uint_t optused; 6135 int ret = 0; 6136 mblk_t *first_mp; 6137 const char *errtype; 6138 zoneid_t zoneid; 6139 ill_t *ill = q->q_ptr; 6140 ipif_t *ipif; 6141 6142 first_mp = mp; 6143 if (mp->b_datap->db_type == M_CTL) { 6144 mp = mp->b_cont; 6145 } 6146 6147 while (optlen != 0) { 6148 opt_type = *optptr; 6149 if (opt_type == IP6OPT_PAD1) { 6150 optused = 1; 6151 } else { 6152 if (optlen < 2) 6153 goto bad_opt; 6154 errtype = "malformed"; 6155 if (opt_type == ip6opt_ls) { 6156 optused = 2 + optptr[1]; 6157 if (optused > optlen) 6158 goto bad_opt; 6159 } else switch (opt_type) { 6160 case IP6OPT_PADN: 6161 /* 6162 * Note:We don't verify that (N-2) pad octets 6163 * are zero as required by spec. Adhere to 6164 * "be liberal in what you accept..." part of 6165 * implementation philosophy (RFC791,RFC1122) 6166 */ 6167 optused = 2 + optptr[1]; 6168 if (optused > optlen) 6169 goto bad_opt; 6170 break; 6171 6172 case IP6OPT_JUMBO: 6173 if (hdr_type != IPPROTO_HOPOPTS) 6174 goto opt_error; 6175 goto opt_error; /* XXX Not implemented! */ 6176 6177 case IP6OPT_ROUTER_ALERT: { 6178 struct ip6_opt_router *or; 6179 6180 if (hdr_type != IPPROTO_HOPOPTS) 6181 goto opt_error; 6182 optused = 2 + optptr[1]; 6183 if (optused > optlen) 6184 goto bad_opt; 6185 or = (struct ip6_opt_router *)optptr; 6186 /* Check total length and alignment */ 6187 if (optused != sizeof (*or) || 6188 ((uintptr_t)or->ip6or_value & 0x1) != 0) 6189 goto opt_error; 6190 /* Check value */ 6191 switch (*((uint16_t *)or->ip6or_value)) { 6192 case IP6_ALERT_MLD: 6193 case IP6_ALERT_RSVP: 6194 ret = 1; 6195 } 6196 break; 6197 } 6198 case IP6OPT_HOME_ADDRESS: { 6199 /* 6200 * Minimal support for the home address option 6201 * (which is required by all IPv6 nodes). 6202 * Implement by just swapping the home address 6203 * and source address. 6204 * XXX Note: this has IPsec implications since 6205 * AH needs to take this into account. 6206 * Also, when IPsec is used we need to ensure 6207 * that this is only processed once 6208 * in the received packet (to avoid swapping 6209 * back and forth). 6210 * NOTE:This option processing is considered 6211 * to be unsafe and prone to a denial of 6212 * service attack. 6213 * The current processing is not safe even with 6214 * IPsec secured IP packets. Since the home 6215 * address option processing requirement still 6216 * is in the IETF draft and in the process of 6217 * being redefined for its usage, it has been 6218 * decided to turn off the option by default. 6219 * If this section of code needs to be executed, 6220 * ndd variable ip6_ignore_home_address_opt 6221 * should be set to 0 at the user's own risk. 6222 */ 6223 struct ip6_opt_home_address *oh; 6224 in6_addr_t tmp; 6225 6226 if (ipst->ips_ipv6_ignore_home_address_opt) 6227 goto opt_error; 6228 6229 if (hdr_type != IPPROTO_DSTOPTS) 6230 goto opt_error; 6231 optused = 2 + optptr[1]; 6232 if (optused > optlen) 6233 goto bad_opt; 6234 6235 /* 6236 * We did this dest. opt the first time 6237 * around (i.e. before AH processing). 6238 * If we've done AH... stop now. 6239 */ 6240 if (first_mp != mp) { 6241 ipsec_in_t *ii; 6242 6243 ii = (ipsec_in_t *)first_mp->b_rptr; 6244 if (ii->ipsec_in_ah_sa != NULL) 6245 break; 6246 } 6247 6248 oh = (struct ip6_opt_home_address *)optptr; 6249 /* Check total length and alignment */ 6250 if (optused < sizeof (*oh) || 6251 ((uintptr_t)oh->ip6oh_addr & 0x7) != 0) 6252 goto opt_error; 6253 /* Swap ip6_src and the home address */ 6254 tmp = ip6h->ip6_src; 6255 /* XXX Note: only 8 byte alignment option */ 6256 ip6h->ip6_src = *(in6_addr_t *)oh->ip6oh_addr; 6257 *(in6_addr_t *)oh->ip6oh_addr = tmp; 6258 break; 6259 } 6260 6261 case IP6OPT_TUNNEL_LIMIT: 6262 if (hdr_type != IPPROTO_DSTOPTS) { 6263 goto opt_error; 6264 } 6265 optused = 2 + optptr[1]; 6266 if (optused > optlen) { 6267 goto bad_opt; 6268 } 6269 if (optused != 3) { 6270 goto opt_error; 6271 } 6272 break; 6273 6274 default: 6275 errtype = "unknown"; 6276 /* FALLTHROUGH */ 6277 opt_error: 6278 /* Determine which zone should send error */ 6279 zoneid = ipif_lookup_addr_zoneid_v6( 6280 &ip6h->ip6_dst, ill, ipst); 6281 switch (IP6OPT_TYPE(opt_type)) { 6282 case IP6OPT_TYPE_SKIP: 6283 optused = 2 + optptr[1]; 6284 if (optused > optlen) 6285 goto bad_opt; 6286 ip1dbg(("ip_process_options_v6: %s " 6287 "opt 0x%x skipped\n", 6288 errtype, opt_type)); 6289 break; 6290 case IP6OPT_TYPE_DISCARD: 6291 ip1dbg(("ip_process_options_v6: %s " 6292 "opt 0x%x; packet dropped\n", 6293 errtype, opt_type)); 6294 freemsg(first_mp); 6295 return (-1); 6296 case IP6OPT_TYPE_ICMP: 6297 if (zoneid == ALL_ZONES) { 6298 freemsg(first_mp); 6299 return (-1); 6300 } 6301 icmp_param_problem_v6(WR(q), first_mp, 6302 ICMP6_PARAMPROB_OPTION, 6303 (uint32_t)(optptr - 6304 (uint8_t *)ip6h), 6305 B_FALSE, B_FALSE, zoneid, ipst); 6306 return (-1); 6307 case IP6OPT_TYPE_FORCEICMP: 6308 /* 6309 * If we don't have a zone and the dst 6310 * addr is multicast, then pick a zone 6311 * based on the inbound interface. 6312 */ 6313 if (zoneid == ALL_ZONES && 6314 IN6_IS_ADDR_MULTICAST( 6315 &ip6h->ip6_dst)) { 6316 ipif = ipif_select_source_v6( 6317 ill, &ip6h->ip6_src, 6318 B_TRUE, 6319 IPV6_PREFER_SRC_DEFAULT, 6320 ALL_ZONES); 6321 if (ipif != NULL) { 6322 zoneid = 6323 ipif->ipif_zoneid; 6324 ipif_refrele(ipif); 6325 } 6326 } 6327 if (zoneid == ALL_ZONES) { 6328 freemsg(first_mp); 6329 return (-1); 6330 } 6331 icmp_param_problem_v6(WR(q), first_mp, 6332 ICMP6_PARAMPROB_OPTION, 6333 (uint32_t)(optptr - 6334 (uint8_t *)ip6h), 6335 B_FALSE, B_TRUE, zoneid, ipst); 6336 return (-1); 6337 default: 6338 ASSERT(0); 6339 } 6340 } 6341 } 6342 optlen -= optused; 6343 optptr += optused; 6344 } 6345 return (ret); 6346 6347 bad_opt: 6348 /* Determine which zone should send error */ 6349 zoneid = ipif_lookup_addr_zoneid_v6(&ip6h->ip6_dst, ill, ipst); 6350 if (zoneid == ALL_ZONES) { 6351 freemsg(first_mp); 6352 } else { 6353 icmp_param_problem_v6(WR(q), first_mp, ICMP6_PARAMPROB_OPTION, 6354 (uint32_t)(optptr - (uint8_t *)ip6h), 6355 B_FALSE, B_FALSE, zoneid, ipst); 6356 } 6357 return (-1); 6358 } 6359 6360 /* 6361 * Process a routing header that is not yet empty. 6362 * Only handles type 0 routing headers. 6363 */ 6364 static void 6365 ip_process_rthdr(queue_t *q, mblk_t *mp, ip6_t *ip6h, ip6_rthdr_t *rth, 6366 ill_t *ill, uint_t flags, mblk_t *hada_mp, mblk_t *dl_mp) 6367 { 6368 ip6_rthdr0_t *rthdr; 6369 uint_t ehdrlen; 6370 uint_t numaddr; 6371 in6_addr_t *addrptr; 6372 in6_addr_t tmp; 6373 ip_stack_t *ipst = ill->ill_ipst; 6374 6375 ASSERT(rth->ip6r_segleft != 0); 6376 6377 if (!ipst->ips_ipv6_forward_src_routed) { 6378 /* XXX Check for source routed out same interface? */ 6379 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 6380 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInAddrErrors); 6381 freemsg(hada_mp); 6382 freemsg(mp); 6383 return; 6384 } 6385 6386 if (rth->ip6r_type != 0) { 6387 if (hada_mp != NULL) 6388 goto hada_drop; 6389 /* Sent by forwarding path, and router is global zone */ 6390 icmp_param_problem_v6(WR(q), mp, 6391 ICMP6_PARAMPROB_HEADER, 6392 (uint32_t)((uchar_t *)&rth->ip6r_type - (uchar_t *)ip6h), 6393 B_FALSE, B_FALSE, GLOBAL_ZONEID, ipst); 6394 return; 6395 } 6396 rthdr = (ip6_rthdr0_t *)rth; 6397 ehdrlen = 8 * (rthdr->ip6r0_len + 1); 6398 ASSERT(mp->b_rptr + ehdrlen <= mp->b_wptr); 6399 addrptr = (in6_addr_t *)((char *)rthdr + sizeof (*rthdr)); 6400 /* rthdr->ip6r0_len is twice the number of addresses in the header */ 6401 if (rthdr->ip6r0_len & 0x1) { 6402 /* An odd length is impossible */ 6403 if (hada_mp != NULL) 6404 goto hada_drop; 6405 /* Sent by forwarding path, and router is global zone */ 6406 icmp_param_problem_v6(WR(q), mp, 6407 ICMP6_PARAMPROB_HEADER, 6408 (uint32_t)((uchar_t *)&rthdr->ip6r0_len - (uchar_t *)ip6h), 6409 B_FALSE, B_FALSE, GLOBAL_ZONEID, ipst); 6410 return; 6411 } 6412 numaddr = rthdr->ip6r0_len / 2; 6413 if (rthdr->ip6r0_segleft > numaddr) { 6414 /* segleft exceeds number of addresses in routing header */ 6415 if (hada_mp != NULL) 6416 goto hada_drop; 6417 /* Sent by forwarding path, and router is global zone */ 6418 icmp_param_problem_v6(WR(q), mp, 6419 ICMP6_PARAMPROB_HEADER, 6420 (uint32_t)((uchar_t *)&rthdr->ip6r0_segleft - 6421 (uchar_t *)ip6h), 6422 B_FALSE, B_FALSE, GLOBAL_ZONEID, ipst); 6423 return; 6424 } 6425 addrptr += (numaddr - rthdr->ip6r0_segleft); 6426 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst) || 6427 IN6_IS_ADDR_MULTICAST(addrptr)) { 6428 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 6429 freemsg(hada_mp); 6430 freemsg(mp); 6431 return; 6432 } 6433 /* Swap */ 6434 tmp = *addrptr; 6435 *addrptr = ip6h->ip6_dst; 6436 ip6h->ip6_dst = tmp; 6437 rthdr->ip6r0_segleft--; 6438 /* Don't allow any mapped addresses - ip_wput_v6 can't handle them */ 6439 if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_dst)) { 6440 if (hada_mp != NULL) 6441 goto hada_drop; 6442 /* Sent by forwarding path, and router is global zone */ 6443 icmp_unreachable_v6(WR(q), mp, ICMP6_DST_UNREACH_NOROUTE, 6444 B_FALSE, B_FALSE, GLOBAL_ZONEID, ipst); 6445 return; 6446 } 6447 if (ip_check_v6_mblk(mp, ill) == IP6_MBLK_OK) { 6448 ip6h = (ip6_t *)mp->b_rptr; 6449 ip_rput_data_v6(q, ill, mp, ip6h, flags, hada_mp, dl_mp); 6450 } else { 6451 freemsg(mp); 6452 } 6453 return; 6454 hada_drop: 6455 /* IPsec kstats: bean counter? */ 6456 freemsg(hada_mp); 6457 freemsg(mp); 6458 } 6459 6460 /* 6461 * Read side put procedure for IPv6 module. 6462 */ 6463 void 6464 ip_rput_v6(queue_t *q, mblk_t *mp) 6465 { 6466 mblk_t *first_mp; 6467 mblk_t *hada_mp = NULL; 6468 ip6_t *ip6h; 6469 boolean_t ll_multicast = B_FALSE; 6470 boolean_t mctl_present = B_FALSE; 6471 ill_t *ill; 6472 struct iocblk *iocp; 6473 uint_t flags = 0; 6474 mblk_t *dl_mp; 6475 ip_stack_t *ipst; 6476 int check; 6477 6478 ill = (ill_t *)q->q_ptr; 6479 ipst = ill->ill_ipst; 6480 if (ill->ill_state_flags & ILL_CONDEMNED) { 6481 union DL_primitives *dl; 6482 6483 dl = (union DL_primitives *)mp->b_rptr; 6484 /* 6485 * Things are opening or closing - only accept DLPI 6486 * ack messages. If the stream is closing and ip_wsrv 6487 * has completed, ip_close is out of the qwait, but has 6488 * not yet completed qprocsoff. Don't proceed any further 6489 * because the ill has been cleaned up and things hanging 6490 * off the ill have been freed. 6491 */ 6492 if ((mp->b_datap->db_type != M_PCPROTO) || 6493 (dl->dl_primitive == DL_UNITDATA_IND)) { 6494 inet_freemsg(mp); 6495 return; 6496 } 6497 } 6498 6499 dl_mp = NULL; 6500 switch (mp->b_datap->db_type) { 6501 case M_DATA: { 6502 int hlen; 6503 uchar_t *ucp; 6504 struct ether_header *eh; 6505 dl_unitdata_ind_t *dui; 6506 6507 /* 6508 * This is a work-around for CR 6451644, a bug in Nemo. It 6509 * should be removed when that problem is fixed. 6510 */ 6511 if (ill->ill_mactype == DL_ETHER && 6512 (hlen = MBLKHEAD(mp)) >= sizeof (struct ether_header) && 6513 (ucp = mp->b_rptr)[-1] == (IP6_DL_SAP & 0xFF) && 6514 ucp[-2] == (IP6_DL_SAP >> 8)) { 6515 if (hlen >= sizeof (struct ether_vlan_header) && 6516 ucp[-5] == 0 && ucp[-6] == 0x81) 6517 ucp -= sizeof (struct ether_vlan_header); 6518 else 6519 ucp -= sizeof (struct ether_header); 6520 /* 6521 * If it's a group address, then fabricate a 6522 * DL_UNITDATA_IND message. 6523 */ 6524 if ((ll_multicast = (ucp[0] & 1)) != 0 && 6525 (dl_mp = allocb(DL_UNITDATA_IND_SIZE + 16, 6526 BPRI_HI)) != NULL) { 6527 eh = (struct ether_header *)ucp; 6528 dui = (dl_unitdata_ind_t *)dl_mp->b_rptr; 6529 DB_TYPE(dl_mp) = M_PROTO; 6530 dl_mp->b_wptr = (uchar_t *)(dui + 1) + 16; 6531 dui->dl_primitive = DL_UNITDATA_IND; 6532 dui->dl_dest_addr_length = 8; 6533 dui->dl_dest_addr_offset = DL_UNITDATA_IND_SIZE; 6534 dui->dl_src_addr_length = 8; 6535 dui->dl_src_addr_offset = DL_UNITDATA_IND_SIZE + 6536 8; 6537 dui->dl_group_address = 1; 6538 ucp = (uchar_t *)(dui + 1); 6539 if (ill->ill_sap_length > 0) 6540 ucp += ill->ill_sap_length; 6541 bcopy(&eh->ether_dhost, ucp, 6); 6542 bcopy(&eh->ether_shost, ucp + 8, 6); 6543 ucp = (uchar_t *)(dui + 1); 6544 if (ill->ill_sap_length < 0) 6545 ucp += 8 + ill->ill_sap_length; 6546 bcopy(&eh->ether_type, ucp, 2); 6547 bcopy(&eh->ether_type, ucp + 8, 2); 6548 } 6549 } 6550 break; 6551 } 6552 6553 case M_PROTO: 6554 case M_PCPROTO: 6555 if (((dl_unitdata_ind_t *)mp->b_rptr)->dl_primitive != 6556 DL_UNITDATA_IND) { 6557 /* Go handle anything other than data elsewhere. */ 6558 ip_rput_dlpi(q, mp); 6559 return; 6560 } 6561 ll_multicast = ip_get_dlpi_mbcast(ill, mp); 6562 6563 /* Save the DLPI header. */ 6564 dl_mp = mp; 6565 mp = mp->b_cont; 6566 dl_mp->b_cont = NULL; 6567 break; 6568 case M_BREAK: 6569 panic("ip_rput_v6: got an M_BREAK"); 6570 /*NOTREACHED*/ 6571 case M_IOCACK: 6572 iocp = (struct iocblk *)mp->b_rptr; 6573 switch (iocp->ioc_cmd) { 6574 case DL_IOC_HDR_INFO: 6575 ill = (ill_t *)q->q_ptr; 6576 ill_fastpath_ack(ill, mp); 6577 return; 6578 6579 case SIOCGTUNPARAM: 6580 case OSIOCGTUNPARAM: 6581 ip_rput_other(NULL, q, mp, NULL); 6582 return; 6583 6584 case SIOCSTUNPARAM: 6585 case OSIOCSTUNPARAM: 6586 /* Go through qwriter */ 6587 break; 6588 default: 6589 putnext(q, mp); 6590 return; 6591 } 6592 /* FALLTHRU */ 6593 case M_ERROR: 6594 case M_HANGUP: 6595 mutex_enter(&ill->ill_lock); 6596 if (ill->ill_state_flags & ILL_CONDEMNED) { 6597 mutex_exit(&ill->ill_lock); 6598 freemsg(mp); 6599 return; 6600 } 6601 ill_refhold_locked(ill); 6602 mutex_exit(&ill->ill_lock); 6603 qwriter_ip(ill, q, mp, ip_rput_other, CUR_OP, B_FALSE); 6604 return; 6605 case M_CTL: 6606 if ((MBLKL(mp) > sizeof (int)) && 6607 ((da_ipsec_t *)mp->b_rptr)->da_type == IPHADA_M_CTL) { 6608 ASSERT(MBLKL(mp) >= sizeof (da_ipsec_t)); 6609 mctl_present = B_TRUE; 6610 break; 6611 } 6612 putnext(q, mp); 6613 return; 6614 case M_IOCNAK: 6615 iocp = (struct iocblk *)mp->b_rptr; 6616 switch (iocp->ioc_cmd) { 6617 case DL_IOC_HDR_INFO: 6618 case SIOCGTUNPARAM: 6619 case OSIOCGTUNPARAM: 6620 ip_rput_other(NULL, q, mp, NULL); 6621 return; 6622 6623 case SIOCSTUNPARAM: 6624 case OSIOCSTUNPARAM: 6625 mutex_enter(&ill->ill_lock); 6626 if (ill->ill_state_flags & ILL_CONDEMNED) { 6627 mutex_exit(&ill->ill_lock); 6628 freemsg(mp); 6629 return; 6630 } 6631 ill_refhold_locked(ill); 6632 mutex_exit(&ill->ill_lock); 6633 qwriter_ip(ill, q, mp, ip_rput_other, CUR_OP, B_FALSE); 6634 return; 6635 default: 6636 break; 6637 } 6638 /* FALLTHRU */ 6639 default: 6640 putnext(q, mp); 6641 return; 6642 } 6643 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInReceives); 6644 UPDATE_MIB(ill->ill_ip_mib, ipIfStatsHCInOctets, 6645 (mp->b_cont == NULL) ? MBLKL(mp) : msgdsize(mp)); 6646 /* 6647 * if db_ref > 1 then copymsg and free original. Packet may be 6648 * changed and do not want other entity who has a reference to this 6649 * message to trip over the changes. This is a blind change because 6650 * trying to catch all places that might change packet is too 6651 * difficult (since it may be a module above this one). 6652 */ 6653 if (mp->b_datap->db_ref > 1) { 6654 mblk_t *mp1; 6655 6656 mp1 = copymsg(mp); 6657 freemsg(mp); 6658 if (mp1 == NULL) { 6659 first_mp = NULL; 6660 goto discard; 6661 } 6662 mp = mp1; 6663 } 6664 first_mp = mp; 6665 if (mctl_present) { 6666 hada_mp = first_mp; 6667 mp = first_mp->b_cont; 6668 } 6669 6670 if ((check = ip_check_v6_mblk(mp, ill)) == IP6_MBLK_HDR_ERR) { 6671 freemsg(mp); 6672 return; 6673 } 6674 6675 ip6h = (ip6_t *)mp->b_rptr; 6676 6677 /* 6678 * ip:::receive must see ipv6 packets with a full header, 6679 * and so is placed after the IP6_MBLK_HDR_ERR check. 6680 */ 6681 DTRACE_IP7(receive, mblk_t *, first_mp, conn_t *, NULL, void_ip_t *, 6682 ip6h, __dtrace_ipsr_ill_t *, ill, ipha_t *, NULL, ip6_t *, ip6h, 6683 int, 0); 6684 6685 if (check != IP6_MBLK_OK) { 6686 freemsg(mp); 6687 return; 6688 } 6689 6690 DTRACE_PROBE4(ip6__physical__in__start, 6691 ill_t *, ill, ill_t *, NULL, 6692 ip6_t *, ip6h, mblk_t *, first_mp); 6693 6694 FW_HOOKS6(ipst->ips_ip6_physical_in_event, 6695 ipst->ips_ipv6firewall_physical_in, 6696 ill, NULL, ip6h, first_mp, mp, ll_multicast, ipst); 6697 6698 DTRACE_PROBE1(ip6__physical__in__end, mblk_t *, first_mp); 6699 6700 if (first_mp == NULL) 6701 return; 6702 6703 /* 6704 * Attach any necessary label information to this packet. 6705 */ 6706 if (is_system_labeled() && !tsol_get_pkt_label(mp, IPV6_VERSION)) { 6707 if (ip6opt_ls != 0) 6708 ip0dbg(("tsol_get_pkt_label v6 failed\n")); 6709 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors); 6710 goto discard; 6711 } 6712 6713 /* IP observability hook. */ 6714 if (ipst->ips_ipobs_enabled) { 6715 zoneid_t dzone; 6716 6717 dzone = ip_get_zoneid_v6(&ip6h->ip6_dst, mp, ill, ipst, 6718 ALL_ZONES); 6719 ipobs_hook(mp, IPOBS_HOOK_INBOUND, ALL_ZONES, dzone, ill, 6720 IPV6_VERSION, 0, ipst); 6721 } 6722 6723 if ((ip6h->ip6_vcf & IPV6_VERS_AND_FLOW_MASK) == 6724 IPV6_DEFAULT_VERS_AND_FLOW) { 6725 /* 6726 * It may be a bit too expensive to do this mapped address 6727 * check here, but in the interest of robustness, it seems 6728 * like the correct place. 6729 * TODO: Avoid this check for e.g. connected TCP sockets 6730 */ 6731 if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_src)) { 6732 ip1dbg(("ip_rput_v6: pkt with mapped src addr\n")); 6733 goto discard; 6734 } 6735 6736 if (IN6_IS_ADDR_LOOPBACK(&ip6h->ip6_src)) { 6737 ip1dbg(("ip_rput_v6: pkt with loopback src")); 6738 goto discard; 6739 } else if (IN6_IS_ADDR_LOOPBACK(&ip6h->ip6_dst)) { 6740 ip1dbg(("ip_rput_v6: pkt with loopback dst")); 6741 goto discard; 6742 } 6743 6744 flags |= (ll_multicast ? IP6_IN_LLMCAST : 0); 6745 ip_rput_data_v6(q, ill, mp, ip6h, flags, hada_mp, dl_mp); 6746 } else { 6747 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInWrongIPVersion); 6748 goto discard; 6749 } 6750 freemsg(dl_mp); 6751 return; 6752 6753 discard: 6754 if (dl_mp != NULL) 6755 freeb(dl_mp); 6756 freemsg(first_mp); 6757 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 6758 } 6759 6760 /* 6761 * Walk through the IPv6 packet in mp and see if there's an AH header 6762 * in it. See if the AH header needs to get done before other headers in 6763 * the packet. (Worker function for ipsec_early_ah_v6().) 6764 */ 6765 #define IPSEC_HDR_DONT_PROCESS 0 6766 #define IPSEC_HDR_PROCESS 1 6767 #define IPSEC_MEMORY_ERROR 2 /* or malformed packet */ 6768 static int 6769 ipsec_needs_processing_v6(mblk_t *mp, uint8_t *nexthdr) 6770 { 6771 uint_t length; 6772 uint_t ehdrlen; 6773 uint8_t *whereptr; 6774 uint8_t *endptr; 6775 uint8_t *nexthdrp; 6776 ip6_dest_t *desthdr; 6777 ip6_rthdr_t *rthdr; 6778 ip6_t *ip6h; 6779 6780 /* 6781 * For now just pullup everything. In general, the less pullups, 6782 * the better, but there's so much squirrelling through anyway, 6783 * it's just easier this way. 6784 */ 6785 if (!pullupmsg(mp, -1)) { 6786 return (IPSEC_MEMORY_ERROR); 6787 } 6788 6789 ip6h = (ip6_t *)mp->b_rptr; 6790 length = IPV6_HDR_LEN; 6791 whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */ 6792 endptr = mp->b_wptr; 6793 6794 /* 6795 * We can't just use the argument nexthdr in the place 6796 * of nexthdrp becaue we don't dereference nexthdrp 6797 * till we confirm whether it is a valid address. 6798 */ 6799 nexthdrp = &ip6h->ip6_nxt; 6800 while (whereptr < endptr) { 6801 /* Is there enough left for len + nexthdr? */ 6802 if (whereptr + MIN_EHDR_LEN > endptr) 6803 return (IPSEC_MEMORY_ERROR); 6804 6805 switch (*nexthdrp) { 6806 case IPPROTO_HOPOPTS: 6807 case IPPROTO_DSTOPTS: 6808 /* Assumes the headers are identical for hbh and dst */ 6809 desthdr = (ip6_dest_t *)whereptr; 6810 ehdrlen = 8 * (desthdr->ip6d_len + 1); 6811 if ((uchar_t *)desthdr + ehdrlen > endptr) 6812 return (IPSEC_MEMORY_ERROR); 6813 /* 6814 * Return DONT_PROCESS because the destination 6815 * options header may be for each hop in a 6816 * routing-header, and we only want AH if we're 6817 * finished with routing headers. 6818 */ 6819 if (*nexthdrp == IPPROTO_DSTOPTS) 6820 return (IPSEC_HDR_DONT_PROCESS); 6821 nexthdrp = &desthdr->ip6d_nxt; 6822 break; 6823 case IPPROTO_ROUTING: 6824 rthdr = (ip6_rthdr_t *)whereptr; 6825 6826 /* 6827 * If there's more hops left on the routing header, 6828 * return now with DON'T PROCESS. 6829 */ 6830 if (rthdr->ip6r_segleft > 0) 6831 return (IPSEC_HDR_DONT_PROCESS); 6832 6833 ehdrlen = 8 * (rthdr->ip6r_len + 1); 6834 if ((uchar_t *)rthdr + ehdrlen > endptr) 6835 return (IPSEC_MEMORY_ERROR); 6836 nexthdrp = &rthdr->ip6r_nxt; 6837 break; 6838 case IPPROTO_FRAGMENT: 6839 /* Wait for reassembly */ 6840 return (IPSEC_HDR_DONT_PROCESS); 6841 case IPPROTO_AH: 6842 *nexthdr = IPPROTO_AH; 6843 return (IPSEC_HDR_PROCESS); 6844 case IPPROTO_NONE: 6845 /* No next header means we're finished */ 6846 default: 6847 return (IPSEC_HDR_DONT_PROCESS); 6848 } 6849 length += ehdrlen; 6850 whereptr += ehdrlen; 6851 } 6852 /* 6853 * Malformed/truncated packet. 6854 */ 6855 return (IPSEC_MEMORY_ERROR); 6856 } 6857 6858 /* 6859 * Path for AH if options are present. If this is the first time we are 6860 * sending a datagram to AH, allocate a IPSEC_IN message and prepend it. 6861 * Otherwise, just fanout. Return value answers the boolean question: 6862 * "Did I consume the mblk you sent me?" 6863 * 6864 * Sometimes AH needs to be done before other IPv6 headers for security 6865 * reasons. This function (and its ipsec_needs_processing_v6() above) 6866 * indicates if that is so, and fans out to the appropriate IPsec protocol 6867 * for the datagram passed in. 6868 */ 6869 static boolean_t 6870 ipsec_early_ah_v6(queue_t *q, mblk_t *first_mp, boolean_t mctl_present, 6871 ill_t *ill, ill_t *inill, mblk_t *hada_mp, zoneid_t zoneid) 6872 { 6873 mblk_t *mp; 6874 uint8_t nexthdr; 6875 ipsec_in_t *ii = NULL; 6876 ah_t *ah; 6877 ipsec_status_t ipsec_rc; 6878 ip_stack_t *ipst = ill->ill_ipst; 6879 netstack_t *ns = ipst->ips_netstack; 6880 ipsec_stack_t *ipss = ns->netstack_ipsec; 6881 6882 ASSERT((hada_mp == NULL) || (!mctl_present)); 6883 6884 switch (ipsec_needs_processing_v6( 6885 (mctl_present ? first_mp->b_cont : first_mp), &nexthdr)) { 6886 case IPSEC_MEMORY_ERROR: 6887 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 6888 freemsg(hada_mp); 6889 freemsg(first_mp); 6890 return (B_TRUE); 6891 case IPSEC_HDR_DONT_PROCESS: 6892 return (B_FALSE); 6893 } 6894 6895 /* Default means send it to AH! */ 6896 ASSERT(nexthdr == IPPROTO_AH); 6897 if (!mctl_present) { 6898 mp = first_mp; 6899 first_mp = ipsec_in_alloc(B_FALSE, ipst->ips_netstack); 6900 if (first_mp == NULL) { 6901 ip1dbg(("ipsec_early_ah_v6: IPSEC_IN " 6902 "allocation failure.\n")); 6903 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 6904 freemsg(hada_mp); 6905 freemsg(mp); 6906 return (B_TRUE); 6907 } 6908 /* 6909 * Store the ill_index so that when we come back 6910 * from IPSEC we ride on the same queue. 6911 */ 6912 ii = (ipsec_in_t *)first_mp->b_rptr; 6913 ii->ipsec_in_ill_index = ill->ill_phyint->phyint_ifindex; 6914 ii->ipsec_in_rill_index = inill->ill_phyint->phyint_ifindex; 6915 first_mp->b_cont = mp; 6916 } 6917 /* 6918 * Cache hardware acceleration info. 6919 */ 6920 if (hada_mp != NULL) { 6921 ASSERT(ii != NULL); 6922 IPSECHW_DEBUG(IPSECHW_PKT, ("ipsec_early_ah_v6: " 6923 "caching data attr.\n")); 6924 ii->ipsec_in_accelerated = B_TRUE; 6925 ii->ipsec_in_da = hada_mp; 6926 } 6927 6928 if (!ipsec_loaded(ipss)) { 6929 ip_proto_not_sup(q, first_mp, IP_FF_SEND_ICMP, zoneid, ipst); 6930 return (B_TRUE); 6931 } 6932 6933 ah = ipsec_inbound_ah_sa(first_mp, ns); 6934 if (ah == NULL) 6935 return (B_TRUE); 6936 ASSERT(ii->ipsec_in_ah_sa != NULL); 6937 ASSERT(ii->ipsec_in_ah_sa->ipsa_input_func != NULL); 6938 ipsec_rc = ii->ipsec_in_ah_sa->ipsa_input_func(first_mp, ah); 6939 6940 switch (ipsec_rc) { 6941 case IPSEC_STATUS_SUCCESS: 6942 /* we're done with IPsec processing, send it up */ 6943 ip_fanout_proto_again(first_mp, ill, inill, NULL); 6944 break; 6945 case IPSEC_STATUS_FAILED: 6946 BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsInDiscards); 6947 break; 6948 case IPSEC_STATUS_PENDING: 6949 /* no action needed */ 6950 break; 6951 } 6952 return (B_TRUE); 6953 } 6954 6955 /* 6956 * Validate the IPv6 mblk for alignment. 6957 */ 6958 int 6959 ip_check_v6_mblk(mblk_t *mp, ill_t *ill) 6960 { 6961 int pkt_len, ip6_len; 6962 ip6_t *ip6h = (ip6_t *)mp->b_rptr; 6963 6964 /* check for alignment and full IPv6 header */ 6965 if (!OK_32PTR((uchar_t *)ip6h) || 6966 (mp->b_wptr - (uchar_t *)ip6h) < IPV6_HDR_LEN) { 6967 if (!pullupmsg(mp, IPV6_HDR_LEN)) { 6968 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 6969 ip1dbg(("ip_rput_v6: pullupmsg failed\n")); 6970 return (IP6_MBLK_HDR_ERR); 6971 } 6972 ip6h = (ip6_t *)mp->b_rptr; 6973 } 6974 6975 ASSERT(OK_32PTR((uchar_t *)ip6h) && 6976 (mp->b_wptr - (uchar_t *)ip6h) >= IPV6_HDR_LEN); 6977 6978 if (mp->b_cont == NULL) 6979 pkt_len = mp->b_wptr - mp->b_rptr; 6980 else 6981 pkt_len = msgdsize(mp); 6982 ip6_len = ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN; 6983 6984 /* 6985 * Check for bogus (too short packet) and packet which 6986 * was padded by the link layer. 6987 */ 6988 if (ip6_len != pkt_len) { 6989 ssize_t diff; 6990 6991 if (ip6_len > pkt_len) { 6992 ip1dbg(("ip_rput_data_v6: packet too short %d %d\n", 6993 ip6_len, pkt_len)); 6994 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInTruncatedPkts); 6995 return (IP6_MBLK_LEN_ERR); 6996 } 6997 diff = (ssize_t)(pkt_len - ip6_len); 6998 6999 if (!adjmsg(mp, -diff)) { 7000 ip1dbg(("ip_rput_data_v6: adjmsg failed\n")); 7001 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 7002 return (IP6_MBLK_LEN_ERR); 7003 } 7004 } 7005 return (IP6_MBLK_OK); 7006 } 7007 7008 /* 7009 * ip_rput_data_v6 -- received IPv6 packets in M_DATA messages show up here. 7010 * ip_rput_v6 has already verified alignment, the min length, the version, 7011 * and db_ref = 1. 7012 * 7013 * The ill passed in (the arg named inill) is the ill that the packet 7014 * actually arrived on. We need to remember this when saving the 7015 * input interface index into potential IPV6_PKTINFO data in 7016 * ip_add_info_v6(). 7017 * 7018 * This routine doesn't free dl_mp; that's the caller's responsibility on 7019 * return. (Note that the callers are complex enough that there's no tail 7020 * recursion here anyway.) 7021 */ 7022 void 7023 ip_rput_data_v6(queue_t *q, ill_t *inill, mblk_t *mp, ip6_t *ip6h, 7024 uint_t flags, mblk_t *hada_mp, mblk_t *dl_mp) 7025 { 7026 ire_t *ire = NULL; 7027 ill_t *ill = inill; 7028 ill_t *outill; 7029 ipif_t *ipif; 7030 uint8_t *whereptr; 7031 uint8_t nexthdr; 7032 uint16_t remlen; 7033 uint_t prev_nexthdr_offset; 7034 uint_t used; 7035 size_t old_pkt_len; 7036 size_t pkt_len; 7037 uint16_t ip6_len; 7038 uint_t hdr_len; 7039 boolean_t mctl_present; 7040 mblk_t *first_mp; 7041 mblk_t *first_mp1; 7042 boolean_t no_forward; 7043 ip6_hbh_t *hbhhdr; 7044 boolean_t ll_multicast = (flags & IP6_IN_LLMCAST); 7045 conn_t *connp; 7046 uint32_t ports; 7047 zoneid_t zoneid = GLOBAL_ZONEID; 7048 uint16_t hck_flags, reass_hck_flags; 7049 uint32_t reass_sum; 7050 boolean_t cksum_err; 7051 mblk_t *mp1; 7052 ip_stack_t *ipst = inill->ill_ipst; 7053 7054 EXTRACT_PKT_MP(mp, first_mp, mctl_present); 7055 7056 if (hada_mp != NULL) { 7057 /* 7058 * It's an IPsec accelerated packet. 7059 * Keep a pointer to the data attributes around until 7060 * we allocate the ipsecinfo structure. 7061 */ 7062 IPSECHW_DEBUG(IPSECHW_PKT, 7063 ("ip_rput_data_v6: inbound HW accelerated IPsec pkt\n")); 7064 hada_mp->b_cont = NULL; 7065 /* 7066 * Since it is accelerated, it came directly from 7067 * the ill. 7068 */ 7069 ASSERT(mctl_present == B_FALSE); 7070 ASSERT(mp->b_datap->db_type != M_CTL); 7071 } 7072 7073 ip6h = (ip6_t *)mp->b_rptr; 7074 ip6_len = ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN; 7075 old_pkt_len = pkt_len = ip6_len; 7076 7077 if (ILL_HCKSUM_CAPABLE(ill) && !mctl_present && dohwcksum) 7078 hck_flags = DB_CKSUMFLAGS(mp); 7079 else 7080 hck_flags = 0; 7081 7082 /* Clear checksum flags in case we need to forward */ 7083 DB_CKSUMFLAGS(mp) = 0; 7084 reass_sum = reass_hck_flags = 0; 7085 7086 nexthdr = ip6h->ip6_nxt; 7087 7088 prev_nexthdr_offset = (uint_t)((uchar_t *)&ip6h->ip6_nxt - 7089 (uchar_t *)ip6h); 7090 whereptr = (uint8_t *)&ip6h[1]; 7091 remlen = pkt_len - IPV6_HDR_LEN; /* Track how much is left */ 7092 7093 /* Process hop by hop header options */ 7094 if (nexthdr == IPPROTO_HOPOPTS) { 7095 uint_t ehdrlen; 7096 uint8_t *optptr; 7097 7098 if (remlen < MIN_EHDR_LEN) 7099 goto pkt_too_short; 7100 if (mp->b_cont != NULL && 7101 whereptr + MIN_EHDR_LEN > mp->b_wptr) { 7102 if (!pullupmsg(mp, IPV6_HDR_LEN + MIN_EHDR_LEN)) { 7103 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 7104 freemsg(hada_mp); 7105 freemsg(first_mp); 7106 return; 7107 } 7108 ip6h = (ip6_t *)mp->b_rptr; 7109 whereptr = (uint8_t *)ip6h + pkt_len - remlen; 7110 } 7111 hbhhdr = (ip6_hbh_t *)whereptr; 7112 nexthdr = hbhhdr->ip6h_nxt; 7113 prev_nexthdr_offset = (uint_t)(whereptr - (uint8_t *)ip6h); 7114 ehdrlen = 8 * (hbhhdr->ip6h_len + 1); 7115 7116 if (remlen < ehdrlen) 7117 goto pkt_too_short; 7118 if (mp->b_cont != NULL && 7119 whereptr + ehdrlen > mp->b_wptr) { 7120 if (!pullupmsg(mp, IPV6_HDR_LEN + ehdrlen)) { 7121 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 7122 freemsg(hada_mp); 7123 freemsg(first_mp); 7124 return; 7125 } 7126 ip6h = (ip6_t *)mp->b_rptr; 7127 whereptr = (uint8_t *)ip6h + pkt_len - remlen; 7128 hbhhdr = (ip6_hbh_t *)whereptr; 7129 } 7130 7131 optptr = whereptr + 2; 7132 whereptr += ehdrlen; 7133 remlen -= ehdrlen; 7134 switch (ip_process_options_v6(q, first_mp, ip6h, optptr, 7135 ehdrlen - 2, IPPROTO_HOPOPTS, ipst)) { 7136 case -1: 7137 /* 7138 * Packet has been consumed and any 7139 * needed ICMP messages sent. 7140 */ 7141 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors); 7142 freemsg(hada_mp); 7143 return; 7144 case 0: 7145 /* no action needed */ 7146 break; 7147 case 1: 7148 /* Known router alert */ 7149 goto ipv6forus; 7150 } 7151 } 7152 7153 /* 7154 * On incoming v6 multicast packets we will bypass the ire table, 7155 * and assume that the read queue corresponds to the targetted 7156 * interface. 7157 * 7158 * The effect of this is the same as the IPv4 original code, but is 7159 * much cleaner I think. See ip_rput for how that was done. 7160 */ 7161 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 7162 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInMcastPkts); 7163 UPDATE_MIB(ill->ill_ip_mib, ipIfStatsHCInMcastOctets, pkt_len); 7164 7165 /* 7166 * So that we don't end up with dups, only one ill in an IPMP 7167 * group is nominated to receive multicast data traffic. 7168 * However, link-locals on any underlying interfaces will have 7169 * joined their solicited-node multicast addresses and we must 7170 * accept those packets. (We don't attempt to precisely 7171 * filter out duplicate solicited-node multicast packets since 7172 * e.g. an IPMP interface and underlying interface may have 7173 * the same solicited-node multicast address.) Note that we 7174 * won't generally have duplicates because we only issue a 7175 * DL_ENABMULTI_REQ on one interface in a group; the exception 7176 * is when PHYI_MULTI_BCAST is set. 7177 */ 7178 if (IS_UNDER_IPMP(ill) && !ill->ill_nom_cast && 7179 !IN6_IS_ADDR_MC_SOLICITEDNODE(&ip6h->ip6_dst)) { 7180 goto drop_pkt; 7181 } 7182 7183 /* 7184 * XXX TODO Give to mrouted to for multicast forwarding. 7185 */ 7186 if (ilm_lookup_ill_v6(ill, &ip6h->ip6_dst, B_FALSE, 7187 ALL_ZONES) == NULL) { 7188 if (ip_debug > 3) { 7189 /* ip2dbg */ 7190 pr_addr_dbg("ip_rput_data_v6: got mcast packet" 7191 " which is not for us: %s\n", AF_INET6, 7192 &ip6h->ip6_dst); 7193 } 7194 drop_pkt: BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 7195 freemsg(hada_mp); 7196 freemsg(first_mp); 7197 return; 7198 } 7199 if (ip_debug > 3) { 7200 /* ip2dbg */ 7201 pr_addr_dbg("ip_rput_data_v6: multicast for us: %s\n", 7202 AF_INET6, &ip6h->ip6_dst); 7203 } 7204 zoneid = GLOBAL_ZONEID; 7205 goto ipv6forus; 7206 } 7207 7208 ipif = ill->ill_ipif; 7209 7210 /* 7211 * If a packet was received on an interface that is a 6to4 tunnel, 7212 * incoming IPv6 packets, with a 6to4 addressed IPv6 destination, must 7213 * be checked to have a 6to4 prefix (2002:V4ADDR::/48) that is equal to 7214 * the 6to4 prefix of the address configured on the receiving interface. 7215 * Otherwise, the packet was delivered to this interface in error and 7216 * the packet must be dropped. 7217 */ 7218 if ((ill->ill_is_6to4tun) && IN6_IS_ADDR_6TO4(&ip6h->ip6_dst)) { 7219 7220 if (!IN6_ARE_6TO4_PREFIX_EQUAL(&ipif->ipif_v6lcl_addr, 7221 &ip6h->ip6_dst)) { 7222 if (ip_debug > 2) { 7223 /* ip1dbg */ 7224 pr_addr_dbg("ip_rput_data_v6: received 6to4 " 7225 "addressed packet which is not for us: " 7226 "%s\n", AF_INET6, &ip6h->ip6_dst); 7227 } 7228 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 7229 freemsg(first_mp); 7230 return; 7231 } 7232 } 7233 7234 /* 7235 * Find an ire that matches destination. For link-local addresses 7236 * we have to match the ill. 7237 * TBD for site local addresses. 7238 */ 7239 if (IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_dst)) { 7240 ire = ire_ctable_lookup_v6(&ip6h->ip6_dst, NULL, 7241 IRE_CACHE|IRE_LOCAL, ill->ill_ipif, ALL_ZONES, NULL, 7242 MATCH_IRE_TYPE | MATCH_IRE_ILL, ipst); 7243 } else { 7244 ire = ire_cache_lookup_v6(&ip6h->ip6_dst, ALL_ZONES, 7245 msg_getlabel(mp), ipst); 7246 7247 if (ire != NULL && ire->ire_stq != NULL && 7248 ire->ire_zoneid != GLOBAL_ZONEID && 7249 ire->ire_zoneid != ALL_ZONES) { 7250 /* 7251 * Should only use IREs that are visible from the 7252 * global zone for forwarding. 7253 */ 7254 ire_refrele(ire); 7255 ire = ire_cache_lookup_v6(&ip6h->ip6_dst, 7256 GLOBAL_ZONEID, msg_getlabel(mp), ipst); 7257 } 7258 } 7259 7260 if (ire == NULL) { 7261 /* 7262 * No matching IRE found. Mark this packet as having 7263 * originated externally. 7264 */ 7265 if (!(ill->ill_flags & ILLF_ROUTER) || ll_multicast) { 7266 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 7267 if (!(ill->ill_flags & ILLF_ROUTER)) { 7268 BUMP_MIB(ill->ill_ip_mib, 7269 ipIfStatsInAddrErrors); 7270 } 7271 freemsg(hada_mp); 7272 freemsg(first_mp); 7273 return; 7274 } 7275 if (ip6h->ip6_hops <= 1) { 7276 if (hada_mp != NULL) 7277 goto hada_drop; 7278 /* Sent by forwarding path, and router is global zone */ 7279 icmp_time_exceeded_v6(WR(q), first_mp, 7280 ICMP6_TIME_EXCEED_TRANSIT, ll_multicast, B_FALSE, 7281 GLOBAL_ZONEID, ipst); 7282 return; 7283 } 7284 /* 7285 * Per RFC 3513 section 2.5.2, we must not forward packets with 7286 * an unspecified source address. 7287 */ 7288 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 7289 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 7290 freemsg(hada_mp); 7291 freemsg(first_mp); 7292 return; 7293 } 7294 mp->b_prev = (mblk_t *)(uintptr_t) 7295 ill->ill_phyint->phyint_ifindex; 7296 ip_newroute_v6(q, mp, &ip6h->ip6_dst, &ip6h->ip6_src, 7297 IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_dst) ? ill : NULL, 7298 GLOBAL_ZONEID, ipst); 7299 return; 7300 } 7301 /* we have a matching IRE */ 7302 if (ire->ire_stq != NULL) { 7303 /* 7304 * To be quicker, we may wish not to chase pointers 7305 * (ire->ire_ipif->ipif_ill...) and instead store the 7306 * forwarding policy in the ire. An unfortunate side- 7307 * effect of this would be requiring an ire flush whenever 7308 * the ILLF_ROUTER flag changes. For now, chase pointers 7309 * once and store in the boolean no_forward. 7310 * 7311 * This appears twice to keep it out of the non-forwarding, 7312 * yes-it's-for-us-on-the-right-interface case. 7313 */ 7314 no_forward = ((ill->ill_flags & 7315 ire->ire_ipif->ipif_ill->ill_flags & ILLF_ROUTER) == 0); 7316 7317 ASSERT(first_mp == mp); 7318 /* 7319 * This ire has a send-to queue - forward the packet. 7320 */ 7321 if (no_forward || ll_multicast || (hada_mp != NULL)) { 7322 freemsg(hada_mp); 7323 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 7324 if (no_forward) { 7325 BUMP_MIB(ill->ill_ip_mib, 7326 ipIfStatsInAddrErrors); 7327 } 7328 freemsg(mp); 7329 ire_refrele(ire); 7330 return; 7331 } 7332 /* 7333 * ipIfStatsHCInForwDatagrams should only be increment if there 7334 * will be an attempt to forward the packet, which is why we 7335 * increment after the above condition has been checked. 7336 */ 7337 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInForwDatagrams); 7338 if (ip6h->ip6_hops <= 1) { 7339 ip1dbg(("ip_rput_data_v6: hop limit expired.\n")); 7340 /* Sent by forwarding path, and router is global zone */ 7341 icmp_time_exceeded_v6(WR(q), mp, 7342 ICMP6_TIME_EXCEED_TRANSIT, ll_multicast, B_FALSE, 7343 GLOBAL_ZONEID, ipst); 7344 ire_refrele(ire); 7345 return; 7346 } 7347 /* 7348 * Per RFC 3513 section 2.5.2, we must not forward packets with 7349 * an unspecified source address. 7350 */ 7351 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 7352 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 7353 freemsg(mp); 7354 ire_refrele(ire); 7355 return; 7356 } 7357 7358 if (is_system_labeled()) { 7359 mblk_t *mp1; 7360 7361 if ((mp1 = tsol_ip_forward(ire, mp)) == NULL) { 7362 BUMP_MIB(ill->ill_ip_mib, 7363 ipIfStatsForwProhibits); 7364 freemsg(mp); 7365 ire_refrele(ire); 7366 return; 7367 } 7368 /* Size may have changed */ 7369 mp = mp1; 7370 ip6h = (ip6_t *)mp->b_rptr; 7371 pkt_len = msgdsize(mp); 7372 } 7373 7374 if (pkt_len > ire->ire_max_frag) { 7375 int max_frag = ire->ire_max_frag; 7376 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInTooBigErrors); 7377 /* 7378 * Handle labeled packet resizing. 7379 */ 7380 if (is_system_labeled()) { 7381 max_frag = tsol_pmtu_adjust(mp, max_frag, 7382 pkt_len - old_pkt_len, AF_INET6); 7383 } 7384 7385 /* Sent by forwarding path, and router is global zone */ 7386 icmp_pkt2big_v6(WR(q), mp, max_frag, 7387 ll_multicast, B_TRUE, GLOBAL_ZONEID, ipst); 7388 ire_refrele(ire); 7389 return; 7390 } 7391 7392 /* 7393 * Check to see if we're forwarding the packet to a 7394 * different link from which it came. If so, check the 7395 * source and destination addresses since routers must not 7396 * forward any packets with link-local source or 7397 * destination addresses to other links. Otherwise (if 7398 * we're forwarding onto the same link), conditionally send 7399 * a redirect message. 7400 */ 7401 if (ire->ire_rfq != q && 7402 !IS_IN_SAME_ILLGRP(ill, (ill_t *)ire->ire_rfq->q_ptr)) { 7403 if (IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_dst) || 7404 IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src)) { 7405 BUMP_MIB(ill->ill_ip_mib, 7406 ipIfStatsInAddrErrors); 7407 freemsg(mp); 7408 ire_refrele(ire); 7409 return; 7410 } 7411 /* TBD add site-local check at site boundary? */ 7412 } else if (ipst->ips_ipv6_send_redirects) { 7413 in6_addr_t *v6targ; 7414 in6_addr_t gw_addr_v6; 7415 ire_t *src_ire_v6 = NULL; 7416 7417 /* 7418 * Don't send a redirect when forwarding a source 7419 * routed packet. 7420 */ 7421 if (ip_source_routed_v6(ip6h, mp, ipst)) 7422 goto forward; 7423 7424 mutex_enter(&ire->ire_lock); 7425 gw_addr_v6 = ire->ire_gateway_addr_v6; 7426 mutex_exit(&ire->ire_lock); 7427 if (!IN6_IS_ADDR_UNSPECIFIED(&gw_addr_v6)) { 7428 v6targ = &gw_addr_v6; 7429 /* 7430 * We won't send redirects to a router 7431 * that doesn't have a link local 7432 * address, but will forward. 7433 */ 7434 if (!IN6_IS_ADDR_LINKLOCAL(v6targ)) { 7435 BUMP_MIB(ill->ill_ip_mib, 7436 ipIfStatsInAddrErrors); 7437 goto forward; 7438 } 7439 } else { 7440 v6targ = &ip6h->ip6_dst; 7441 } 7442 7443 src_ire_v6 = ire_ftable_lookup_v6(&ip6h->ip6_src, 7444 NULL, NULL, IRE_INTERFACE, ire->ire_ipif, NULL, 7445 GLOBAL_ZONEID, 0, NULL, 7446 MATCH_IRE_IPIF | MATCH_IRE_TYPE, 7447 ipst); 7448 7449 if (src_ire_v6 != NULL) { 7450 /* 7451 * The source is directly connected. 7452 */ 7453 mp1 = copymsg(mp); 7454 if (mp1 != NULL) { 7455 icmp_send_redirect_v6(WR(q), 7456 mp1, v6targ, &ip6h->ip6_dst, 7457 ill, B_FALSE); 7458 } 7459 ire_refrele(src_ire_v6); 7460 } 7461 } 7462 7463 forward: 7464 /* Hoplimit verified above */ 7465 ip6h->ip6_hops--; 7466 7467 outill = ire->ire_ipif->ipif_ill; 7468 7469 DTRACE_PROBE4(ip6__forwarding__start, 7470 ill_t *, inill, ill_t *, outill, 7471 ip6_t *, ip6h, mblk_t *, mp); 7472 7473 FW_HOOKS6(ipst->ips_ip6_forwarding_event, 7474 ipst->ips_ipv6firewall_forwarding, 7475 inill, outill, ip6h, mp, mp, 0, ipst); 7476 7477 DTRACE_PROBE1(ip6__forwarding__end, mblk_t *, mp); 7478 7479 if (mp != NULL) { 7480 UPDATE_IB_PKT_COUNT(ire); 7481 ire->ire_last_used_time = lbolt; 7482 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCOutForwDatagrams); 7483 ip_xmit_v6(mp, ire, 0, NULL, B_FALSE, NULL); 7484 } 7485 IRE_REFRELE(ire); 7486 return; 7487 } 7488 7489 /* 7490 * Need to put on correct queue for reassembly to find it. 7491 * No need to use put() since reassembly has its own locks. 7492 * Note: multicast packets and packets destined to addresses 7493 * assigned to loopback (ire_rfq is NULL) will be reassembled on 7494 * the arriving ill. Unlike the IPv4 case, enabling strict 7495 * destination multihoming will prevent accepting packets 7496 * addressed to an IRE_LOCAL on lo0. 7497 */ 7498 if (ire->ire_rfq != q) { 7499 if ((ire = ip_check_multihome(&ip6h->ip6_dst, ire, ill)) 7500 == NULL) { 7501 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 7502 freemsg(hada_mp); 7503 freemsg(first_mp); 7504 return; 7505 } 7506 if (ire->ire_rfq != NULL) { 7507 q = ire->ire_rfq; 7508 ill = (ill_t *)q->q_ptr; 7509 ASSERT(ill != NULL); 7510 } 7511 } 7512 7513 zoneid = ire->ire_zoneid; 7514 UPDATE_IB_PKT_COUNT(ire); 7515 ire->ire_last_used_time = lbolt; 7516 /* Don't use the ire after this point, we'll NULL it out to be sure. */ 7517 ire_refrele(ire); 7518 ire = NULL; 7519 ipv6forus: 7520 /* 7521 * Looks like this packet is for us one way or another. 7522 * This is where we'll process destination headers etc. 7523 */ 7524 for (; ; ) { 7525 switch (nexthdr) { 7526 case IPPROTO_TCP: { 7527 uint16_t *up; 7528 uint32_t sum; 7529 int offset; 7530 7531 hdr_len = pkt_len - remlen; 7532 7533 if (hada_mp != NULL) { 7534 ip0dbg(("tcp hada drop\n")); 7535 goto hada_drop; 7536 } 7537 7538 7539 /* TCP needs all of the TCP header */ 7540 if (remlen < TCP_MIN_HEADER_LENGTH) 7541 goto pkt_too_short; 7542 if (mp->b_cont != NULL && 7543 whereptr + TCP_MIN_HEADER_LENGTH > mp->b_wptr) { 7544 if (!pullupmsg(mp, 7545 hdr_len + TCP_MIN_HEADER_LENGTH)) { 7546 BUMP_MIB(ill->ill_ip_mib, 7547 ipIfStatsInDiscards); 7548 freemsg(first_mp); 7549 return; 7550 } 7551 hck_flags = 0; 7552 ip6h = (ip6_t *)mp->b_rptr; 7553 whereptr = (uint8_t *)ip6h + hdr_len; 7554 } 7555 /* 7556 * Extract the offset field from the TCP header. 7557 */ 7558 offset = ((uchar_t *)ip6h)[hdr_len + 12] >> 4; 7559 if (offset != 5) { 7560 if (offset < 5) { 7561 ip1dbg(("ip_rput_data_v6: short " 7562 "TCP data offset")); 7563 BUMP_MIB(ill->ill_ip_mib, 7564 ipIfStatsInDiscards); 7565 freemsg(first_mp); 7566 return; 7567 } 7568 /* 7569 * There must be TCP options. 7570 * Make sure we can grab them. 7571 */ 7572 offset <<= 2; 7573 if (remlen < offset) 7574 goto pkt_too_short; 7575 if (mp->b_cont != NULL && 7576 whereptr + offset > mp->b_wptr) { 7577 if (!pullupmsg(mp, 7578 hdr_len + offset)) { 7579 BUMP_MIB(ill->ill_ip_mib, 7580 ipIfStatsInDiscards); 7581 freemsg(first_mp); 7582 return; 7583 } 7584 hck_flags = 0; 7585 ip6h = (ip6_t *)mp->b_rptr; 7586 whereptr = (uint8_t *)ip6h + hdr_len; 7587 } 7588 } 7589 7590 up = (uint16_t *)&ip6h->ip6_src; 7591 /* 7592 * TCP checksum calculation. First sum up the 7593 * pseudo-header fields: 7594 * - Source IPv6 address 7595 * - Destination IPv6 address 7596 * - TCP payload length 7597 * - TCP protocol ID 7598 */ 7599 sum = htons(IPPROTO_TCP + remlen) + 7600 up[0] + up[1] + up[2] + up[3] + 7601 up[4] + up[5] + up[6] + up[7] + 7602 up[8] + up[9] + up[10] + up[11] + 7603 up[12] + up[13] + up[14] + up[15]; 7604 7605 /* Fold initial sum */ 7606 sum = (sum & 0xffff) + (sum >> 16); 7607 7608 mp1 = mp->b_cont; 7609 7610 if ((hck_flags & (HCK_FULLCKSUM|HCK_PARTIALCKSUM)) == 0) 7611 IP6_STAT(ipst, ip6_in_sw_cksum); 7612 7613 IP_CKSUM_RECV(hck_flags, sum, (uchar_t *) 7614 ((uchar_t *)mp->b_rptr + DB_CKSUMSTART(mp)), 7615 (int32_t)(whereptr - (uchar_t *)mp->b_rptr), 7616 mp, mp1, cksum_err); 7617 7618 if (cksum_err) { 7619 BUMP_MIB(ill->ill_ip_mib, tcpIfStatsInErrs); 7620 7621 if (hck_flags & HCK_FULLCKSUM) { 7622 IP6_STAT(ipst, 7623 ip6_tcp_in_full_hw_cksum_err); 7624 } else if (hck_flags & HCK_PARTIALCKSUM) { 7625 IP6_STAT(ipst, 7626 ip6_tcp_in_part_hw_cksum_err); 7627 } else { 7628 IP6_STAT(ipst, ip6_tcp_in_sw_cksum_err); 7629 } 7630 freemsg(first_mp); 7631 return; 7632 } 7633 tcp_fanout: 7634 ip_fanout_tcp_v6(q, first_mp, ip6h, ill, inill, 7635 (flags|IP_FF_SEND_ICMP|IP_FF_SYN_ADDIRE| 7636 IP_FF_IPINFO), hdr_len, mctl_present, zoneid); 7637 return; 7638 } 7639 case IPPROTO_SCTP: 7640 { 7641 sctp_hdr_t *sctph; 7642 uint32_t calcsum, pktsum; 7643 uint_t hdr_len = pkt_len - remlen; 7644 sctp_stack_t *sctps; 7645 7646 sctps = inill->ill_ipst->ips_netstack->netstack_sctp; 7647 7648 /* SCTP needs all of the SCTP header */ 7649 if (remlen < sizeof (*sctph)) { 7650 goto pkt_too_short; 7651 } 7652 if (whereptr + sizeof (*sctph) > mp->b_wptr) { 7653 ASSERT(mp->b_cont != NULL); 7654 if (!pullupmsg(mp, hdr_len + sizeof (*sctph))) { 7655 BUMP_MIB(ill->ill_ip_mib, 7656 ipIfStatsInDiscards); 7657 freemsg(mp); 7658 return; 7659 } 7660 ip6h = (ip6_t *)mp->b_rptr; 7661 whereptr = (uint8_t *)ip6h + hdr_len; 7662 } 7663 7664 sctph = (sctp_hdr_t *)(mp->b_rptr + hdr_len); 7665 /* checksum */ 7666 pktsum = sctph->sh_chksum; 7667 sctph->sh_chksum = 0; 7668 calcsum = sctp_cksum(mp, hdr_len); 7669 if (calcsum != pktsum) { 7670 BUMP_MIB(&sctps->sctps_mib, sctpChecksumError); 7671 freemsg(mp); 7672 return; 7673 } 7674 sctph->sh_chksum = pktsum; 7675 ports = *(uint32_t *)(mp->b_rptr + hdr_len); 7676 if ((connp = sctp_fanout(&ip6h->ip6_src, &ip6h->ip6_dst, 7677 ports, zoneid, mp, sctps)) == NULL) { 7678 ip_fanout_sctp_raw(first_mp, ill, 7679 (ipha_t *)ip6h, B_FALSE, ports, 7680 mctl_present, 7681 (flags|IP_FF_SEND_ICMP|IP_FF_IPINFO), 7682 B_TRUE, zoneid); 7683 return; 7684 } 7685 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 7686 sctp_input(connp, (ipha_t *)ip6h, mp, first_mp, ill, 7687 B_FALSE, mctl_present); 7688 return; 7689 } 7690 case IPPROTO_UDP: { 7691 uint16_t *up; 7692 uint32_t sum; 7693 7694 hdr_len = pkt_len - remlen; 7695 7696 if (hada_mp != NULL) { 7697 ip0dbg(("udp hada drop\n")); 7698 goto hada_drop; 7699 } 7700 7701 /* Verify that at least the ports are present */ 7702 if (remlen < UDPH_SIZE) 7703 goto pkt_too_short; 7704 if (mp->b_cont != NULL && 7705 whereptr + UDPH_SIZE > mp->b_wptr) { 7706 if (!pullupmsg(mp, hdr_len + UDPH_SIZE)) { 7707 BUMP_MIB(ill->ill_ip_mib, 7708 ipIfStatsInDiscards); 7709 freemsg(first_mp); 7710 return; 7711 } 7712 hck_flags = 0; 7713 ip6h = (ip6_t *)mp->b_rptr; 7714 whereptr = (uint8_t *)ip6h + hdr_len; 7715 } 7716 7717 /* 7718 * Before going through the regular checksum 7719 * calculation, make sure the received checksum 7720 * is non-zero. RFC 2460 says, a 0x0000 checksum 7721 * in a UDP packet (within IPv6 packet) is invalid 7722 * and should be replaced by 0xffff. This makes 7723 * sense as regular checksum calculation will 7724 * pass for both the cases i.e. 0x0000 and 0xffff. 7725 * Removing one of the case makes error detection 7726 * stronger. 7727 */ 7728 7729 if (((udpha_t *)whereptr)->uha_checksum == 0) { 7730 /* 0x0000 checksum is invalid */ 7731 ip1dbg(("ip_rput_data_v6: Invalid UDP " 7732 "checksum value 0x0000\n")); 7733 BUMP_MIB(ill->ill_ip_mib, 7734 udpIfStatsInCksumErrs); 7735 freemsg(first_mp); 7736 return; 7737 } 7738 7739 up = (uint16_t *)&ip6h->ip6_src; 7740 7741 /* 7742 * UDP checksum calculation. First sum up the 7743 * pseudo-header fields: 7744 * - Source IPv6 address 7745 * - Destination IPv6 address 7746 * - UDP payload length 7747 * - UDP protocol ID 7748 */ 7749 7750 sum = htons(IPPROTO_UDP + remlen) + 7751 up[0] + up[1] + up[2] + up[3] + 7752 up[4] + up[5] + up[6] + up[7] + 7753 up[8] + up[9] + up[10] + up[11] + 7754 up[12] + up[13] + up[14] + up[15]; 7755 7756 /* Fold initial sum */ 7757 sum = (sum & 0xffff) + (sum >> 16); 7758 7759 if (reass_hck_flags != 0) { 7760 hck_flags = reass_hck_flags; 7761 7762 IP_CKSUM_RECV_REASS(hck_flags, 7763 (int32_t)(whereptr - (uchar_t *)mp->b_rptr), 7764 sum, reass_sum, cksum_err); 7765 } else { 7766 mp1 = mp->b_cont; 7767 7768 IP_CKSUM_RECV(hck_flags, sum, (uchar_t *) 7769 ((uchar_t *)mp->b_rptr + DB_CKSUMSTART(mp)), 7770 (int32_t)(whereptr - (uchar_t *)mp->b_rptr), 7771 mp, mp1, cksum_err); 7772 } 7773 7774 if ((hck_flags & (HCK_FULLCKSUM|HCK_PARTIALCKSUM)) == 0) 7775 IP6_STAT(ipst, ip6_in_sw_cksum); 7776 7777 if (cksum_err) { 7778 BUMP_MIB(ill->ill_ip_mib, 7779 udpIfStatsInCksumErrs); 7780 7781 if (hck_flags & HCK_FULLCKSUM) 7782 IP6_STAT(ipst, 7783 ip6_udp_in_full_hw_cksum_err); 7784 else if (hck_flags & HCK_PARTIALCKSUM) 7785 IP6_STAT(ipst, 7786 ip6_udp_in_part_hw_cksum_err); 7787 else 7788 IP6_STAT(ipst, ip6_udp_in_sw_cksum_err); 7789 7790 freemsg(first_mp); 7791 return; 7792 } 7793 goto udp_fanout; 7794 } 7795 case IPPROTO_ICMPV6: { 7796 uint16_t *up; 7797 uint32_t sum; 7798 uint_t hdr_len = pkt_len - remlen; 7799 7800 if (hada_mp != NULL) { 7801 ip0dbg(("icmp hada drop\n")); 7802 goto hada_drop; 7803 } 7804 7805 up = (uint16_t *)&ip6h->ip6_src; 7806 sum = htons(IPPROTO_ICMPV6 + remlen) + 7807 up[0] + up[1] + up[2] + up[3] + 7808 up[4] + up[5] + up[6] + up[7] + 7809 up[8] + up[9] + up[10] + up[11] + 7810 up[12] + up[13] + up[14] + up[15]; 7811 sum = (sum & 0xffff) + (sum >> 16); 7812 sum = IP_CSUM(mp, hdr_len, sum); 7813 if (sum != 0) { 7814 /* IPv6 ICMP checksum failed */ 7815 ip1dbg(("ip_rput_data_v6: ICMPv6 checksum " 7816 "failed %x\n", 7817 sum)); 7818 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInMsgs); 7819 BUMP_MIB(ill->ill_icmp6_mib, 7820 ipv6IfIcmpInErrors); 7821 freemsg(first_mp); 7822 return; 7823 } 7824 7825 icmp_fanout: 7826 /* Check variable for testing applications */ 7827 if (ipst->ips_ipv6_drop_inbound_icmpv6) { 7828 freemsg(first_mp); 7829 return; 7830 } 7831 /* 7832 * Assume that there is always at least one conn for 7833 * ICMPv6 (in.ndpd) i.e. don't optimize the case 7834 * where there is no conn. 7835 */ 7836 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 7837 ilm_t *ilm; 7838 ilm_walker_t ilw; 7839 7840 ASSERT(!IS_LOOPBACK(ill)); 7841 /* 7842 * In the multicast case, applications may have 7843 * joined the group from different zones, so we 7844 * need to deliver the packet to each of them. 7845 * Loop through the multicast memberships 7846 * structures (ilm) on the receive ill and send 7847 * a copy of the packet up each matching one. 7848 */ 7849 ilm = ilm_walker_start(&ilw, inill); 7850 for (; ilm != NULL; 7851 ilm = ilm_walker_step(&ilw, ilm)) { 7852 if (!IN6_ARE_ADDR_EQUAL( 7853 &ilm->ilm_v6addr, &ip6h->ip6_dst)) 7854 continue; 7855 if (!ipif_lookup_zoneid( 7856 ilw.ilw_walk_ill, ilm->ilm_zoneid, 7857 IPIF_UP, NULL)) 7858 continue; 7859 7860 first_mp1 = ip_copymsg(first_mp); 7861 if (first_mp1 == NULL) 7862 continue; 7863 icmp_inbound_v6(q, first_mp1, 7864 ilw.ilw_walk_ill, inill, 7865 hdr_len, mctl_present, 0, 7866 ilm->ilm_zoneid, dl_mp); 7867 } 7868 ilm_walker_finish(&ilw); 7869 } else { 7870 first_mp1 = ip_copymsg(first_mp); 7871 if (first_mp1 != NULL) 7872 icmp_inbound_v6(q, first_mp1, ill, 7873 inill, hdr_len, mctl_present, 0, 7874 zoneid, dl_mp); 7875 } 7876 } 7877 /* FALLTHRU */ 7878 default: { 7879 /* 7880 * Handle protocols with which IPv6 is less intimate. 7881 */ 7882 uint_t proto_flags = IP_FF_RAWIP|IP_FF_IPINFO; 7883 7884 if (hada_mp != NULL) { 7885 ip0dbg(("default hada drop\n")); 7886 goto hada_drop; 7887 } 7888 7889 /* 7890 * Enable sending ICMP for "Unknown" nexthdr 7891 * case. i.e. where we did not FALLTHRU from 7892 * IPPROTO_ICMPV6 processing case above. 7893 * If we did FALLTHRU, then the packet has already been 7894 * processed for IPPF, don't process it again in 7895 * ip_fanout_proto_v6; set IP6_NO_IPPOLICY in the 7896 * flags 7897 */ 7898 if (nexthdr != IPPROTO_ICMPV6) 7899 proto_flags |= IP_FF_SEND_ICMP; 7900 else 7901 proto_flags |= IP6_NO_IPPOLICY; 7902 7903 ip_fanout_proto_v6(q, first_mp, ip6h, ill, inill, 7904 nexthdr, prev_nexthdr_offset, (flags|proto_flags), 7905 mctl_present, zoneid); 7906 return; 7907 } 7908 7909 case IPPROTO_DSTOPTS: { 7910 uint_t ehdrlen; 7911 uint8_t *optptr; 7912 ip6_dest_t *desthdr; 7913 7914 /* If packet is too short, look no further */ 7915 if (remlen < MIN_EHDR_LEN) 7916 goto pkt_too_short; 7917 7918 /* Check if AH is present. */ 7919 if (ipsec_early_ah_v6(q, first_mp, mctl_present, ill, 7920 inill, hada_mp, zoneid)) { 7921 return; 7922 } 7923 7924 /* 7925 * Reinitialize pointers, as ipsec_early_ah_v6() does 7926 * complete pullups. We don't have to do more pullups 7927 * as a result. 7928 */ 7929 whereptr = (uint8_t *)((uintptr_t)mp->b_rptr + 7930 (uintptr_t)(whereptr - ((uint8_t *)ip6h))); 7931 ip6h = (ip6_t *)mp->b_rptr; 7932 7933 desthdr = (ip6_dest_t *)whereptr; 7934 nexthdr = desthdr->ip6d_nxt; 7935 prev_nexthdr_offset = (uint_t)(whereptr - 7936 (uint8_t *)ip6h); 7937 ehdrlen = 8 * (desthdr->ip6d_len + 1); 7938 if (remlen < ehdrlen) 7939 goto pkt_too_short; 7940 optptr = whereptr + 2; 7941 /* 7942 * Note: XXX This code does not seem to make 7943 * distinction between Destination Options Header 7944 * being before/after Routing Header which can 7945 * happen if we are at the end of source route. 7946 * This may become significant in future. 7947 * (No real significant Destination Options are 7948 * defined/implemented yet ). 7949 */ 7950 switch (ip_process_options_v6(q, first_mp, ip6h, optptr, 7951 ehdrlen - 2, IPPROTO_DSTOPTS, ipst)) { 7952 case -1: 7953 /* 7954 * Packet has been consumed and any needed 7955 * ICMP errors sent. 7956 */ 7957 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors); 7958 freemsg(hada_mp); 7959 return; 7960 case 0: 7961 /* No action needed continue */ 7962 break; 7963 case 1: 7964 /* 7965 * Unnexpected return value 7966 * (Router alert is a Hop-by-Hop option) 7967 */ 7968 #ifdef DEBUG 7969 panic("ip_rput_data_v6: router " 7970 "alert hbh opt indication in dest opt"); 7971 /*NOTREACHED*/ 7972 #else 7973 freemsg(hada_mp); 7974 freemsg(first_mp); 7975 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 7976 return; 7977 #endif 7978 } 7979 used = ehdrlen; 7980 break; 7981 } 7982 case IPPROTO_FRAGMENT: { 7983 ip6_frag_t *fraghdr; 7984 size_t no_frag_hdr_len; 7985 7986 if (hada_mp != NULL) { 7987 ip0dbg(("frag hada drop\n")); 7988 goto hada_drop; 7989 } 7990 7991 ASSERT(first_mp == mp); 7992 if (remlen < sizeof (ip6_frag_t)) 7993 goto pkt_too_short; 7994 7995 if (mp->b_cont != NULL && 7996 whereptr + sizeof (ip6_frag_t) > mp->b_wptr) { 7997 if (!pullupmsg(mp, 7998 pkt_len - remlen + sizeof (ip6_frag_t))) { 7999 BUMP_MIB(ill->ill_ip_mib, 8000 ipIfStatsInDiscards); 8001 freemsg(mp); 8002 return; 8003 } 8004 hck_flags = 0; 8005 ip6h = (ip6_t *)mp->b_rptr; 8006 whereptr = (uint8_t *)ip6h + pkt_len - remlen; 8007 } 8008 8009 fraghdr = (ip6_frag_t *)whereptr; 8010 used = (uint_t)sizeof (ip6_frag_t); 8011 BUMP_MIB(ill->ill_ip_mib, ipIfStatsReasmReqds); 8012 8013 /* 8014 * Invoke the CGTP (multirouting) filtering module to 8015 * process the incoming packet. Packets identified as 8016 * duplicates must be discarded. Filtering is active 8017 * only if the the ip_cgtp_filter ndd variable is 8018 * non-zero. 8019 */ 8020 if (ipst->ips_ip_cgtp_filter && 8021 ipst->ips_ip_cgtp_filter_ops != NULL) { 8022 int cgtp_flt_pkt; 8023 netstackid_t stackid; 8024 8025 stackid = ipst->ips_netstack->netstack_stackid; 8026 8027 cgtp_flt_pkt = 8028 ipst->ips_ip_cgtp_filter_ops->cfo_filter_v6( 8029 stackid, inill->ill_phyint->phyint_ifindex, 8030 ip6h, fraghdr); 8031 if (cgtp_flt_pkt == CGTP_IP_PKT_DUPLICATE) { 8032 freemsg(mp); 8033 return; 8034 } 8035 } 8036 8037 /* Restore the flags */ 8038 DB_CKSUMFLAGS(mp) = hck_flags; 8039 8040 mp = ip_rput_frag_v6(ill, inill, mp, ip6h, fraghdr, 8041 remlen - used, &prev_nexthdr_offset, 8042 &reass_sum, &reass_hck_flags); 8043 if (mp == NULL) { 8044 /* Reassembly is still pending */ 8045 return; 8046 } 8047 /* The first mblk are the headers before the frag hdr */ 8048 BUMP_MIB(ill->ill_ip_mib, ipIfStatsReasmOKs); 8049 8050 first_mp = mp; /* mp has most likely changed! */ 8051 no_frag_hdr_len = mp->b_wptr - mp->b_rptr; 8052 ip6h = (ip6_t *)mp->b_rptr; 8053 nexthdr = ((char *)ip6h)[prev_nexthdr_offset]; 8054 whereptr = mp->b_rptr + no_frag_hdr_len; 8055 remlen = ntohs(ip6h->ip6_plen) + 8056 (uint16_t)(IPV6_HDR_LEN - no_frag_hdr_len); 8057 pkt_len = msgdsize(mp); 8058 used = 0; 8059 break; 8060 } 8061 case IPPROTO_HOPOPTS: { 8062 if (hada_mp != NULL) { 8063 ip0dbg(("hop hada drop\n")); 8064 goto hada_drop; 8065 } 8066 /* 8067 * Illegal header sequence. 8068 * (Hop-by-hop headers are processed above 8069 * and required to immediately follow IPv6 header) 8070 */ 8071 icmp_param_problem_v6(WR(q), first_mp, 8072 ICMP6_PARAMPROB_NEXTHEADER, 8073 prev_nexthdr_offset, 8074 B_FALSE, B_FALSE, zoneid, ipst); 8075 return; 8076 } 8077 case IPPROTO_ROUTING: { 8078 uint_t ehdrlen; 8079 ip6_rthdr_t *rthdr; 8080 8081 /* If packet is too short, look no further */ 8082 if (remlen < MIN_EHDR_LEN) 8083 goto pkt_too_short; 8084 8085 /* Check if AH is present. */ 8086 if (ipsec_early_ah_v6(q, first_mp, mctl_present, ill, 8087 inill, hada_mp, zoneid)) { 8088 return; 8089 } 8090 8091 /* 8092 * Reinitialize pointers, as ipsec_early_ah_v6() does 8093 * complete pullups. We don't have to do more pullups 8094 * as a result. 8095 */ 8096 whereptr = (uint8_t *)((uintptr_t)mp->b_rptr + 8097 (uintptr_t)(whereptr - ((uint8_t *)ip6h))); 8098 ip6h = (ip6_t *)mp->b_rptr; 8099 8100 rthdr = (ip6_rthdr_t *)whereptr; 8101 nexthdr = rthdr->ip6r_nxt; 8102 prev_nexthdr_offset = (uint_t)(whereptr - 8103 (uint8_t *)ip6h); 8104 ehdrlen = 8 * (rthdr->ip6r_len + 1); 8105 if (remlen < ehdrlen) 8106 goto pkt_too_short; 8107 if (rthdr->ip6r_segleft != 0) { 8108 /* Not end of source route */ 8109 if (ll_multicast) { 8110 BUMP_MIB(ill->ill_ip_mib, 8111 ipIfStatsForwProhibits); 8112 freemsg(hada_mp); 8113 freemsg(mp); 8114 return; 8115 } 8116 ip_process_rthdr(q, mp, ip6h, rthdr, ill, 8117 flags, hada_mp, dl_mp); 8118 return; 8119 } 8120 used = ehdrlen; 8121 break; 8122 } 8123 case IPPROTO_AH: 8124 case IPPROTO_ESP: { 8125 /* 8126 * Fast path for AH/ESP. If this is the first time 8127 * we are sending a datagram to AH/ESP, allocate 8128 * a IPSEC_IN message and prepend it. Otherwise, 8129 * just fanout. 8130 */ 8131 8132 ipsec_in_t *ii; 8133 int ipsec_rc; 8134 ipsec_stack_t *ipss; 8135 8136 ipss = ipst->ips_netstack->netstack_ipsec; 8137 if (!mctl_present) { 8138 ASSERT(first_mp == mp); 8139 first_mp = ipsec_in_alloc(B_FALSE, 8140 ipst->ips_netstack); 8141 if (first_mp == NULL) { 8142 ip1dbg(("ip_rput_data_v6: IPSEC_IN " 8143 "allocation failure.\n")); 8144 BUMP_MIB(ill->ill_ip_mib, 8145 ipIfStatsInDiscards); 8146 freemsg(mp); 8147 return; 8148 } 8149 /* 8150 * Store the ill_index so that when we come back 8151 * from IPSEC we ride on the same queue. 8152 */ 8153 ii = (ipsec_in_t *)first_mp->b_rptr; 8154 ii->ipsec_in_ill_index = 8155 ill->ill_phyint->phyint_ifindex; 8156 ii->ipsec_in_rill_index = 8157 inill->ill_phyint->phyint_ifindex; 8158 first_mp->b_cont = mp; 8159 /* 8160 * Cache hardware acceleration info. 8161 */ 8162 if (hada_mp != NULL) { 8163 IPSECHW_DEBUG(IPSECHW_PKT, 8164 ("ip_rput_data_v6: " 8165 "caching data attr.\n")); 8166 ii->ipsec_in_accelerated = B_TRUE; 8167 ii->ipsec_in_da = hada_mp; 8168 hada_mp = NULL; 8169 } 8170 } else { 8171 ii = (ipsec_in_t *)first_mp->b_rptr; 8172 } 8173 8174 if (!ipsec_loaded(ipss)) { 8175 ip_proto_not_sup(q, first_mp, IP_FF_SEND_ICMP, 8176 zoneid, ipst); 8177 return; 8178 } 8179 8180 /* select inbound SA and have IPsec process the pkt */ 8181 if (nexthdr == IPPROTO_ESP) { 8182 esph_t *esph = ipsec_inbound_esp_sa(first_mp, 8183 ipst->ips_netstack); 8184 if (esph == NULL) 8185 return; 8186 ASSERT(ii->ipsec_in_esp_sa != NULL); 8187 ASSERT(ii->ipsec_in_esp_sa->ipsa_input_func != 8188 NULL); 8189 ipsec_rc = ii->ipsec_in_esp_sa->ipsa_input_func( 8190 first_mp, esph); 8191 } else { 8192 ah_t *ah = ipsec_inbound_ah_sa(first_mp, 8193 ipst->ips_netstack); 8194 if (ah == NULL) 8195 return; 8196 ASSERT(ii->ipsec_in_ah_sa != NULL); 8197 ASSERT(ii->ipsec_in_ah_sa->ipsa_input_func != 8198 NULL); 8199 ipsec_rc = ii->ipsec_in_ah_sa->ipsa_input_func( 8200 first_mp, ah); 8201 } 8202 8203 switch (ipsec_rc) { 8204 case IPSEC_STATUS_SUCCESS: 8205 break; 8206 case IPSEC_STATUS_FAILED: 8207 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 8208 /* FALLTHRU */ 8209 case IPSEC_STATUS_PENDING: 8210 return; 8211 } 8212 /* we're done with IPsec processing, send it up */ 8213 ip_fanout_proto_again(first_mp, ill, inill, NULL); 8214 return; 8215 } 8216 case IPPROTO_NONE: 8217 /* All processing is done. Count as "delivered". */ 8218 freemsg(hada_mp); 8219 freemsg(first_mp); 8220 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 8221 return; 8222 } 8223 whereptr += used; 8224 ASSERT(remlen >= used); 8225 remlen -= used; 8226 } 8227 /* NOTREACHED */ 8228 8229 pkt_too_short: 8230 ip1dbg(("ip_rput_data_v6: packet too short %d %lu %d\n", 8231 ip6_len, pkt_len, remlen)); 8232 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInTruncatedPkts); 8233 freemsg(hada_mp); 8234 freemsg(first_mp); 8235 return; 8236 udp_fanout: 8237 if (mctl_present || IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 8238 connp = NULL; 8239 } else { 8240 connp = ipcl_classify_v6(mp, IPPROTO_UDP, hdr_len, zoneid, 8241 ipst); 8242 if ((connp != NULL) && (connp->conn_upq == NULL)) { 8243 CONN_DEC_REF(connp); 8244 connp = NULL; 8245 } 8246 } 8247 8248 if (connp == NULL) { 8249 uint32_t ports; 8250 8251 ports = *(uint32_t *)(mp->b_rptr + hdr_len + 8252 UDP_PORTS_OFFSET); 8253 IP6_STAT(ipst, ip6_udp_slow_path); 8254 ip_fanout_udp_v6(q, first_mp, ip6h, ports, ill, inill, 8255 (flags|IP_FF_SEND_ICMP|IP_FF_IPINFO), mctl_present, 8256 zoneid); 8257 return; 8258 } 8259 8260 if ((IPCL_IS_NONSTR(connp) && PROTO_FLOW_CNTRLD(connp)) || 8261 (!IPCL_IS_NONSTR(connp) && CONN_UDP_FLOWCTLD(connp))) { 8262 freemsg(first_mp); 8263 BUMP_MIB(ill->ill_ip_mib, udpIfStatsInOverflows); 8264 CONN_DEC_REF(connp); 8265 return; 8266 } 8267 8268 /* Initiate IPPF processing */ 8269 if (IP6_IN_IPP(flags, ipst)) { 8270 ip_process(IPP_LOCAL_IN, &mp, ill->ill_phyint->phyint_ifindex); 8271 if (mp == NULL) { 8272 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 8273 CONN_DEC_REF(connp); 8274 return; 8275 } 8276 } 8277 8278 if (connp->conn_ip_recvpktinfo || 8279 IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src)) { 8280 mp = ip_add_info_v6(mp, inill, &ip6h->ip6_dst); 8281 if (mp == NULL) { 8282 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 8283 CONN_DEC_REF(connp); 8284 return; 8285 } 8286 } 8287 8288 IP6_STAT(ipst, ip6_udp_fast_path); 8289 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 8290 8291 /* Send it upstream */ 8292 (connp->conn_recv)(connp, mp, NULL); 8293 8294 CONN_DEC_REF(connp); 8295 freemsg(hada_mp); 8296 return; 8297 8298 hada_drop: 8299 ip1dbg(("ip_rput_data_v6: malformed accelerated packet\n")); 8300 /* IPsec kstats: bump counter here */ 8301 freemsg(hada_mp); 8302 freemsg(first_mp); 8303 } 8304 8305 /* 8306 * Reassemble fragment. 8307 * When it returns a completed message the first mblk will only contain 8308 * the headers prior to the fragment header. 8309 * 8310 * prev_nexthdr_offset is an offset indication of where the nexthdr field is 8311 * of the preceding header. This is needed to patch the previous header's 8312 * nexthdr field when reassembly completes. 8313 */ 8314 static mblk_t * 8315 ip_rput_frag_v6(ill_t *ill, ill_t *inill, mblk_t *mp, ip6_t *ip6h, 8316 ip6_frag_t *fraghdr, uint_t remlen, uint_t *prev_nexthdr_offset, 8317 uint32_t *cksum_val, uint16_t *cksum_flags) 8318 { 8319 uint32_t ident = ntohl(fraghdr->ip6f_ident); 8320 uint16_t offset; 8321 boolean_t more_frags; 8322 uint8_t nexthdr = fraghdr->ip6f_nxt; 8323 in6_addr_t *v6dst_ptr; 8324 in6_addr_t *v6src_ptr; 8325 uint_t end; 8326 uint_t hdr_length; 8327 size_t count; 8328 ipf_t *ipf; 8329 ipf_t **ipfp; 8330 ipfb_t *ipfb; 8331 mblk_t *mp1; 8332 uint8_t ecn_info = 0; 8333 size_t msg_len; 8334 mblk_t *tail_mp; 8335 mblk_t *t_mp; 8336 boolean_t pruned = B_FALSE; 8337 uint32_t sum_val; 8338 uint16_t sum_flags; 8339 ip_stack_t *ipst = ill->ill_ipst; 8340 8341 if (cksum_val != NULL) 8342 *cksum_val = 0; 8343 if (cksum_flags != NULL) 8344 *cksum_flags = 0; 8345 8346 /* 8347 * We utilize hardware computed checksum info only for UDP since 8348 * IP fragmentation is a normal occurence for the protocol. In 8349 * addition, checksum offload support for IP fragments carrying 8350 * UDP payload is commonly implemented across network adapters. 8351 */ 8352 ASSERT(inill != NULL); 8353 if (nexthdr == IPPROTO_UDP && dohwcksum && ILL_HCKSUM_CAPABLE(inill) && 8354 (DB_CKSUMFLAGS(mp) & (HCK_FULLCKSUM | HCK_PARTIALCKSUM))) { 8355 mblk_t *mp1 = mp->b_cont; 8356 int32_t len; 8357 8358 /* Record checksum information from the packet */ 8359 sum_val = (uint32_t)DB_CKSUM16(mp); 8360 sum_flags = DB_CKSUMFLAGS(mp); 8361 8362 /* fragmented payload offset from beginning of mblk */ 8363 offset = (uint16_t)((uchar_t *)&fraghdr[1] - mp->b_rptr); 8364 8365 if ((sum_flags & HCK_PARTIALCKSUM) && 8366 (mp1 == NULL || mp1->b_cont == NULL) && 8367 offset >= (uint16_t)DB_CKSUMSTART(mp) && 8368 ((len = offset - (uint16_t)DB_CKSUMSTART(mp)) & 1) == 0) { 8369 uint32_t adj; 8370 /* 8371 * Partial checksum has been calculated by hardware 8372 * and attached to the packet; in addition, any 8373 * prepended extraneous data is even byte aligned. 8374 * If any such data exists, we adjust the checksum; 8375 * this would also handle any postpended data. 8376 */ 8377 IP_ADJCKSUM_PARTIAL(mp->b_rptr + DB_CKSUMSTART(mp), 8378 mp, mp1, len, adj); 8379 8380 /* One's complement subtract extraneous checksum */ 8381 if (adj >= sum_val) 8382 sum_val = ~(adj - sum_val) & 0xFFFF; 8383 else 8384 sum_val -= adj; 8385 } 8386 } else { 8387 sum_val = 0; 8388 sum_flags = 0; 8389 } 8390 8391 /* Clear hardware checksumming flag */ 8392 DB_CKSUMFLAGS(mp) = 0; 8393 8394 /* 8395 * Note: Fragment offset in header is in 8-octet units. 8396 * Clearing least significant 3 bits not only extracts 8397 * it but also gets it in units of octets. 8398 */ 8399 offset = ntohs(fraghdr->ip6f_offlg) & ~7; 8400 more_frags = (fraghdr->ip6f_offlg & IP6F_MORE_FRAG); 8401 8402 /* 8403 * Is the more frags flag on and the payload length not a multiple 8404 * of eight? 8405 */ 8406 if (more_frags && (ntohs(ip6h->ip6_plen) & 7)) { 8407 zoneid_t zoneid; 8408 8409 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors); 8410 zoneid = ipif_lookup_addr_zoneid_v6(&ip6h->ip6_dst, ill, ipst); 8411 if (zoneid == ALL_ZONES) { 8412 freemsg(mp); 8413 return (NULL); 8414 } 8415 icmp_param_problem_v6(ill->ill_wq, mp, ICMP6_PARAMPROB_HEADER, 8416 (uint32_t)((char *)&ip6h->ip6_plen - 8417 (char *)ip6h), B_FALSE, B_FALSE, zoneid, ipst); 8418 return (NULL); 8419 } 8420 8421 v6src_ptr = &ip6h->ip6_src; 8422 v6dst_ptr = &ip6h->ip6_dst; 8423 end = remlen; 8424 8425 hdr_length = (uint_t)((char *)&fraghdr[1] - (char *)ip6h); 8426 end += offset; 8427 8428 /* 8429 * Would fragment cause reassembled packet to have a payload length 8430 * greater than IP_MAXPACKET - the max payload size? 8431 */ 8432 if (end > IP_MAXPACKET) { 8433 zoneid_t zoneid; 8434 8435 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors); 8436 zoneid = ipif_lookup_addr_zoneid_v6(&ip6h->ip6_dst, ill, ipst); 8437 if (zoneid == ALL_ZONES) { 8438 freemsg(mp); 8439 return (NULL); 8440 } 8441 icmp_param_problem_v6(ill->ill_wq, mp, ICMP6_PARAMPROB_HEADER, 8442 (uint32_t)((char *)&fraghdr->ip6f_offlg - 8443 (char *)ip6h), B_FALSE, B_FALSE, zoneid, ipst); 8444 return (NULL); 8445 } 8446 8447 /* 8448 * This packet just has one fragment. Reassembly not 8449 * needed. 8450 */ 8451 if (!more_frags && offset == 0) { 8452 goto reass_done; 8453 } 8454 8455 /* 8456 * Drop the fragmented as early as possible, if 8457 * we don't have resource(s) to re-assemble. 8458 */ 8459 if (ipst->ips_ip_reass_queue_bytes == 0) { 8460 freemsg(mp); 8461 return (NULL); 8462 } 8463 8464 /* Record the ECN field info. */ 8465 ecn_info = (uint8_t)(ntohl(ip6h->ip6_vcf & htonl(~0xFFCFFFFF)) >> 20); 8466 /* 8467 * If this is not the first fragment, dump the unfragmentable 8468 * portion of the packet. 8469 */ 8470 if (offset) 8471 mp->b_rptr = (uchar_t *)&fraghdr[1]; 8472 8473 /* 8474 * Fragmentation reassembly. Each ILL has a hash table for 8475 * queueing packets undergoing reassembly for all IPIFs 8476 * associated with the ILL. The hash is based on the packet 8477 * IP ident field. The ILL frag hash table was allocated 8478 * as a timer block at the time the ILL was created. Whenever 8479 * there is anything on the reassembly queue, the timer will 8480 * be running. 8481 */ 8482 msg_len = MBLKSIZE(mp); 8483 tail_mp = mp; 8484 while (tail_mp->b_cont != NULL) { 8485 tail_mp = tail_mp->b_cont; 8486 msg_len += MBLKSIZE(tail_mp); 8487 } 8488 /* 8489 * If the reassembly list for this ILL will get too big 8490 * prune it. 8491 */ 8492 8493 if ((msg_len + sizeof (*ipf) + ill->ill_frag_count) >= 8494 ipst->ips_ip_reass_queue_bytes) { 8495 ill_frag_prune(ill, 8496 (ipst->ips_ip_reass_queue_bytes < msg_len) ? 0 : 8497 (ipst->ips_ip_reass_queue_bytes - msg_len)); 8498 pruned = B_TRUE; 8499 } 8500 8501 ipfb = &ill->ill_frag_hash_tbl[ILL_FRAG_HASH_V6(*v6src_ptr, ident)]; 8502 mutex_enter(&ipfb->ipfb_lock); 8503 8504 ipfp = &ipfb->ipfb_ipf; 8505 /* Try to find an existing fragment queue for this packet. */ 8506 for (;;) { 8507 ipf = ipfp[0]; 8508 if (ipf) { 8509 /* 8510 * It has to match on ident, source address, and 8511 * dest address. 8512 */ 8513 if (ipf->ipf_ident == ident && 8514 IN6_ARE_ADDR_EQUAL(&ipf->ipf_v6src, v6src_ptr) && 8515 IN6_ARE_ADDR_EQUAL(&ipf->ipf_v6dst, v6dst_ptr)) { 8516 8517 /* 8518 * If we have received too many 8519 * duplicate fragments for this packet 8520 * free it. 8521 */ 8522 if (ipf->ipf_num_dups > ip_max_frag_dups) { 8523 ill_frag_free_pkts(ill, ipfb, ipf, 1); 8524 freemsg(mp); 8525 mutex_exit(&ipfb->ipfb_lock); 8526 return (NULL); 8527 } 8528 8529 break; 8530 } 8531 ipfp = &ipf->ipf_hash_next; 8532 continue; 8533 } 8534 8535 8536 /* 8537 * If we pruned the list, do we want to store this new 8538 * fragment?. We apply an optimization here based on the 8539 * fact that most fragments will be received in order. 8540 * So if the offset of this incoming fragment is zero, 8541 * it is the first fragment of a new packet. We will 8542 * keep it. Otherwise drop the fragment, as we have 8543 * probably pruned the packet already (since the 8544 * packet cannot be found). 8545 */ 8546 8547 if (pruned && offset != 0) { 8548 mutex_exit(&ipfb->ipfb_lock); 8549 freemsg(mp); 8550 return (NULL); 8551 } 8552 8553 /* New guy. Allocate a frag message. */ 8554 mp1 = allocb(sizeof (*ipf), BPRI_MED); 8555 if (!mp1) { 8556 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 8557 freemsg(mp); 8558 partial_reass_done: 8559 mutex_exit(&ipfb->ipfb_lock); 8560 return (NULL); 8561 } 8562 8563 if (ipfb->ipfb_frag_pkts >= MAX_FRAG_PKTS(ipst)) { 8564 /* 8565 * Too many fragmented packets in this hash bucket. 8566 * Free the oldest. 8567 */ 8568 ill_frag_free_pkts(ill, ipfb, ipfb->ipfb_ipf, 1); 8569 } 8570 8571 mp1->b_cont = mp; 8572 8573 /* Initialize the fragment header. */ 8574 ipf = (ipf_t *)mp1->b_rptr; 8575 ipf->ipf_mp = mp1; 8576 ipf->ipf_ptphn = ipfp; 8577 ipfp[0] = ipf; 8578 ipf->ipf_hash_next = NULL; 8579 ipf->ipf_ident = ident; 8580 ipf->ipf_v6src = *v6src_ptr; 8581 ipf->ipf_v6dst = *v6dst_ptr; 8582 /* Record reassembly start time. */ 8583 ipf->ipf_timestamp = gethrestime_sec(); 8584 /* Record ipf generation and account for frag header */ 8585 ipf->ipf_gen = ill->ill_ipf_gen++; 8586 ipf->ipf_count = MBLKSIZE(mp1); 8587 ipf->ipf_protocol = nexthdr; 8588 ipf->ipf_nf_hdr_len = 0; 8589 ipf->ipf_prev_nexthdr_offset = 0; 8590 ipf->ipf_last_frag_seen = B_FALSE; 8591 ipf->ipf_ecn = ecn_info; 8592 ipf->ipf_num_dups = 0; 8593 ipfb->ipfb_frag_pkts++; 8594 ipf->ipf_checksum = 0; 8595 ipf->ipf_checksum_flags = 0; 8596 8597 /* Store checksum value in fragment header */ 8598 if (sum_flags != 0) { 8599 sum_val = (sum_val & 0xFFFF) + (sum_val >> 16); 8600 sum_val = (sum_val & 0xFFFF) + (sum_val >> 16); 8601 ipf->ipf_checksum = sum_val; 8602 ipf->ipf_checksum_flags = sum_flags; 8603 } 8604 8605 /* 8606 * We handle reassembly two ways. In the easy case, 8607 * where all the fragments show up in order, we do 8608 * minimal bookkeeping, and just clip new pieces on 8609 * the end. If we ever see a hole, then we go off 8610 * to ip_reassemble which has to mark the pieces and 8611 * keep track of the number of holes, etc. Obviously, 8612 * the point of having both mechanisms is so we can 8613 * handle the easy case as efficiently as possible. 8614 */ 8615 if (offset == 0) { 8616 /* Easy case, in-order reassembly so far. */ 8617 /* Update the byte count */ 8618 ipf->ipf_count += msg_len; 8619 ipf->ipf_tail_mp = tail_mp; 8620 /* 8621 * Keep track of next expected offset in 8622 * ipf_end. 8623 */ 8624 ipf->ipf_end = end; 8625 ipf->ipf_nf_hdr_len = hdr_length; 8626 ipf->ipf_prev_nexthdr_offset = *prev_nexthdr_offset; 8627 } else { 8628 /* Hard case, hole at the beginning. */ 8629 ipf->ipf_tail_mp = NULL; 8630 /* 8631 * ipf_end == 0 means that we have given up 8632 * on easy reassembly. 8633 */ 8634 ipf->ipf_end = 0; 8635 8636 /* Forget checksum offload from now on */ 8637 ipf->ipf_checksum_flags = 0; 8638 8639 /* 8640 * ipf_hole_cnt is set by ip_reassemble. 8641 * ipf_count is updated by ip_reassemble. 8642 * No need to check for return value here 8643 * as we don't expect reassembly to complete or 8644 * fail for the first fragment itself. 8645 */ 8646 (void) ip_reassemble(mp, ipf, offset, more_frags, ill, 8647 msg_len); 8648 } 8649 /* Update per ipfb and ill byte counts */ 8650 ipfb->ipfb_count += ipf->ipf_count; 8651 ASSERT(ipfb->ipfb_count > 0); /* Wraparound */ 8652 atomic_add_32(&ill->ill_frag_count, ipf->ipf_count); 8653 /* If the frag timer wasn't already going, start it. */ 8654 mutex_enter(&ill->ill_lock); 8655 ill_frag_timer_start(ill); 8656 mutex_exit(&ill->ill_lock); 8657 goto partial_reass_done; 8658 } 8659 8660 /* 8661 * If the packet's flag has changed (it could be coming up 8662 * from an interface different than the previous, therefore 8663 * possibly different checksum capability), then forget about 8664 * any stored checksum states. Otherwise add the value to 8665 * the existing one stored in the fragment header. 8666 */ 8667 if (sum_flags != 0 && sum_flags == ipf->ipf_checksum_flags) { 8668 sum_val += ipf->ipf_checksum; 8669 sum_val = (sum_val & 0xFFFF) + (sum_val >> 16); 8670 sum_val = (sum_val & 0xFFFF) + (sum_val >> 16); 8671 ipf->ipf_checksum = sum_val; 8672 } else if (ipf->ipf_checksum_flags != 0) { 8673 /* Forget checksum offload from now on */ 8674 ipf->ipf_checksum_flags = 0; 8675 } 8676 8677 /* 8678 * We have a new piece of a datagram which is already being 8679 * reassembled. Update the ECN info if all IP fragments 8680 * are ECN capable. If there is one which is not, clear 8681 * all the info. If there is at least one which has CE 8682 * code point, IP needs to report that up to transport. 8683 */ 8684 if (ecn_info != IPH_ECN_NECT && ipf->ipf_ecn != IPH_ECN_NECT) { 8685 if (ecn_info == IPH_ECN_CE) 8686 ipf->ipf_ecn = IPH_ECN_CE; 8687 } else { 8688 ipf->ipf_ecn = IPH_ECN_NECT; 8689 } 8690 8691 if (offset && ipf->ipf_end == offset) { 8692 /* The new fragment fits at the end */ 8693 ipf->ipf_tail_mp->b_cont = mp; 8694 /* Update the byte count */ 8695 ipf->ipf_count += msg_len; 8696 /* Update per ipfb and ill byte counts */ 8697 ipfb->ipfb_count += msg_len; 8698 ASSERT(ipfb->ipfb_count > 0); /* Wraparound */ 8699 atomic_add_32(&ill->ill_frag_count, msg_len); 8700 if (more_frags) { 8701 /* More to come. */ 8702 ipf->ipf_end = end; 8703 ipf->ipf_tail_mp = tail_mp; 8704 goto partial_reass_done; 8705 } 8706 } else { 8707 /* 8708 * Go do the hard cases. 8709 * Call ip_reassemble(). 8710 */ 8711 int ret; 8712 8713 if (offset == 0) { 8714 if (ipf->ipf_prev_nexthdr_offset == 0) { 8715 ipf->ipf_nf_hdr_len = hdr_length; 8716 ipf->ipf_prev_nexthdr_offset = 8717 *prev_nexthdr_offset; 8718 } 8719 } 8720 /* Save current byte count */ 8721 count = ipf->ipf_count; 8722 ret = ip_reassemble(mp, ipf, offset, more_frags, ill, msg_len); 8723 8724 /* Count of bytes added and subtracted (freeb()ed) */ 8725 count = ipf->ipf_count - count; 8726 if (count) { 8727 /* Update per ipfb and ill byte counts */ 8728 ipfb->ipfb_count += count; 8729 ASSERT(ipfb->ipfb_count > 0); /* Wraparound */ 8730 atomic_add_32(&ill->ill_frag_count, count); 8731 } 8732 if (ret == IP_REASS_PARTIAL) { 8733 goto partial_reass_done; 8734 } else if (ret == IP_REASS_FAILED) { 8735 /* Reassembly failed. Free up all resources */ 8736 ill_frag_free_pkts(ill, ipfb, ipf, 1); 8737 for (t_mp = mp; t_mp != NULL; t_mp = t_mp->b_cont) { 8738 IP_REASS_SET_START(t_mp, 0); 8739 IP_REASS_SET_END(t_mp, 0); 8740 } 8741 freemsg(mp); 8742 goto partial_reass_done; 8743 } 8744 8745 /* We will reach here iff 'ret' is IP_REASS_COMPLETE */ 8746 } 8747 /* 8748 * We have completed reassembly. Unhook the frag header from 8749 * the reassembly list. 8750 * 8751 * Grab the unfragmentable header length next header value out 8752 * of the first fragment 8753 */ 8754 ASSERT(ipf->ipf_nf_hdr_len != 0); 8755 hdr_length = ipf->ipf_nf_hdr_len; 8756 8757 /* 8758 * Before we free the frag header, record the ECN info 8759 * to report back to the transport. 8760 */ 8761 ecn_info = ipf->ipf_ecn; 8762 8763 /* 8764 * Store the nextheader field in the header preceding the fragment 8765 * header 8766 */ 8767 nexthdr = ipf->ipf_protocol; 8768 *prev_nexthdr_offset = ipf->ipf_prev_nexthdr_offset; 8769 ipfp = ipf->ipf_ptphn; 8770 8771 /* We need to supply these to caller */ 8772 if ((sum_flags = ipf->ipf_checksum_flags) != 0) 8773 sum_val = ipf->ipf_checksum; 8774 else 8775 sum_val = 0; 8776 8777 mp1 = ipf->ipf_mp; 8778 count = ipf->ipf_count; 8779 ipf = ipf->ipf_hash_next; 8780 if (ipf) 8781 ipf->ipf_ptphn = ipfp; 8782 ipfp[0] = ipf; 8783 atomic_add_32(&ill->ill_frag_count, -count); 8784 ASSERT(ipfb->ipfb_count >= count); 8785 ipfb->ipfb_count -= count; 8786 ipfb->ipfb_frag_pkts--; 8787 mutex_exit(&ipfb->ipfb_lock); 8788 /* Ditch the frag header. */ 8789 mp = mp1->b_cont; 8790 freeb(mp1); 8791 8792 /* 8793 * Make sure the packet is good by doing some sanity 8794 * check. If bad we can silentely drop the packet. 8795 */ 8796 reass_done: 8797 if (hdr_length < sizeof (ip6_frag_t)) { 8798 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors); 8799 ip1dbg(("ip_rput_frag_v6: bad packet\n")); 8800 freemsg(mp); 8801 return (NULL); 8802 } 8803 8804 /* 8805 * Remove the fragment header from the initial header by 8806 * splitting the mblk into the non-fragmentable header and 8807 * everthing after the fragment extension header. This has the 8808 * side effect of putting all the headers that need destination 8809 * processing into the b_cont block-- on return this fact is 8810 * used in order to avoid having to look at the extensions 8811 * already processed. 8812 * 8813 * Note that this code assumes that the unfragmentable portion 8814 * of the header is in the first mblk and increments 8815 * the read pointer past it. If this assumption is broken 8816 * this code fails badly. 8817 */ 8818 if (mp->b_rptr + hdr_length != mp->b_wptr) { 8819 mblk_t *nmp; 8820 8821 if (!(nmp = dupb(mp))) { 8822 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 8823 ip1dbg(("ip_rput_frag_v6: dupb failed\n")); 8824 freemsg(mp); 8825 return (NULL); 8826 } 8827 nmp->b_cont = mp->b_cont; 8828 mp->b_cont = nmp; 8829 nmp->b_rptr += hdr_length; 8830 } 8831 mp->b_wptr = mp->b_rptr + hdr_length - sizeof (ip6_frag_t); 8832 8833 ip6h = (ip6_t *)mp->b_rptr; 8834 ((char *)ip6h)[*prev_nexthdr_offset] = nexthdr; 8835 8836 /* Restore original IP length in header. */ 8837 ip6h->ip6_plen = htons((uint16_t)(msgdsize(mp) - IPV6_HDR_LEN)); 8838 /* Record the ECN info. */ 8839 ip6h->ip6_vcf &= htonl(0xFFCFFFFF); 8840 ip6h->ip6_vcf |= htonl(ecn_info << 20); 8841 8842 /* Reassembly is successful; return checksum information if needed */ 8843 if (cksum_val != NULL) 8844 *cksum_val = sum_val; 8845 if (cksum_flags != NULL) 8846 *cksum_flags = sum_flags; 8847 8848 return (mp); 8849 } 8850 8851 /* 8852 * Given an mblk and a ptr, find the destination address in an IPv6 routing 8853 * header. 8854 */ 8855 static in6_addr_t 8856 pluck_out_dst(mblk_t *mp, uint8_t *whereptr, in6_addr_t oldrv) 8857 { 8858 ip6_rthdr0_t *rt0; 8859 int segleft, numaddr; 8860 in6_addr_t *ap, rv = oldrv; 8861 8862 rt0 = (ip6_rthdr0_t *)whereptr; 8863 if (rt0->ip6r0_type != 0 && rt0->ip6r0_type != 2) { 8864 DTRACE_PROBE2(pluck_out_dst_unknown_type, mblk_t *, mp, 8865 uint8_t *, whereptr); 8866 return (rv); 8867 } 8868 segleft = rt0->ip6r0_segleft; 8869 numaddr = rt0->ip6r0_len / 2; 8870 8871 if ((rt0->ip6r0_len & 0x1) || 8872 whereptr + (rt0->ip6r0_len + 1) * 8 > mp->b_wptr || 8873 (segleft > rt0->ip6r0_len / 2)) { 8874 /* 8875 * Corrupt packet. Either the routing header length is odd 8876 * (can't happen) or mismatched compared to the packet, or the 8877 * number of addresses is. Return what we can. This will 8878 * only be a problem on forwarded packets that get squeezed 8879 * through an outbound tunnel enforcing IPsec Tunnel Mode. 8880 */ 8881 DTRACE_PROBE2(pluck_out_dst_badpkt, mblk_t *, mp, uint8_t *, 8882 whereptr); 8883 return (rv); 8884 } 8885 8886 if (segleft != 0) { 8887 ap = (in6_addr_t *)((char *)rt0 + sizeof (*rt0)); 8888 rv = ap[numaddr - 1]; 8889 } 8890 8891 return (rv); 8892 } 8893 8894 /* 8895 * Walk through the options to see if there is a routing header. 8896 * If present get the destination which is the last address of 8897 * the option. 8898 */ 8899 in6_addr_t 8900 ip_get_dst_v6(ip6_t *ip6h, mblk_t *mp, boolean_t *is_fragment) 8901 { 8902 mblk_t *current_mp = mp; 8903 uint8_t nexthdr; 8904 uint8_t *whereptr; 8905 int ehdrlen; 8906 in6_addr_t rv; 8907 8908 whereptr = (uint8_t *)ip6h; 8909 ehdrlen = sizeof (ip6_t); 8910 8911 /* We assume at least the IPv6 base header is within one mblk. */ 8912 ASSERT(mp->b_rptr <= whereptr && mp->b_wptr >= whereptr + ehdrlen); 8913 8914 rv = ip6h->ip6_dst; 8915 nexthdr = ip6h->ip6_nxt; 8916 if (is_fragment != NULL) 8917 *is_fragment = B_FALSE; 8918 8919 /* 8920 * We also assume (thanks to ipsec_tun_outbound()'s pullup) that 8921 * no extension headers will be split across mblks. 8922 */ 8923 8924 while (nexthdr == IPPROTO_HOPOPTS || nexthdr == IPPROTO_DSTOPTS || 8925 nexthdr == IPPROTO_ROUTING) { 8926 if (nexthdr == IPPROTO_ROUTING) 8927 rv = pluck_out_dst(current_mp, whereptr, rv); 8928 8929 /* 8930 * All IPv6 extension headers have the next-header in byte 8931 * 0, and the (length - 8) in 8-byte-words. 8932 */ 8933 while (whereptr + ehdrlen >= current_mp->b_wptr) { 8934 ehdrlen -= (current_mp->b_wptr - whereptr); 8935 current_mp = current_mp->b_cont; 8936 if (current_mp == NULL) { 8937 /* Bad packet. Return what we can. */ 8938 DTRACE_PROBE3(ip_get_dst_v6_badpkt, mblk_t *, 8939 mp, mblk_t *, current_mp, ip6_t *, ip6h); 8940 goto done; 8941 } 8942 whereptr = current_mp->b_rptr; 8943 } 8944 whereptr += ehdrlen; 8945 8946 nexthdr = *whereptr; 8947 ASSERT(whereptr + 1 < current_mp->b_wptr); 8948 ehdrlen = (*(whereptr + 1) + 1) * 8; 8949 } 8950 8951 done: 8952 if (nexthdr == IPPROTO_FRAGMENT && is_fragment != NULL) 8953 *is_fragment = B_TRUE; 8954 return (rv); 8955 } 8956 8957 /* 8958 * ip_source_routed_v6: 8959 * This function is called by redirect code in ip_rput_data_v6 to 8960 * know whether this packet is source routed through this node i.e 8961 * whether this node (router) is part of the journey. This 8962 * function is called under two cases : 8963 * 8964 * case 1 : Routing header was processed by this node and 8965 * ip_process_rthdr replaced ip6_dst with the next hop 8966 * and we are forwarding the packet to the next hop. 8967 * 8968 * case 2 : Routing header was not processed by this node and we 8969 * are just forwarding the packet. 8970 * 8971 * For case (1) we don't want to send redirects. For case(2) we 8972 * want to send redirects. 8973 */ 8974 static boolean_t 8975 ip_source_routed_v6(ip6_t *ip6h, mblk_t *mp, ip_stack_t *ipst) 8976 { 8977 uint8_t nexthdr; 8978 in6_addr_t *addrptr; 8979 ip6_rthdr0_t *rthdr; 8980 uint8_t numaddr; 8981 ip6_hbh_t *hbhhdr; 8982 uint_t ehdrlen; 8983 uint8_t *byteptr; 8984 8985 ip2dbg(("ip_source_routed_v6\n")); 8986 nexthdr = ip6h->ip6_nxt; 8987 ehdrlen = IPV6_HDR_LEN; 8988 8989 /* if a routing hdr is preceeded by HOPOPT or DSTOPT */ 8990 while (nexthdr == IPPROTO_HOPOPTS || 8991 nexthdr == IPPROTO_DSTOPTS) { 8992 byteptr = (uint8_t *)ip6h + ehdrlen; 8993 /* 8994 * Check if we have already processed 8995 * packets or we are just a forwarding 8996 * router which only pulled up msgs up 8997 * to IPV6HDR and one HBH ext header 8998 */ 8999 if (byteptr + MIN_EHDR_LEN > mp->b_wptr) { 9000 ip2dbg(("ip_source_routed_v6: Extension" 9001 " headers not processed\n")); 9002 return (B_FALSE); 9003 } 9004 hbhhdr = (ip6_hbh_t *)byteptr; 9005 nexthdr = hbhhdr->ip6h_nxt; 9006 ehdrlen = ehdrlen + 8 * (hbhhdr->ip6h_len + 1); 9007 } 9008 switch (nexthdr) { 9009 case IPPROTO_ROUTING: 9010 byteptr = (uint8_t *)ip6h + ehdrlen; 9011 /* 9012 * If for some reason, we haven't pulled up 9013 * the routing hdr data mblk, then we must 9014 * not have processed it at all. So for sure 9015 * we are not part of the source routed journey. 9016 */ 9017 if (byteptr + MIN_EHDR_LEN > mp->b_wptr) { 9018 ip2dbg(("ip_source_routed_v6: Routing" 9019 " header not processed\n")); 9020 return (B_FALSE); 9021 } 9022 rthdr = (ip6_rthdr0_t *)byteptr; 9023 /* 9024 * Either we are an intermediate router or the 9025 * last hop before destination and we have 9026 * already processed the routing header. 9027 * If segment_left is greater than or equal to zero, 9028 * then we must be the (numaddr - segleft) entry 9029 * of the routing header. Although ip6r0_segleft 9030 * is a unit8_t variable, we still check for zero 9031 * or greater value, if in case the data type 9032 * is changed someday in future. 9033 */ 9034 if (rthdr->ip6r0_segleft > 0 || 9035 rthdr->ip6r0_segleft == 0) { 9036 ire_t *ire = NULL; 9037 9038 numaddr = rthdr->ip6r0_len / 2; 9039 addrptr = (in6_addr_t *)((char *)rthdr + 9040 sizeof (*rthdr)); 9041 addrptr += (numaddr - (rthdr->ip6r0_segleft + 1)); 9042 if (addrptr != NULL) { 9043 ire = ire_ctable_lookup_v6(addrptr, NULL, 9044 IRE_LOCAL, NULL, ALL_ZONES, NULL, 9045 MATCH_IRE_TYPE, 9046 ipst); 9047 if (ire != NULL) { 9048 ire_refrele(ire); 9049 return (B_TRUE); 9050 } 9051 ip1dbg(("ip_source_routed_v6: No ire found\n")); 9052 } 9053 } 9054 /* FALLTHRU */ 9055 default: 9056 ip2dbg(("ip_source_routed_v6: Not source routed here\n")); 9057 return (B_FALSE); 9058 } 9059 } 9060 9061 /* 9062 * ip_wput_v6 -- Packets sent down from transport modules show up here. 9063 * Assumes that the following set of headers appear in the first 9064 * mblk: 9065 * ip6i_t (if present) CAN also appear as a separate mblk. 9066 * ip6_t 9067 * Any extension headers 9068 * TCP/UDP/SCTP header (if present) 9069 * The routine can handle an ICMPv6 header that is not in the first mblk. 9070 * 9071 * The order to determine the outgoing interface is as follows: 9072 * 1. If an ip6i_t with IP6I_IFINDEX set then use that ill. 9073 * 2. If q is an ill queue and (link local or multicast destination) then 9074 * use that ill. 9075 * 3. If IPV6_BOUND_IF has been set use that ill. 9076 * 4. For multicast: if IPV6_MULTICAST_IF has been set use it. Otherwise 9077 * look for the best IRE match for the unspecified group to determine 9078 * the ill. 9079 * 5. For unicast: Just do an IRE lookup for the best match. 9080 * 9081 * arg2 is always a queue_t *. 9082 * When that queue is an ill_t (i.e. q_next != NULL), then arg must be 9083 * the zoneid. 9084 * When that queue is not an ill_t, then arg must be a conn_t pointer. 9085 */ 9086 void 9087 ip_output_v6(void *arg, mblk_t *mp, void *arg2, int caller) 9088 { 9089 conn_t *connp = NULL; 9090 queue_t *q = (queue_t *)arg2; 9091 ire_t *ire = NULL; 9092 ire_t *sctp_ire = NULL; 9093 ip6_t *ip6h; 9094 in6_addr_t *v6dstp; 9095 ill_t *ill = NULL; 9096 ipif_t *ipif; 9097 ip6i_t *ip6i; 9098 int cksum_request; /* -1 => normal. */ 9099 /* 1 => Skip TCP/UDP/SCTP checksum */ 9100 /* Otherwise contains insert offset for checksum */ 9101 int unspec_src; 9102 boolean_t do_outrequests; /* Increment OutRequests? */ 9103 mib2_ipIfStatsEntry_t *mibptr; 9104 int match_flags = MATCH_IRE_ILL; 9105 mblk_t *first_mp; 9106 boolean_t mctl_present; 9107 ipsec_out_t *io; 9108 boolean_t multirt_need_resolve = B_FALSE; 9109 mblk_t *copy_mp = NULL; 9110 int err = 0; 9111 int ip6i_flags = 0; 9112 zoneid_t zoneid; 9113 ill_t *saved_ill = NULL; 9114 boolean_t conn_lock_held; 9115 boolean_t need_decref = B_FALSE; 9116 ip_stack_t *ipst; 9117 9118 if (q->q_next != NULL) { 9119 ill = (ill_t *)q->q_ptr; 9120 ipst = ill->ill_ipst; 9121 } else { 9122 connp = (conn_t *)arg; 9123 ASSERT(connp != NULL); 9124 ipst = connp->conn_netstack->netstack_ip; 9125 } 9126 9127 /* 9128 * Highest bit in version field is Reachability Confirmation bit 9129 * used by NUD in ip_xmit_v6(). 9130 */ 9131 #ifdef _BIG_ENDIAN 9132 #define IPVER(ip6h) ((((uint32_t *)ip6h)[0] >> 28) & 0x7) 9133 #else 9134 #define IPVER(ip6h) ((((uint32_t *)ip6h)[0] >> 4) & 0x7) 9135 #endif 9136 9137 /* 9138 * M_CTL comes from 6 places 9139 * 9140 * 1) TCP sends down IPSEC_OUT(M_CTL) for detached connections 9141 * both V4 and V6 datagrams. 9142 * 9143 * 2) AH/ESP sends down M_CTL after doing their job with both 9144 * V4 and V6 datagrams. 9145 * 9146 * 3) NDP callbacks when nce is resolved and IPSEC_OUT has been 9147 * attached. 9148 * 9149 * 4) Notifications from an external resolver (for XRESOLV ifs) 9150 * 9151 * 5) AH/ESP send down IPSEC_CTL(M_CTL) to be relayed to hardware for 9152 * IPsec hardware acceleration support. 9153 * 9154 * 6) TUN_HELLO. 9155 * 9156 * We need to handle (1)'s IPv6 case and (3) here. For the 9157 * IPv4 case in (1), and (2), IPSEC processing has already 9158 * started. The code in ip_wput() already knows how to handle 9159 * continuing IPSEC processing (for IPv4 and IPv6). All other 9160 * M_CTLs (including case (4)) are passed on to ip_wput_nondata() 9161 * for handling. 9162 */ 9163 first_mp = mp; 9164 mctl_present = B_FALSE; 9165 io = NULL; 9166 9167 /* Multidata transmit? */ 9168 if (DB_TYPE(mp) == M_MULTIDATA) { 9169 /* 9170 * We should never get here, since all Multidata messages 9171 * originating from tcp should have been directed over to 9172 * tcp_multisend() in the first place. 9173 */ 9174 BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsOutDiscards); 9175 freemsg(mp); 9176 return; 9177 } else if (DB_TYPE(mp) == M_CTL) { 9178 uint32_t mctltype = 0; 9179 uint32_t mlen = MBLKL(first_mp); 9180 9181 mp = mp->b_cont; 9182 mctl_present = B_TRUE; 9183 io = (ipsec_out_t *)first_mp->b_rptr; 9184 9185 /* 9186 * Validate this M_CTL message. The only three types of 9187 * M_CTL messages we expect to see in this code path are 9188 * ipsec_out_t or ipsec_in_t structures (allocated as 9189 * ipsec_info_t unions), or ipsec_ctl_t structures. 9190 * The ipsec_out_type and ipsec_in_type overlap in the two 9191 * data structures, and they are either set to IPSEC_OUT 9192 * or IPSEC_IN depending on which data structure it is. 9193 * ipsec_ctl_t is an IPSEC_CTL. 9194 * 9195 * All other M_CTL messages are sent to ip_wput_nondata() 9196 * for handling. 9197 */ 9198 if (mlen >= sizeof (io->ipsec_out_type)) 9199 mctltype = io->ipsec_out_type; 9200 9201 if ((mlen == sizeof (ipsec_ctl_t)) && 9202 (mctltype == IPSEC_CTL)) { 9203 ip_output(arg, first_mp, arg2, caller); 9204 return; 9205 } 9206 9207 if ((mlen < sizeof (ipsec_info_t)) || 9208 (mctltype != IPSEC_OUT && mctltype != IPSEC_IN) || 9209 mp == NULL) { 9210 ip_wput_nondata(NULL, q, first_mp, NULL); 9211 return; 9212 } 9213 /* NDP callbacks have q_next non-NULL. That's case #3. */ 9214 if (q->q_next == NULL) { 9215 ip6h = (ip6_t *)mp->b_rptr; 9216 /* 9217 * For a freshly-generated TCP dgram that needs IPV6 9218 * processing, don't call ip_wput immediately. We can 9219 * tell this by the ipsec_out_proc_begin. In-progress 9220 * IPSEC_OUT messages have proc_begin set to TRUE, 9221 * and we want to send all IPSEC_IN messages to 9222 * ip_wput() for IPsec processing or finishing. 9223 */ 9224 if (mctltype == IPSEC_IN || 9225 IPVER(ip6h) != IPV6_VERSION || 9226 io->ipsec_out_proc_begin) { 9227 mibptr = &ipst->ips_ip6_mib; 9228 goto notv6; 9229 } 9230 } 9231 } else if (DB_TYPE(mp) != M_DATA) { 9232 ip_wput_nondata(NULL, q, mp, NULL); 9233 return; 9234 } 9235 9236 ip6h = (ip6_t *)mp->b_rptr; 9237 9238 if (IPVER(ip6h) != IPV6_VERSION) { 9239 mibptr = &ipst->ips_ip6_mib; 9240 goto notv6; 9241 } 9242 9243 if (is_system_labeled() && DB_TYPE(mp) == M_DATA && 9244 (connp == NULL || !connp->conn_ulp_labeled)) { 9245 cred_t *cr; 9246 9247 if (connp != NULL) { 9248 ASSERT(CONN_CRED(connp) != NULL); 9249 err = tsol_check_label_v6(BEST_CRED(mp, connp), 9250 &mp, connp->conn_mac_exempt, ipst); 9251 } else if ((cr = msg_getcred(mp, NULL)) != NULL) { 9252 err = tsol_check_label_v6(cr, &mp, B_FALSE, ipst); 9253 } 9254 if (mctl_present) 9255 first_mp->b_cont = mp; 9256 else 9257 first_mp = mp; 9258 if (err != 0) { 9259 DTRACE_PROBE3( 9260 tsol_ip_log_drop_checklabel_ip6, char *, 9261 "conn(1), failed to check/update mp(2)", 9262 conn_t, connp, mblk_t, mp); 9263 freemsg(first_mp); 9264 return; 9265 } 9266 ip6h = (ip6_t *)mp->b_rptr; 9267 } 9268 if (q->q_next != NULL) { 9269 /* 9270 * We don't know if this ill will be used for IPv6 9271 * until the ILLF_IPV6 flag is set via SIOCSLIFNAME. 9272 * ipif_set_values() sets the ill_isv6 flag to true if 9273 * ILLF_IPV6 is set. If the ill_isv6 flag isn't true, 9274 * just drop the packet. 9275 */ 9276 if (!ill->ill_isv6) { 9277 ip1dbg(("ip_wput_v6: Received an IPv6 packet before " 9278 "ILLF_IPV6 was set\n")); 9279 freemsg(first_mp); 9280 return; 9281 } 9282 /* For uniformity do a refhold */ 9283 mutex_enter(&ill->ill_lock); 9284 if (!ILL_CAN_LOOKUP(ill)) { 9285 mutex_exit(&ill->ill_lock); 9286 freemsg(first_mp); 9287 return; 9288 } 9289 ill_refhold_locked(ill); 9290 mutex_exit(&ill->ill_lock); 9291 mibptr = ill->ill_ip_mib; 9292 9293 ASSERT(mibptr != NULL); 9294 unspec_src = 0; 9295 BUMP_MIB(mibptr, ipIfStatsHCOutRequests); 9296 do_outrequests = B_FALSE; 9297 zoneid = (zoneid_t)(uintptr_t)arg; 9298 } else { 9299 ASSERT(connp != NULL); 9300 zoneid = connp->conn_zoneid; 9301 9302 /* is queue flow controlled? */ 9303 if ((q->q_first || connp->conn_draining) && 9304 (caller == IP_WPUT)) { 9305 /* 9306 * 1) TCP sends down M_CTL for detached connections. 9307 * 2) AH/ESP sends down M_CTL. 9308 * 9309 * We don't flow control either of the above. Only 9310 * UDP and others are flow controlled for which we 9311 * can't have a M_CTL. 9312 */ 9313 ASSERT(first_mp == mp); 9314 (void) putq(q, mp); 9315 return; 9316 } 9317 mibptr = &ipst->ips_ip6_mib; 9318 unspec_src = connp->conn_unspec_src; 9319 do_outrequests = B_TRUE; 9320 if (mp->b_flag & MSGHASREF) { 9321 mp->b_flag &= ~MSGHASREF; 9322 ASSERT(connp->conn_ulp == IPPROTO_SCTP); 9323 SCTP_EXTRACT_IPINFO(mp, sctp_ire); 9324 need_decref = B_TRUE; 9325 } 9326 9327 /* 9328 * If there is a policy, try to attach an ipsec_out in 9329 * the front. At the end, first_mp either points to a 9330 * M_DATA message or IPSEC_OUT message linked to a 9331 * M_DATA message. We have to do it now as we might 9332 * lose the "conn" if we go through ip_newroute. 9333 */ 9334 if (!mctl_present && 9335 (connp->conn_out_enforce_policy || 9336 connp->conn_latch != NULL)) { 9337 ASSERT(first_mp == mp); 9338 /* XXX Any better way to get the protocol fast ? */ 9339 if (((mp = ipsec_attach_ipsec_out(&mp, connp, NULL, 9340 connp->conn_ulp, ipst->ips_netstack)) == NULL)) { 9341 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 9342 if (need_decref) 9343 CONN_DEC_REF(connp); 9344 return; 9345 } else { 9346 ASSERT(mp->b_datap->db_type == M_CTL); 9347 first_mp = mp; 9348 mp = mp->b_cont; 9349 mctl_present = B_TRUE; 9350 io = (ipsec_out_t *)first_mp->b_rptr; 9351 } 9352 } 9353 } 9354 9355 /* check for alignment and full IPv6 header */ 9356 if (!OK_32PTR((uchar_t *)ip6h) || 9357 (mp->b_wptr - (uchar_t *)ip6h) < IPV6_HDR_LEN) { 9358 ip0dbg(("ip_wput_v6: bad alignment or length\n")); 9359 if (do_outrequests) 9360 BUMP_MIB(mibptr, ipIfStatsHCOutRequests); 9361 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 9362 freemsg(first_mp); 9363 if (ill != NULL) 9364 ill_refrele(ill); 9365 if (need_decref) 9366 CONN_DEC_REF(connp); 9367 return; 9368 } 9369 v6dstp = &ip6h->ip6_dst; 9370 cksum_request = -1; 9371 ip6i = NULL; 9372 9373 /* 9374 * Once neighbor discovery has completed, ndp_process() will provide 9375 * locally generated packets for which processing can be reattempted. 9376 * In these cases, connp is NULL and the original zone is part of a 9377 * prepended ipsec_out_t. 9378 */ 9379 if (io != NULL) { 9380 /* 9381 * When coming from icmp_input_v6, the zoneid might not match 9382 * for the loopback case, because inside icmp_input_v6 the 9383 * queue_t is a conn queue from the sending side. 9384 */ 9385 zoneid = io->ipsec_out_zoneid; 9386 ASSERT(zoneid != ALL_ZONES); 9387 } 9388 9389 if (ip6h->ip6_nxt == IPPROTO_RAW) { 9390 /* 9391 * This is an ip6i_t header followed by an ip6_hdr. 9392 * Check which fields are set. 9393 * 9394 * When the packet comes from a transport we should have 9395 * all needed headers in the first mblk. However, when 9396 * going through ip_newroute*_v6 the ip6i might be in 9397 * a separate mblk when we return here. In that case 9398 * we pullup everything to ensure that extension and transport 9399 * headers "stay" in the first mblk. 9400 */ 9401 ip6i = (ip6i_t *)ip6h; 9402 ip6i_flags = ip6i->ip6i_flags; 9403 9404 ASSERT((mp->b_wptr - (uchar_t *)ip6i) == sizeof (ip6i_t) || 9405 ((mp->b_wptr - (uchar_t *)ip6i) >= 9406 sizeof (ip6i_t) + IPV6_HDR_LEN)); 9407 9408 if ((mp->b_wptr - (uchar_t *)ip6i) == sizeof (ip6i_t)) { 9409 if (!pullupmsg(mp, -1)) { 9410 ip1dbg(("ip_wput_v6: pullupmsg failed\n")); 9411 if (do_outrequests) { 9412 BUMP_MIB(mibptr, 9413 ipIfStatsHCOutRequests); 9414 } 9415 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 9416 freemsg(first_mp); 9417 if (ill != NULL) 9418 ill_refrele(ill); 9419 if (need_decref) 9420 CONN_DEC_REF(connp); 9421 return; 9422 } 9423 ip6h = (ip6_t *)mp->b_rptr; 9424 v6dstp = &ip6h->ip6_dst; 9425 ip6i = (ip6i_t *)ip6h; 9426 } 9427 ip6h = (ip6_t *)&ip6i[1]; 9428 9429 /* 9430 * Advance rptr past the ip6i_t to get ready for 9431 * transmitting the packet. However, if the packet gets 9432 * passed to ip_newroute*_v6 then rptr is moved back so 9433 * that the ip6i_t header can be inspected when the 9434 * packet comes back here after passing through 9435 * ire_add_then_send. 9436 */ 9437 mp->b_rptr = (uchar_t *)ip6h; 9438 9439 if (ip6i->ip6i_flags & IP6I_IFINDEX) { 9440 ASSERT(ip6i->ip6i_ifindex != 0); 9441 if (ill != NULL) 9442 ill_refrele(ill); 9443 ill = ill_lookup_on_ifindex(ip6i->ip6i_ifindex, 1, 9444 NULL, NULL, NULL, NULL, ipst); 9445 if (ill == NULL) { 9446 if (do_outrequests) { 9447 BUMP_MIB(mibptr, 9448 ipIfStatsHCOutRequests); 9449 } 9450 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 9451 ip1dbg(("ip_wput_v6: bad ifindex %d\n", 9452 ip6i->ip6i_ifindex)); 9453 if (need_decref) 9454 CONN_DEC_REF(connp); 9455 freemsg(first_mp); 9456 return; 9457 } 9458 mibptr = ill->ill_ip_mib; 9459 /* 9460 * Preserve the index so that when we return from 9461 * IPSEC processing, we know where to send the packet. 9462 */ 9463 if (mctl_present) { 9464 ASSERT(io != NULL); 9465 io->ipsec_out_ill_index = ip6i->ip6i_ifindex; 9466 } 9467 } 9468 if (ip6i->ip6i_flags & IP6I_VERIFY_SRC) { 9469 cred_t *cr = msg_getcred(mp, NULL); 9470 9471 /* rpcmod doesn't send down db_credp for UDP packets */ 9472 if (cr == NULL) { 9473 if (connp != NULL) 9474 cr = connp->conn_cred; 9475 else 9476 cr = ill->ill_credp; 9477 } 9478 9479 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)); 9480 if (secpolicy_net_rawaccess(cr) != 0) { 9481 /* 9482 * Use IPCL_ZONEID to honor SO_ALLZONES. 9483 */ 9484 ire = ire_route_lookup_v6(&ip6h->ip6_src, 9485 0, 0, (IRE_LOCAL|IRE_LOOPBACK), NULL, 9486 NULL, connp != NULL ? 9487 IPCL_ZONEID(connp) : zoneid, NULL, 9488 MATCH_IRE_TYPE | MATCH_IRE_ZONEONLY, ipst); 9489 if (ire == NULL) { 9490 if (do_outrequests) 9491 BUMP_MIB(mibptr, 9492 ipIfStatsHCOutRequests); 9493 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 9494 ip1dbg(("ip_wput_v6: bad source " 9495 "addr\n")); 9496 freemsg(first_mp); 9497 if (ill != NULL) 9498 ill_refrele(ill); 9499 if (need_decref) 9500 CONN_DEC_REF(connp); 9501 return; 9502 } 9503 ire_refrele(ire); 9504 } 9505 /* No need to verify again when using ip_newroute */ 9506 ip6i->ip6i_flags &= ~IP6I_VERIFY_SRC; 9507 } 9508 if (!(ip6i->ip6i_flags & IP6I_NEXTHOP)) { 9509 /* 9510 * Make sure they match since ip_newroute*_v6 etc might 9511 * (unknown to them) inspect ip6i_nexthop when 9512 * they think they access ip6_dst. 9513 */ 9514 ip6i->ip6i_nexthop = ip6h->ip6_dst; 9515 } 9516 if (ip6i->ip6i_flags & IP6I_NO_ULP_CKSUM) 9517 cksum_request = 1; 9518 if (ip6i->ip6i_flags & IP6I_RAW_CHECKSUM) 9519 cksum_request = ip6i->ip6i_checksum_off; 9520 if (ip6i->ip6i_flags & IP6I_UNSPEC_SRC) 9521 unspec_src = 1; 9522 9523 if (do_outrequests && ill != NULL) { 9524 BUMP_MIB(mibptr, ipIfStatsHCOutRequests); 9525 do_outrequests = B_FALSE; 9526 } 9527 /* 9528 * Store ip6i_t info that we need after we come back 9529 * from IPSEC processing. 9530 */ 9531 if (mctl_present) { 9532 ASSERT(io != NULL); 9533 io->ipsec_out_unspec_src = unspec_src; 9534 } 9535 } 9536 if (connp != NULL && connp->conn_dontroute) 9537 ip6h->ip6_hops = 1; 9538 9539 if (IN6_IS_ADDR_MULTICAST(v6dstp)) 9540 goto ipv6multicast; 9541 9542 /* 1. If an ip6i_t with IP6I_IFINDEX set then use that ill. */ 9543 if (ip6i != NULL && (ip6i->ip6i_flags & IP6I_IFINDEX)) { 9544 ASSERT(ill != NULL); 9545 goto send_from_ill; 9546 } 9547 9548 /* 9549 * 2. If q is an ill queue and there's a link-local destination 9550 * then use that ill. 9551 */ 9552 if (ill != NULL && IN6_IS_ADDR_LINKLOCAL(v6dstp)) 9553 goto send_from_ill; 9554 9555 /* 3. If IPV6_BOUND_IF has been set use that ill. */ 9556 if (connp != NULL && connp->conn_outgoing_ill != NULL) { 9557 ill_t *conn_outgoing_ill; 9558 9559 conn_outgoing_ill = conn_get_held_ill(connp, 9560 &connp->conn_outgoing_ill, &err); 9561 if (err == ILL_LOOKUP_FAILED) { 9562 if (ill != NULL) 9563 ill_refrele(ill); 9564 if (need_decref) 9565 CONN_DEC_REF(connp); 9566 freemsg(first_mp); 9567 return; 9568 } 9569 if (ill != NULL) 9570 ill_refrele(ill); 9571 ill = conn_outgoing_ill; 9572 mibptr = ill->ill_ip_mib; 9573 goto send_from_ill; 9574 } 9575 9576 /* 9577 * 4. For unicast: Just do an IRE lookup for the best match. 9578 * If we get here for a link-local address it is rather random 9579 * what interface we pick on a multihomed host. 9580 * *If* there is an IRE_CACHE (and the link-local address 9581 * isn't duplicated on multi links) this will find the IRE_CACHE. 9582 * Otherwise it will use one of the matching IRE_INTERFACE routes 9583 * for the link-local prefix. Hence, applications 9584 * *should* be encouraged to specify an outgoing interface when sending 9585 * to a link local address. 9586 */ 9587 if (connp == NULL || (IP_FLOW_CONTROLLED_ULP(connp->conn_ulp) && 9588 !connp->conn_fully_bound)) { 9589 /* 9590 * We cache IRE_CACHEs to avoid lookups. We don't do 9591 * this for the tcp global queue and listen end point 9592 * as it does not really have a real destination to 9593 * talk to. 9594 */ 9595 ire = ire_cache_lookup_v6(v6dstp, zoneid, msg_getlabel(mp), 9596 ipst); 9597 } else { 9598 /* 9599 * IRE_MARK_CONDEMNED is marked in ire_delete. We don't 9600 * grab a lock here to check for CONDEMNED as it is okay 9601 * to send a packet or two with the IRE_CACHE that is going 9602 * away. 9603 */ 9604 mutex_enter(&connp->conn_lock); 9605 ire = sctp_ire != NULL ? sctp_ire : connp->conn_ire_cache; 9606 if (ire != NULL && 9607 IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, v6dstp) && 9608 !(ire->ire_marks & IRE_MARK_CONDEMNED)) { 9609 9610 IRE_REFHOLD(ire); 9611 mutex_exit(&connp->conn_lock); 9612 9613 } else { 9614 boolean_t cached = B_FALSE; 9615 9616 connp->conn_ire_cache = NULL; 9617 mutex_exit(&connp->conn_lock); 9618 /* Release the old ire */ 9619 if (ire != NULL && sctp_ire == NULL) 9620 IRE_REFRELE_NOTR(ire); 9621 9622 ire = ire_cache_lookup_v6(v6dstp, zoneid, 9623 msg_getlabel(mp), ipst); 9624 if (ire != NULL) { 9625 IRE_REFHOLD_NOTR(ire); 9626 9627 mutex_enter(&connp->conn_lock); 9628 if (CONN_CACHE_IRE(connp) && 9629 (connp->conn_ire_cache == NULL)) { 9630 rw_enter(&ire->ire_bucket->irb_lock, 9631 RW_READER); 9632 if (!(ire->ire_marks & 9633 IRE_MARK_CONDEMNED)) { 9634 connp->conn_ire_cache = ire; 9635 cached = B_TRUE; 9636 } 9637 rw_exit(&ire->ire_bucket->irb_lock); 9638 } 9639 mutex_exit(&connp->conn_lock); 9640 9641 /* 9642 * We can continue to use the ire but since it 9643 * was not cached, we should drop the extra 9644 * reference. 9645 */ 9646 if (!cached) 9647 IRE_REFRELE_NOTR(ire); 9648 } 9649 } 9650 } 9651 9652 if (ire != NULL) { 9653 if (do_outrequests) { 9654 /* Handle IRE_LOCAL's that might appear here */ 9655 if (ire->ire_type == IRE_CACHE) { 9656 mibptr = ((ill_t *)ire->ire_stq->q_ptr)-> 9657 ill_ip_mib; 9658 } else { 9659 mibptr = ire->ire_ipif->ipif_ill->ill_ip_mib; 9660 } 9661 BUMP_MIB(mibptr, ipIfStatsHCOutRequests); 9662 } 9663 9664 /* 9665 * Check if the ire has the RTF_MULTIRT flag, inherited 9666 * from an IRE_OFFSUBNET ire entry in ip_newroute(). 9667 */ 9668 if (ire->ire_flags & RTF_MULTIRT) { 9669 /* 9670 * Force hop limit of multirouted packets if required. 9671 * The hop limit of such packets is bounded by the 9672 * ip_multirt_ttl ndd variable. 9673 * NDP packets must have a hop limit of 255; don't 9674 * change the hop limit in that case. 9675 */ 9676 if ((ipst->ips_ip_multirt_ttl > 0) && 9677 (ip6h->ip6_hops > ipst->ips_ip_multirt_ttl) && 9678 (ip6h->ip6_hops != IPV6_MAX_HOPS)) { 9679 if (ip_debug > 3) { 9680 ip2dbg(("ip_wput_v6: forcing multirt " 9681 "hop limit to %d (was %d) ", 9682 ipst->ips_ip_multirt_ttl, 9683 ip6h->ip6_hops)); 9684 pr_addr_dbg("v6dst %s\n", AF_INET6, 9685 &ire->ire_addr_v6); 9686 } 9687 ip6h->ip6_hops = ipst->ips_ip_multirt_ttl; 9688 } 9689 9690 /* 9691 * We look at this point if there are pending 9692 * unresolved routes. ire_multirt_need_resolve_v6() 9693 * checks in O(n) that all IRE_OFFSUBNET ire 9694 * entries for the packet's destination and 9695 * flagged RTF_MULTIRT are currently resolved. 9696 * If some remain unresolved, we do a copy 9697 * of the current message. It will be used 9698 * to initiate additional route resolutions. 9699 */ 9700 multirt_need_resolve = 9701 ire_multirt_need_resolve_v6(&ire->ire_addr_v6, 9702 msg_getlabel(first_mp), ipst); 9703 ip2dbg(("ip_wput_v6: ire %p, " 9704 "multirt_need_resolve %d, first_mp %p\n", 9705 (void *)ire, multirt_need_resolve, 9706 (void *)first_mp)); 9707 if (multirt_need_resolve) { 9708 copy_mp = copymsg(first_mp); 9709 if (copy_mp != NULL) { 9710 MULTIRT_DEBUG_TAG(copy_mp); 9711 } 9712 } 9713 } 9714 ip_wput_ire_v6(q, first_mp, ire, unspec_src, cksum_request, 9715 connp, caller, ip6i_flags, zoneid); 9716 if (need_decref) { 9717 CONN_DEC_REF(connp); 9718 connp = NULL; 9719 } 9720 IRE_REFRELE(ire); 9721 9722 /* 9723 * Try to resolve another multiroute if 9724 * ire_multirt_need_resolve_v6() deemed it necessary. 9725 * copy_mp will be consumed (sent or freed) by 9726 * ip_newroute_v6(). 9727 */ 9728 if (copy_mp != NULL) { 9729 if (mctl_present) { 9730 ip6h = (ip6_t *)copy_mp->b_cont->b_rptr; 9731 } else { 9732 ip6h = (ip6_t *)copy_mp->b_rptr; 9733 } 9734 ip_newroute_v6(q, copy_mp, &ip6h->ip6_dst, 9735 &ip6h->ip6_src, NULL, zoneid, ipst); 9736 } 9737 if (ill != NULL) 9738 ill_refrele(ill); 9739 return; 9740 } 9741 9742 /* 9743 * No full IRE for this destination. Send it to 9744 * ip_newroute_v6 to see if anything else matches. 9745 * Mark this packet as having originated on this 9746 * machine. 9747 * Update rptr if there was an ip6i_t header. 9748 */ 9749 mp->b_prev = NULL; 9750 mp->b_next = NULL; 9751 if (ip6i != NULL) 9752 mp->b_rptr -= sizeof (ip6i_t); 9753 9754 if (unspec_src) { 9755 if (ip6i == NULL) { 9756 /* 9757 * Add ip6i_t header to carry unspec_src 9758 * until the packet comes back in ip_wput_v6. 9759 */ 9760 mp = ip_add_info_v6(mp, NULL, v6dstp); 9761 if (mp == NULL) { 9762 if (do_outrequests) 9763 BUMP_MIB(mibptr, 9764 ipIfStatsHCOutRequests); 9765 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 9766 if (mctl_present) 9767 freeb(first_mp); 9768 if (ill != NULL) 9769 ill_refrele(ill); 9770 if (need_decref) 9771 CONN_DEC_REF(connp); 9772 return; 9773 } 9774 ip6i = (ip6i_t *)mp->b_rptr; 9775 9776 if (mctl_present) { 9777 ASSERT(first_mp != mp); 9778 first_mp->b_cont = mp; 9779 } else { 9780 first_mp = mp; 9781 } 9782 9783 if ((mp->b_wptr - (uchar_t *)ip6i) == 9784 sizeof (ip6i_t)) { 9785 /* 9786 * ndp_resolver called from ip_newroute_v6 9787 * expects pulled up message. 9788 */ 9789 if (!pullupmsg(mp, -1)) { 9790 ip1dbg(("ip_wput_v6: pullupmsg" 9791 " failed\n")); 9792 if (do_outrequests) { 9793 BUMP_MIB(mibptr, 9794 ipIfStatsHCOutRequests); 9795 } 9796 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 9797 freemsg(first_mp); 9798 if (ill != NULL) 9799 ill_refrele(ill); 9800 if (need_decref) 9801 CONN_DEC_REF(connp); 9802 return; 9803 } 9804 ip6i = (ip6i_t *)mp->b_rptr; 9805 } 9806 ip6h = (ip6_t *)&ip6i[1]; 9807 v6dstp = &ip6h->ip6_dst; 9808 } 9809 ip6i->ip6i_flags |= IP6I_UNSPEC_SRC; 9810 if (mctl_present) { 9811 ASSERT(io != NULL); 9812 io->ipsec_out_unspec_src = unspec_src; 9813 } 9814 } 9815 if (do_outrequests) 9816 BUMP_MIB(mibptr, ipIfStatsHCOutRequests); 9817 if (need_decref) 9818 CONN_DEC_REF(connp); 9819 ip_newroute_v6(q, first_mp, v6dstp, &ip6h->ip6_src, NULL, zoneid, ipst); 9820 if (ill != NULL) 9821 ill_refrele(ill); 9822 return; 9823 9824 9825 /* 9826 * Handle multicast packets with or without an conn. 9827 * Assumes that the transports set ip6_hops taking 9828 * IPV6_MULTICAST_HOPS (and the other ways to set the hoplimit) 9829 * into account. 9830 */ 9831 ipv6multicast: 9832 ip2dbg(("ip_wput_v6: multicast\n")); 9833 9834 /* 9835 * Hold the conn_lock till we refhold the ill of interest that is 9836 * pointed to from the conn. Since we cannot do an ill/ipif_refrele 9837 * while holding any locks, postpone the refrele until after the 9838 * conn_lock is dropped. 9839 */ 9840 if (connp != NULL) { 9841 mutex_enter(&connp->conn_lock); 9842 conn_lock_held = B_TRUE; 9843 } else { 9844 conn_lock_held = B_FALSE; 9845 } 9846 if (ip6i != NULL && (ip6i->ip6i_flags & IP6I_IFINDEX)) { 9847 /* 1. If an ip6i_t with IP6I_IFINDEX set then use that ill. */ 9848 ASSERT(ill != NULL); 9849 } else if (ill != NULL) { 9850 /* 9851 * 2. If q is an ill queue and (link local or multicast 9852 * destination) then use that ill. 9853 * We don't need the ipif initialization here. 9854 * This useless assert below is just to prevent lint from 9855 * reporting a null body if statement. 9856 */ 9857 ASSERT(ill != NULL); 9858 } else if (connp != NULL) { 9859 /* 9860 * 3. If IPV6_BOUND_IF has been set use that ill. 9861 * 9862 * 4. For multicast: if IPV6_MULTICAST_IF has been set use it. 9863 * Otherwise look for the best IRE match for the unspecified 9864 * group to determine the ill. 9865 * 9866 * conn_multicast_ill is used for only IPv6 packets. 9867 * conn_multicast_ipif is used for only IPv4 packets. 9868 * Thus a PF_INET6 socket send both IPv4 and IPv6 9869 * multicast packets using different IP*_MULTICAST_IF 9870 * interfaces. 9871 */ 9872 if (connp->conn_outgoing_ill != NULL) { 9873 err = ill_check_and_refhold(connp->conn_outgoing_ill); 9874 if (err == ILL_LOOKUP_FAILED) { 9875 ip1dbg(("ip_output_v6: multicast" 9876 " conn_outgoing_ill no ipif\n")); 9877 multicast_discard: 9878 ASSERT(saved_ill == NULL); 9879 if (conn_lock_held) 9880 mutex_exit(&connp->conn_lock); 9881 if (ill != NULL) 9882 ill_refrele(ill); 9883 freemsg(first_mp); 9884 if (do_outrequests) 9885 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 9886 if (need_decref) 9887 CONN_DEC_REF(connp); 9888 return; 9889 } 9890 ill = connp->conn_outgoing_ill; 9891 } else if (connp->conn_multicast_ill != NULL) { 9892 err = ill_check_and_refhold(connp->conn_multicast_ill); 9893 if (err == ILL_LOOKUP_FAILED) { 9894 ip1dbg(("ip_output_v6: multicast" 9895 " conn_multicast_ill no ipif\n")); 9896 goto multicast_discard; 9897 } 9898 ill = connp->conn_multicast_ill; 9899 } else { 9900 mutex_exit(&connp->conn_lock); 9901 conn_lock_held = B_FALSE; 9902 ipif = ipif_lookup_group_v6(v6dstp, zoneid, ipst); 9903 if (ipif == NULL) { 9904 ip1dbg(("ip_output_v6: multicast no ipif\n")); 9905 goto multicast_discard; 9906 } 9907 /* 9908 * We have a ref to this ipif, so we can safely 9909 * access ipif_ill. 9910 */ 9911 ill = ipif->ipif_ill; 9912 mutex_enter(&ill->ill_lock); 9913 if (!ILL_CAN_LOOKUP(ill)) { 9914 mutex_exit(&ill->ill_lock); 9915 ipif_refrele(ipif); 9916 ill = NULL; 9917 ip1dbg(("ip_output_v6: multicast no ipif\n")); 9918 goto multicast_discard; 9919 } 9920 ill_refhold_locked(ill); 9921 mutex_exit(&ill->ill_lock); 9922 ipif_refrele(ipif); 9923 /* 9924 * Save binding until IPV6_MULTICAST_IF 9925 * changes it 9926 */ 9927 mutex_enter(&connp->conn_lock); 9928 connp->conn_multicast_ill = ill; 9929 mutex_exit(&connp->conn_lock); 9930 } 9931 } 9932 if (conn_lock_held) 9933 mutex_exit(&connp->conn_lock); 9934 9935 if (saved_ill != NULL) 9936 ill_refrele(saved_ill); 9937 9938 ASSERT(ill != NULL); 9939 /* 9940 * For multicast loopback interfaces replace the multicast address 9941 * with a unicast address for the ire lookup. 9942 */ 9943 if (IS_LOOPBACK(ill)) 9944 v6dstp = &ill->ill_ipif->ipif_v6lcl_addr; 9945 9946 mibptr = ill->ill_ip_mib; 9947 if (do_outrequests) { 9948 BUMP_MIB(mibptr, ipIfStatsHCOutRequests); 9949 do_outrequests = B_FALSE; 9950 } 9951 BUMP_MIB(mibptr, ipIfStatsHCOutMcastPkts); 9952 UPDATE_MIB(mibptr, ipIfStatsHCOutMcastOctets, 9953 ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN); 9954 9955 /* 9956 * As we may lose the conn by the time we reach ip_wput_ire_v6 9957 * we copy conn_multicast_loop and conn_dontroute on to an 9958 * ipsec_out. In case if this datagram goes out secure, 9959 * we need the ill_index also. Copy that also into the 9960 * ipsec_out. 9961 */ 9962 if (mctl_present) { 9963 io = (ipsec_out_t *)first_mp->b_rptr; 9964 ASSERT(first_mp->b_datap->db_type == M_CTL); 9965 ASSERT(io->ipsec_out_type == IPSEC_OUT); 9966 } else { 9967 ASSERT(mp == first_mp); 9968 if ((first_mp = ipsec_alloc_ipsec_out(ipst->ips_netstack)) == 9969 NULL) { 9970 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 9971 freemsg(mp); 9972 if (ill != NULL) 9973 ill_refrele(ill); 9974 if (need_decref) 9975 CONN_DEC_REF(connp); 9976 return; 9977 } 9978 io = (ipsec_out_t *)first_mp->b_rptr; 9979 /* This is not a secure packet */ 9980 io->ipsec_out_secure = B_FALSE; 9981 io->ipsec_out_use_global_policy = B_TRUE; 9982 io->ipsec_out_zoneid = 9983 (zoneid != ALL_ZONES ? zoneid : GLOBAL_ZONEID); 9984 first_mp->b_cont = mp; 9985 mctl_present = B_TRUE; 9986 } 9987 io->ipsec_out_ill_index = ill->ill_phyint->phyint_ifindex; 9988 io->ipsec_out_unspec_src = unspec_src; 9989 if (connp != NULL) 9990 io->ipsec_out_dontroute = connp->conn_dontroute; 9991 9992 send_from_ill: 9993 ASSERT(ill != NULL); 9994 ASSERT(mibptr == ill->ill_ip_mib); 9995 9996 if (do_outrequests) { 9997 BUMP_MIB(mibptr, ipIfStatsHCOutRequests); 9998 do_outrequests = B_FALSE; 9999 } 10000 10001 /* 10002 * Because nce_xmit() calls ip_output_v6() and NCEs are always tied to 10003 * an underlying interface, IS_UNDER_IPMP() may be true even when 10004 * building IREs that will be used for data traffic. As such, use the 10005 * packet's source address to determine whether the traffic is test 10006 * traffic, and set MATCH_IRE_MARK_TESTHIDDEN if so. 10007 * 10008 * Separately, we also need to mark probe packets so that ND can 10009 * process them specially; see the comments in nce_queue_mp_common(). 10010 */ 10011 if (IS_UNDER_IPMP(ill) && !IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src) && 10012 ipif_lookup_testaddr_v6(ill, &ip6h->ip6_src, NULL)) { 10013 if (ip6i == NULL) { 10014 if ((mp = ip_add_info_v6(mp, NULL, v6dstp)) == NULL) { 10015 if (mctl_present) 10016 freeb(first_mp); 10017 goto discard; 10018 } 10019 10020 if (mctl_present) 10021 first_mp->b_cont = mp; 10022 else 10023 first_mp = mp; 10024 10025 /* ndp_resolver() expects a pulled-up message */ 10026 if (MBLKL(mp) == sizeof (ip6i_t) && 10027 pullupmsg(mp, -1) == 0) { 10028 ip1dbg(("ip_output_v6: pullupmsg failed\n")); 10029 discard: BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10030 ill_refrele(ill); 10031 if (need_decref) 10032 CONN_DEC_REF(connp); 10033 return; 10034 } 10035 ip6i = (ip6i_t *)mp->b_rptr; 10036 ip6h = (ip6_t *)&ip6i[1]; 10037 v6dstp = &ip6h->ip6_dst; 10038 mp->b_rptr = (uchar_t *)ip6h; /* rewound below */ 10039 } 10040 ip6i->ip6i_flags |= IP6I_IPMP_PROBE; 10041 match_flags |= MATCH_IRE_MARK_TESTHIDDEN; 10042 } 10043 10044 if (io != NULL) 10045 io->ipsec_out_ill_index = ill->ill_phyint->phyint_ifindex; 10046 10047 /* 10048 * When a specific ill is specified (using IPV6_PKTINFO, 10049 * IPV6_MULTICAST_IF, or IPV6_BOUND_IF) we will only match 10050 * on routing entries (ftable and ctable) that have a matching 10051 * ire->ire_ipif->ipif_ill. Thus this can only be used 10052 * for destinations that are on-link for the specific ill 10053 * and that can appear on multiple links. Thus it is useful 10054 * for multicast destinations, link-local destinations, and 10055 * at some point perhaps for site-local destinations (if the 10056 * node sits at a site boundary). 10057 * We create the cache entries in the regular ctable since 10058 * it can not "confuse" things for other destinations. 10059 * table. 10060 * 10061 * NOTE : conn_ire_cache is not used for caching ire_ctable_lookups. 10062 * It is used only when ire_cache_lookup is used above. 10063 */ 10064 ire = ire_ctable_lookup_v6(v6dstp, 0, 0, ill->ill_ipif, 10065 zoneid, msg_getlabel(mp), match_flags, ipst); 10066 if (ire != NULL) { 10067 /* 10068 * Check if the ire has the RTF_MULTIRT flag, inherited 10069 * from an IRE_OFFSUBNET ire entry in ip_newroute(). 10070 */ 10071 if (ire->ire_flags & RTF_MULTIRT) { 10072 /* 10073 * Force hop limit of multirouted packets if required. 10074 * The hop limit of such packets is bounded by the 10075 * ip_multirt_ttl ndd variable. 10076 * NDP packets must have a hop limit of 255; don't 10077 * change the hop limit in that case. 10078 */ 10079 if ((ipst->ips_ip_multirt_ttl > 0) && 10080 (ip6h->ip6_hops > ipst->ips_ip_multirt_ttl) && 10081 (ip6h->ip6_hops != IPV6_MAX_HOPS)) { 10082 if (ip_debug > 3) { 10083 ip2dbg(("ip_wput_v6: forcing multirt " 10084 "hop limit to %d (was %d) ", 10085 ipst->ips_ip_multirt_ttl, 10086 ip6h->ip6_hops)); 10087 pr_addr_dbg("v6dst %s\n", AF_INET6, 10088 &ire->ire_addr_v6); 10089 } 10090 ip6h->ip6_hops = ipst->ips_ip_multirt_ttl; 10091 } 10092 10093 /* 10094 * We look at this point if there are pending 10095 * unresolved routes. ire_multirt_need_resolve_v6() 10096 * checks in O(n) that all IRE_OFFSUBNET ire 10097 * entries for the packet's destination and 10098 * flagged RTF_MULTIRT are currently resolved. 10099 * If some remain unresolved, we make a copy 10100 * of the current message. It will be used 10101 * to initiate additional route resolutions. 10102 */ 10103 multirt_need_resolve = 10104 ire_multirt_need_resolve_v6(&ire->ire_addr_v6, 10105 msg_getlabel(first_mp), ipst); 10106 ip2dbg(("ip_wput_v6[send_from_ill]: ire %p, " 10107 "multirt_need_resolve %d, first_mp %p\n", 10108 (void *)ire, multirt_need_resolve, 10109 (void *)first_mp)); 10110 if (multirt_need_resolve) { 10111 copy_mp = copymsg(first_mp); 10112 if (copy_mp != NULL) { 10113 MULTIRT_DEBUG_TAG(copy_mp); 10114 } 10115 } 10116 } 10117 10118 ip1dbg(("ip_wput_v6: send on %s, ire = %p, ill index = %d\n", 10119 ill->ill_name, (void *)ire, 10120 ill->ill_phyint->phyint_ifindex)); 10121 ip_wput_ire_v6(q, first_mp, ire, unspec_src, cksum_request, 10122 connp, caller, ip6i_flags, zoneid); 10123 ire_refrele(ire); 10124 if (need_decref) { 10125 CONN_DEC_REF(connp); 10126 connp = NULL; 10127 } 10128 10129 /* 10130 * Try to resolve another multiroute if 10131 * ire_multirt_need_resolve_v6() deemed it necessary. 10132 * copy_mp will be consumed (sent or freed) by 10133 * ip_newroute_[ipif_]v6(). 10134 */ 10135 if (copy_mp != NULL) { 10136 if (mctl_present) { 10137 ip6h = (ip6_t *)copy_mp->b_cont->b_rptr; 10138 } else { 10139 ip6h = (ip6_t *)copy_mp->b_rptr; 10140 } 10141 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 10142 ipif = ipif_lookup_group_v6(&ip6h->ip6_dst, 10143 zoneid, ipst); 10144 if (ipif == NULL) { 10145 ip1dbg(("ip_wput_v6: No ipif for " 10146 "multicast\n")); 10147 MULTIRT_DEBUG_UNTAG(copy_mp); 10148 freemsg(copy_mp); 10149 return; 10150 } 10151 ip_newroute_ipif_v6(q, copy_mp, ipif, 10152 &ip6h->ip6_dst, &ip6h->ip6_src, unspec_src, 10153 zoneid); 10154 ipif_refrele(ipif); 10155 } else { 10156 ip_newroute_v6(q, copy_mp, &ip6h->ip6_dst, 10157 &ip6h->ip6_src, ill, zoneid, ipst); 10158 } 10159 } 10160 ill_refrele(ill); 10161 return; 10162 } 10163 if (need_decref) { 10164 CONN_DEC_REF(connp); 10165 connp = NULL; 10166 } 10167 10168 /* Update rptr if there was an ip6i_t header. */ 10169 if (ip6i != NULL) 10170 mp->b_rptr -= sizeof (ip6i_t); 10171 if (unspec_src) { 10172 if (ip6i == NULL) { 10173 /* 10174 * Add ip6i_t header to carry unspec_src 10175 * until the packet comes back in ip_wput_v6. 10176 */ 10177 if (mctl_present) { 10178 first_mp->b_cont = 10179 ip_add_info_v6(mp, NULL, v6dstp); 10180 mp = first_mp->b_cont; 10181 if (mp == NULL) 10182 freeb(first_mp); 10183 } else { 10184 first_mp = mp = ip_add_info_v6(mp, NULL, 10185 v6dstp); 10186 } 10187 if (mp == NULL) { 10188 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10189 ill_refrele(ill); 10190 return; 10191 } 10192 ip6i = (ip6i_t *)mp->b_rptr; 10193 if ((mp->b_wptr - (uchar_t *)ip6i) == 10194 sizeof (ip6i_t)) { 10195 /* 10196 * ndp_resolver called from ip_newroute_v6 10197 * expects a pulled up message. 10198 */ 10199 if (!pullupmsg(mp, -1)) { 10200 ip1dbg(("ip_wput_v6: pullupmsg" 10201 " failed\n")); 10202 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10203 freemsg(first_mp); 10204 return; 10205 } 10206 ip6i = (ip6i_t *)mp->b_rptr; 10207 } 10208 ip6h = (ip6_t *)&ip6i[1]; 10209 v6dstp = &ip6h->ip6_dst; 10210 } 10211 ip6i->ip6i_flags |= IP6I_UNSPEC_SRC; 10212 if (mctl_present) { 10213 ASSERT(io != NULL); 10214 io->ipsec_out_unspec_src = unspec_src; 10215 } 10216 } 10217 if (IN6_IS_ADDR_MULTICAST(v6dstp)) { 10218 ip_newroute_ipif_v6(q, first_mp, ill->ill_ipif, v6dstp, 10219 &ip6h->ip6_src, unspec_src, zoneid); 10220 } else { 10221 ip_newroute_v6(q, first_mp, v6dstp, &ip6h->ip6_src, ill, 10222 zoneid, ipst); 10223 } 10224 ill_refrele(ill); 10225 return; 10226 10227 notv6: 10228 /* FIXME?: assume the caller calls the right version of ip_output? */ 10229 if (q->q_next == NULL) { 10230 connp = Q_TO_CONN(q); 10231 10232 /* 10233 * We can change conn_send for all types of conn, even 10234 * though only TCP uses it right now. 10235 * FIXME: sctp could use conn_send but doesn't currently. 10236 */ 10237 ip_setpktversion(connp, B_FALSE, B_TRUE, ipst); 10238 } 10239 BUMP_MIB(mibptr, ipIfStatsOutWrongIPVersion); 10240 (void) ip_output(arg, first_mp, arg2, caller); 10241 if (ill != NULL) 10242 ill_refrele(ill); 10243 } 10244 10245 /* 10246 * If this is a conn_t queue, then we pass in the conn. This includes the 10247 * zoneid. 10248 * Otherwise, this is a message for an ill_t queue, 10249 * in which case we use the global zoneid since those are all part of 10250 * the global zone. 10251 */ 10252 void 10253 ip_wput_v6(queue_t *q, mblk_t *mp) 10254 { 10255 if (CONN_Q(q)) 10256 ip_output_v6(Q_TO_CONN(q), mp, q, IP_WPUT); 10257 else 10258 ip_output_v6(GLOBAL_ZONEID, mp, q, IP_WPUT); 10259 } 10260 10261 /* 10262 * NULL send-to queue - packet is to be delivered locally. 10263 */ 10264 void 10265 ip_wput_local_v6(queue_t *q, ill_t *ill, ip6_t *ip6h, mblk_t *first_mp, 10266 ire_t *ire, int fanout_flags, zoneid_t zoneid) 10267 { 10268 uint32_t ports; 10269 mblk_t *mp = first_mp, *first_mp1; 10270 boolean_t mctl_present; 10271 uint8_t nexthdr; 10272 uint16_t hdr_length; 10273 ipsec_out_t *io; 10274 mib2_ipIfStatsEntry_t *mibptr; 10275 ilm_t *ilm; 10276 uint_t nexthdr_offset; 10277 ip_stack_t *ipst = ill->ill_ipst; 10278 10279 if (DB_TYPE(mp) == M_CTL) { 10280 io = (ipsec_out_t *)mp->b_rptr; 10281 if (!io->ipsec_out_secure) { 10282 mp = mp->b_cont; 10283 freeb(first_mp); 10284 first_mp = mp; 10285 mctl_present = B_FALSE; 10286 } else { 10287 mctl_present = B_TRUE; 10288 mp = first_mp->b_cont; 10289 ipsec_out_to_in(first_mp); 10290 } 10291 } else { 10292 mctl_present = B_FALSE; 10293 } 10294 10295 /* 10296 * Remove reachability confirmation bit from version field 10297 * before passing the packet on to any firewall hooks or 10298 * looping back the packet. 10299 */ 10300 if (ip6h->ip6_vcf & IP_FORWARD_PROG) 10301 ip6h->ip6_vcf &= ~IP_FORWARD_PROG; 10302 10303 DTRACE_PROBE4(ip6__loopback__in__start, 10304 ill_t *, ill, ill_t *, NULL, 10305 ip6_t *, ip6h, mblk_t *, first_mp); 10306 10307 FW_HOOKS6(ipst->ips_ip6_loopback_in_event, 10308 ipst->ips_ipv6firewall_loopback_in, 10309 ill, NULL, ip6h, first_mp, mp, 0, ipst); 10310 10311 DTRACE_PROBE1(ip6__loopback__in__end, mblk_t *, first_mp); 10312 10313 if (first_mp == NULL) 10314 return; 10315 10316 if (ipst->ips_ipobs_enabled) { 10317 zoneid_t szone, dzone, lookup_zoneid = ALL_ZONES; 10318 zoneid_t stackzoneid = netstackid_to_zoneid( 10319 ipst->ips_netstack->netstack_stackid); 10320 10321 szone = (stackzoneid == GLOBAL_ZONEID) ? zoneid : stackzoneid; 10322 /* 10323 * ::1 is special, as we cannot lookup its zoneid by 10324 * address. For this case, restrict the lookup to the 10325 * source zone. 10326 */ 10327 if (IN6_IS_ADDR_LOOPBACK(&ip6h->ip6_dst)) 10328 lookup_zoneid = zoneid; 10329 dzone = ip_get_zoneid_v6(&ip6h->ip6_dst, mp, ill, ipst, 10330 lookup_zoneid); 10331 ipobs_hook(mp, IPOBS_HOOK_LOCAL, szone, dzone, ill, 10332 IPV6_VERSION, 0, ipst); 10333 } 10334 10335 DTRACE_IP7(receive, mblk_t *, first_mp, conn_t *, NULL, void_ip_t *, 10336 ip6h, __dtrace_ipsr_ill_t *, ill, ipha_t *, NULL, ip6_t *, ip6h, 10337 int, 1); 10338 10339 nexthdr = ip6h->ip6_nxt; 10340 mibptr = ill->ill_ip_mib; 10341 10342 /* Fastpath */ 10343 switch (nexthdr) { 10344 case IPPROTO_TCP: 10345 case IPPROTO_UDP: 10346 case IPPROTO_ICMPV6: 10347 case IPPROTO_SCTP: 10348 hdr_length = IPV6_HDR_LEN; 10349 nexthdr_offset = (uint_t)((uchar_t *)&ip6h->ip6_nxt - 10350 (uchar_t *)ip6h); 10351 break; 10352 default: { 10353 uint8_t *nexthdrp; 10354 10355 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, 10356 &hdr_length, &nexthdrp)) { 10357 /* Malformed packet */ 10358 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10359 freemsg(first_mp); 10360 return; 10361 } 10362 nexthdr = *nexthdrp; 10363 nexthdr_offset = nexthdrp - (uint8_t *)ip6h; 10364 break; 10365 } 10366 } 10367 10368 UPDATE_OB_PKT_COUNT(ire); 10369 ire->ire_last_used_time = lbolt; 10370 10371 switch (nexthdr) { 10372 case IPPROTO_TCP: 10373 if (DB_TYPE(mp) == M_DATA) { 10374 /* 10375 * M_DATA mblk, so init mblk (chain) for 10376 * no struio(). 10377 */ 10378 mblk_t *mp1 = mp; 10379 10380 do { 10381 mp1->b_datap->db_struioflag = 0; 10382 } while ((mp1 = mp1->b_cont) != NULL); 10383 } 10384 ports = *(uint32_t *)(mp->b_rptr + hdr_length + 10385 TCP_PORTS_OFFSET); 10386 ip_fanout_tcp_v6(q, first_mp, ip6h, ill, ill, 10387 fanout_flags|IP_FF_SEND_ICMP|IP_FF_SYN_ADDIRE| 10388 IP_FF_IPINFO|IP6_NO_IPPOLICY|IP_FF_LOOPBACK, 10389 hdr_length, mctl_present, ire->ire_zoneid); 10390 return; 10391 10392 case IPPROTO_UDP: 10393 ports = *(uint32_t *)(mp->b_rptr + hdr_length + 10394 UDP_PORTS_OFFSET); 10395 ip_fanout_udp_v6(q, first_mp, ip6h, ports, ill, ill, 10396 fanout_flags|IP_FF_SEND_ICMP|IP_FF_IPINFO| 10397 IP6_NO_IPPOLICY, mctl_present, ire->ire_zoneid); 10398 return; 10399 10400 case IPPROTO_SCTP: 10401 { 10402 ports = *(uint32_t *)(mp->b_rptr + hdr_length); 10403 ip_fanout_sctp(first_mp, ill, (ipha_t *)ip6h, ports, 10404 fanout_flags|IP_FF_SEND_ICMP|IP_FF_IPINFO, 10405 mctl_present, IP6_NO_IPPOLICY, ire->ire_zoneid); 10406 return; 10407 } 10408 case IPPROTO_ICMPV6: { 10409 icmp6_t *icmp6; 10410 10411 /* check for full IPv6+ICMPv6 header */ 10412 if ((mp->b_wptr - mp->b_rptr) < 10413 (hdr_length + ICMP6_MINLEN)) { 10414 if (!pullupmsg(mp, hdr_length + ICMP6_MINLEN)) { 10415 ip1dbg(("ip_wput_v6: ICMP hdr pullupmsg" 10416 " failed\n")); 10417 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10418 freemsg(first_mp); 10419 return; 10420 } 10421 ip6h = (ip6_t *)mp->b_rptr; 10422 } 10423 icmp6 = (icmp6_t *)((uchar_t *)ip6h + hdr_length); 10424 10425 /* Update output mib stats */ 10426 icmp_update_out_mib_v6(ill, icmp6); 10427 10428 /* Check variable for testing applications */ 10429 if (ipst->ips_ipv6_drop_inbound_icmpv6) { 10430 freemsg(first_mp); 10431 return; 10432 } 10433 /* 10434 * Assume that there is always at least one conn for 10435 * ICMPv6 (in.ndpd) i.e. don't optimize the case 10436 * where there is no conn. 10437 */ 10438 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst) && 10439 !IS_LOOPBACK(ill)) { 10440 ilm_walker_t ilw; 10441 10442 /* 10443 * In the multicast case, applications may have 10444 * joined the group from different zones, so we 10445 * need to deliver the packet to each of them. 10446 * Loop through the multicast memberships 10447 * structures (ilm) on the receive ill and send 10448 * a copy of the packet up each matching one. 10449 * However, we don't do this for multicasts sent 10450 * on the loopback interface (PHYI_LOOPBACK flag 10451 * set) as they must stay in the sender's zone. 10452 */ 10453 ilm = ilm_walker_start(&ilw, ill); 10454 for (; ilm != NULL; 10455 ilm = ilm_walker_step(&ilw, ilm)) { 10456 if (!IN6_ARE_ADDR_EQUAL( 10457 &ilm->ilm_v6addr, &ip6h->ip6_dst)) 10458 continue; 10459 if ((fanout_flags & 10460 IP_FF_NO_MCAST_LOOP) && 10461 ilm->ilm_zoneid == ire->ire_zoneid) 10462 continue; 10463 if (!ipif_lookup_zoneid( 10464 ilw.ilw_walk_ill, ilm->ilm_zoneid, 10465 IPIF_UP, NULL)) 10466 continue; 10467 10468 first_mp1 = ip_copymsg(first_mp); 10469 if (first_mp1 == NULL) 10470 continue; 10471 icmp_inbound_v6(q, first_mp1, 10472 ilw.ilw_walk_ill, ill, hdr_length, 10473 mctl_present, IP6_NO_IPPOLICY, 10474 ilm->ilm_zoneid, NULL); 10475 } 10476 ilm_walker_finish(&ilw); 10477 } else { 10478 first_mp1 = ip_copymsg(first_mp); 10479 if (first_mp1 != NULL) 10480 icmp_inbound_v6(q, first_mp1, ill, ill, 10481 hdr_length, mctl_present, 10482 IP6_NO_IPPOLICY, ire->ire_zoneid, 10483 NULL); 10484 } 10485 } 10486 /* FALLTHRU */ 10487 default: { 10488 /* 10489 * Handle protocols with which IPv6 is less intimate. 10490 */ 10491 fanout_flags |= IP_FF_RAWIP|IP_FF_IPINFO; 10492 10493 /* 10494 * Enable sending ICMP for "Unknown" nexthdr 10495 * case. i.e. where we did not FALLTHRU from 10496 * IPPROTO_ICMPV6 processing case above. 10497 */ 10498 if (nexthdr != IPPROTO_ICMPV6) 10499 fanout_flags |= IP_FF_SEND_ICMP; 10500 /* 10501 * Note: There can be more than one stream bound 10502 * to a particular protocol. When this is the case, 10503 * each one gets a copy of any incoming packets. 10504 */ 10505 ip_fanout_proto_v6(q, first_mp, ip6h, ill, ill, nexthdr, 10506 nexthdr_offset, fanout_flags|IP6_NO_IPPOLICY, 10507 mctl_present, ire->ire_zoneid); 10508 return; 10509 } 10510 } 10511 } 10512 10513 /* 10514 * Send packet using IRE. 10515 * Checksumming is controlled by cksum_request: 10516 * -1 => normal i.e. TCP/UDP/SCTP/ICMPv6 are checksummed and nothing else. 10517 * 1 => Skip TCP/UDP/SCTP checksum 10518 * Otherwise => checksum_request contains insert offset for checksum 10519 * 10520 * Assumes that the following set of headers appear in the first 10521 * mblk: 10522 * ip6_t 10523 * Any extension headers 10524 * TCP/UDP/SCTP header (if present) 10525 * The routine can handle an ICMPv6 header that is not in the first mblk. 10526 * 10527 * NOTE : This function does not ire_refrele the ire passed in as the 10528 * argument unlike ip_wput_ire where the REFRELE is done. 10529 * Refer to ip_wput_ire for more on this. 10530 */ 10531 static void 10532 ip_wput_ire_v6(queue_t *q, mblk_t *mp, ire_t *ire, int unspec_src, 10533 int cksum_request, conn_t *connp, int caller, int flags, zoneid_t zoneid) 10534 { 10535 ip6_t *ip6h; 10536 uint8_t nexthdr; 10537 uint16_t hdr_length; 10538 uint_t reachable = 0x0; 10539 ill_t *ill; 10540 mib2_ipIfStatsEntry_t *mibptr; 10541 mblk_t *first_mp; 10542 boolean_t mctl_present; 10543 ipsec_out_t *io; 10544 boolean_t conn_dontroute; /* conn value for multicast */ 10545 boolean_t conn_multicast_loop; /* conn value for multicast */ 10546 boolean_t multicast_forward; /* Should we forward ? */ 10547 int max_frag; 10548 ip_stack_t *ipst = ire->ire_ipst; 10549 ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec; 10550 10551 ill = ire_to_ill(ire); 10552 first_mp = mp; 10553 multicast_forward = B_FALSE; 10554 10555 if (mp->b_datap->db_type != M_CTL) { 10556 ip6h = (ip6_t *)first_mp->b_rptr; 10557 } else { 10558 io = (ipsec_out_t *)first_mp->b_rptr; 10559 ASSERT(io->ipsec_out_type == IPSEC_OUT); 10560 /* 10561 * Grab the zone id now because the M_CTL can be discarded by 10562 * ip_wput_ire_parse_ipsec_out() below. 10563 */ 10564 ASSERT(zoneid == io->ipsec_out_zoneid); 10565 ASSERT(zoneid != ALL_ZONES); 10566 ip6h = (ip6_t *)first_mp->b_cont->b_rptr; 10567 /* 10568 * For the multicast case, ipsec_out carries conn_dontroute and 10569 * conn_multicast_loop as conn may not be available here. We 10570 * need this for multicast loopback and forwarding which is done 10571 * later in the code. 10572 */ 10573 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 10574 conn_dontroute = io->ipsec_out_dontroute; 10575 conn_multicast_loop = io->ipsec_out_multicast_loop; 10576 /* 10577 * If conn_dontroute is not set or conn_multicast_loop 10578 * is set, we need to do forwarding/loopback. For 10579 * datagrams from ip_wput_multicast, conn_dontroute is 10580 * set to B_TRUE and conn_multicast_loop is set to 10581 * B_FALSE so that we neither do forwarding nor 10582 * loopback. 10583 */ 10584 if (!conn_dontroute || conn_multicast_loop) 10585 multicast_forward = B_TRUE; 10586 } 10587 } 10588 10589 /* 10590 * If the sender didn't supply the hop limit and there is a default 10591 * unicast hop limit associated with the output interface, we use 10592 * that if the packet is unicast. Interface specific unicast hop 10593 * limits as set via the SIOCSLIFLNKINFO ioctl. 10594 */ 10595 if (ill->ill_max_hops != 0 && !(flags & IP6I_HOPLIMIT) && 10596 !(IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst))) { 10597 ip6h->ip6_hops = ill->ill_max_hops; 10598 } 10599 10600 if (ire->ire_type == IRE_LOCAL && ire->ire_zoneid != zoneid && 10601 ire->ire_zoneid != ALL_ZONES) { 10602 /* 10603 * When a zone sends a packet to another zone, we try to deliver 10604 * the packet under the same conditions as if the destination 10605 * was a real node on the network. To do so, we look for a 10606 * matching route in the forwarding table. 10607 * RTF_REJECT and RTF_BLACKHOLE are handled just like 10608 * ip_newroute_v6() does. 10609 * Note that IRE_LOCAL are special, since they are used 10610 * when the zoneid doesn't match in some cases. This means that 10611 * we need to handle ipha_src differently since ire_src_addr 10612 * belongs to the receiving zone instead of the sending zone. 10613 * When ip_restrict_interzone_loopback is set, then 10614 * ire_cache_lookup_v6() ensures that IRE_LOCAL are only used 10615 * for loopback between zones when the logical "Ethernet" would 10616 * have looped them back. 10617 */ 10618 ire_t *src_ire; 10619 10620 src_ire = ire_ftable_lookup_v6(&ip6h->ip6_dst, 0, 0, 0, 10621 NULL, NULL, zoneid, 0, NULL, (MATCH_IRE_RECURSIVE | 10622 MATCH_IRE_DEFAULT | MATCH_IRE_RJ_BHOLE), ipst); 10623 if (src_ire != NULL && 10624 !(src_ire->ire_flags & (RTF_REJECT | RTF_BLACKHOLE)) && 10625 (!ipst->ips_ip_restrict_interzone_loopback || 10626 ire_local_same_lan(ire, src_ire))) { 10627 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src) && 10628 !unspec_src) { 10629 ip6h->ip6_src = src_ire->ire_src_addr_v6; 10630 } 10631 ire_refrele(src_ire); 10632 } else { 10633 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutNoRoutes); 10634 if (src_ire != NULL) { 10635 if (src_ire->ire_flags & RTF_BLACKHOLE) { 10636 ire_refrele(src_ire); 10637 freemsg(first_mp); 10638 return; 10639 } 10640 ire_refrele(src_ire); 10641 } 10642 if (ip_hdr_complete_v6(ip6h, zoneid, ipst)) { 10643 /* Failed */ 10644 freemsg(first_mp); 10645 return; 10646 } 10647 icmp_unreachable_v6(q, first_mp, 10648 ICMP6_DST_UNREACH_NOROUTE, B_FALSE, B_FALSE, 10649 zoneid, ipst); 10650 return; 10651 } 10652 } 10653 10654 if (mp->b_datap->db_type == M_CTL || 10655 ipss->ipsec_outbound_v6_policy_present) { 10656 mp = ip_wput_ire_parse_ipsec_out(first_mp, NULL, ip6h, ire, 10657 connp, unspec_src, zoneid); 10658 if (mp == NULL) { 10659 return; 10660 } 10661 } 10662 10663 first_mp = mp; 10664 if (mp->b_datap->db_type == M_CTL) { 10665 io = (ipsec_out_t *)mp->b_rptr; 10666 ASSERT(io->ipsec_out_type == IPSEC_OUT); 10667 mp = mp->b_cont; 10668 mctl_present = B_TRUE; 10669 } else { 10670 mctl_present = B_FALSE; 10671 } 10672 10673 ip6h = (ip6_t *)mp->b_rptr; 10674 nexthdr = ip6h->ip6_nxt; 10675 mibptr = ill->ill_ip_mib; 10676 10677 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src) && !unspec_src) { 10678 ipif_t *ipif; 10679 10680 /* 10681 * Select the source address using ipif_select_source_v6. 10682 */ 10683 ipif = ipif_select_source_v6(ill, &ip6h->ip6_dst, B_FALSE, 10684 IPV6_PREFER_SRC_DEFAULT, zoneid); 10685 if (ipif == NULL) { 10686 if (ip_debug > 2) { 10687 /* ip1dbg */ 10688 pr_addr_dbg("ip_wput_ire_v6: no src for " 10689 "dst %s\n", AF_INET6, &ip6h->ip6_dst); 10690 printf("through interface %s\n", ill->ill_name); 10691 } 10692 freemsg(first_mp); 10693 return; 10694 } 10695 ip6h->ip6_src = ipif->ipif_v6src_addr; 10696 ipif_refrele(ipif); 10697 } 10698 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 10699 if ((connp != NULL && connp->conn_multicast_loop) || 10700 !IS_LOOPBACK(ill)) { 10701 if (ilm_lookup_ill_v6(ill, &ip6h->ip6_dst, B_FALSE, 10702 ALL_ZONES) != NULL) { 10703 mblk_t *nmp; 10704 int fanout_flags = 0; 10705 10706 if (connp != NULL && 10707 !connp->conn_multicast_loop) { 10708 fanout_flags |= IP_FF_NO_MCAST_LOOP; 10709 } 10710 ip1dbg(("ip_wput_ire_v6: " 10711 "Loopback multicast\n")); 10712 nmp = ip_copymsg(first_mp); 10713 if (nmp != NULL) { 10714 ip6_t *nip6h; 10715 mblk_t *mp_ip6h; 10716 10717 if (mctl_present) { 10718 nip6h = (ip6_t *) 10719 nmp->b_cont->b_rptr; 10720 mp_ip6h = nmp->b_cont; 10721 } else { 10722 nip6h = (ip6_t *)nmp->b_rptr; 10723 mp_ip6h = nmp; 10724 } 10725 10726 DTRACE_PROBE4( 10727 ip6__loopback__out__start, 10728 ill_t *, NULL, 10729 ill_t *, ill, 10730 ip6_t *, nip6h, 10731 mblk_t *, nmp); 10732 10733 FW_HOOKS6( 10734 ipst->ips_ip6_loopback_out_event, 10735 ipst->ips_ipv6firewall_loopback_out, 10736 NULL, ill, nip6h, nmp, mp_ip6h, 10737 0, ipst); 10738 10739 DTRACE_PROBE1( 10740 ip6__loopback__out__end, 10741 mblk_t *, nmp); 10742 10743 /* 10744 * DTrace this as ip:::send. A blocked 10745 * packet will fire the send probe, but 10746 * not the receive probe. 10747 */ 10748 DTRACE_IP7(send, mblk_t *, nmp, 10749 conn_t *, NULL, void_ip_t *, nip6h, 10750 __dtrace_ipsr_ill_t *, ill, 10751 ipha_t *, NULL, ip6_t *, nip6h, 10752 int, 1); 10753 10754 if (nmp != NULL) { 10755 /* 10756 * Deliver locally and to 10757 * every local zone, except 10758 * the sending zone when 10759 * IPV6_MULTICAST_LOOP is 10760 * disabled. 10761 */ 10762 ip_wput_local_v6(RD(q), ill, 10763 nip6h, nmp, ire, 10764 fanout_flags, zoneid); 10765 } 10766 } else { 10767 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10768 ip1dbg(("ip_wput_ire_v6: " 10769 "copymsg failed\n")); 10770 } 10771 } 10772 } 10773 if (ip6h->ip6_hops == 0 || 10774 IN6_IS_ADDR_MC_NODELOCAL(&ip6h->ip6_dst) || 10775 IS_LOOPBACK(ill)) { 10776 /* 10777 * Local multicast or just loopback on loopback 10778 * interface. 10779 */ 10780 BUMP_MIB(mibptr, ipIfStatsHCOutMcastPkts); 10781 UPDATE_MIB(mibptr, ipIfStatsHCOutMcastOctets, 10782 ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN); 10783 ip1dbg(("ip_wput_ire_v6: local multicast only\n")); 10784 freemsg(first_mp); 10785 return; 10786 } 10787 } 10788 10789 if (ire->ire_stq != NULL) { 10790 uint32_t sum; 10791 uint_t ill_index = ((ill_t *)ire->ire_stq->q_ptr)-> 10792 ill_phyint->phyint_ifindex; 10793 queue_t *dev_q = ire->ire_stq->q_next; 10794 10795 /* 10796 * non-NULL send-to queue - packet is to be sent 10797 * out an interface. 10798 */ 10799 10800 /* Driver is flow-controlling? */ 10801 if (!IP_FLOW_CONTROLLED_ULP(nexthdr) && 10802 DEV_Q_FLOW_BLOCKED(dev_q)) { 10803 /* 10804 * Queue packet if we have an conn to give back 10805 * pressure. We can't queue packets intended for 10806 * hardware acceleration since we've tossed that 10807 * state already. If the packet is being fed back 10808 * from ire_send_v6, we don't know the position in 10809 * the queue to enqueue the packet and we discard 10810 * the packet. 10811 */ 10812 if (ipst->ips_ip_output_queue && connp != NULL && 10813 !mctl_present && caller != IRE_SEND) { 10814 if (caller == IP_WSRV) { 10815 idl_tx_list_t *idl_txl; 10816 10817 idl_txl = &ipst->ips_idl_tx_list[0]; 10818 connp->conn_did_putbq = 1; 10819 (void) putbq(connp->conn_wq, mp); 10820 conn_drain_insert(connp, idl_txl); 10821 /* 10822 * caller == IP_WSRV implies we are 10823 * the service thread, and the 10824 * queue is already noenabled. 10825 * The check for canput and 10826 * the putbq is not atomic. 10827 * So we need to check again. 10828 */ 10829 if (canput(dev_q)) 10830 connp->conn_did_putbq = 0; 10831 } else { 10832 (void) putq(connp->conn_wq, mp); 10833 } 10834 return; 10835 } 10836 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10837 freemsg(first_mp); 10838 return; 10839 } 10840 10841 /* 10842 * Look for reachability confirmations from the transport. 10843 */ 10844 if (ip6h->ip6_vcf & IP_FORWARD_PROG) { 10845 reachable |= IPV6_REACHABILITY_CONFIRMATION; 10846 ip6h->ip6_vcf &= ~IP_FORWARD_PROG; 10847 if (mctl_present) 10848 io->ipsec_out_reachable = B_TRUE; 10849 } 10850 /* Fastpath */ 10851 switch (nexthdr) { 10852 case IPPROTO_TCP: 10853 case IPPROTO_UDP: 10854 case IPPROTO_ICMPV6: 10855 case IPPROTO_SCTP: 10856 hdr_length = IPV6_HDR_LEN; 10857 break; 10858 default: { 10859 uint8_t *nexthdrp; 10860 10861 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, 10862 &hdr_length, &nexthdrp)) { 10863 /* Malformed packet */ 10864 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10865 freemsg(first_mp); 10866 return; 10867 } 10868 nexthdr = *nexthdrp; 10869 break; 10870 } 10871 } 10872 10873 if (cksum_request != -1 && nexthdr != IPPROTO_ICMPV6) { 10874 uint16_t *up; 10875 uint16_t *insp; 10876 10877 /* 10878 * The packet header is processed once for all, even 10879 * in the multirouting case. We disable hardware 10880 * checksum if the packet is multirouted, as it will be 10881 * replicated via several interfaces, and not all of 10882 * them may have this capability. 10883 */ 10884 if (cksum_request == 1 && 10885 !(ire->ire_flags & RTF_MULTIRT)) { 10886 /* Skip the transport checksum */ 10887 goto cksum_done; 10888 } 10889 /* 10890 * Do user-configured raw checksum. 10891 * Compute checksum and insert at offset "cksum_request" 10892 */ 10893 10894 /* check for enough headers for checksum */ 10895 cksum_request += hdr_length; /* offset from rptr */ 10896 if ((mp->b_wptr - mp->b_rptr) < 10897 (cksum_request + sizeof (int16_t))) { 10898 if (!pullupmsg(mp, 10899 cksum_request + sizeof (int16_t))) { 10900 ip1dbg(("ip_wput_v6: ICMP hdr pullupmsg" 10901 " failed\n")); 10902 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10903 freemsg(first_mp); 10904 return; 10905 } 10906 ip6h = (ip6_t *)mp->b_rptr; 10907 } 10908 insp = (uint16_t *)((uchar_t *)ip6h + cksum_request); 10909 ASSERT(((uintptr_t)insp & 0x1) == 0); 10910 up = (uint16_t *)&ip6h->ip6_src; 10911 /* 10912 * icmp has placed length and routing 10913 * header adjustment in *insp. 10914 */ 10915 sum = htons(nexthdr) + 10916 up[0] + up[1] + up[2] + up[3] + 10917 up[4] + up[5] + up[6] + up[7] + 10918 up[8] + up[9] + up[10] + up[11] + 10919 up[12] + up[13] + up[14] + up[15]; 10920 sum = (sum & 0xffff) + (sum >> 16); 10921 *insp = IP_CSUM(mp, hdr_length, sum); 10922 } else if (nexthdr == IPPROTO_TCP) { 10923 uint16_t *up; 10924 10925 /* 10926 * Check for full IPv6 header + enough TCP header 10927 * to get at the checksum field. 10928 */ 10929 if ((mp->b_wptr - mp->b_rptr) < 10930 (hdr_length + TCP_CHECKSUM_OFFSET + 10931 TCP_CHECKSUM_SIZE)) { 10932 if (!pullupmsg(mp, hdr_length + 10933 TCP_CHECKSUM_OFFSET + TCP_CHECKSUM_SIZE)) { 10934 ip1dbg(("ip_wput_v6: TCP hdr pullupmsg" 10935 " failed\n")); 10936 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10937 freemsg(first_mp); 10938 return; 10939 } 10940 ip6h = (ip6_t *)mp->b_rptr; 10941 } 10942 10943 up = (uint16_t *)&ip6h->ip6_src; 10944 /* 10945 * Note: The TCP module has stored the length value 10946 * into the tcp checksum field, so we don't 10947 * need to explicitly sum it in here. 10948 */ 10949 sum = up[0] + up[1] + up[2] + up[3] + 10950 up[4] + up[5] + up[6] + up[7] + 10951 up[8] + up[9] + up[10] + up[11] + 10952 up[12] + up[13] + up[14] + up[15]; 10953 10954 /* Fold the initial sum */ 10955 sum = (sum & 0xffff) + (sum >> 16); 10956 10957 up = (uint16_t *)(((uchar_t *)ip6h) + 10958 hdr_length + TCP_CHECKSUM_OFFSET); 10959 10960 IP_CKSUM_XMIT(ill, ire, mp, ip6h, up, IPPROTO_TCP, 10961 hdr_length, ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN, 10962 ire->ire_max_frag, mctl_present, sum); 10963 10964 /* Software checksum? */ 10965 if (DB_CKSUMFLAGS(mp) == 0) { 10966 IP6_STAT(ipst, ip6_out_sw_cksum); 10967 IP6_STAT_UPDATE(ipst, 10968 ip6_tcp_out_sw_cksum_bytes, 10969 (ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN) - 10970 hdr_length); 10971 } 10972 } else if (nexthdr == IPPROTO_UDP) { 10973 uint16_t *up; 10974 10975 /* 10976 * check for full IPv6 header + enough UDP header 10977 * to get at the UDP checksum field 10978 */ 10979 if ((mp->b_wptr - mp->b_rptr) < (hdr_length + 10980 UDP_CHECKSUM_OFFSET + UDP_CHECKSUM_SIZE)) { 10981 if (!pullupmsg(mp, hdr_length + 10982 UDP_CHECKSUM_OFFSET + UDP_CHECKSUM_SIZE)) { 10983 ip1dbg(("ip_wput_v6: UDP hdr pullupmsg" 10984 " failed\n")); 10985 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10986 freemsg(first_mp); 10987 return; 10988 } 10989 ip6h = (ip6_t *)mp->b_rptr; 10990 } 10991 up = (uint16_t *)&ip6h->ip6_src; 10992 /* 10993 * Note: The UDP module has stored the length value 10994 * into the udp checksum field, so we don't 10995 * need to explicitly sum it in here. 10996 */ 10997 sum = up[0] + up[1] + up[2] + up[3] + 10998 up[4] + up[5] + up[6] + up[7] + 10999 up[8] + up[9] + up[10] + up[11] + 11000 up[12] + up[13] + up[14] + up[15]; 11001 11002 /* Fold the initial sum */ 11003 sum = (sum & 0xffff) + (sum >> 16); 11004 11005 up = (uint16_t *)(((uchar_t *)ip6h) + 11006 hdr_length + UDP_CHECKSUM_OFFSET); 11007 11008 IP_CKSUM_XMIT(ill, ire, mp, ip6h, up, IPPROTO_UDP, 11009 hdr_length, ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN, 11010 ire->ire_max_frag, mctl_present, sum); 11011 11012 /* Software checksum? */ 11013 if (DB_CKSUMFLAGS(mp) == 0) { 11014 IP6_STAT(ipst, ip6_out_sw_cksum); 11015 IP6_STAT_UPDATE(ipst, 11016 ip6_udp_out_sw_cksum_bytes, 11017 (ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN) - 11018 hdr_length); 11019 } 11020 } else if (nexthdr == IPPROTO_ICMPV6) { 11021 uint16_t *up; 11022 icmp6_t *icmp6; 11023 11024 /* check for full IPv6+ICMPv6 header */ 11025 if ((mp->b_wptr - mp->b_rptr) < 11026 (hdr_length + ICMP6_MINLEN)) { 11027 if (!pullupmsg(mp, hdr_length + ICMP6_MINLEN)) { 11028 ip1dbg(("ip_wput_v6: ICMP hdr pullupmsg" 11029 " failed\n")); 11030 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 11031 freemsg(first_mp); 11032 return; 11033 } 11034 ip6h = (ip6_t *)mp->b_rptr; 11035 } 11036 icmp6 = (icmp6_t *)((uchar_t *)ip6h + hdr_length); 11037 up = (uint16_t *)&ip6h->ip6_src; 11038 /* 11039 * icmp has placed length and routing 11040 * header adjustment in icmp6_cksum. 11041 */ 11042 sum = htons(IPPROTO_ICMPV6) + 11043 up[0] + up[1] + up[2] + up[3] + 11044 up[4] + up[5] + up[6] + up[7] + 11045 up[8] + up[9] + up[10] + up[11] + 11046 up[12] + up[13] + up[14] + up[15]; 11047 sum = (sum & 0xffff) + (sum >> 16); 11048 icmp6->icmp6_cksum = IP_CSUM(mp, hdr_length, sum); 11049 11050 /* Update output mib stats */ 11051 icmp_update_out_mib_v6(ill, icmp6); 11052 } else if (nexthdr == IPPROTO_SCTP) { 11053 sctp_hdr_t *sctph; 11054 11055 if (MBLKL(mp) < (hdr_length + sizeof (*sctph))) { 11056 if (!pullupmsg(mp, hdr_length + 11057 sizeof (*sctph))) { 11058 ip1dbg(("ip_wput_v6: SCTP hdr pullupmsg" 11059 " failed\n")); 11060 BUMP_MIB(ill->ill_ip_mib, 11061 ipIfStatsOutDiscards); 11062 freemsg(mp); 11063 return; 11064 } 11065 ip6h = (ip6_t *)mp->b_rptr; 11066 } 11067 sctph = (sctp_hdr_t *)(mp->b_rptr + hdr_length); 11068 sctph->sh_chksum = 0; 11069 sctph->sh_chksum = sctp_cksum(mp, hdr_length); 11070 } 11071 11072 cksum_done: 11073 /* 11074 * We force the insertion of a fragment header using the 11075 * IPH_FRAG_HDR flag in two cases: 11076 * - after reception of an ICMPv6 "packet too big" message 11077 * with a MTU < 1280 (cf. RFC 2460 section 5) 11078 * - for multirouted IPv6 packets, so that the receiver can 11079 * discard duplicates according to their fragment identifier 11080 * 11081 * Two flags modifed from the API can modify this behavior. 11082 * The first is IPV6_USE_MIN_MTU. With this API the user 11083 * can specify how to manage PMTUD for unicast and multicast. 11084 * 11085 * IPV6_DONTFRAG disallows fragmentation. 11086 */ 11087 max_frag = ire->ire_max_frag; 11088 switch (IP6I_USE_MIN_MTU_API(flags)) { 11089 case IPV6_USE_MIN_MTU_DEFAULT: 11090 case IPV6_USE_MIN_MTU_UNICAST: 11091 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 11092 max_frag = IPV6_MIN_MTU; 11093 } 11094 break; 11095 11096 case IPV6_USE_MIN_MTU_NEVER: 11097 max_frag = IPV6_MIN_MTU; 11098 break; 11099 } 11100 if (ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN > max_frag || 11101 (ire->ire_frag_flag & IPH_FRAG_HDR)) { 11102 if (connp != NULL && (flags & IP6I_DONTFRAG)) { 11103 icmp_pkt2big_v6(ire->ire_stq, first_mp, 11104 max_frag, B_FALSE, B_TRUE, zoneid, ipst); 11105 return; 11106 } 11107 11108 if (ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN != 11109 (mp->b_cont ? msgdsize(mp) : 11110 mp->b_wptr - (uchar_t *)ip6h)) { 11111 ip0dbg(("Packet length mismatch: %d, %ld\n", 11112 ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN, 11113 msgdsize(mp))); 11114 freemsg(first_mp); 11115 return; 11116 } 11117 /* Do IPSEC processing first */ 11118 if (mctl_present) { 11119 ipsec_out_process(q, first_mp, ire, ill_index); 11120 return; 11121 } 11122 ASSERT(mp->b_prev == NULL); 11123 ip2dbg(("Fragmenting Size = %d, mtu = %d\n", 11124 ntohs(ip6h->ip6_plen) + 11125 IPV6_HDR_LEN, max_frag)); 11126 ASSERT(mp == first_mp); 11127 /* Initiate IPPF processing */ 11128 if (IPP_ENABLED(IPP_LOCAL_OUT, ipst)) { 11129 ip_process(IPP_LOCAL_OUT, &mp, ill_index); 11130 if (mp == NULL) { 11131 return; 11132 } 11133 } 11134 ip_wput_frag_v6(mp, ire, reachable, connp, 11135 caller, max_frag); 11136 return; 11137 } 11138 /* Do IPSEC processing first */ 11139 if (mctl_present) { 11140 int extra_len = ipsec_out_extra_length(first_mp); 11141 11142 if (ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN + extra_len > 11143 max_frag) { 11144 /* 11145 * IPsec headers will push the packet over the 11146 * MTU limit. Issue an ICMPv6 Packet Too Big 11147 * message for this packet if the upper-layer 11148 * that issued this packet will be able to 11149 * react to the icmp_pkt2big_v6() that we'll 11150 * generate. 11151 */ 11152 icmp_pkt2big_v6(ire->ire_stq, first_mp, 11153 max_frag, B_FALSE, B_TRUE, zoneid, ipst); 11154 return; 11155 } 11156 ipsec_out_process(q, first_mp, ire, ill_index); 11157 return; 11158 } 11159 /* 11160 * XXX multicast: add ip_mforward_v6() here. 11161 * Check conn_dontroute 11162 */ 11163 #ifdef lint 11164 /* 11165 * XXX The only purpose of this statement is to avoid lint 11166 * errors. See the above "XXX multicast". When that gets 11167 * fixed, remove this whole #ifdef lint section. 11168 */ 11169 ip3dbg(("multicast forward is %s.\n", 11170 (multicast_forward ? "TRUE" : "FALSE"))); 11171 #endif 11172 11173 UPDATE_OB_PKT_COUNT(ire); 11174 ire->ire_last_used_time = lbolt; 11175 ASSERT(mp == first_mp); 11176 ip_xmit_v6(mp, ire, reachable, connp, caller, NULL); 11177 } else { 11178 /* 11179 * DTrace this as ip:::send. A blocked packet will fire the 11180 * send probe, but not the receive probe. 11181 */ 11182 DTRACE_IP7(send, mblk_t *, first_mp, conn_t *, NULL, 11183 void_ip_t *, ip6h, __dtrace_ipsr_ill_t *, ill, ipha_t *, 11184 NULL, ip6_t *, ip6h, int, 1); 11185 DTRACE_PROBE4(ip6__loopback__out__start, 11186 ill_t *, NULL, ill_t *, ill, 11187 ip6_t *, ip6h, mblk_t *, first_mp); 11188 FW_HOOKS6(ipst->ips_ip6_loopback_out_event, 11189 ipst->ips_ipv6firewall_loopback_out, 11190 NULL, ill, ip6h, first_mp, mp, 0, ipst); 11191 DTRACE_PROBE1(ip6__loopback__out__end, mblk_t *, first_mp); 11192 if (first_mp != NULL) { 11193 ip_wput_local_v6(RD(q), ill, ip6h, first_mp, ire, 0, 11194 zoneid); 11195 } 11196 } 11197 } 11198 11199 /* 11200 * Outbound IPv6 fragmentation routine using MDT. 11201 */ 11202 static void 11203 ip_wput_frag_mdt_v6(mblk_t *mp, ire_t *ire, size_t max_chunk, 11204 size_t unfragmentable_len, uint8_t nexthdr, uint_t prev_nexthdr_offset) 11205 { 11206 ip6_t *ip6h = (ip6_t *)mp->b_rptr; 11207 uint_t pkts, wroff, hdr_chunk_len, pbuf_idx; 11208 mblk_t *hdr_mp, *md_mp = NULL; 11209 int i1; 11210 multidata_t *mmd; 11211 unsigned char *hdr_ptr, *pld_ptr; 11212 ip_pdescinfo_t pdi; 11213 uint32_t ident; 11214 size_t len; 11215 uint16_t offset; 11216 queue_t *stq = ire->ire_stq; 11217 ill_t *ill = (ill_t *)stq->q_ptr; 11218 ip_stack_t *ipst = ill->ill_ipst; 11219 11220 ASSERT(DB_TYPE(mp) == M_DATA); 11221 ASSERT(MBLKL(mp) > unfragmentable_len); 11222 11223 /* 11224 * Move read ptr past unfragmentable portion, we don't want this part 11225 * of the data in our fragments. 11226 */ 11227 mp->b_rptr += unfragmentable_len; 11228 11229 /* Calculate how many packets we will send out */ 11230 i1 = (mp->b_cont == NULL) ? MBLKL(mp) : msgsize(mp); 11231 pkts = (i1 + max_chunk - 1) / max_chunk; 11232 ASSERT(pkts > 1); 11233 11234 /* Allocate a message block which will hold all the IP Headers. */ 11235 wroff = ipst->ips_ip_wroff_extra; 11236 hdr_chunk_len = wroff + unfragmentable_len + sizeof (ip6_frag_t); 11237 11238 i1 = pkts * hdr_chunk_len; 11239 /* 11240 * Create the header buffer, Multidata and destination address 11241 * and SAP attribute that should be associated with it. 11242 */ 11243 if ((hdr_mp = allocb(i1, BPRI_HI)) == NULL || 11244 ((hdr_mp->b_wptr += i1), 11245 (mmd = mmd_alloc(hdr_mp, &md_mp, KM_NOSLEEP)) == NULL) || 11246 !ip_md_addr_attr(mmd, NULL, ire->ire_nce->nce_res_mp)) { 11247 freemsg(mp); 11248 if (md_mp == NULL) { 11249 freemsg(hdr_mp); 11250 } else { 11251 free_mmd: IP6_STAT(ipst, ip6_frag_mdt_discarded); 11252 freemsg(md_mp); 11253 } 11254 IP6_STAT(ipst, ip6_frag_mdt_allocfail); 11255 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails); 11256 return; 11257 } 11258 IP6_STAT(ipst, ip6_frag_mdt_allocd); 11259 11260 /* 11261 * Add a payload buffer to the Multidata; this operation must not 11262 * fail, or otherwise our logic in this routine is broken. There 11263 * is no memory allocation done by the routine, so any returned 11264 * failure simply tells us that we've done something wrong. 11265 * 11266 * A failure tells us that either we're adding the same payload 11267 * buffer more than once, or we're trying to add more buffers than 11268 * allowed. None of the above cases should happen, and we panic 11269 * because either there's horrible heap corruption, and/or 11270 * programming mistake. 11271 */ 11272 if ((pbuf_idx = mmd_addpldbuf(mmd, mp)) < 0) { 11273 goto pbuf_panic; 11274 } 11275 11276 hdr_ptr = hdr_mp->b_rptr; 11277 pld_ptr = mp->b_rptr; 11278 11279 pdi.flags = PDESC_HBUF_REF | PDESC_PBUF_REF; 11280 11281 ident = htonl(atomic_add_32_nv(&ire->ire_ident, 1)); 11282 11283 /* 11284 * len is the total length of the fragmentable data in this 11285 * datagram. For each fragment sent, we will decrement len 11286 * by the amount of fragmentable data sent in that fragment 11287 * until len reaches zero. 11288 */ 11289 len = ntohs(ip6h->ip6_plen) - (unfragmentable_len - IPV6_HDR_LEN); 11290 11291 offset = 0; 11292 prev_nexthdr_offset += wroff; 11293 11294 while (len != 0) { 11295 size_t mlen; 11296 ip6_t *fip6h; 11297 ip6_frag_t *fraghdr; 11298 int error; 11299 11300 ASSERT((hdr_ptr + hdr_chunk_len) <= hdr_mp->b_wptr); 11301 mlen = MIN(len, max_chunk); 11302 len -= mlen; 11303 11304 fip6h = (ip6_t *)(hdr_ptr + wroff); 11305 ASSERT(OK_32PTR(fip6h)); 11306 bcopy(ip6h, fip6h, unfragmentable_len); 11307 hdr_ptr[prev_nexthdr_offset] = IPPROTO_FRAGMENT; 11308 11309 fip6h->ip6_plen = htons((uint16_t)(mlen + 11310 unfragmentable_len - IPV6_HDR_LEN + sizeof (ip6_frag_t))); 11311 11312 fraghdr = (ip6_frag_t *)((unsigned char *)fip6h + 11313 unfragmentable_len); 11314 fraghdr->ip6f_nxt = nexthdr; 11315 fraghdr->ip6f_reserved = 0; 11316 fraghdr->ip6f_offlg = htons(offset) | 11317 ((len != 0) ? IP6F_MORE_FRAG : 0); 11318 fraghdr->ip6f_ident = ident; 11319 11320 /* 11321 * Record offset and size of header and data of the next packet 11322 * in the multidata message. 11323 */ 11324 PDESC_HDR_ADD(&pdi, hdr_ptr, wroff, 11325 unfragmentable_len + sizeof (ip6_frag_t), 0); 11326 PDESC_PLD_INIT(&pdi); 11327 i1 = MIN(mp->b_wptr - pld_ptr, mlen); 11328 ASSERT(i1 > 0); 11329 PDESC_PLD_SPAN_ADD(&pdi, pbuf_idx, pld_ptr, i1); 11330 if (i1 == mlen) { 11331 pld_ptr += mlen; 11332 } else { 11333 i1 = mlen - i1; 11334 mp = mp->b_cont; 11335 ASSERT(mp != NULL); 11336 ASSERT(MBLKL(mp) >= i1); 11337 /* 11338 * Attach the next payload message block to the 11339 * multidata message. 11340 */ 11341 if ((pbuf_idx = mmd_addpldbuf(mmd, mp)) < 0) 11342 goto pbuf_panic; 11343 PDESC_PLD_SPAN_ADD(&pdi, pbuf_idx, mp->b_rptr, i1); 11344 pld_ptr = mp->b_rptr + i1; 11345 } 11346 11347 if ((mmd_addpdesc(mmd, (pdescinfo_t *)&pdi, &error, 11348 KM_NOSLEEP)) == NULL) { 11349 /* 11350 * Any failure other than ENOMEM indicates that we 11351 * have passed in invalid pdesc info or parameters 11352 * to mmd_addpdesc, which must not happen. 11353 * 11354 * EINVAL is a result of failure on boundary checks 11355 * against the pdesc info contents. It should not 11356 * happen, and we panic because either there's 11357 * horrible heap corruption, and/or programming 11358 * mistake. 11359 */ 11360 if (error != ENOMEM) { 11361 cmn_err(CE_PANIC, "ip_wput_frag_mdt_v6: " 11362 "pdesc logic error detected for " 11363 "mmd %p pinfo %p (%d)\n", 11364 (void *)mmd, (void *)&pdi, error); 11365 /* NOTREACHED */ 11366 } 11367 IP6_STAT(ipst, ip6_frag_mdt_addpdescfail); 11368 /* Free unattached payload message blocks as well */ 11369 md_mp->b_cont = mp->b_cont; 11370 goto free_mmd; 11371 } 11372 11373 /* Advance fragment offset. */ 11374 offset += mlen; 11375 11376 /* Advance to location for next header in the buffer. */ 11377 hdr_ptr += hdr_chunk_len; 11378 11379 /* Did we reach the next payload message block? */ 11380 if (pld_ptr == mp->b_wptr && mp->b_cont != NULL) { 11381 mp = mp->b_cont; 11382 /* 11383 * Attach the next message block with payload 11384 * data to the multidata message. 11385 */ 11386 if ((pbuf_idx = mmd_addpldbuf(mmd, mp)) < 0) 11387 goto pbuf_panic; 11388 pld_ptr = mp->b_rptr; 11389 } 11390 } 11391 11392 ASSERT(hdr_mp->b_wptr == hdr_ptr); 11393 ASSERT(mp->b_wptr == pld_ptr); 11394 11395 /* Update IP statistics */ 11396 UPDATE_MIB(ill->ill_ip_mib, ipIfStatsOutFragCreates, pkts); 11397 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragOKs); 11398 UPDATE_MIB(ill->ill_ip_mib, ipIfStatsHCOutTransmits, pkts); 11399 /* 11400 * The ipv6 header len is accounted for in unfragmentable_len so 11401 * when calculating the fragmentation overhead just add the frag 11402 * header len. 11403 */ 11404 UPDATE_MIB(ill->ill_ip_mib, ipIfStatsHCOutOctets, 11405 (ntohs(ip6h->ip6_plen) - (unfragmentable_len - IPV6_HDR_LEN)) + 11406 pkts * (unfragmentable_len + sizeof (ip6_frag_t))); 11407 IP6_STAT_UPDATE(ipst, ip6_frag_mdt_pkt_out, pkts); 11408 11409 ire->ire_ob_pkt_count += pkts; 11410 if (ire->ire_ipif != NULL) 11411 atomic_add_32(&ire->ire_ipif->ipif_ob_pkt_count, pkts); 11412 11413 ire->ire_last_used_time = lbolt; 11414 /* Send it down */ 11415 putnext(stq, md_mp); 11416 return; 11417 11418 pbuf_panic: 11419 cmn_err(CE_PANIC, "ip_wput_frag_mdt_v6: payload buffer logic " 11420 "error for mmd %p pbuf %p (%d)", (void *)mmd, (void *)mp, 11421 pbuf_idx); 11422 /* NOTREACHED */ 11423 } 11424 11425 /* 11426 * IPv6 fragmentation. Essentially the same as IPv4 fragmentation. 11427 * We have not optimized this in terms of number of mblks 11428 * allocated. For instance, for each fragment sent we always allocate a 11429 * mblk to hold the IPv6 header and fragment header. 11430 * 11431 * Assumes that all the extension headers are contained in the first mblk. 11432 * 11433 * The fragment header is inserted after an hop-by-hop options header 11434 * and after [an optional destinations header followed by] a routing header. 11435 * 11436 * NOTE : This function does not ire_refrele the ire passed in as 11437 * the argument. 11438 */ 11439 void 11440 ip_wput_frag_v6(mblk_t *mp, ire_t *ire, uint_t reachable, conn_t *connp, 11441 int caller, int max_frag) 11442 { 11443 ip6_t *ip6h = (ip6_t *)mp->b_rptr; 11444 ip6_t *fip6h; 11445 mblk_t *hmp; 11446 mblk_t *hmp0; 11447 mblk_t *dmp; 11448 ip6_frag_t *fraghdr; 11449 size_t unfragmentable_len; 11450 size_t len; 11451 size_t mlen; 11452 size_t max_chunk; 11453 uint32_t ident; 11454 uint16_t off_flags; 11455 uint16_t offset = 0; 11456 ill_t *ill; 11457 uint8_t nexthdr; 11458 uint_t prev_nexthdr_offset; 11459 uint8_t *ptr; 11460 ip_stack_t *ipst = ire->ire_ipst; 11461 11462 ASSERT(ire->ire_type == IRE_CACHE); 11463 ill = (ill_t *)ire->ire_stq->q_ptr; 11464 11465 if (max_frag <= 0) { 11466 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails); 11467 freemsg(mp); 11468 return; 11469 } 11470 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragReqds); 11471 11472 /* 11473 * Determine the length of the unfragmentable portion of this 11474 * datagram. This consists of the IPv6 header, a potential 11475 * hop-by-hop options header, a potential pre-routing-header 11476 * destination options header, and a potential routing header. 11477 */ 11478 nexthdr = ip6h->ip6_nxt; 11479 prev_nexthdr_offset = (uint8_t *)&ip6h->ip6_nxt - (uint8_t *)ip6h; 11480 ptr = (uint8_t *)&ip6h[1]; 11481 11482 if (nexthdr == IPPROTO_HOPOPTS) { 11483 ip6_hbh_t *hbh_hdr; 11484 uint_t hdr_len; 11485 11486 hbh_hdr = (ip6_hbh_t *)ptr; 11487 hdr_len = 8 * (hbh_hdr->ip6h_len + 1); 11488 nexthdr = hbh_hdr->ip6h_nxt; 11489 prev_nexthdr_offset = (uint8_t *)&hbh_hdr->ip6h_nxt 11490 - (uint8_t *)ip6h; 11491 ptr += hdr_len; 11492 } 11493 if (nexthdr == IPPROTO_DSTOPTS) { 11494 ip6_dest_t *dest_hdr; 11495 uint_t hdr_len; 11496 11497 dest_hdr = (ip6_dest_t *)ptr; 11498 if (dest_hdr->ip6d_nxt == IPPROTO_ROUTING) { 11499 hdr_len = 8 * (dest_hdr->ip6d_len + 1); 11500 nexthdr = dest_hdr->ip6d_nxt; 11501 prev_nexthdr_offset = (uint8_t *)&dest_hdr->ip6d_nxt 11502 - (uint8_t *)ip6h; 11503 ptr += hdr_len; 11504 } 11505 } 11506 if (nexthdr == IPPROTO_ROUTING) { 11507 ip6_rthdr_t *rthdr; 11508 uint_t hdr_len; 11509 11510 rthdr = (ip6_rthdr_t *)ptr; 11511 nexthdr = rthdr->ip6r_nxt; 11512 prev_nexthdr_offset = (uint8_t *)&rthdr->ip6r_nxt 11513 - (uint8_t *)ip6h; 11514 hdr_len = 8 * (rthdr->ip6r_len + 1); 11515 ptr += hdr_len; 11516 } 11517 unfragmentable_len = (uint_t)(ptr - (uint8_t *)ip6h); 11518 11519 max_chunk = (min(max_frag, ire->ire_max_frag) - unfragmentable_len - 11520 sizeof (ip6_frag_t)) & ~7; 11521 11522 /* Check if we can use MDT to send out the frags. */ 11523 ASSERT(!IRE_IS_LOCAL(ire)); 11524 if (ipst->ips_ip_multidata_outbound && reachable == 0 && 11525 !(ire->ire_flags & RTF_MULTIRT) && ILL_MDT_CAPABLE(ill) && 11526 IP_CAN_FRAG_MDT(mp, unfragmentable_len, max_chunk)) { 11527 ip_wput_frag_mdt_v6(mp, ire, max_chunk, unfragmentable_len, 11528 nexthdr, prev_nexthdr_offset); 11529 return; 11530 } 11531 11532 /* 11533 * Allocate an mblk with enough room for the link-layer 11534 * header, the unfragmentable part of the datagram, and the 11535 * fragment header. This (or a copy) will be used as the 11536 * first mblk for each fragment we send. 11537 */ 11538 hmp = allocb_tmpl(unfragmentable_len + sizeof (ip6_frag_t) + 11539 ipst->ips_ip_wroff_extra, mp); 11540 if (hmp == NULL) { 11541 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails); 11542 freemsg(mp); 11543 return; 11544 } 11545 hmp->b_rptr += ipst->ips_ip_wroff_extra; 11546 hmp->b_wptr = hmp->b_rptr + unfragmentable_len + sizeof (ip6_frag_t); 11547 11548 fip6h = (ip6_t *)hmp->b_rptr; 11549 fraghdr = (ip6_frag_t *)(hmp->b_rptr + unfragmentable_len); 11550 11551 bcopy(ip6h, fip6h, unfragmentable_len); 11552 hmp->b_rptr[prev_nexthdr_offset] = IPPROTO_FRAGMENT; 11553 11554 ident = atomic_add_32_nv(&ire->ire_ident, 1); 11555 11556 fraghdr->ip6f_nxt = nexthdr; 11557 fraghdr->ip6f_reserved = 0; 11558 fraghdr->ip6f_offlg = 0; 11559 fraghdr->ip6f_ident = htonl(ident); 11560 11561 /* 11562 * len is the total length of the fragmentable data in this 11563 * datagram. For each fragment sent, we will decrement len 11564 * by the amount of fragmentable data sent in that fragment 11565 * until len reaches zero. 11566 */ 11567 len = ntohs(ip6h->ip6_plen) - (unfragmentable_len - IPV6_HDR_LEN); 11568 11569 /* 11570 * Move read ptr past unfragmentable portion, we don't want this part 11571 * of the data in our fragments. 11572 */ 11573 mp->b_rptr += unfragmentable_len; 11574 11575 while (len != 0) { 11576 mlen = MIN(len, max_chunk); 11577 len -= mlen; 11578 if (len != 0) { 11579 /* Not last */ 11580 hmp0 = copyb(hmp); 11581 if (hmp0 == NULL) { 11582 freeb(hmp); 11583 freemsg(mp); 11584 BUMP_MIB(ill->ill_ip_mib, 11585 ipIfStatsOutFragFails); 11586 ip1dbg(("ip_wput_frag_v6: copyb failed\n")); 11587 return; 11588 } 11589 off_flags = IP6F_MORE_FRAG; 11590 } else { 11591 /* Last fragment */ 11592 hmp0 = hmp; 11593 hmp = NULL; 11594 off_flags = 0; 11595 } 11596 fip6h = (ip6_t *)(hmp0->b_rptr); 11597 fraghdr = (ip6_frag_t *)(hmp0->b_rptr + unfragmentable_len); 11598 11599 fip6h->ip6_plen = htons((uint16_t)(mlen + 11600 unfragmentable_len - IPV6_HDR_LEN + sizeof (ip6_frag_t))); 11601 /* 11602 * Note: Optimization alert. 11603 * In IPv6 (and IPv4) protocol header, Fragment Offset 11604 * ("offset") is 13 bits wide and in 8-octet units. 11605 * In IPv6 protocol header (unlike IPv4) in a 16 bit field, 11606 * it occupies the most significant 13 bits. 11607 * (least significant 13 bits in IPv4). 11608 * We do not do any shifts here. Not shifting is same effect 11609 * as taking offset value in octet units, dividing by 8 and 11610 * then shifting 3 bits left to line it up in place in proper 11611 * place protocol header. 11612 */ 11613 fraghdr->ip6f_offlg = htons(offset) | off_flags; 11614 11615 if (!(dmp = ip_carve_mp(&mp, mlen))) { 11616 /* mp has already been freed by ip_carve_mp() */ 11617 if (hmp != NULL) 11618 freeb(hmp); 11619 freeb(hmp0); 11620 ip1dbg(("ip_carve_mp: failed\n")); 11621 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails); 11622 return; 11623 } 11624 hmp0->b_cont = dmp; 11625 /* Get the priority marking, if any */ 11626 hmp0->b_band = dmp->b_band; 11627 UPDATE_OB_PKT_COUNT(ire); 11628 ire->ire_last_used_time = lbolt; 11629 ip_xmit_v6(hmp0, ire, reachable | IP6_NO_IPPOLICY, connp, 11630 caller, NULL); 11631 reachable = 0; /* No need to redo state machine in loop */ 11632 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragCreates); 11633 offset += mlen; 11634 } 11635 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragOKs); 11636 } 11637 11638 /* 11639 * Determine if the ill and multicast aspects of that packets 11640 * "matches" the conn. 11641 */ 11642 boolean_t 11643 conn_wantpacket_v6(conn_t *connp, ill_t *ill, ip6_t *ip6h, int fanout_flags, 11644 zoneid_t zoneid) 11645 { 11646 ill_t *bound_ill; 11647 boolean_t wantpacket; 11648 in6_addr_t *v6dst_ptr = &ip6h->ip6_dst; 11649 in6_addr_t *v6src_ptr = &ip6h->ip6_src; 11650 11651 /* 11652 * conn_incoming_ill is set by IPV6_BOUND_IF which limits 11653 * unicast and multicast reception to conn_incoming_ill. 11654 * conn_wantpacket_v6 is called both for unicast and 11655 * multicast. 11656 */ 11657 bound_ill = connp->conn_incoming_ill; 11658 if (bound_ill != NULL) { 11659 if (IS_IPMP(bound_ill)) { 11660 if (bound_ill->ill_grp != ill->ill_grp) 11661 return (B_FALSE); 11662 } else { 11663 if (bound_ill != ill) 11664 return (B_FALSE); 11665 } 11666 } 11667 11668 if (connp->conn_multi_router) 11669 return (B_TRUE); 11670 11671 if (!IN6_IS_ADDR_MULTICAST(v6dst_ptr) && 11672 !IN6_IS_ADDR_V4MAPPED_CLASSD(v6dst_ptr)) { 11673 /* 11674 * Unicast case: we match the conn only if it's in the specified 11675 * zone. 11676 */ 11677 return (IPCL_ZONE_MATCH(connp, zoneid)); 11678 } 11679 11680 if ((fanout_flags & IP_FF_NO_MCAST_LOOP) && 11681 (connp->conn_zoneid == zoneid || zoneid == ALL_ZONES)) { 11682 /* 11683 * Loopback case: the sending endpoint has IP_MULTICAST_LOOP 11684 * disabled, therefore we don't dispatch the multicast packet to 11685 * the sending zone. 11686 */ 11687 return (B_FALSE); 11688 } 11689 11690 if (IS_LOOPBACK(ill) && connp->conn_zoneid != zoneid && 11691 zoneid != ALL_ZONES) { 11692 /* 11693 * Multicast packet on the loopback interface: we only match 11694 * conns who joined the group in the specified zone. 11695 */ 11696 return (B_FALSE); 11697 } 11698 11699 mutex_enter(&connp->conn_lock); 11700 wantpacket = 11701 ilg_lookup_ill_withsrc_v6(connp, v6dst_ptr, v6src_ptr, ill) != NULL; 11702 mutex_exit(&connp->conn_lock); 11703 11704 return (wantpacket); 11705 } 11706 11707 11708 /* 11709 * Transmit a packet and update any NUD state based on the flags 11710 * XXX need to "recover" any ip6i_t when doing putq! 11711 * 11712 * NOTE : This function does not ire_refrele the ire passed in as the 11713 * argument. 11714 */ 11715 void 11716 ip_xmit_v6(mblk_t *mp, ire_t *ire, uint_t flags, conn_t *connp, 11717 int caller, ipsec_out_t *io) 11718 { 11719 mblk_t *mp1; 11720 nce_t *nce = ire->ire_nce; 11721 ill_t *ill; 11722 ill_t *out_ill; 11723 uint64_t delta; 11724 ip6_t *ip6h; 11725 queue_t *stq = ire->ire_stq; 11726 ire_t *ire1 = NULL; 11727 ire_t *save_ire = ire; 11728 boolean_t multirt_send = B_FALSE; 11729 mblk_t *next_mp = NULL; 11730 ip_stack_t *ipst = ire->ire_ipst; 11731 boolean_t fp_prepend = B_FALSE; 11732 uint32_t hlen; 11733 11734 ip6h = (ip6_t *)mp->b_rptr; 11735 ASSERT(!IN6_IS_ADDR_V4MAPPED(&ire->ire_addr_v6)); 11736 ASSERT(ire->ire_ipversion == IPV6_VERSION); 11737 ASSERT(nce != NULL); 11738 ASSERT(mp->b_datap->db_type == M_DATA); 11739 ASSERT(stq != NULL); 11740 11741 ill = ire_to_ill(ire); 11742 if (!ill) { 11743 ip0dbg(("ip_xmit_v6: ire_to_ill failed\n")); 11744 freemsg(mp); 11745 return; 11746 } 11747 11748 /* 11749 * If a packet is to be sent out an interface that is a 6to4 11750 * tunnel, outgoing IPv6 packets, with a 6to4 addressed IPv6 11751 * destination, must be checked to have a 6to4 prefix 11752 * (2002:V4ADDR::/48) that is NOT equal to the 6to4 prefix of 11753 * address configured on the sending interface. Otherwise, 11754 * the packet was delivered to this interface in error and the 11755 * packet must be dropped. 11756 */ 11757 if ((ill->ill_is_6to4tun) && IN6_IS_ADDR_6TO4(&ip6h->ip6_dst)) { 11758 ipif_t *ipif = ill->ill_ipif; 11759 11760 if (IN6_ARE_6TO4_PREFIX_EQUAL(&ipif->ipif_v6lcl_addr, 11761 &ip6h->ip6_dst)) { 11762 if (ip_debug > 2) { 11763 /* ip1dbg */ 11764 pr_addr_dbg("ip_xmit_v6: attempting to " 11765 "send 6to4 addressed IPv6 " 11766 "destination (%s) out the wrong " 11767 "interface.\n", AF_INET6, 11768 &ip6h->ip6_dst); 11769 } 11770 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); 11771 freemsg(mp); 11772 return; 11773 } 11774 } 11775 11776 /* Flow-control check has been done in ip_wput_ire_v6 */ 11777 if (IP_FLOW_CONTROLLED_ULP(ip6h->ip6_nxt) || caller == IP_WPUT || 11778 caller == IP_WSRV || canput(stq->q_next)) { 11779 uint32_t ill_index; 11780 11781 /* 11782 * In most cases, the emission loop below is entered only 11783 * once. Only in the case where the ire holds the 11784 * RTF_MULTIRT flag, do we loop to process all RTF_MULTIRT 11785 * flagged ires in the bucket, and send the packet 11786 * through all crossed RTF_MULTIRT routes. 11787 */ 11788 if (ire->ire_flags & RTF_MULTIRT) { 11789 /* 11790 * Multirouting case. The bucket where ire is stored 11791 * probably holds other RTF_MULTIRT flagged ires 11792 * to the destination. In this call to ip_xmit_v6, 11793 * we attempt to send the packet through all 11794 * those ires. Thus, we first ensure that ire is the 11795 * first RTF_MULTIRT ire in the bucket, 11796 * before walking the ire list. 11797 */ 11798 ire_t *first_ire; 11799 irb_t *irb = ire->ire_bucket; 11800 ASSERT(irb != NULL); 11801 multirt_send = B_TRUE; 11802 11803 /* Make sure we do not omit any multiroute ire. */ 11804 IRB_REFHOLD(irb); 11805 for (first_ire = irb->irb_ire; 11806 first_ire != NULL; 11807 first_ire = first_ire->ire_next) { 11808 if ((first_ire->ire_flags & RTF_MULTIRT) && 11809 (IN6_ARE_ADDR_EQUAL(&first_ire->ire_addr_v6, 11810 &ire->ire_addr_v6)) && 11811 !(first_ire->ire_marks & 11812 (IRE_MARK_CONDEMNED | IRE_MARK_TESTHIDDEN))) 11813 break; 11814 } 11815 11816 if ((first_ire != NULL) && (first_ire != ire)) { 11817 IRE_REFHOLD(first_ire); 11818 /* ire will be released by the caller */ 11819 ire = first_ire; 11820 nce = ire->ire_nce; 11821 stq = ire->ire_stq; 11822 ill = ire_to_ill(ire); 11823 } 11824 IRB_REFRELE(irb); 11825 } else if (connp != NULL && IPCL_IS_TCP(connp) && 11826 connp->conn_mdt_ok && !connp->conn_tcp->tcp_mdt && 11827 ILL_MDT_USABLE(ill)) { 11828 /* 11829 * This tcp connection was marked as MDT-capable, but 11830 * it has been turned off due changes in the interface. 11831 * Now that the interface support is back, turn it on 11832 * by notifying tcp. We don't directly modify tcp_mdt, 11833 * since we leave all the details to the tcp code that 11834 * knows better. 11835 */ 11836 mblk_t *mdimp = ip_mdinfo_alloc(ill->ill_mdt_capab); 11837 11838 if (mdimp == NULL) { 11839 ip0dbg(("ip_xmit_v6: can't re-enable MDT for " 11840 "connp %p (ENOMEM)\n", (void *)connp)); 11841 } else { 11842 CONN_INC_REF(connp); 11843 SQUEUE_ENTER_ONE(connp->conn_sqp, mdimp, 11844 tcp_input, connp, SQ_FILL, 11845 SQTAG_TCP_INPUT_MCTL); 11846 } 11847 } 11848 11849 do { 11850 mblk_t *mp_ip6h; 11851 11852 if (multirt_send) { 11853 irb_t *irb; 11854 /* 11855 * We are in a multiple send case, need to get 11856 * the next ire and make a duplicate of the 11857 * packet. ire1 holds here the next ire to 11858 * process in the bucket. If multirouting is 11859 * expected, any non-RTF_MULTIRT ire that has 11860 * the right destination address is ignored. 11861 */ 11862 irb = ire->ire_bucket; 11863 ASSERT(irb != NULL); 11864 11865 IRB_REFHOLD(irb); 11866 for (ire1 = ire->ire_next; 11867 ire1 != NULL; 11868 ire1 = ire1->ire_next) { 11869 if (!(ire1->ire_flags & RTF_MULTIRT)) 11870 continue; 11871 if (!IN6_ARE_ADDR_EQUAL( 11872 &ire1->ire_addr_v6, 11873 &ire->ire_addr_v6)) 11874 continue; 11875 if (ire1->ire_marks & 11876 IRE_MARK_CONDEMNED) 11877 continue; 11878 11879 /* Got one */ 11880 if (ire1 != save_ire) { 11881 IRE_REFHOLD(ire1); 11882 } 11883 break; 11884 } 11885 IRB_REFRELE(irb); 11886 11887 if (ire1 != NULL) { 11888 next_mp = copyb(mp); 11889 if ((next_mp == NULL) || 11890 ((mp->b_cont != NULL) && 11891 ((next_mp->b_cont = 11892 dupmsg(mp->b_cont)) == NULL))) { 11893 freemsg(next_mp); 11894 next_mp = NULL; 11895 ire_refrele(ire1); 11896 ire1 = NULL; 11897 } 11898 } 11899 11900 /* Last multiroute ire; don't loop anymore. */ 11901 if (ire1 == NULL) { 11902 multirt_send = B_FALSE; 11903 } 11904 } 11905 11906 ill_index = 11907 ((ill_t *)stq->q_ptr)->ill_phyint->phyint_ifindex; 11908 11909 /* Initiate IPPF processing */ 11910 if (IP6_OUT_IPP(flags, ipst)) { 11911 ip_process(IPP_LOCAL_OUT, &mp, ill_index); 11912 if (mp == NULL) { 11913 BUMP_MIB(ill->ill_ip_mib, 11914 ipIfStatsOutDiscards); 11915 if (next_mp != NULL) 11916 freemsg(next_mp); 11917 if (ire != save_ire) { 11918 ire_refrele(ire); 11919 } 11920 return; 11921 } 11922 ip6h = (ip6_t *)mp->b_rptr; 11923 } 11924 mp_ip6h = mp; 11925 11926 /* 11927 * Check for fastpath, we need to hold nce_lock to 11928 * prevent fastpath update from chaining nce_fp_mp. 11929 */ 11930 11931 ASSERT(nce->nce_ipversion != IPV4_VERSION); 11932 mutex_enter(&nce->nce_lock); 11933 if ((mp1 = nce->nce_fp_mp) != NULL) { 11934 uchar_t *rptr; 11935 11936 hlen = MBLKL(mp1); 11937 rptr = mp->b_rptr - hlen; 11938 /* 11939 * make sure there is room for the fastpath 11940 * datalink header 11941 */ 11942 if (rptr < mp->b_datap->db_base) { 11943 mp1 = copyb(mp1); 11944 mutex_exit(&nce->nce_lock); 11945 if (mp1 == NULL) { 11946 BUMP_MIB(ill->ill_ip_mib, 11947 ipIfStatsOutDiscards); 11948 freemsg(mp); 11949 if (next_mp != NULL) 11950 freemsg(next_mp); 11951 if (ire != save_ire) { 11952 ire_refrele(ire); 11953 } 11954 return; 11955 } 11956 mp1->b_cont = mp; 11957 11958 /* Get the priority marking, if any */ 11959 mp1->b_band = mp->b_band; 11960 mp = mp1; 11961 } else { 11962 mp->b_rptr = rptr; 11963 /* 11964 * fastpath - pre-pend datalink 11965 * header 11966 */ 11967 bcopy(mp1->b_rptr, rptr, hlen); 11968 mutex_exit(&nce->nce_lock); 11969 fp_prepend = B_TRUE; 11970 } 11971 } else { 11972 /* 11973 * Get the DL_UNITDATA_REQ. 11974 */ 11975 mp1 = nce->nce_res_mp; 11976 if (mp1 == NULL) { 11977 mutex_exit(&nce->nce_lock); 11978 ip1dbg(("ip_xmit_v6: No resolution " 11979 "block ire = %p\n", (void *)ire)); 11980 freemsg(mp); 11981 if (next_mp != NULL) 11982 freemsg(next_mp); 11983 if (ire != save_ire) { 11984 ire_refrele(ire); 11985 } 11986 return; 11987 } 11988 /* 11989 * Prepend the DL_UNITDATA_REQ. 11990 */ 11991 mp1 = copyb(mp1); 11992 mutex_exit(&nce->nce_lock); 11993 if (mp1 == NULL) { 11994 BUMP_MIB(ill->ill_ip_mib, 11995 ipIfStatsOutDiscards); 11996 freemsg(mp); 11997 if (next_mp != NULL) 11998 freemsg(next_mp); 11999 if (ire != save_ire) { 12000 ire_refrele(ire); 12001 } 12002 return; 12003 } 12004 mp1->b_cont = mp; 12005 12006 /* Get the priority marking, if any */ 12007 mp1->b_band = mp->b_band; 12008 mp = mp1; 12009 } 12010 12011 out_ill = (ill_t *)stq->q_ptr; 12012 12013 DTRACE_PROBE4(ip6__physical__out__start, 12014 ill_t *, NULL, ill_t *, out_ill, 12015 ip6_t *, ip6h, mblk_t *, mp); 12016 12017 FW_HOOKS6(ipst->ips_ip6_physical_out_event, 12018 ipst->ips_ipv6firewall_physical_out, 12019 NULL, out_ill, ip6h, mp, mp_ip6h, 0, ipst); 12020 12021 DTRACE_PROBE1(ip6__physical__out__end, mblk_t *, mp); 12022 12023 if (mp == NULL) { 12024 if (multirt_send) { 12025 ASSERT(ire1 != NULL); 12026 if (ire != save_ire) { 12027 ire_refrele(ire); 12028 } 12029 /* 12030 * Proceed with the next RTF_MULTIRT 12031 * ire, also set up the send-to queue 12032 * accordingly. 12033 */ 12034 ire = ire1; 12035 ire1 = NULL; 12036 stq = ire->ire_stq; 12037 nce = ire->ire_nce; 12038 ill = ire_to_ill(ire); 12039 mp = next_mp; 12040 next_mp = NULL; 12041 continue; 12042 } else { 12043 ASSERT(next_mp == NULL); 12044 ASSERT(ire1 == NULL); 12045 break; 12046 } 12047 } 12048 12049 if (ipst->ips_ipobs_enabled) { 12050 zoneid_t szone; 12051 12052 szone = ip_get_zoneid_v6(&ip6h->ip6_src, 12053 mp_ip6h, out_ill, ipst, ALL_ZONES); 12054 ipobs_hook(mp_ip6h, IPOBS_HOOK_OUTBOUND, szone, 12055 ALL_ZONES, out_ill, IPV6_VERSION, 12056 fp_prepend ? hlen : 0, ipst); 12057 } 12058 12059 /* 12060 * Update ire and MIB counters; for save_ire, this has 12061 * been done by the caller. 12062 */ 12063 if (ire != save_ire) { 12064 UPDATE_OB_PKT_COUNT(ire); 12065 ire->ire_last_used_time = lbolt; 12066 12067 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 12068 BUMP_MIB(ill->ill_ip_mib, 12069 ipIfStatsHCOutMcastPkts); 12070 UPDATE_MIB(ill->ill_ip_mib, 12071 ipIfStatsHCOutMcastOctets, 12072 ntohs(ip6h->ip6_plen) + 12073 IPV6_HDR_LEN); 12074 } 12075 } 12076 12077 /* 12078 * Send it down. XXX Do we want to flow control AH/ESP 12079 * packets that carry TCP payloads? We don't flow 12080 * control TCP packets, but we should also not 12081 * flow-control TCP packets that have been protected. 12082 * We don't have an easy way to find out if an AH/ESP 12083 * packet was originally TCP or not currently. 12084 */ 12085 if (io == NULL) { 12086 BUMP_MIB(ill->ill_ip_mib, 12087 ipIfStatsHCOutTransmits); 12088 UPDATE_MIB(ill->ill_ip_mib, 12089 ipIfStatsHCOutOctets, 12090 ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN); 12091 DTRACE_IP7(send, mblk_t *, mp, conn_t *, NULL, 12092 void_ip_t *, ip6h, __dtrace_ipsr_ill_t *, 12093 out_ill, ipha_t *, NULL, ip6_t *, ip6h, 12094 int, 0); 12095 12096 putnext(stq, mp); 12097 } else { 12098 /* 12099 * Safety Pup says: make sure this is 12100 * going to the right interface! 12101 */ 12102 if (io->ipsec_out_capab_ill_index != 12103 ill_index) { 12104 /* IPsec kstats: bump lose counter */ 12105 freemsg(mp1); 12106 } else { 12107 BUMP_MIB(ill->ill_ip_mib, 12108 ipIfStatsHCOutTransmits); 12109 UPDATE_MIB(ill->ill_ip_mib, 12110 ipIfStatsHCOutOctets, 12111 ntohs(ip6h->ip6_plen) + 12112 IPV6_HDR_LEN); 12113 DTRACE_IP7(send, mblk_t *, mp, 12114 conn_t *, NULL, void_ip_t *, ip6h, 12115 __dtrace_ipsr_ill_t *, out_ill, 12116 ipha_t *, NULL, ip6_t *, ip6h, int, 12117 0); 12118 ipsec_hw_putnext(stq, mp); 12119 } 12120 } 12121 12122 if (nce->nce_flags & (NCE_F_NONUD|NCE_F_PERMANENT)) { 12123 if (ire != save_ire) { 12124 ire_refrele(ire); 12125 } 12126 if (multirt_send) { 12127 ASSERT(ire1 != NULL); 12128 /* 12129 * Proceed with the next RTF_MULTIRT 12130 * ire, also set up the send-to queue 12131 * accordingly. 12132 */ 12133 ire = ire1; 12134 ire1 = NULL; 12135 stq = ire->ire_stq; 12136 nce = ire->ire_nce; 12137 ill = ire_to_ill(ire); 12138 mp = next_mp; 12139 next_mp = NULL; 12140 continue; 12141 } 12142 ASSERT(next_mp == NULL); 12143 ASSERT(ire1 == NULL); 12144 return; 12145 } 12146 12147 ASSERT(nce->nce_state != ND_INCOMPLETE); 12148 12149 /* 12150 * Check for upper layer advice 12151 */ 12152 if (flags & IPV6_REACHABILITY_CONFIRMATION) { 12153 /* 12154 * It should be o.k. to check the state without 12155 * a lock here, at most we lose an advice. 12156 */ 12157 nce->nce_last = TICK_TO_MSEC(lbolt64); 12158 if (nce->nce_state != ND_REACHABLE) { 12159 12160 mutex_enter(&nce->nce_lock); 12161 nce->nce_state = ND_REACHABLE; 12162 nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT; 12163 mutex_exit(&nce->nce_lock); 12164 (void) untimeout(nce->nce_timeout_id); 12165 if (ip_debug > 2) { 12166 /* ip1dbg */ 12167 pr_addr_dbg("ip_xmit_v6: state" 12168 " for %s changed to" 12169 " REACHABLE\n", AF_INET6, 12170 &ire->ire_addr_v6); 12171 } 12172 } 12173 if (ire != save_ire) { 12174 ire_refrele(ire); 12175 } 12176 if (multirt_send) { 12177 ASSERT(ire1 != NULL); 12178 /* 12179 * Proceed with the next RTF_MULTIRT 12180 * ire, also set up the send-to queue 12181 * accordingly. 12182 */ 12183 ire = ire1; 12184 ire1 = NULL; 12185 stq = ire->ire_stq; 12186 nce = ire->ire_nce; 12187 ill = ire_to_ill(ire); 12188 mp = next_mp; 12189 next_mp = NULL; 12190 continue; 12191 } 12192 ASSERT(next_mp == NULL); 12193 ASSERT(ire1 == NULL); 12194 return; 12195 } 12196 12197 delta = TICK_TO_MSEC(lbolt64) - nce->nce_last; 12198 ip1dbg(("ip_xmit_v6: delta = %" PRId64 12199 " ill_reachable_time = %d \n", delta, 12200 ill->ill_reachable_time)); 12201 if (delta > (uint64_t)ill->ill_reachable_time) { 12202 nce = ire->ire_nce; 12203 mutex_enter(&nce->nce_lock); 12204 switch (nce->nce_state) { 12205 case ND_REACHABLE: 12206 case ND_STALE: 12207 /* 12208 * ND_REACHABLE is identical to 12209 * ND_STALE in this specific case. If 12210 * reachable time has expired for this 12211 * neighbor (delta is greater than 12212 * reachable time), conceptually, the 12213 * neighbor cache is no longer in 12214 * REACHABLE state, but already in 12215 * STALE state. So the correct 12216 * transition here is to ND_DELAY. 12217 */ 12218 nce->nce_state = ND_DELAY; 12219 mutex_exit(&nce->nce_lock); 12220 NDP_RESTART_TIMER(nce, 12221 ipst->ips_delay_first_probe_time); 12222 if (ip_debug > 3) { 12223 /* ip2dbg */ 12224 pr_addr_dbg("ip_xmit_v6: state" 12225 " for %s changed to" 12226 " DELAY\n", AF_INET6, 12227 &ire->ire_addr_v6); 12228 } 12229 break; 12230 case ND_DELAY: 12231 case ND_PROBE: 12232 mutex_exit(&nce->nce_lock); 12233 /* Timers have already started */ 12234 break; 12235 case ND_UNREACHABLE: 12236 /* 12237 * ndp timer has detected that this nce 12238 * is unreachable and initiated deleting 12239 * this nce and all its associated IREs. 12240 * This is a race where we found the 12241 * ire before it was deleted and have 12242 * just sent out a packet using this 12243 * unreachable nce. 12244 */ 12245 mutex_exit(&nce->nce_lock); 12246 break; 12247 default: 12248 ASSERT(0); 12249 } 12250 } 12251 12252 if (multirt_send) { 12253 ASSERT(ire1 != NULL); 12254 /* 12255 * Proceed with the next RTF_MULTIRT ire, 12256 * Also set up the send-to queue accordingly. 12257 */ 12258 if (ire != save_ire) { 12259 ire_refrele(ire); 12260 } 12261 ire = ire1; 12262 ire1 = NULL; 12263 stq = ire->ire_stq; 12264 nce = ire->ire_nce; 12265 ill = ire_to_ill(ire); 12266 mp = next_mp; 12267 next_mp = NULL; 12268 } 12269 } while (multirt_send); 12270 /* 12271 * In the multirouting case, release the last ire used for 12272 * emission. save_ire will be released by the caller. 12273 */ 12274 if (ire != save_ire) { 12275 ire_refrele(ire); 12276 } 12277 } else { 12278 /* 12279 * Can't apply backpressure, just discard the packet. 12280 */ 12281 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); 12282 freemsg(mp); 12283 return; 12284 } 12285 } 12286 12287 /* 12288 * pr_addr_dbg function provides the needed buffer space to call 12289 * inet_ntop() function's 3rd argument. This function should be 12290 * used by any kernel routine which wants to save INET6_ADDRSTRLEN 12291 * stack buffer space in it's own stack frame. This function uses 12292 * a buffer from it's own stack and prints the information. 12293 * Example: pr_addr_dbg("func: no route for %s\n ", AF_INET, addr) 12294 * 12295 * Note: This function can call inet_ntop() once. 12296 */ 12297 void 12298 pr_addr_dbg(char *fmt1, int af, const void *addr) 12299 { 12300 char buf[INET6_ADDRSTRLEN]; 12301 12302 if (fmt1 == NULL) { 12303 ip0dbg(("pr_addr_dbg: Wrong arguments\n")); 12304 return; 12305 } 12306 12307 /* 12308 * This does not compare debug level and just prints 12309 * out. Thus it is the responsibility of the caller 12310 * to check the appropriate debug-level before calling 12311 * this function. 12312 */ 12313 if (ip_debug > 0) { 12314 printf(fmt1, inet_ntop(af, addr, buf, sizeof (buf))); 12315 } 12316 12317 12318 } 12319 12320 12321 /* 12322 * Return the length in bytes of the IPv6 headers (base header, ip6i_t 12323 * if needed and extension headers) that will be needed based on the 12324 * ip6_pkt_t structure passed by the caller. 12325 * 12326 * The returned length does not include the length of the upper level 12327 * protocol (ULP) header. 12328 */ 12329 int 12330 ip_total_hdrs_len_v6(ip6_pkt_t *ipp) 12331 { 12332 int len; 12333 12334 len = IPV6_HDR_LEN; 12335 if (ipp->ipp_fields & IPPF_HAS_IP6I) 12336 len += sizeof (ip6i_t); 12337 if (ipp->ipp_fields & IPPF_HOPOPTS) { 12338 ASSERT(ipp->ipp_hopoptslen != 0); 12339 len += ipp->ipp_hopoptslen; 12340 } 12341 if (ipp->ipp_fields & IPPF_RTHDR) { 12342 ASSERT(ipp->ipp_rthdrlen != 0); 12343 len += ipp->ipp_rthdrlen; 12344 } 12345 /* 12346 * En-route destination options 12347 * Only do them if there's a routing header as well 12348 */ 12349 if ((ipp->ipp_fields & (IPPF_RTDSTOPTS|IPPF_RTHDR)) == 12350 (IPPF_RTDSTOPTS|IPPF_RTHDR)) { 12351 ASSERT(ipp->ipp_rtdstoptslen != 0); 12352 len += ipp->ipp_rtdstoptslen; 12353 } 12354 if (ipp->ipp_fields & IPPF_DSTOPTS) { 12355 ASSERT(ipp->ipp_dstoptslen != 0); 12356 len += ipp->ipp_dstoptslen; 12357 } 12358 return (len); 12359 } 12360 12361 /* 12362 * All-purpose routine to build a header chain of an IPv6 header 12363 * followed by any required extension headers and a proto header, 12364 * preceeded (where necessary) by an ip6i_t private header. 12365 * 12366 * The fields of the IPv6 header that are derived from the ip6_pkt_t 12367 * will be filled in appropriately. 12368 * Thus the caller must fill in the rest of the IPv6 header, such as 12369 * traffic class/flowid, source address (if not set here), hoplimit (if not 12370 * set here) and destination address. 12371 * 12372 * The extension headers and ip6i_t header will all be fully filled in. 12373 */ 12374 void 12375 ip_build_hdrs_v6(uchar_t *ext_hdrs, uint_t ext_hdrs_len, 12376 ip6_pkt_t *ipp, uint8_t protocol) 12377 { 12378 uint8_t *nxthdr_ptr; 12379 uint8_t *cp; 12380 ip6i_t *ip6i; 12381 ip6_t *ip6h = (ip6_t *)ext_hdrs; 12382 12383 /* 12384 * If sending private ip6i_t header down (checksum info, nexthop, 12385 * or ifindex), adjust ip header pointer and set ip6i_t header pointer, 12386 * then fill it in. (The checksum info will be filled in by icmp). 12387 */ 12388 if (ipp->ipp_fields & IPPF_HAS_IP6I) { 12389 ip6i = (ip6i_t *)ip6h; 12390 ip6h = (ip6_t *)&ip6i[1]; 12391 12392 ip6i->ip6i_flags = 0; 12393 ip6i->ip6i_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 12394 if (ipp->ipp_fields & IPPF_IFINDEX || 12395 ipp->ipp_fields & IPPF_SCOPE_ID) { 12396 ASSERT(ipp->ipp_ifindex != 0); 12397 ip6i->ip6i_flags |= IP6I_IFINDEX; 12398 ip6i->ip6i_ifindex = ipp->ipp_ifindex; 12399 } 12400 if (ipp->ipp_fields & IPPF_ADDR) { 12401 /* 12402 * Enable per-packet source address verification if 12403 * IPV6_PKTINFO specified the source address. 12404 * ip6_src is set in the transport's _wput function. 12405 */ 12406 ASSERT(!IN6_IS_ADDR_UNSPECIFIED( 12407 &ipp->ipp_addr)); 12408 ip6i->ip6i_flags |= IP6I_VERIFY_SRC; 12409 } 12410 if (ipp->ipp_fields & IPPF_UNICAST_HOPS) { 12411 ip6h->ip6_hops = ipp->ipp_unicast_hops; 12412 /* 12413 * We need to set this flag so that IP doesn't 12414 * rewrite the IPv6 header's hoplimit with the 12415 * current default value. 12416 */ 12417 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 12418 } 12419 if (ipp->ipp_fields & IPPF_NEXTHOP) { 12420 ASSERT(!IN6_IS_ADDR_UNSPECIFIED( 12421 &ipp->ipp_nexthop)); 12422 ip6i->ip6i_flags |= IP6I_NEXTHOP; 12423 ip6i->ip6i_nexthop = ipp->ipp_nexthop; 12424 } 12425 /* 12426 * tell IP this is an ip6i_t private header 12427 */ 12428 ip6i->ip6i_nxt = IPPROTO_RAW; 12429 } 12430 /* Initialize IPv6 header */ 12431 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 12432 if (ipp->ipp_fields & IPPF_TCLASS) { 12433 ip6h->ip6_vcf = (ip6h->ip6_vcf & ~IPV6_FLOWINFO_TCLASS) | 12434 (ipp->ipp_tclass << 20); 12435 } 12436 if (ipp->ipp_fields & IPPF_ADDR) 12437 ip6h->ip6_src = ipp->ipp_addr; 12438 12439 nxthdr_ptr = (uint8_t *)&ip6h->ip6_nxt; 12440 cp = (uint8_t *)&ip6h[1]; 12441 /* 12442 * Here's where we have to start stringing together 12443 * any extension headers in the right order: 12444 * Hop-by-hop, destination, routing, and final destination opts. 12445 */ 12446 if (ipp->ipp_fields & IPPF_HOPOPTS) { 12447 /* Hop-by-hop options */ 12448 ip6_hbh_t *hbh = (ip6_hbh_t *)cp; 12449 12450 *nxthdr_ptr = IPPROTO_HOPOPTS; 12451 nxthdr_ptr = &hbh->ip6h_nxt; 12452 12453 bcopy(ipp->ipp_hopopts, cp, ipp->ipp_hopoptslen); 12454 cp += ipp->ipp_hopoptslen; 12455 } 12456 /* 12457 * En-route destination options 12458 * Only do them if there's a routing header as well 12459 */ 12460 if ((ipp->ipp_fields & (IPPF_RTDSTOPTS|IPPF_RTHDR)) == 12461 (IPPF_RTDSTOPTS|IPPF_RTHDR)) { 12462 ip6_dest_t *dst = (ip6_dest_t *)cp; 12463 12464 *nxthdr_ptr = IPPROTO_DSTOPTS; 12465 nxthdr_ptr = &dst->ip6d_nxt; 12466 12467 bcopy(ipp->ipp_rtdstopts, cp, ipp->ipp_rtdstoptslen); 12468 cp += ipp->ipp_rtdstoptslen; 12469 } 12470 /* 12471 * Routing header next 12472 */ 12473 if (ipp->ipp_fields & IPPF_RTHDR) { 12474 ip6_rthdr_t *rt = (ip6_rthdr_t *)cp; 12475 12476 *nxthdr_ptr = IPPROTO_ROUTING; 12477 nxthdr_ptr = &rt->ip6r_nxt; 12478 12479 bcopy(ipp->ipp_rthdr, cp, ipp->ipp_rthdrlen); 12480 cp += ipp->ipp_rthdrlen; 12481 } 12482 /* 12483 * Do ultimate destination options 12484 */ 12485 if (ipp->ipp_fields & IPPF_DSTOPTS) { 12486 ip6_dest_t *dest = (ip6_dest_t *)cp; 12487 12488 *nxthdr_ptr = IPPROTO_DSTOPTS; 12489 nxthdr_ptr = &dest->ip6d_nxt; 12490 12491 bcopy(ipp->ipp_dstopts, cp, ipp->ipp_dstoptslen); 12492 cp += ipp->ipp_dstoptslen; 12493 } 12494 /* 12495 * Now set the last header pointer to the proto passed in 12496 */ 12497 *nxthdr_ptr = protocol; 12498 ASSERT((int)(cp - ext_hdrs) == ext_hdrs_len); 12499 } 12500 12501 /* 12502 * Return a pointer to the routing header extension header 12503 * in the IPv6 header(s) chain passed in. 12504 * If none found, return NULL 12505 * Assumes that all extension headers are in same mblk as the v6 header 12506 */ 12507 ip6_rthdr_t * 12508 ip_find_rthdr_v6(ip6_t *ip6h, uint8_t *endptr) 12509 { 12510 ip6_dest_t *desthdr; 12511 ip6_frag_t *fraghdr; 12512 uint_t hdrlen; 12513 uint8_t nexthdr; 12514 uint8_t *ptr = (uint8_t *)&ip6h[1]; 12515 12516 if (ip6h->ip6_nxt == IPPROTO_ROUTING) 12517 return ((ip6_rthdr_t *)ptr); 12518 12519 /* 12520 * The routing header will precede all extension headers 12521 * other than the hop-by-hop and destination options 12522 * extension headers, so if we see anything other than those, 12523 * we're done and didn't find it. 12524 * We could see a destination options header alone but no 12525 * routing header, in which case we'll return NULL as soon as 12526 * we see anything after that. 12527 * Hop-by-hop and destination option headers are identical, 12528 * so we can use either one we want as a template. 12529 */ 12530 nexthdr = ip6h->ip6_nxt; 12531 while (ptr < endptr) { 12532 /* Is there enough left for len + nexthdr? */ 12533 if (ptr + MIN_EHDR_LEN > endptr) 12534 return (NULL); 12535 12536 switch (nexthdr) { 12537 case IPPROTO_HOPOPTS: 12538 case IPPROTO_DSTOPTS: 12539 /* Assumes the headers are identical for hbh and dst */ 12540 desthdr = (ip6_dest_t *)ptr; 12541 hdrlen = 8 * (desthdr->ip6d_len + 1); 12542 nexthdr = desthdr->ip6d_nxt; 12543 break; 12544 12545 case IPPROTO_ROUTING: 12546 return ((ip6_rthdr_t *)ptr); 12547 12548 case IPPROTO_FRAGMENT: 12549 fraghdr = (ip6_frag_t *)ptr; 12550 hdrlen = sizeof (ip6_frag_t); 12551 nexthdr = fraghdr->ip6f_nxt; 12552 break; 12553 12554 default: 12555 return (NULL); 12556 } 12557 ptr += hdrlen; 12558 } 12559 return (NULL); 12560 } 12561 12562 /* 12563 * Called for source-routed packets originating on this node. 12564 * Manipulates the original routing header by moving every entry up 12565 * one slot, placing the first entry in the v6 header's v6_dst field, 12566 * and placing the ultimate destination in the routing header's last 12567 * slot. 12568 * 12569 * Returns the checksum diference between the ultimate destination 12570 * (last hop in the routing header when the packet is sent) and 12571 * the first hop (ip6_dst when the packet is sent) 12572 */ 12573 /* ARGSUSED2 */ 12574 uint32_t 12575 ip_massage_options_v6(ip6_t *ip6h, ip6_rthdr_t *rth, netstack_t *ns) 12576 { 12577 uint_t numaddr; 12578 uint_t i; 12579 in6_addr_t *addrptr; 12580 in6_addr_t tmp; 12581 ip6_rthdr0_t *rthdr = (ip6_rthdr0_t *)rth; 12582 uint32_t cksm; 12583 uint32_t addrsum = 0; 12584 uint16_t *ptr; 12585 12586 /* 12587 * Perform any processing needed for source routing. 12588 * We know that all extension headers will be in the same mblk 12589 * as the IPv6 header. 12590 */ 12591 12592 /* 12593 * If no segments left in header, or the header length field is zero, 12594 * don't move hop addresses around; 12595 * Checksum difference is zero. 12596 */ 12597 if ((rthdr->ip6r0_segleft == 0) || (rthdr->ip6r0_len == 0)) 12598 return (0); 12599 12600 ptr = (uint16_t *)&ip6h->ip6_dst; 12601 cksm = 0; 12602 for (i = 0; i < (sizeof (in6_addr_t) / sizeof (uint16_t)); i++) { 12603 cksm += ptr[i]; 12604 } 12605 cksm = (cksm & 0xFFFF) + (cksm >> 16); 12606 12607 /* 12608 * Here's where the fun begins - we have to 12609 * move all addresses up one spot, take the 12610 * first hop and make it our first ip6_dst, 12611 * and place the ultimate destination in the 12612 * newly-opened last slot. 12613 */ 12614 addrptr = (in6_addr_t *)((char *)rthdr + sizeof (*rthdr)); 12615 numaddr = rthdr->ip6r0_len / 2; 12616 tmp = *addrptr; 12617 for (i = 0; i < (numaddr - 1); addrptr++, i++) { 12618 *addrptr = addrptr[1]; 12619 } 12620 *addrptr = ip6h->ip6_dst; 12621 ip6h->ip6_dst = tmp; 12622 12623 /* 12624 * From the checksummed ultimate destination subtract the checksummed 12625 * current ip6_dst (the first hop address). Return that number. 12626 * (In the v4 case, the second part of this is done in each routine 12627 * that calls ip_massage_options(). We do it all in this one place 12628 * for v6). 12629 */ 12630 ptr = (uint16_t *)&ip6h->ip6_dst; 12631 for (i = 0; i < (sizeof (in6_addr_t) / sizeof (uint16_t)); i++) { 12632 addrsum += ptr[i]; 12633 } 12634 cksm -= ((addrsum >> 16) + (addrsum & 0xFFFF)); 12635 if ((int)cksm < 0) 12636 cksm--; 12637 cksm = (cksm & 0xFFFF) + (cksm >> 16); 12638 12639 return (cksm); 12640 } 12641 12642 /* 12643 * Propagate a multicast group membership operation (join/leave) (*fn) on 12644 * all interfaces crossed by the related multirt routes. 12645 * The call is considered successful if the operation succeeds 12646 * on at least one interface. 12647 * The function is called if the destination address in the packet to send 12648 * is multirouted. 12649 */ 12650 int 12651 ip_multirt_apply_membership_v6(int (*fn)(conn_t *, boolean_t, 12652 const in6_addr_t *, int, mcast_record_t, const in6_addr_t *, mblk_t *), 12653 ire_t *ire, conn_t *connp, boolean_t checkonly, const in6_addr_t *v6grp, 12654 mcast_record_t fmode, const in6_addr_t *v6src, mblk_t *first_mp) 12655 { 12656 ire_t *ire_gw; 12657 irb_t *irb; 12658 int index, error = 0; 12659 opt_restart_t *or; 12660 ip_stack_t *ipst = ire->ire_ipst; 12661 12662 irb = ire->ire_bucket; 12663 ASSERT(irb != NULL); 12664 12665 ASSERT(DB_TYPE(first_mp) == M_CTL); 12666 or = (opt_restart_t *)first_mp->b_rptr; 12667 12668 IRB_REFHOLD(irb); 12669 for (; ire != NULL; ire = ire->ire_next) { 12670 if ((ire->ire_flags & RTF_MULTIRT) == 0) 12671 continue; 12672 if (!IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, v6grp)) 12673 continue; 12674 12675 ire_gw = ire_ftable_lookup_v6(&ire->ire_gateway_addr_v6, 0, 0, 12676 IRE_INTERFACE, NULL, NULL, ALL_ZONES, 0, NULL, 12677 MATCH_IRE_RECURSIVE | MATCH_IRE_TYPE, ipst); 12678 /* No resolver exists for the gateway; skip this ire. */ 12679 if (ire_gw == NULL) 12680 continue; 12681 index = ire_gw->ire_ipif->ipif_ill->ill_phyint->phyint_ifindex; 12682 /* 12683 * A resolver exists: we can get the interface on which we have 12684 * to apply the operation. 12685 */ 12686 error = fn(connp, checkonly, v6grp, index, fmode, v6src, 12687 first_mp); 12688 if (error == 0) 12689 or->or_private = CGTP_MCAST_SUCCESS; 12690 12691 if (ip_debug > 0) { 12692 ulong_t off; 12693 char *ksym; 12694 12695 ksym = kobj_getsymname((uintptr_t)fn, &off); 12696 ip2dbg(("ip_multirt_apply_membership_v6: " 12697 "called %s, multirt group 0x%08x via itf 0x%08x, " 12698 "error %d [success %u]\n", 12699 ksym ? ksym : "?", 12700 ntohl(V4_PART_OF_V6((*v6grp))), 12701 ntohl(V4_PART_OF_V6(ire_gw->ire_src_addr_v6)), 12702 error, or->or_private)); 12703 } 12704 12705 ire_refrele(ire_gw); 12706 if (error == EINPROGRESS) { 12707 IRB_REFRELE(irb); 12708 return (error); 12709 } 12710 } 12711 IRB_REFRELE(irb); 12712 /* 12713 * Consider the call as successful if we succeeded on at least 12714 * one interface. Otherwise, return the last encountered error. 12715 */ 12716 return (or->or_private == CGTP_MCAST_SUCCESS ? 0 : error); 12717 } 12718 12719 void 12720 *ip6_kstat_init(netstackid_t stackid, ip6_stat_t *ip6_statisticsp) 12721 { 12722 kstat_t *ksp; 12723 12724 ip6_stat_t template = { 12725 { "ip6_udp_fast_path", KSTAT_DATA_UINT64 }, 12726 { "ip6_udp_slow_path", KSTAT_DATA_UINT64 }, 12727 { "ip6_udp_fannorm", KSTAT_DATA_UINT64 }, 12728 { "ip6_udp_fanmb", KSTAT_DATA_UINT64 }, 12729 { "ip6_out_sw_cksum", KSTAT_DATA_UINT64 }, 12730 { "ip6_in_sw_cksum", KSTAT_DATA_UINT64 }, 12731 { "ip6_tcp_in_full_hw_cksum_err", KSTAT_DATA_UINT64 }, 12732 { "ip6_tcp_in_part_hw_cksum_err", KSTAT_DATA_UINT64 }, 12733 { "ip6_tcp_in_sw_cksum_err", KSTAT_DATA_UINT64 }, 12734 { "ip6_tcp_out_sw_cksum_bytes", KSTAT_DATA_UINT64 }, 12735 { "ip6_udp_in_full_hw_cksum_err", KSTAT_DATA_UINT64 }, 12736 { "ip6_udp_in_part_hw_cksum_err", KSTAT_DATA_UINT64 }, 12737 { "ip6_udp_in_sw_cksum_err", KSTAT_DATA_UINT64 }, 12738 { "ip6_udp_out_sw_cksum_bytes", KSTAT_DATA_UINT64 }, 12739 { "ip6_frag_mdt_pkt_out", KSTAT_DATA_UINT64 }, 12740 { "ip6_frag_mdt_discarded", KSTAT_DATA_UINT64 }, 12741 { "ip6_frag_mdt_allocfail", KSTAT_DATA_UINT64 }, 12742 { "ip6_frag_mdt_addpdescfail", KSTAT_DATA_UINT64 }, 12743 { "ip6_frag_mdt_allocd", KSTAT_DATA_UINT64 }, 12744 }; 12745 ksp = kstat_create_netstack("ip", 0, "ip6stat", "net", 12746 KSTAT_TYPE_NAMED, sizeof (template) / sizeof (kstat_named_t), 12747 KSTAT_FLAG_VIRTUAL, stackid); 12748 12749 if (ksp == NULL) 12750 return (NULL); 12751 12752 bcopy(&template, ip6_statisticsp, sizeof (template)); 12753 ksp->ks_data = (void *)ip6_statisticsp; 12754 ksp->ks_private = (void *)(uintptr_t)stackid; 12755 12756 kstat_install(ksp); 12757 return (ksp); 12758 } 12759 12760 void 12761 ip6_kstat_fini(netstackid_t stackid, kstat_t *ksp) 12762 { 12763 if (ksp != NULL) { 12764 ASSERT(stackid == (netstackid_t)(uintptr_t)ksp->ks_private); 12765 kstat_delete_netstack(ksp, stackid); 12766 } 12767 } 12768 12769 /* 12770 * The following two functions set and get the value for the 12771 * IPV6_SRC_PREFERENCES socket option. 12772 */ 12773 int 12774 ip6_set_src_preferences(conn_t *connp, uint32_t prefs) 12775 { 12776 /* 12777 * We only support preferences that are covered by 12778 * IPV6_PREFER_SRC_MASK. 12779 */ 12780 if (prefs & ~IPV6_PREFER_SRC_MASK) 12781 return (EINVAL); 12782 12783 /* 12784 * Look for conflicting preferences or default preferences. If 12785 * both bits of a related pair are clear, the application wants the 12786 * system's default value for that pair. Both bits in a pair can't 12787 * be set. 12788 */ 12789 if ((prefs & IPV6_PREFER_SRC_MIPMASK) == 0) { 12790 prefs |= IPV6_PREFER_SRC_MIPDEFAULT; 12791 } else if ((prefs & IPV6_PREFER_SRC_MIPMASK) == 12792 IPV6_PREFER_SRC_MIPMASK) { 12793 return (EINVAL); 12794 } 12795 if ((prefs & IPV6_PREFER_SRC_TMPMASK) == 0) { 12796 prefs |= IPV6_PREFER_SRC_TMPDEFAULT; 12797 } else if ((prefs & IPV6_PREFER_SRC_TMPMASK) == 12798 IPV6_PREFER_SRC_TMPMASK) { 12799 return (EINVAL); 12800 } 12801 if ((prefs & IPV6_PREFER_SRC_CGAMASK) == 0) { 12802 prefs |= IPV6_PREFER_SRC_CGADEFAULT; 12803 } else if ((prefs & IPV6_PREFER_SRC_CGAMASK) == 12804 IPV6_PREFER_SRC_CGAMASK) { 12805 return (EINVAL); 12806 } 12807 12808 connp->conn_src_preferences = prefs; 12809 return (0); 12810 } 12811 12812 size_t 12813 ip6_get_src_preferences(conn_t *connp, uint32_t *val) 12814 { 12815 *val = connp->conn_src_preferences; 12816 return (sizeof (connp->conn_src_preferences)); 12817 } 12818 12819 int 12820 ip6_set_pktinfo(cred_t *cr, conn_t *connp, struct in6_pktinfo *pkti) 12821 { 12822 ire_t *ire; 12823 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 12824 12825 /* 12826 * Verify the source address and ifindex. Privileged users can use 12827 * any source address. For ancillary data the source address is 12828 * checked in ip_wput_v6. 12829 */ 12830 if (pkti->ipi6_ifindex != 0) { 12831 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 12832 if (!phyint_exists(pkti->ipi6_ifindex, ipst)) { 12833 rw_exit(&ipst->ips_ill_g_lock); 12834 return (ENXIO); 12835 } 12836 rw_exit(&ipst->ips_ill_g_lock); 12837 } 12838 if (!IN6_IS_ADDR_UNSPECIFIED(&pkti->ipi6_addr) && 12839 secpolicy_net_rawaccess(cr) != 0) { 12840 ire = ire_route_lookup_v6(&pkti->ipi6_addr, 0, 0, 12841 (IRE_LOCAL|IRE_LOOPBACK), NULL, NULL, 12842 connp->conn_zoneid, NULL, MATCH_IRE_TYPE, ipst); 12843 if (ire != NULL) 12844 ire_refrele(ire); 12845 else 12846 return (ENXIO); 12847 } 12848 return (0); 12849 } 12850 12851 /* 12852 * Get the size of the IP options (including the IP headers size) 12853 * without including the AH header's size. If till_ah is B_FALSE, 12854 * and if AH header is present, dest options beyond AH header will 12855 * also be included in the returned size. 12856 */ 12857 int 12858 ipsec_ah_get_hdr_size_v6(mblk_t *mp, boolean_t till_ah) 12859 { 12860 ip6_t *ip6h; 12861 uint8_t nexthdr; 12862 uint8_t *whereptr; 12863 ip6_hbh_t *hbhhdr; 12864 ip6_dest_t *dsthdr; 12865 ip6_rthdr_t *rthdr; 12866 int ehdrlen; 12867 int size; 12868 ah_t *ah; 12869 12870 ip6h = (ip6_t *)mp->b_rptr; 12871 size = IPV6_HDR_LEN; 12872 nexthdr = ip6h->ip6_nxt; 12873 whereptr = (uint8_t *)&ip6h[1]; 12874 for (;;) { 12875 /* Assume IP has already stripped it */ 12876 ASSERT(nexthdr != IPPROTO_FRAGMENT && nexthdr != IPPROTO_RAW); 12877 switch (nexthdr) { 12878 case IPPROTO_HOPOPTS: 12879 hbhhdr = (ip6_hbh_t *)whereptr; 12880 nexthdr = hbhhdr->ip6h_nxt; 12881 ehdrlen = 8 * (hbhhdr->ip6h_len + 1); 12882 break; 12883 case IPPROTO_DSTOPTS: 12884 dsthdr = (ip6_dest_t *)whereptr; 12885 nexthdr = dsthdr->ip6d_nxt; 12886 ehdrlen = 8 * (dsthdr->ip6d_len + 1); 12887 break; 12888 case IPPROTO_ROUTING: 12889 rthdr = (ip6_rthdr_t *)whereptr; 12890 nexthdr = rthdr->ip6r_nxt; 12891 ehdrlen = 8 * (rthdr->ip6r_len + 1); 12892 break; 12893 default : 12894 if (till_ah) { 12895 ASSERT(nexthdr == IPPROTO_AH); 12896 return (size); 12897 } 12898 /* 12899 * If we don't have a AH header to traverse, 12900 * return now. This happens normally for 12901 * outbound datagrams where we have not inserted 12902 * the AH header. 12903 */ 12904 if (nexthdr != IPPROTO_AH) { 12905 return (size); 12906 } 12907 12908 /* 12909 * We don't include the AH header's size 12910 * to be symmetrical with other cases where 12911 * we either don't have a AH header (outbound) 12912 * or peek into the AH header yet (inbound and 12913 * not pulled up yet). 12914 */ 12915 ah = (ah_t *)whereptr; 12916 nexthdr = ah->ah_nexthdr; 12917 ehdrlen = (ah->ah_length << 2) + 8; 12918 12919 if (nexthdr == IPPROTO_DSTOPTS) { 12920 if (whereptr + ehdrlen >= mp->b_wptr) { 12921 /* 12922 * The destination options header 12923 * is not part of the first mblk. 12924 */ 12925 whereptr = mp->b_cont->b_rptr; 12926 } else { 12927 whereptr += ehdrlen; 12928 } 12929 12930 dsthdr = (ip6_dest_t *)whereptr; 12931 ehdrlen = 8 * (dsthdr->ip6d_len + 1); 12932 size += ehdrlen; 12933 } 12934 return (size); 12935 } 12936 whereptr += ehdrlen; 12937 size += ehdrlen; 12938 } 12939 } 12940 12941 /* 12942 * Utility routine that checks if `v6srcp' is a valid address on underlying 12943 * interface `ill'. If `ipifp' is non-NULL, it's set to a held ipif 12944 * associated with `v6srcp' on success. NOTE: if this is not called from 12945 * inside the IPSQ (ill_g_lock is not held), `ill' may be removed from the 12946 * group during or after this lookup. 12947 */ 12948 static boolean_t 12949 ipif_lookup_testaddr_v6(ill_t *ill, const in6_addr_t *v6srcp, ipif_t **ipifp) 12950 { 12951 ipif_t *ipif; 12952 12953 ipif = ipif_lookup_addr_exact_v6(v6srcp, ill, ill->ill_ipst); 12954 if (ipif != NULL) { 12955 if (ipifp != NULL) 12956 *ipifp = ipif; 12957 else 12958 ipif_refrele(ipif); 12959 return (B_TRUE); 12960 } 12961 12962 if (ip_debug > 2) { 12963 pr_addr_dbg("ipif_lookup_testaddr_v6: cannot find ipif for " 12964 "src %s\n", AF_INET6, v6srcp); 12965 } 12966 return (B_FALSE); 12967 } 12968