1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* 26 * Copyright (c) 1990 Mentat Inc. 27 */ 28 29 #include <sys/types.h> 30 #include <sys/stream.h> 31 #include <sys/dlpi.h> 32 #include <sys/stropts.h> 33 #include <sys/sysmacros.h> 34 #include <sys/strsun.h> 35 #include <sys/strlog.h> 36 #include <sys/strsubr.h> 37 #define _SUN_TPI_VERSION 2 38 #include <sys/tihdr.h> 39 #include <sys/ddi.h> 40 #include <sys/sunddi.h> 41 #include <sys/cmn_err.h> 42 #include <sys/debug.h> 43 #include <sys/sdt.h> 44 #include <sys/kobj.h> 45 #include <sys/zone.h> 46 #include <sys/neti.h> 47 #include <sys/hook.h> 48 49 #include <sys/kmem.h> 50 #include <sys/systm.h> 51 #include <sys/param.h> 52 #include <sys/socket.h> 53 #include <sys/vtrace.h> 54 #include <sys/isa_defs.h> 55 #include <sys/atomic.h> 56 #include <sys/iphada.h> 57 #include <sys/policy.h> 58 #include <net/if.h> 59 #include <net/if_types.h> 60 #include <net/route.h> 61 #include <net/if_dl.h> 62 #include <sys/sockio.h> 63 #include <netinet/in.h> 64 #include <netinet/ip6.h> 65 #include <netinet/icmp6.h> 66 #include <netinet/sctp.h> 67 68 #include <inet/common.h> 69 #include <inet/mi.h> 70 #include <inet/optcom.h> 71 #include <inet/mib2.h> 72 #include <inet/nd.h> 73 #include <inet/arp.h> 74 75 #include <inet/ip.h> 76 #include <inet/ip_impl.h> 77 #include <inet/ip6.h> 78 #include <inet/ip6_asp.h> 79 #include <inet/tcp.h> 80 #include <inet/tcp_impl.h> 81 #include <inet/udp_impl.h> 82 #include <inet/ipp_common.h> 83 84 #include <inet/ip_multi.h> 85 #include <inet/ip_if.h> 86 #include <inet/ip_ire.h> 87 #include <inet/ip_rts.h> 88 #include <inet/ip_ndp.h> 89 #include <net/pfkeyv2.h> 90 #include <inet/ipsec_info.h> 91 #include <inet/sadb.h> 92 #include <inet/ipsec_impl.h> 93 #include <inet/tun.h> 94 #include <inet/sctp_ip.h> 95 #include <sys/pattr.h> 96 #include <inet/ipclassifier.h> 97 #include <inet/ipsecah.h> 98 #include <inet/rawip_impl.h> 99 #include <inet/rts_impl.h> 100 #include <sys/squeue_impl.h> 101 #include <sys/squeue.h> 102 103 #include <sys/tsol/label.h> 104 #include <sys/tsol/tnet.h> 105 106 #include <rpc/pmap_prot.h> 107 108 /* Temporary; for CR 6451644 work-around */ 109 #include <sys/ethernet.h> 110 111 extern int ip_squeue_flag; 112 113 /* 114 * Naming conventions: 115 * These rules should be judiciously applied 116 * if there is a need to identify something as IPv6 versus IPv4 117 * IPv6 funcions will end with _v6 in the ip module. 118 * IPv6 funcions will end with _ipv6 in the transport modules. 119 * IPv6 macros: 120 * Some macros end with _V6; e.g. ILL_FRAG_HASH_V6 121 * Some macros start with V6_; e.g. V6_OR_V4_INADDR_ANY 122 * And then there are ..V4_PART_OF_V6. 123 * The intent is that macros in the ip module end with _V6. 124 * IPv6 global variables will start with ipv6_ 125 * IPv6 structures will start with ipv6 126 * IPv6 defined constants should start with IPV6_ 127 * (but then there are NDP_DEFAULT_VERS_PRI_AND_FLOW, etc) 128 */ 129 130 /* 131 * ip6opt_ls is used to enable IPv6 (via /etc/system on TX systems). 132 * We need to do this because we didn't obtain the IP6OPT_LS (0x0a) 133 * from IANA. This mechanism will remain in effect until an official 134 * number is obtained. 135 */ 136 uchar_t ip6opt_ls; 137 138 const in6_addr_t ipv6_all_ones = 139 { 0xffffffffU, 0xffffffffU, 0xffffffffU, 0xffffffffU }; 140 const in6_addr_t ipv6_all_zeros = { 0, 0, 0, 0 }; 141 142 #ifdef _BIG_ENDIAN 143 const in6_addr_t ipv6_unspecified_group = { 0xff000000U, 0, 0, 0 }; 144 #else /* _BIG_ENDIAN */ 145 const in6_addr_t ipv6_unspecified_group = { 0x000000ffU, 0, 0, 0 }; 146 #endif /* _BIG_ENDIAN */ 147 148 #ifdef _BIG_ENDIAN 149 const in6_addr_t ipv6_loopback = { 0, 0, 0, 0x00000001U }; 150 #else /* _BIG_ENDIAN */ 151 const in6_addr_t ipv6_loopback = { 0, 0, 0, 0x01000000U }; 152 #endif /* _BIG_ENDIAN */ 153 154 #ifdef _BIG_ENDIAN 155 const in6_addr_t ipv6_all_hosts_mcast = { 0xff020000U, 0, 0, 0x00000001U }; 156 #else /* _BIG_ENDIAN */ 157 const in6_addr_t ipv6_all_hosts_mcast = { 0x000002ffU, 0, 0, 0x01000000U }; 158 #endif /* _BIG_ENDIAN */ 159 160 #ifdef _BIG_ENDIAN 161 const in6_addr_t ipv6_all_rtrs_mcast = { 0xff020000U, 0, 0, 0x00000002U }; 162 #else /* _BIG_ENDIAN */ 163 const in6_addr_t ipv6_all_rtrs_mcast = { 0x000002ffU, 0, 0, 0x02000000U }; 164 #endif /* _BIG_ENDIAN */ 165 166 #ifdef _BIG_ENDIAN 167 const in6_addr_t ipv6_all_v2rtrs_mcast = { 0xff020000U, 0, 0, 0x00000016U }; 168 #else /* _BIG_ENDIAN */ 169 const in6_addr_t ipv6_all_v2rtrs_mcast = { 0x000002ffU, 0, 0, 0x16000000U }; 170 #endif /* _BIG_ENDIAN */ 171 172 #ifdef _BIG_ENDIAN 173 const in6_addr_t ipv6_solicited_node_mcast = 174 { 0xff020000U, 0, 0x00000001U, 0xff000000U }; 175 #else /* _BIG_ENDIAN */ 176 const in6_addr_t ipv6_solicited_node_mcast = 177 { 0x000002ffU, 0, 0x01000000U, 0x000000ffU }; 178 #endif /* _BIG_ENDIAN */ 179 180 /* Leave room for ip_newroute to tack on the src and target addresses */ 181 #define OK_RESOLVER_MP_V6(mp) \ 182 ((mp) && ((mp)->b_wptr - (mp)->b_rptr) >= (2 * IPV6_ADDR_LEN)) 183 184 #define IP6_MBLK_OK 0 185 #define IP6_MBLK_HDR_ERR 1 186 #define IP6_MBLK_LEN_ERR 2 187 188 static void icmp_inbound_too_big_v6(queue_t *, mblk_t *, ill_t *, ill_t *, 189 boolean_t, zoneid_t); 190 static void icmp_pkt_v6(queue_t *, mblk_t *, void *, size_t, 191 const in6_addr_t *, boolean_t, zoneid_t, ip_stack_t *); 192 static void icmp_redirect_v6(queue_t *, mblk_t *, ill_t *ill); 193 static int ip_bind_connected_v6(conn_t *, mblk_t **, uint8_t, in6_addr_t *, 194 uint16_t, const in6_addr_t *, ip6_pkt_t *, uint16_t, 195 boolean_t, boolean_t); 196 static boolean_t ip_bind_get_ire_v6(mblk_t **, ire_t *, const in6_addr_t *, 197 iulp_t *, ip_stack_t *); 198 static void ip_bind_post_handling_v6(conn_t *, mblk_t *, boolean_t, 199 boolean_t, ip_stack_t *); 200 static int ip_bind_laddr_v6(conn_t *, mblk_t **, uint8_t, 201 const in6_addr_t *, uint16_t, boolean_t); 202 static void ip_fanout_proto_v6(queue_t *, mblk_t *, ip6_t *, ill_t *, 203 ill_t *, uint8_t, uint_t, uint_t, boolean_t, zoneid_t); 204 static void ip_fanout_tcp_v6(queue_t *, mblk_t *, ip6_t *, ill_t *, 205 ill_t *, uint_t, uint_t, boolean_t, zoneid_t); 206 static void ip_fanout_udp_v6(queue_t *, mblk_t *, ip6_t *, uint32_t, 207 ill_t *, ill_t *, uint_t, boolean_t, zoneid_t); 208 static int ip_process_options_v6(queue_t *, mblk_t *, ip6_t *, 209 uint8_t *, uint_t, uint8_t, ip_stack_t *); 210 static mblk_t *ip_rput_frag_v6(ill_t *, ill_t *, mblk_t *, ip6_t *, 211 ip6_frag_t *, uint_t, uint_t *, uint32_t *, uint16_t *); 212 static boolean_t ip_source_routed_v6(ip6_t *, mblk_t *, ip_stack_t *); 213 static void ip_wput_ire_v6(queue_t *, mblk_t *, ire_t *, int, int, 214 conn_t *, int, int, zoneid_t); 215 static boolean_t ipif_lookup_testaddr_v6(ill_t *, const in6_addr_t *, 216 ipif_t **); 217 218 /* 219 * A template for an IPv6 AR_ENTRY_QUERY 220 */ 221 static areq_t ipv6_areq_template = { 222 AR_ENTRY_QUERY, /* cmd */ 223 sizeof (areq_t)+(2*IPV6_ADDR_LEN), /* name offset */ 224 sizeof (areq_t), /* name len (filled by ill_arp_alloc) */ 225 IP6_DL_SAP, /* protocol, from arps perspective */ 226 sizeof (areq_t), /* target addr offset */ 227 IPV6_ADDR_LEN, /* target addr_length */ 228 0, /* flags */ 229 sizeof (areq_t) + IPV6_ADDR_LEN, /* sender addr offset */ 230 IPV6_ADDR_LEN, /* sender addr length */ 231 6, /* xmit_count */ 232 1000, /* (re)xmit_interval in milliseconds */ 233 4 /* max # of requests to buffer */ 234 /* anything else filled in by the code */ 235 }; 236 237 /* 238 * Handle IPv6 ICMP packets sent to us. Consume the mblk passed in. 239 * The message has already been checksummed and if needed, 240 * a copy has been made to be sent any interested ICMP client (conn) 241 * Note that this is different than icmp_inbound() which does the fanout 242 * to conn's as well as local processing of the ICMP packets. 243 * 244 * All error messages are passed to the matching transport stream. 245 * 246 * Zones notes: 247 * The packet is only processed in the context of the specified zone: typically 248 * only this zone will reply to an echo request. This means that the caller must 249 * call icmp_inbound_v6() for each relevant zone. 250 */ 251 static void 252 icmp_inbound_v6(queue_t *q, mblk_t *mp, ill_t *ill, ill_t *inill, 253 uint_t hdr_length, boolean_t mctl_present, uint_t flags, zoneid_t zoneid, 254 mblk_t *dl_mp) 255 { 256 icmp6_t *icmp6; 257 ip6_t *ip6h; 258 boolean_t interested; 259 in6_addr_t origsrc; 260 mblk_t *first_mp; 261 ipsec_in_t *ii; 262 ip_stack_t *ipst = ill->ill_ipst; 263 264 ASSERT(ill != NULL); 265 first_mp = mp; 266 if (mctl_present) { 267 mp = first_mp->b_cont; 268 ASSERT(mp != NULL); 269 270 ii = (ipsec_in_t *)first_mp->b_rptr; 271 ASSERT(ii->ipsec_in_type == IPSEC_IN); 272 } 273 274 ip6h = (ip6_t *)mp->b_rptr; 275 276 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInMsgs); 277 278 if ((mp->b_wptr - mp->b_rptr) < (hdr_length + ICMP6_MINLEN)) { 279 if (!pullupmsg(mp, hdr_length + ICMP6_MINLEN)) { 280 ip1dbg(("icmp_inbound_v6: pullupmsg failed\n")); 281 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors); 282 freemsg(first_mp); 283 return; 284 } 285 ip6h = (ip6_t *)mp->b_rptr; 286 } 287 if (ipst->ips_icmp_accept_clear_messages == 0) { 288 first_mp = ipsec_check_global_policy(first_mp, NULL, 289 NULL, ip6h, mctl_present, ipst->ips_netstack); 290 if (first_mp == NULL) 291 return; 292 } 293 294 /* 295 * On a labeled system, we have to check whether the zone itself is 296 * permitted to receive raw traffic. 297 */ 298 if (is_system_labeled()) { 299 if (zoneid == ALL_ZONES) 300 zoneid = tsol_packet_to_zoneid(mp); 301 if (!tsol_can_accept_raw(mp, B_FALSE)) { 302 ip1dbg(("icmp_inbound_v6: zone %d can't receive raw", 303 zoneid)); 304 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors); 305 freemsg(first_mp); 306 return; 307 } 308 } 309 310 icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]); 311 ip2dbg(("icmp_inbound_v6: type %d code %d\n", icmp6->icmp6_type, 312 icmp6->icmp6_code)); 313 interested = !(icmp6->icmp6_type & ICMP6_INFOMSG_MASK); 314 315 /* Initiate IPPF processing here */ 316 if (IP6_IN_IPP(flags, ipst)) { 317 318 /* 319 * If the ifindex changes due to SIOCSLIFINDEX 320 * packet may return to IP on the wrong ill. 321 */ 322 ip_process(IPP_LOCAL_IN, &mp, ill->ill_phyint->phyint_ifindex); 323 if (mp == NULL) { 324 if (mctl_present) { 325 freeb(first_mp); 326 } 327 return; 328 } 329 } 330 331 switch (icmp6->icmp6_type) { 332 case ICMP6_DST_UNREACH: 333 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInDestUnreachs); 334 if (icmp6->icmp6_code == ICMP6_DST_UNREACH_ADMIN) 335 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInAdminProhibs); 336 break; 337 338 case ICMP6_TIME_EXCEEDED: 339 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInTimeExcds); 340 break; 341 342 case ICMP6_PARAM_PROB: 343 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInParmProblems); 344 break; 345 346 case ICMP6_PACKET_TOO_BIG: 347 icmp_inbound_too_big_v6(q, first_mp, ill, inill, mctl_present, 348 zoneid); 349 return; 350 case ICMP6_ECHO_REQUEST: 351 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInEchos); 352 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst) && 353 !ipst->ips_ipv6_resp_echo_mcast) 354 break; 355 356 /* 357 * We must have exclusive use of the mblk to convert it to 358 * a response. 359 * If not, we copy it. 360 */ 361 if (mp->b_datap->db_ref > 1) { 362 mblk_t *mp1; 363 364 mp1 = copymsg(mp); 365 freemsg(mp); 366 if (mp1 == NULL) { 367 BUMP_MIB(ill->ill_icmp6_mib, 368 ipv6IfIcmpInErrors); 369 if (mctl_present) 370 freeb(first_mp); 371 return; 372 } 373 mp = mp1; 374 ip6h = (ip6_t *)mp->b_rptr; 375 icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]); 376 if (mctl_present) 377 first_mp->b_cont = mp; 378 else 379 first_mp = mp; 380 } 381 382 /* 383 * Turn the echo into an echo reply. 384 * Remove any extension headers (do not reverse a source route) 385 * and clear the flow id (keep traffic class for now). 386 */ 387 if (hdr_length != IPV6_HDR_LEN) { 388 int i; 389 390 for (i = 0; i < IPV6_HDR_LEN; i++) 391 mp->b_rptr[hdr_length - i - 1] = 392 mp->b_rptr[IPV6_HDR_LEN - i - 1]; 393 mp->b_rptr += (hdr_length - IPV6_HDR_LEN); 394 ip6h = (ip6_t *)mp->b_rptr; 395 ip6h->ip6_nxt = IPPROTO_ICMPV6; 396 hdr_length = IPV6_HDR_LEN; 397 } 398 ip6h->ip6_vcf &= ~IPV6_FLOWINFO_FLOWLABEL; 399 icmp6->icmp6_type = ICMP6_ECHO_REPLY; 400 401 ip6h->ip6_plen = 402 htons((uint16_t)(msgdsize(mp) - IPV6_HDR_LEN)); 403 origsrc = ip6h->ip6_src; 404 /* 405 * Reverse the source and destination addresses. 406 * If the return address is a multicast, zero out the source 407 * (ip_wput_v6 will set an address). 408 */ 409 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 410 ip6h->ip6_src = ipv6_all_zeros; 411 ip6h->ip6_dst = origsrc; 412 } else { 413 ip6h->ip6_src = ip6h->ip6_dst; 414 ip6h->ip6_dst = origsrc; 415 } 416 417 /* set the hop limit */ 418 ip6h->ip6_hops = ipst->ips_ipv6_def_hops; 419 420 /* 421 * Prepare for checksum by putting icmp length in the icmp 422 * checksum field. The checksum is calculated in ip_wput_v6. 423 */ 424 icmp6->icmp6_cksum = ip6h->ip6_plen; 425 426 if (!mctl_present) { 427 /* 428 * This packet should go out the same way as it 429 * came in i.e in clear. To make sure that global 430 * policy will not be applied to this in ip_wput, 431 * we attach a IPSEC_IN mp and clear ipsec_in_secure. 432 */ 433 ASSERT(first_mp == mp); 434 first_mp = ipsec_in_alloc(B_FALSE, ipst->ips_netstack); 435 if (first_mp == NULL) { 436 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 437 freemsg(mp); 438 return; 439 } 440 ii = (ipsec_in_t *)first_mp->b_rptr; 441 442 /* This is not a secure packet */ 443 ii->ipsec_in_secure = B_FALSE; 444 first_mp->b_cont = mp; 445 } 446 ii->ipsec_in_zoneid = zoneid; 447 ASSERT(zoneid != ALL_ZONES); 448 if (!ipsec_in_to_out(first_mp, NULL, ip6h)) { 449 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 450 return; 451 } 452 put(WR(q), first_mp); 453 return; 454 455 case ICMP6_ECHO_REPLY: 456 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInEchoReplies); 457 break; 458 459 case ND_ROUTER_SOLICIT: 460 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInRouterSolicits); 461 break; 462 463 case ND_ROUTER_ADVERT: 464 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInRouterAdvertisements); 465 break; 466 467 case ND_NEIGHBOR_SOLICIT: 468 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInNeighborSolicits); 469 if (mctl_present) 470 freeb(first_mp); 471 /* XXX may wish to pass first_mp up to ndp_input someday. */ 472 ndp_input(inill, mp, dl_mp); 473 return; 474 475 case ND_NEIGHBOR_ADVERT: 476 BUMP_MIB(ill->ill_icmp6_mib, 477 ipv6IfIcmpInNeighborAdvertisements); 478 if (mctl_present) 479 freeb(first_mp); 480 /* XXX may wish to pass first_mp up to ndp_input someday. */ 481 ndp_input(inill, mp, dl_mp); 482 return; 483 484 case ND_REDIRECT: { 485 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInRedirects); 486 487 if (ipst->ips_ipv6_ignore_redirect) 488 break; 489 490 /* 491 * As there is no upper client to deliver, we don't 492 * need the first_mp any more. 493 */ 494 if (mctl_present) 495 freeb(first_mp); 496 if (!pullupmsg(mp, -1)) { 497 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects); 498 break; 499 } 500 icmp_redirect_v6(q, mp, ill); 501 return; 502 } 503 504 /* 505 * The next three icmp messages will be handled by MLD. 506 * Pass all valid MLD packets up to any process(es) 507 * listening on a raw ICMP socket. MLD messages are 508 * freed by mld_input function. 509 */ 510 case MLD_LISTENER_QUERY: 511 case MLD_LISTENER_REPORT: 512 case MLD_LISTENER_REDUCTION: 513 if (mctl_present) 514 freeb(first_mp); 515 mld_input(q, mp, ill); 516 return; 517 default: 518 break; 519 } 520 if (interested) { 521 icmp_inbound_error_fanout_v6(q, first_mp, ip6h, icmp6, ill, 522 inill, mctl_present, zoneid); 523 } else { 524 freemsg(first_mp); 525 } 526 } 527 528 /* 529 * Process received IPv6 ICMP Packet too big. 530 * After updating any IRE it does the fanout to any matching transport streams. 531 * Assumes the IPv6 plus ICMPv6 headers have been pulled up but nothing else. 532 */ 533 /* ARGSUSED */ 534 static void 535 icmp_inbound_too_big_v6(queue_t *q, mblk_t *mp, ill_t *ill, ill_t *inill, 536 boolean_t mctl_present, zoneid_t zoneid) 537 { 538 ip6_t *ip6h; 539 ip6_t *inner_ip6h; 540 icmp6_t *icmp6; 541 uint16_t hdr_length; 542 uint32_t mtu; 543 ire_t *ire, *first_ire; 544 mblk_t *first_mp; 545 ip_stack_t *ipst = ill->ill_ipst; 546 547 first_mp = mp; 548 if (mctl_present) 549 mp = first_mp->b_cont; 550 /* 551 * We must have exclusive use of the mblk to update the MTU 552 * in the packet. 553 * If not, we copy it. 554 * 555 * If there's an M_CTL present, we know that allocated first_mp 556 * earlier in this function, so we know first_mp has refcnt of one. 557 */ 558 ASSERT(!mctl_present || first_mp->b_datap->db_ref == 1); 559 if (mp->b_datap->db_ref > 1) { 560 mblk_t *mp1; 561 562 mp1 = copymsg(mp); 563 freemsg(mp); 564 if (mp1 == NULL) { 565 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 566 if (mctl_present) 567 freeb(first_mp); 568 return; 569 } 570 mp = mp1; 571 if (mctl_present) 572 first_mp->b_cont = mp; 573 else 574 first_mp = mp; 575 } 576 ip6h = (ip6_t *)mp->b_rptr; 577 if (ip6h->ip6_nxt != IPPROTO_ICMPV6) 578 hdr_length = ip_hdr_length_v6(mp, ip6h); 579 else 580 hdr_length = IPV6_HDR_LEN; 581 582 icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]); 583 ASSERT((size_t)(mp->b_wptr - mp->b_rptr) >= hdr_length + ICMP6_MINLEN); 584 inner_ip6h = (ip6_t *)&icmp6[1]; /* Packet in error */ 585 if ((uchar_t *)&inner_ip6h[1] > mp->b_wptr) { 586 if (!pullupmsg(mp, (uchar_t *)&inner_ip6h[1] - mp->b_rptr)) { 587 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 588 freemsg(first_mp); 589 return; 590 } 591 ip6h = (ip6_t *)mp->b_rptr; 592 icmp6 = (icmp6_t *)&mp->b_rptr[hdr_length]; 593 inner_ip6h = (ip6_t *)&icmp6[1]; 594 } 595 596 /* 597 * For link local destinations matching simply on IRE type is not 598 * sufficient. Same link local addresses for different ILL's is 599 * possible. 600 */ 601 if (IN6_IS_ADDR_LINKLOCAL(&inner_ip6h->ip6_dst)) { 602 first_ire = ire_ctable_lookup_v6(&inner_ip6h->ip6_dst, NULL, 603 IRE_CACHE, ill->ill_ipif, ALL_ZONES, NULL, 604 MATCH_IRE_TYPE | MATCH_IRE_ILL, ipst); 605 606 if (first_ire == NULL) { 607 if (ip_debug > 2) { 608 /* ip1dbg */ 609 pr_addr_dbg("icmp_inbound_too_big_v6:" 610 "no ire for dst %s\n", AF_INET6, 611 &inner_ip6h->ip6_dst); 612 } 613 freemsg(first_mp); 614 return; 615 } 616 617 mtu = ntohl(icmp6->icmp6_mtu); 618 rw_enter(&first_ire->ire_bucket->irb_lock, RW_READER); 619 for (ire = first_ire; ire != NULL && 620 IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, &inner_ip6h->ip6_dst); 621 ire = ire->ire_next) { 622 mutex_enter(&ire->ire_lock); 623 if (mtu < IPV6_MIN_MTU) { 624 ip1dbg(("Received mtu less than IPv6 " 625 "min mtu %d: %d\n", IPV6_MIN_MTU, mtu)); 626 mtu = IPV6_MIN_MTU; 627 /* 628 * If an mtu less than IPv6 min mtu is received, 629 * we must include a fragment header in 630 * subsequent packets. 631 */ 632 ire->ire_frag_flag |= IPH_FRAG_HDR; 633 } 634 ip1dbg(("Received mtu from router: %d\n", mtu)); 635 ire->ire_max_frag = MIN(ire->ire_max_frag, mtu); 636 /* Record the new max frag size for the ULP. */ 637 if (ire->ire_frag_flag & IPH_FRAG_HDR) { 638 /* 639 * If we need a fragment header in every packet 640 * (above case or multirouting), make sure the 641 * ULP takes it into account when computing the 642 * payload size. 643 */ 644 icmp6->icmp6_mtu = htonl(ire->ire_max_frag - 645 sizeof (ip6_frag_t)); 646 } else { 647 icmp6->icmp6_mtu = htonl(ire->ire_max_frag); 648 } 649 mutex_exit(&ire->ire_lock); 650 } 651 rw_exit(&first_ire->ire_bucket->irb_lock); 652 ire_refrele(first_ire); 653 } else { 654 irb_t *irb = NULL; 655 /* 656 * for non-link local destinations we match only on the IRE type 657 */ 658 ire = ire_ctable_lookup_v6(&inner_ip6h->ip6_dst, NULL, 659 IRE_CACHE, ill->ill_ipif, ALL_ZONES, NULL, MATCH_IRE_TYPE, 660 ipst); 661 if (ire == NULL) { 662 if (ip_debug > 2) { 663 /* ip1dbg */ 664 pr_addr_dbg("icmp_inbound_too_big_v6:" 665 "no ire for dst %s\n", 666 AF_INET6, &inner_ip6h->ip6_dst); 667 } 668 freemsg(first_mp); 669 return; 670 } 671 irb = ire->ire_bucket; 672 ire_refrele(ire); 673 rw_enter(&irb->irb_lock, RW_READER); 674 for (ire = irb->irb_ire; ire != NULL; ire = ire->ire_next) { 675 if (IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, 676 &inner_ip6h->ip6_dst)) { 677 mtu = ntohl(icmp6->icmp6_mtu); 678 mutex_enter(&ire->ire_lock); 679 if (mtu < IPV6_MIN_MTU) { 680 ip1dbg(("Received mtu less than IPv6" 681 "min mtu %d: %d\n", 682 IPV6_MIN_MTU, mtu)); 683 mtu = IPV6_MIN_MTU; 684 /* 685 * If an mtu less than IPv6 min mtu is 686 * received, we must include a fragment 687 * header in subsequent packets. 688 */ 689 ire->ire_frag_flag |= IPH_FRAG_HDR; 690 } 691 692 ip1dbg(("Received mtu from router: %d\n", mtu)); 693 ire->ire_max_frag = MIN(ire->ire_max_frag, mtu); 694 /* Record the new max frag size for the ULP. */ 695 if (ire->ire_frag_flag & IPH_FRAG_HDR) { 696 /* 697 * If we need a fragment header in 698 * every packet (above case or 699 * multirouting), make sure the ULP 700 * takes it into account when computing 701 * the payload size. 702 */ 703 icmp6->icmp6_mtu = 704 htonl(ire->ire_max_frag - 705 sizeof (ip6_frag_t)); 706 } else { 707 icmp6->icmp6_mtu = 708 htonl(ire->ire_max_frag); 709 } 710 mutex_exit(&ire->ire_lock); 711 } 712 } 713 rw_exit(&irb->irb_lock); 714 } 715 icmp_inbound_error_fanout_v6(q, first_mp, ip6h, icmp6, ill, inill, 716 mctl_present, zoneid); 717 } 718 719 /* 720 * Fanout received ICMPv6 error packets to the transports. 721 * Assumes the IPv6 plus ICMPv6 headers have been pulled up but nothing else. 722 */ 723 void 724 icmp_inbound_error_fanout_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, 725 icmp6_t *icmp6, ill_t *ill, ill_t *inill, boolean_t mctl_present, 726 zoneid_t zoneid) 727 { 728 uint16_t *up; /* Pointer to ports in ULP header */ 729 uint32_t ports; /* reversed ports for fanout */ 730 ip6_t rip6h; /* With reversed addresses */ 731 uint16_t hdr_length; 732 uint8_t *nexthdrp; 733 uint8_t nexthdr; 734 mblk_t *first_mp; 735 ipsec_in_t *ii; 736 tcpha_t *tcpha; 737 conn_t *connp; 738 ip_stack_t *ipst = ill->ill_ipst; 739 740 first_mp = mp; 741 if (mctl_present) { 742 mp = first_mp->b_cont; 743 ASSERT(mp != NULL); 744 745 ii = (ipsec_in_t *)first_mp->b_rptr; 746 ASSERT(ii->ipsec_in_type == IPSEC_IN); 747 } else { 748 ii = NULL; 749 } 750 751 hdr_length = (uint16_t)((uchar_t *)icmp6 - (uchar_t *)ip6h); 752 ASSERT((size_t)(mp->b_wptr - (uchar_t *)icmp6) >= ICMP6_MINLEN); 753 754 /* 755 * Need to pullup everything in order to use 756 * ip_hdr_length_nexthdr_v6() 757 */ 758 if (mp->b_cont != NULL) { 759 if (!pullupmsg(mp, -1)) { 760 ip1dbg(("icmp_inbound_error_fanout_v6: " 761 "pullupmsg failed\n")); 762 goto drop_pkt; 763 } 764 ip6h = (ip6_t *)mp->b_rptr; 765 icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]); 766 } 767 768 ip6h = (ip6_t *)&icmp6[1]; /* Packet in error */ 769 if ((uchar_t *)&ip6h[1] > mp->b_wptr) 770 goto drop_pkt; 771 772 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_length, &nexthdrp)) 773 goto drop_pkt; 774 nexthdr = *nexthdrp; 775 776 /* Set message type, must be done after pullups */ 777 mp->b_datap->db_type = M_CTL; 778 779 /* Try to pass the ICMP message to clients who need it */ 780 switch (nexthdr) { 781 case IPPROTO_UDP: { 782 /* 783 * Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of 784 * UDP header to get the port information. 785 */ 786 if ((uchar_t *)ip6h + hdr_length + ICMP_MIN_TP_HDR_LEN > 787 mp->b_wptr) { 788 break; 789 } 790 /* 791 * Attempt to find a client stream based on port. 792 * Note that we do a reverse lookup since the header is 793 * in the form we sent it out. 794 * The rip6h header is only used for the IPCL_UDP_MATCH_V6 795 * and we only set the src and dst addresses and nexthdr. 796 */ 797 up = (uint16_t *)((uchar_t *)ip6h + hdr_length); 798 rip6h.ip6_src = ip6h->ip6_dst; 799 rip6h.ip6_dst = ip6h->ip6_src; 800 rip6h.ip6_nxt = nexthdr; 801 ((uint16_t *)&ports)[0] = up[1]; 802 ((uint16_t *)&ports)[1] = up[0]; 803 804 ip_fanout_udp_v6(q, first_mp, &rip6h, ports, ill, inill, 805 IP6_NO_IPPOLICY, mctl_present, zoneid); 806 return; 807 } 808 case IPPROTO_TCP: { 809 /* 810 * Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of 811 * the TCP header to get the port information. 812 */ 813 if ((uchar_t *)ip6h + hdr_length + ICMP_MIN_TP_HDR_LEN > 814 mp->b_wptr) { 815 break; 816 } 817 818 /* 819 * Attempt to find a client stream based on port. 820 * Note that we do a reverse lookup since the header is 821 * in the form we sent it out. 822 * The rip6h header is only used for the IP_TCP_*MATCH_V6 and 823 * we only set the src and dst addresses and nexthdr. 824 */ 825 826 tcpha = (tcpha_t *)((char *)ip6h + hdr_length); 827 connp = ipcl_tcp_lookup_reversed_ipv6(ip6h, tcpha, 828 TCPS_LISTEN, ill->ill_phyint->phyint_ifindex, ipst); 829 if (connp == NULL) { 830 goto drop_pkt; 831 } 832 833 SQUEUE_ENTER_ONE(connp->conn_sqp, first_mp, tcp_input, connp, 834 SQ_FILL, SQTAG_TCP6_INPUT_ICMP_ERR); 835 return; 836 837 } 838 case IPPROTO_SCTP: 839 /* 840 * Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of 841 * the SCTP header to get the port information. 842 */ 843 if ((uchar_t *)ip6h + hdr_length + ICMP_MIN_TP_HDR_LEN > 844 mp->b_wptr) { 845 break; 846 } 847 848 up = (uint16_t *)((uchar_t *)ip6h + hdr_length); 849 ((uint16_t *)&ports)[0] = up[1]; 850 ((uint16_t *)&ports)[1] = up[0]; 851 ip_fanout_sctp(first_mp, inill, (ipha_t *)ip6h, ports, 0, 852 mctl_present, IP6_NO_IPPOLICY, zoneid); 853 return; 854 case IPPROTO_ESP: 855 case IPPROTO_AH: { 856 int ipsec_rc; 857 ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec; 858 859 /* 860 * We need a IPSEC_IN in the front to fanout to AH/ESP. 861 * We will re-use the IPSEC_IN if it is already present as 862 * AH/ESP will not affect any fields in the IPSEC_IN for 863 * ICMP errors. If there is no IPSEC_IN, allocate a new 864 * one and attach it in the front. 865 */ 866 if (ii != NULL) { 867 /* 868 * ip_fanout_proto_again converts the ICMP errors 869 * that come back from AH/ESP to M_DATA so that 870 * if it is non-AH/ESP and we do a pullupmsg in 871 * this function, it would work. Convert it back 872 * to M_CTL before we send up as this is a ICMP 873 * error. This could have been generated locally or 874 * by some router. Validate the inner IPSEC 875 * headers. 876 * 877 * NOTE : ill_index is used by ip_fanout_proto_again 878 * to locate the ill. 879 */ 880 ASSERT(ill != NULL); 881 ii->ipsec_in_ill_index = 882 ill->ill_phyint->phyint_ifindex; 883 ii->ipsec_in_rill_index = 884 inill->ill_phyint->phyint_ifindex; 885 first_mp->b_cont->b_datap->db_type = M_CTL; 886 } else { 887 /* 888 * IPSEC_IN is not present. We attach a ipsec_in 889 * message and send up to IPSEC for validating 890 * and removing the IPSEC headers. Clear 891 * ipsec_in_secure so that when we return 892 * from IPSEC, we don't mistakenly think that this 893 * is a secure packet came from the network. 894 * 895 * NOTE : ill_index is used by ip_fanout_proto_again 896 * to locate the ill. 897 */ 898 ASSERT(first_mp == mp); 899 first_mp = ipsec_in_alloc(B_FALSE, ipst->ips_netstack); 900 ASSERT(ill != NULL); 901 if (first_mp == NULL) { 902 freemsg(mp); 903 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 904 return; 905 } 906 ii = (ipsec_in_t *)first_mp->b_rptr; 907 908 /* This is not a secure packet */ 909 ii->ipsec_in_secure = B_FALSE; 910 first_mp->b_cont = mp; 911 mp->b_datap->db_type = M_CTL; 912 ii->ipsec_in_ill_index = 913 ill->ill_phyint->phyint_ifindex; 914 ii->ipsec_in_rill_index = 915 inill->ill_phyint->phyint_ifindex; 916 } 917 918 if (!ipsec_loaded(ipss)) { 919 ip_proto_not_sup(q, first_mp, 0, zoneid, ipst); 920 return; 921 } 922 923 if (nexthdr == IPPROTO_ESP) 924 ipsec_rc = ipsecesp_icmp_error(first_mp); 925 else 926 ipsec_rc = ipsecah_icmp_error(first_mp); 927 if (ipsec_rc == IPSEC_STATUS_FAILED) 928 return; 929 930 ip_fanout_proto_again(first_mp, ill, inill, NULL); 931 return; 932 } 933 case IPPROTO_ENCAP: 934 case IPPROTO_IPV6: 935 if ((uint8_t *)ip6h + hdr_length + 936 (nexthdr == IPPROTO_ENCAP ? sizeof (ipha_t) : 937 sizeof (ip6_t)) > mp->b_wptr) { 938 goto drop_pkt; 939 } 940 941 if (nexthdr == IPPROTO_ENCAP || 942 !IN6_ARE_ADDR_EQUAL( 943 &((ip6_t *)(((uint8_t *)ip6h) + hdr_length))->ip6_src, 944 &ip6h->ip6_src) || 945 !IN6_ARE_ADDR_EQUAL( 946 &((ip6_t *)(((uint8_t *)ip6h) + hdr_length))->ip6_dst, 947 &ip6h->ip6_dst)) { 948 /* 949 * For tunnels that have used IPsec protection, 950 * we need to adjust the MTU to take into account 951 * the IPsec overhead. 952 */ 953 if (ii != NULL) 954 icmp6->icmp6_mtu = htonl( 955 ntohl(icmp6->icmp6_mtu) - 956 ipsec_in_extra_length(first_mp)); 957 } else { 958 /* 959 * Self-encapsulated case. As in the ipv4 case, 960 * we need to strip the 2nd IP header. Since mp 961 * is already pulled-up, we can simply bcopy 962 * the 3rd header + data over the 2nd header. 963 */ 964 uint16_t unused_len; 965 ip6_t *inner_ip6h = (ip6_t *) 966 ((uchar_t *)ip6h + hdr_length); 967 968 /* 969 * Make sure we don't do recursion more than once. 970 */ 971 if (!ip_hdr_length_nexthdr_v6(mp, inner_ip6h, 972 &unused_len, &nexthdrp) || 973 *nexthdrp == IPPROTO_IPV6) { 974 goto drop_pkt; 975 } 976 977 /* 978 * We are about to modify the packet. Make a copy if 979 * someone else has a reference to it. 980 */ 981 if (DB_REF(mp) > 1) { 982 mblk_t *mp1; 983 uint16_t icmp6_offset; 984 985 mp1 = copymsg(mp); 986 if (mp1 == NULL) { 987 goto drop_pkt; 988 } 989 icmp6_offset = (uint16_t) 990 ((uchar_t *)icmp6 - mp->b_rptr); 991 freemsg(mp); 992 mp = mp1; 993 994 icmp6 = (icmp6_t *)(mp->b_rptr + icmp6_offset); 995 ip6h = (ip6_t *)&icmp6[1]; 996 inner_ip6h = (ip6_t *) 997 ((uchar_t *)ip6h + hdr_length); 998 999 if (mctl_present) 1000 first_mp->b_cont = mp; 1001 else 1002 first_mp = mp; 1003 } 1004 1005 /* 1006 * Need to set db_type back to M_DATA before 1007 * refeeding mp into this function. 1008 */ 1009 DB_TYPE(mp) = M_DATA; 1010 1011 /* 1012 * Copy the 3rd header + remaining data on top 1013 * of the 2nd header. 1014 */ 1015 bcopy(inner_ip6h, ip6h, 1016 mp->b_wptr - (uchar_t *)inner_ip6h); 1017 1018 /* 1019 * Subtract length of the 2nd header. 1020 */ 1021 mp->b_wptr -= hdr_length; 1022 1023 /* 1024 * Now recurse, and see what I _really_ should be 1025 * doing here. 1026 */ 1027 icmp_inbound_error_fanout_v6(q, first_mp, 1028 (ip6_t *)mp->b_rptr, icmp6, ill, inill, 1029 mctl_present, zoneid); 1030 return; 1031 } 1032 /* FALLTHRU */ 1033 default: 1034 /* 1035 * The rip6h header is only used for the lookup and we 1036 * only set the src and dst addresses and nexthdr. 1037 */ 1038 rip6h.ip6_src = ip6h->ip6_dst; 1039 rip6h.ip6_dst = ip6h->ip6_src; 1040 rip6h.ip6_nxt = nexthdr; 1041 ip_fanout_proto_v6(q, first_mp, &rip6h, ill, inill, nexthdr, 0, 1042 IP6_NO_IPPOLICY, mctl_present, zoneid); 1043 return; 1044 } 1045 /* NOTREACHED */ 1046 drop_pkt: 1047 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors); 1048 ip1dbg(("icmp_inbound_error_fanout_v6: drop pkt\n")); 1049 freemsg(first_mp); 1050 } 1051 1052 /* 1053 * Process received IPv6 ICMP Redirect messages. 1054 */ 1055 /* ARGSUSED */ 1056 static void 1057 icmp_redirect_v6(queue_t *q, mblk_t *mp, ill_t *ill) 1058 { 1059 ip6_t *ip6h; 1060 uint16_t hdr_length; 1061 nd_redirect_t *rd; 1062 ire_t *ire; 1063 ire_t *prev_ire; 1064 ire_t *redir_ire; 1065 in6_addr_t *src, *dst, *gateway; 1066 nd_opt_hdr_t *opt; 1067 nce_t *nce; 1068 int nce_flags = 0; 1069 int err = 0; 1070 boolean_t redirect_to_router = B_FALSE; 1071 int len; 1072 int optlen; 1073 iulp_t ulp_info = { 0 }; 1074 ill_t *prev_ire_ill; 1075 ipif_t *ipif; 1076 ip_stack_t *ipst = ill->ill_ipst; 1077 1078 ip6h = (ip6_t *)mp->b_rptr; 1079 if (ip6h->ip6_nxt != IPPROTO_ICMPV6) 1080 hdr_length = ip_hdr_length_v6(mp, ip6h); 1081 else 1082 hdr_length = IPV6_HDR_LEN; 1083 1084 rd = (nd_redirect_t *)&mp->b_rptr[hdr_length]; 1085 len = mp->b_wptr - mp->b_rptr - hdr_length; 1086 src = &ip6h->ip6_src; 1087 dst = &rd->nd_rd_dst; 1088 gateway = &rd->nd_rd_target; 1089 1090 /* Verify if it is a valid redirect */ 1091 if (!IN6_IS_ADDR_LINKLOCAL(src) || 1092 (ip6h->ip6_hops != IPV6_MAX_HOPS) || 1093 (rd->nd_rd_code != 0) || 1094 (len < sizeof (nd_redirect_t)) || 1095 (IN6_IS_ADDR_V4MAPPED(dst)) || 1096 (IN6_IS_ADDR_MULTICAST(dst))) { 1097 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects); 1098 freemsg(mp); 1099 return; 1100 } 1101 1102 if (!(IN6_IS_ADDR_LINKLOCAL(gateway) || 1103 IN6_ARE_ADDR_EQUAL(gateway, dst))) { 1104 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects); 1105 freemsg(mp); 1106 return; 1107 } 1108 1109 if (len > sizeof (nd_redirect_t)) { 1110 if (!ndp_verify_optlen((nd_opt_hdr_t *)&rd[1], 1111 len - sizeof (nd_redirect_t))) { 1112 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects); 1113 freemsg(mp); 1114 return; 1115 } 1116 } 1117 1118 if (!IN6_ARE_ADDR_EQUAL(gateway, dst)) { 1119 redirect_to_router = B_TRUE; 1120 nce_flags |= NCE_F_ISROUTER; 1121 } 1122 1123 /* ipif will be refreleased afterwards */ 1124 ipif = ipif_get_next_ipif(NULL, ill); 1125 if (ipif == NULL) { 1126 freemsg(mp); 1127 return; 1128 } 1129 1130 /* 1131 * Verify that the IP source address of the redirect is 1132 * the same as the current first-hop router for the specified 1133 * ICMP destination address. 1134 * Also, Make sure we had a route for the dest in question and 1135 * that route was pointing to the old gateway (the source of the 1136 * redirect packet.) 1137 */ 1138 1139 prev_ire = ire_route_lookup_v6(dst, 0, src, 0, ipif, NULL, ALL_ZONES, 1140 NULL, MATCH_IRE_GW | MATCH_IRE_ILL | MATCH_IRE_DEFAULT, ipst); 1141 1142 /* 1143 * Check that 1144 * the redirect was not from ourselves 1145 * old gateway is still directly reachable 1146 */ 1147 if (prev_ire == NULL || 1148 prev_ire->ire_type == IRE_LOCAL) { 1149 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects); 1150 ipif_refrele(ipif); 1151 goto fail_redirect; 1152 } 1153 prev_ire_ill = ire_to_ill(prev_ire); 1154 ASSERT(prev_ire_ill != NULL); 1155 if (prev_ire_ill->ill_flags & ILLF_NONUD) 1156 nce_flags |= NCE_F_NONUD; 1157 1158 /* 1159 * Should we use the old ULP info to create the new gateway? From 1160 * a user's perspective, we should inherit the info so that it 1161 * is a "smooth" transition. If we do not do that, then new 1162 * connections going thru the new gateway will have no route metrics, 1163 * which is counter-intuitive to user. From a network point of 1164 * view, this may or may not make sense even though the new gateway 1165 * is still directly connected to us so the route metrics should not 1166 * change much. 1167 * 1168 * But if the old ire_uinfo is not initialized, we do another 1169 * recursive lookup on the dest using the new gateway. There may 1170 * be a route to that. If so, use it to initialize the redirect 1171 * route. 1172 */ 1173 if (prev_ire->ire_uinfo.iulp_set) { 1174 bcopy(&prev_ire->ire_uinfo, &ulp_info, sizeof (iulp_t)); 1175 } else if (redirect_to_router) { 1176 /* 1177 * Only do the following if the redirection is really to 1178 * a router. 1179 */ 1180 ire_t *tmp_ire; 1181 ire_t *sire; 1182 1183 tmp_ire = ire_ftable_lookup_v6(dst, 0, gateway, 0, NULL, &sire, 1184 ALL_ZONES, 0, NULL, 1185 (MATCH_IRE_RECURSIVE | MATCH_IRE_GW | MATCH_IRE_DEFAULT), 1186 ipst); 1187 if (sire != NULL) { 1188 bcopy(&sire->ire_uinfo, &ulp_info, sizeof (iulp_t)); 1189 ASSERT(tmp_ire != NULL); 1190 ire_refrele(tmp_ire); 1191 ire_refrele(sire); 1192 } else if (tmp_ire != NULL) { 1193 bcopy(&tmp_ire->ire_uinfo, &ulp_info, 1194 sizeof (iulp_t)); 1195 ire_refrele(tmp_ire); 1196 } 1197 } 1198 1199 optlen = mp->b_wptr - mp->b_rptr - hdr_length - sizeof (nd_redirect_t); 1200 opt = (nd_opt_hdr_t *)&rd[1]; 1201 opt = ndp_get_option(opt, optlen, ND_OPT_TARGET_LINKADDR); 1202 if (opt != NULL) { 1203 err = ndp_lookup_then_add_v6(ill, 1204 B_FALSE, /* don't match across illgrp */ 1205 (uchar_t *)&opt[1], /* Link layer address */ 1206 gateway, 1207 &ipv6_all_ones, /* prefix mask */ 1208 &ipv6_all_zeros, /* Mapping mask */ 1209 0, 1210 nce_flags, 1211 ND_STALE, 1212 &nce); 1213 switch (err) { 1214 case 0: 1215 NCE_REFRELE(nce); 1216 break; 1217 case EEXIST: 1218 /* 1219 * Check to see if link layer address has changed and 1220 * process the nce_state accordingly. 1221 */ 1222 ndp_process(nce, (uchar_t *)&opt[1], 0, B_FALSE); 1223 NCE_REFRELE(nce); 1224 break; 1225 default: 1226 ip1dbg(("icmp_redirect_v6: NCE create failed %d\n", 1227 err)); 1228 ipif_refrele(ipif); 1229 goto fail_redirect; 1230 } 1231 } 1232 if (redirect_to_router) { 1233 /* icmp_redirect_ok_v6() must have already verified this */ 1234 ASSERT(IN6_IS_ADDR_LINKLOCAL(gateway)); 1235 1236 /* 1237 * Create a Route Association. This will allow us to remember 1238 * a router told us to use the particular gateway. 1239 */ 1240 ire = ire_create_v6( 1241 dst, 1242 &ipv6_all_ones, /* mask */ 1243 &prev_ire->ire_src_addr_v6, /* source addr */ 1244 gateway, /* gateway addr */ 1245 &prev_ire->ire_max_frag, /* max frag */ 1246 NULL, /* no src nce */ 1247 NULL, /* no rfq */ 1248 NULL, /* no stq */ 1249 IRE_HOST, 1250 prev_ire->ire_ipif, 1251 NULL, 1252 0, 1253 0, 1254 (RTF_DYNAMIC | RTF_GATEWAY | RTF_HOST), 1255 &ulp_info, 1256 NULL, 1257 NULL, 1258 ipst); 1259 } else { 1260 queue_t *stq; 1261 1262 stq = (ipif->ipif_net_type == IRE_IF_RESOLVER) 1263 ? ipif->ipif_rq : ipif->ipif_wq; 1264 1265 /* 1266 * Just create an on link entry, i.e. interface route. 1267 */ 1268 ire = ire_create_v6( 1269 dst, /* gateway == dst */ 1270 &ipv6_all_ones, /* mask */ 1271 &prev_ire->ire_src_addr_v6, /* source addr */ 1272 &ipv6_all_zeros, /* gateway addr */ 1273 &prev_ire->ire_max_frag, /* max frag */ 1274 NULL, /* no src nce */ 1275 NULL, /* ire rfq */ 1276 stq, /* ire stq */ 1277 ipif->ipif_net_type, /* IF_[NO]RESOLVER */ 1278 prev_ire->ire_ipif, 1279 &ipv6_all_ones, 1280 0, 1281 0, 1282 (RTF_DYNAMIC | RTF_HOST), 1283 &ulp_info, 1284 NULL, 1285 NULL, 1286 ipst); 1287 } 1288 1289 /* Release reference from earlier ipif_get_next_ipif() */ 1290 ipif_refrele(ipif); 1291 1292 if (ire == NULL) 1293 goto fail_redirect; 1294 1295 if (ire_add(&ire, NULL, NULL, NULL, B_FALSE) == 0) { 1296 1297 /* tell routing sockets that we received a redirect */ 1298 ip_rts_change_v6(RTM_REDIRECT, 1299 &rd->nd_rd_dst, 1300 &rd->nd_rd_target, 1301 &ipv6_all_ones, 0, &ire->ire_src_addr_v6, 1302 (RTF_DYNAMIC | RTF_GATEWAY | RTF_HOST), 0, 1303 (RTA_DST | RTA_GATEWAY | RTA_NETMASK | RTA_AUTHOR), ipst); 1304 1305 /* 1306 * Delete any existing IRE_HOST type ires for this destination. 1307 * This together with the added IRE has the effect of 1308 * modifying an existing redirect. 1309 */ 1310 redir_ire = ire_ftable_lookup_v6(dst, 0, src, IRE_HOST, 1311 ire->ire_ipif, NULL, ALL_ZONES, 0, NULL, 1312 (MATCH_IRE_GW | MATCH_IRE_TYPE | MATCH_IRE_ILL), ipst); 1313 1314 ire_refrele(ire); /* Held in ire_add_v6 */ 1315 1316 if (redir_ire != NULL) { 1317 if (redir_ire->ire_flags & RTF_DYNAMIC) 1318 ire_delete(redir_ire); 1319 ire_refrele(redir_ire); 1320 } 1321 } 1322 1323 if (prev_ire->ire_type == IRE_CACHE) 1324 ire_delete(prev_ire); 1325 ire_refrele(prev_ire); 1326 prev_ire = NULL; 1327 1328 fail_redirect: 1329 if (prev_ire != NULL) 1330 ire_refrele(prev_ire); 1331 freemsg(mp); 1332 } 1333 1334 static ill_t * 1335 ip_queue_to_ill_v6(queue_t *q, ip_stack_t *ipst) 1336 { 1337 ill_t *ill; 1338 1339 ASSERT(WR(q) == q); 1340 1341 if (q->q_next != NULL) { 1342 ill = (ill_t *)q->q_ptr; 1343 if (ILL_CAN_LOOKUP(ill)) 1344 ill_refhold(ill); 1345 else 1346 ill = NULL; 1347 } else { 1348 ill = ill_lookup_on_name(ipif_loopback_name, B_FALSE, B_TRUE, 1349 NULL, NULL, NULL, NULL, NULL, ipst); 1350 } 1351 if (ill == NULL) 1352 ip0dbg(("ip_queue_to_ill_v6: no ill\n")); 1353 return (ill); 1354 } 1355 1356 /* 1357 * Assigns an appropriate source address to the packet. 1358 * If origdst is one of our IP addresses that use it as the source. 1359 * If the queue is an ill queue then select a source from that ill. 1360 * Otherwise pick a source based on a route lookup back to the origsrc. 1361 * 1362 * src is the return parameter. Returns a pointer to src or NULL if failure. 1363 */ 1364 static in6_addr_t * 1365 icmp_pick_source_v6(queue_t *wq, in6_addr_t *origsrc, in6_addr_t *origdst, 1366 in6_addr_t *src, zoneid_t zoneid, ip_stack_t *ipst) 1367 { 1368 ill_t *ill; 1369 ire_t *ire; 1370 ipif_t *ipif; 1371 1372 ASSERT(!(wq->q_flag & QREADR)); 1373 if (wq->q_next != NULL) { 1374 ill = (ill_t *)wq->q_ptr; 1375 } else { 1376 ill = NULL; 1377 } 1378 1379 ire = ire_route_lookup_v6(origdst, 0, 0, (IRE_LOCAL|IRE_LOOPBACK), 1380 NULL, NULL, zoneid, NULL, (MATCH_IRE_TYPE|MATCH_IRE_ZONEONLY), 1381 ipst); 1382 if (ire != NULL) { 1383 /* Destined to one of our addresses */ 1384 *src = *origdst; 1385 ire_refrele(ire); 1386 return (src); 1387 } 1388 if (ire != NULL) { 1389 ire_refrele(ire); 1390 ire = NULL; 1391 } 1392 if (ill == NULL) { 1393 /* What is the route back to the original source? */ 1394 ire = ire_route_lookup_v6(origsrc, 0, 0, 0, 1395 NULL, NULL, zoneid, NULL, 1396 (MATCH_IRE_DEFAULT|MATCH_IRE_RECURSIVE), ipst); 1397 if (ire == NULL) { 1398 BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsOutNoRoutes); 1399 return (NULL); 1400 } 1401 ASSERT(ire->ire_ipif != NULL); 1402 ill = ire->ire_ipif->ipif_ill; 1403 ire_refrele(ire); 1404 } 1405 ipif = ipif_select_source_v6(ill, origsrc, B_FALSE, 1406 IPV6_PREFER_SRC_DEFAULT, zoneid); 1407 if (ipif != NULL) { 1408 *src = ipif->ipif_v6src_addr; 1409 ipif_refrele(ipif); 1410 return (src); 1411 } 1412 /* 1413 * Unusual case - can't find a usable source address to reach the 1414 * original source. Use what in the route to the source. 1415 */ 1416 ire = ire_route_lookup_v6(origsrc, 0, 0, 0, 1417 NULL, NULL, zoneid, NULL, 1418 (MATCH_IRE_DEFAULT|MATCH_IRE_RECURSIVE), ipst); 1419 if (ire == NULL) { 1420 BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsOutNoRoutes); 1421 return (NULL); 1422 } 1423 ASSERT(ire != NULL); 1424 *src = ire->ire_src_addr_v6; 1425 ire_refrele(ire); 1426 return (src); 1427 } 1428 1429 /* 1430 * Build and ship an IPv6 ICMP message using the packet data in mp, 1431 * and the ICMP header pointed to by "stuff". (May be called as 1432 * writer.) 1433 * Note: assumes that icmp_pkt_err_ok_v6 has been called to 1434 * verify that an icmp error packet can be sent. 1435 * 1436 * If q is an ill write side queue (which is the case when packets 1437 * arrive from ip_rput) then ip_wput code will ensure that packets to 1438 * link-local destinations are sent out that ill. 1439 * 1440 * If v6src_ptr is set use it as a source. Otherwise select a reasonable 1441 * source address (see above function). 1442 */ 1443 static void 1444 icmp_pkt_v6(queue_t *q, mblk_t *mp, void *stuff, size_t len, 1445 const in6_addr_t *v6src_ptr, boolean_t mctl_present, zoneid_t zoneid, 1446 ip_stack_t *ipst) 1447 { 1448 ip6_t *ip6h; 1449 in6_addr_t v6dst; 1450 size_t len_needed; 1451 size_t msg_len; 1452 mblk_t *mp1; 1453 icmp6_t *icmp6; 1454 ill_t *ill; 1455 in6_addr_t v6src; 1456 mblk_t *ipsec_mp; 1457 ipsec_out_t *io; 1458 1459 ill = ip_queue_to_ill_v6(q, ipst); 1460 if (ill == NULL) { 1461 freemsg(mp); 1462 return; 1463 } 1464 1465 if (mctl_present) { 1466 /* 1467 * If it is : 1468 * 1469 * 1) a IPSEC_OUT, then this is caused by outbound 1470 * datagram originating on this host. IPSEC processing 1471 * may or may not have been done. Refer to comments above 1472 * icmp_inbound_error_fanout for details. 1473 * 1474 * 2) a IPSEC_IN if we are generating a icmp_message 1475 * for an incoming datagram destined for us i.e called 1476 * from ip_fanout_send_icmp. 1477 */ 1478 ipsec_info_t *in; 1479 1480 ipsec_mp = mp; 1481 mp = ipsec_mp->b_cont; 1482 1483 in = (ipsec_info_t *)ipsec_mp->b_rptr; 1484 ip6h = (ip6_t *)mp->b_rptr; 1485 1486 ASSERT(in->ipsec_info_type == IPSEC_OUT || 1487 in->ipsec_info_type == IPSEC_IN); 1488 1489 if (in->ipsec_info_type == IPSEC_IN) { 1490 /* 1491 * Convert the IPSEC_IN to IPSEC_OUT. 1492 */ 1493 if (!ipsec_in_to_out(ipsec_mp, NULL, ip6h)) { 1494 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 1495 ill_refrele(ill); 1496 return; 1497 } 1498 } else { 1499 ASSERT(in->ipsec_info_type == IPSEC_OUT); 1500 io = (ipsec_out_t *)in; 1501 /* 1502 * Clear out ipsec_out_proc_begin, so we do a fresh 1503 * ire lookup. 1504 */ 1505 io->ipsec_out_proc_begin = B_FALSE; 1506 } 1507 } else { 1508 /* 1509 * This is in clear. The icmp message we are building 1510 * here should go out in clear. 1511 */ 1512 ipsec_in_t *ii; 1513 ASSERT(mp->b_datap->db_type == M_DATA); 1514 ipsec_mp = ipsec_in_alloc(B_FALSE, ipst->ips_netstack); 1515 if (ipsec_mp == NULL) { 1516 freemsg(mp); 1517 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 1518 ill_refrele(ill); 1519 return; 1520 } 1521 ii = (ipsec_in_t *)ipsec_mp->b_rptr; 1522 1523 /* This is not a secure packet */ 1524 ii->ipsec_in_secure = B_FALSE; 1525 /* 1526 * For trusted extensions using a shared IP address we can 1527 * send using any zoneid. 1528 */ 1529 if (zoneid == ALL_ZONES) 1530 ii->ipsec_in_zoneid = GLOBAL_ZONEID; 1531 else 1532 ii->ipsec_in_zoneid = zoneid; 1533 ipsec_mp->b_cont = mp; 1534 ip6h = (ip6_t *)mp->b_rptr; 1535 /* 1536 * Convert the IPSEC_IN to IPSEC_OUT. 1537 */ 1538 if (!ipsec_in_to_out(ipsec_mp, NULL, ip6h)) { 1539 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 1540 ill_refrele(ill); 1541 return; 1542 } 1543 } 1544 io = (ipsec_out_t *)ipsec_mp->b_rptr; 1545 1546 if (v6src_ptr != NULL) { 1547 v6src = *v6src_ptr; 1548 } else { 1549 if (icmp_pick_source_v6(q, &ip6h->ip6_src, &ip6h->ip6_dst, 1550 &v6src, zoneid, ipst) == NULL) { 1551 freemsg(ipsec_mp); 1552 ill_refrele(ill); 1553 return; 1554 } 1555 } 1556 v6dst = ip6h->ip6_src; 1557 len_needed = ipst->ips_ipv6_icmp_return - IPV6_HDR_LEN - len; 1558 msg_len = msgdsize(mp); 1559 if (msg_len > len_needed) { 1560 if (!adjmsg(mp, len_needed - msg_len)) { 1561 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutErrors); 1562 freemsg(ipsec_mp); 1563 ill_refrele(ill); 1564 return; 1565 } 1566 msg_len = len_needed; 1567 } 1568 mp1 = allocb_cred(IPV6_HDR_LEN + len, DB_CRED(mp)); 1569 if (mp1 == NULL) { 1570 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutErrors); 1571 freemsg(ipsec_mp); 1572 ill_refrele(ill); 1573 return; 1574 } 1575 ill_refrele(ill); 1576 mp1->b_cont = mp; 1577 mp = mp1; 1578 ASSERT(ipsec_mp->b_datap->db_type == M_CTL && 1579 io->ipsec_out_type == IPSEC_OUT); 1580 ipsec_mp->b_cont = mp; 1581 1582 /* 1583 * Set ipsec_out_icmp_loopback so we can let the ICMP messages this 1584 * node generates be accepted in peace by all on-host destinations. 1585 * If we do NOT assume that all on-host destinations trust 1586 * self-generated ICMP messages, then rework here, ip.c, and spd.c. 1587 * (Look for ipsec_out_icmp_loopback). 1588 */ 1589 io->ipsec_out_icmp_loopback = B_TRUE; 1590 1591 ip6h = (ip6_t *)mp->b_rptr; 1592 mp1->b_wptr = (uchar_t *)ip6h + (IPV6_HDR_LEN + len); 1593 1594 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 1595 ip6h->ip6_nxt = IPPROTO_ICMPV6; 1596 ip6h->ip6_hops = ipst->ips_ipv6_def_hops; 1597 ip6h->ip6_dst = v6dst; 1598 ip6h->ip6_src = v6src; 1599 msg_len += IPV6_HDR_LEN + len; 1600 if (msg_len > IP_MAXPACKET + IPV6_HDR_LEN) { 1601 (void) adjmsg(mp, IP_MAXPACKET + IPV6_HDR_LEN - msg_len); 1602 msg_len = IP_MAXPACKET + IPV6_HDR_LEN; 1603 } 1604 ip6h->ip6_plen = htons((uint16_t)(msgdsize(mp) - IPV6_HDR_LEN)); 1605 icmp6 = (icmp6_t *)&ip6h[1]; 1606 bcopy(stuff, (char *)icmp6, len); 1607 /* 1608 * Prepare for checksum by putting icmp length in the icmp 1609 * checksum field. The checksum is calculated in ip_wput_v6. 1610 */ 1611 icmp6->icmp6_cksum = ip6h->ip6_plen; 1612 if (icmp6->icmp6_type == ND_REDIRECT) { 1613 ip6h->ip6_hops = IPV6_MAX_HOPS; 1614 } 1615 /* Send to V6 writeside put routine */ 1616 put(q, ipsec_mp); 1617 } 1618 1619 /* 1620 * Update the output mib when ICMPv6 packets are sent. 1621 */ 1622 static void 1623 icmp_update_out_mib_v6(ill_t *ill, icmp6_t *icmp6) 1624 { 1625 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutMsgs); 1626 1627 switch (icmp6->icmp6_type) { 1628 case ICMP6_DST_UNREACH: 1629 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutDestUnreachs); 1630 if (icmp6->icmp6_code == ICMP6_DST_UNREACH_ADMIN) 1631 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutAdminProhibs); 1632 break; 1633 1634 case ICMP6_TIME_EXCEEDED: 1635 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutTimeExcds); 1636 break; 1637 1638 case ICMP6_PARAM_PROB: 1639 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutParmProblems); 1640 break; 1641 1642 case ICMP6_PACKET_TOO_BIG: 1643 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutPktTooBigs); 1644 break; 1645 1646 case ICMP6_ECHO_REQUEST: 1647 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutEchos); 1648 break; 1649 1650 case ICMP6_ECHO_REPLY: 1651 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutEchoReplies); 1652 break; 1653 1654 case ND_ROUTER_SOLICIT: 1655 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutRouterSolicits); 1656 break; 1657 1658 case ND_ROUTER_ADVERT: 1659 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutRouterAdvertisements); 1660 break; 1661 1662 case ND_NEIGHBOR_SOLICIT: 1663 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutNeighborSolicits); 1664 break; 1665 1666 case ND_NEIGHBOR_ADVERT: 1667 BUMP_MIB(ill->ill_icmp6_mib, 1668 ipv6IfIcmpOutNeighborAdvertisements); 1669 break; 1670 1671 case ND_REDIRECT: 1672 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutRedirects); 1673 break; 1674 1675 case MLD_LISTENER_QUERY: 1676 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutGroupMembQueries); 1677 break; 1678 1679 case MLD_LISTENER_REPORT: 1680 case MLD_V2_LISTENER_REPORT: 1681 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutGroupMembResponses); 1682 break; 1683 1684 case MLD_LISTENER_REDUCTION: 1685 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutGroupMembReductions); 1686 break; 1687 } 1688 } 1689 1690 /* 1691 * Check if it is ok to send an ICMPv6 error packet in 1692 * response to the IP packet in mp. 1693 * Free the message and return null if no 1694 * ICMP error packet should be sent. 1695 */ 1696 static mblk_t * 1697 icmp_pkt_err_ok_v6(queue_t *q, mblk_t *mp, 1698 boolean_t llbcast, boolean_t mcast_ok, ip_stack_t *ipst) 1699 { 1700 ip6_t *ip6h; 1701 1702 if (!mp) 1703 return (NULL); 1704 1705 ip6h = (ip6_t *)mp->b_rptr; 1706 1707 /* Check if source address uniquely identifies the host */ 1708 1709 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_src) || 1710 IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_src) || 1711 IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 1712 freemsg(mp); 1713 return (NULL); 1714 } 1715 1716 if (ip6h->ip6_nxt == IPPROTO_ICMPV6) { 1717 size_t len_needed = IPV6_HDR_LEN + ICMP6_MINLEN; 1718 icmp6_t *icmp6; 1719 1720 if (mp->b_wptr - mp->b_rptr < len_needed) { 1721 if (!pullupmsg(mp, len_needed)) { 1722 ill_t *ill; 1723 1724 ill = ip_queue_to_ill_v6(q, ipst); 1725 if (ill == NULL) { 1726 BUMP_MIB(&ipst->ips_icmp6_mib, 1727 ipv6IfIcmpInErrors); 1728 } else { 1729 BUMP_MIB(ill->ill_icmp6_mib, 1730 ipv6IfIcmpInErrors); 1731 ill_refrele(ill); 1732 } 1733 freemsg(mp); 1734 return (NULL); 1735 } 1736 ip6h = (ip6_t *)mp->b_rptr; 1737 } 1738 icmp6 = (icmp6_t *)&ip6h[1]; 1739 /* Explicitly do not generate errors in response to redirects */ 1740 if (ICMP6_IS_ERROR(icmp6->icmp6_type) || 1741 icmp6->icmp6_type == ND_REDIRECT) { 1742 freemsg(mp); 1743 return (NULL); 1744 } 1745 } 1746 /* 1747 * Check that the destination is not multicast and that the packet 1748 * was not sent on link layer broadcast or multicast. (Exception 1749 * is Packet too big message as per the draft - when mcast_ok is set.) 1750 */ 1751 if (!mcast_ok && 1752 (llbcast || IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst))) { 1753 freemsg(mp); 1754 return (NULL); 1755 } 1756 if (icmp_err_rate_limit(ipst)) { 1757 /* 1758 * Only send ICMP error packets every so often. 1759 * This should be done on a per port/source basis, 1760 * but for now this will suffice. 1761 */ 1762 freemsg(mp); 1763 return (NULL); 1764 } 1765 return (mp); 1766 } 1767 1768 /* 1769 * Generate an ICMPv6 redirect message. 1770 * Include target link layer address option if it exits. 1771 * Always include redirect header. 1772 */ 1773 static void 1774 icmp_send_redirect_v6(queue_t *q, mblk_t *mp, in6_addr_t *targetp, 1775 in6_addr_t *dest, ill_t *ill, boolean_t llbcast) 1776 { 1777 nd_redirect_t *rd; 1778 nd_opt_rd_hdr_t *rdh; 1779 uchar_t *buf; 1780 nce_t *nce = NULL; 1781 nd_opt_hdr_t *opt; 1782 int len; 1783 int ll_opt_len = 0; 1784 int max_redir_hdr_data_len; 1785 int pkt_len; 1786 in6_addr_t *srcp; 1787 ip_stack_t *ipst = ill->ill_ipst; 1788 1789 /* 1790 * We are called from ip_rput where we could 1791 * not have attached an IPSEC_IN. 1792 */ 1793 ASSERT(mp->b_datap->db_type == M_DATA); 1794 1795 mp = icmp_pkt_err_ok_v6(q, mp, llbcast, B_FALSE, ipst); 1796 if (mp == NULL) 1797 return; 1798 nce = ndp_lookup_v6(ill, B_TRUE, targetp, B_FALSE); 1799 if (nce != NULL && nce->nce_state != ND_INCOMPLETE) { 1800 ll_opt_len = (sizeof (nd_opt_hdr_t) + 1801 ill->ill_phys_addr_length + 7)/8 * 8; 1802 } 1803 len = sizeof (nd_redirect_t) + sizeof (nd_opt_rd_hdr_t) + ll_opt_len; 1804 ASSERT(len % 4 == 0); 1805 buf = kmem_alloc(len, KM_NOSLEEP); 1806 if (buf == NULL) { 1807 if (nce != NULL) 1808 NCE_REFRELE(nce); 1809 freemsg(mp); 1810 return; 1811 } 1812 1813 rd = (nd_redirect_t *)buf; 1814 rd->nd_rd_type = (uint8_t)ND_REDIRECT; 1815 rd->nd_rd_code = 0; 1816 rd->nd_rd_reserved = 0; 1817 rd->nd_rd_target = *targetp; 1818 rd->nd_rd_dst = *dest; 1819 1820 opt = (nd_opt_hdr_t *)(buf + sizeof (nd_redirect_t)); 1821 if (nce != NULL && ll_opt_len != 0) { 1822 opt->nd_opt_type = ND_OPT_TARGET_LINKADDR; 1823 opt->nd_opt_len = ll_opt_len/8; 1824 bcopy((char *)nce->nce_res_mp->b_rptr + 1825 NCE_LL_ADDR_OFFSET(ill), &opt[1], 1826 ill->ill_phys_addr_length); 1827 } 1828 if (nce != NULL) 1829 NCE_REFRELE(nce); 1830 rdh = (nd_opt_rd_hdr_t *)(buf + sizeof (nd_redirect_t) + ll_opt_len); 1831 rdh->nd_opt_rh_type = (uint8_t)ND_OPT_REDIRECTED_HEADER; 1832 /* max_redir_hdr_data_len and nd_opt_rh_len must be multiple of 8 */ 1833 max_redir_hdr_data_len = 1834 (ipst->ips_ipv6_icmp_return - IPV6_HDR_LEN - len)/8*8; 1835 pkt_len = msgdsize(mp); 1836 /* Make sure mp is 8 byte aligned */ 1837 if (pkt_len > max_redir_hdr_data_len) { 1838 rdh->nd_opt_rh_len = (max_redir_hdr_data_len + 1839 sizeof (nd_opt_rd_hdr_t))/8; 1840 (void) adjmsg(mp, max_redir_hdr_data_len - pkt_len); 1841 } else { 1842 rdh->nd_opt_rh_len = (pkt_len + sizeof (nd_opt_rd_hdr_t))/8; 1843 (void) adjmsg(mp, -(pkt_len % 8)); 1844 } 1845 rdh->nd_opt_rh_reserved1 = 0; 1846 rdh->nd_opt_rh_reserved2 = 0; 1847 /* ipif_v6src_addr contains the link-local source address */ 1848 srcp = &ill->ill_ipif->ipif_v6src_addr; 1849 1850 /* Redirects sent by router, and router is global zone */ 1851 icmp_pkt_v6(q, mp, buf, len, srcp, B_FALSE, GLOBAL_ZONEID, ipst); 1852 kmem_free(buf, len); 1853 } 1854 1855 1856 /* Generate an ICMP time exceeded message. (May be called as writer.) */ 1857 void 1858 icmp_time_exceeded_v6(queue_t *q, mblk_t *mp, uint8_t code, 1859 boolean_t llbcast, boolean_t mcast_ok, zoneid_t zoneid, 1860 ip_stack_t *ipst) 1861 { 1862 icmp6_t icmp6; 1863 boolean_t mctl_present; 1864 mblk_t *first_mp; 1865 1866 EXTRACT_PKT_MP(mp, first_mp, mctl_present); 1867 1868 mp = icmp_pkt_err_ok_v6(q, mp, llbcast, mcast_ok, ipst); 1869 if (mp == NULL) { 1870 if (mctl_present) 1871 freeb(first_mp); 1872 return; 1873 } 1874 bzero(&icmp6, sizeof (icmp6_t)); 1875 icmp6.icmp6_type = ICMP6_TIME_EXCEEDED; 1876 icmp6.icmp6_code = code; 1877 icmp_pkt_v6(q, first_mp, &icmp6, sizeof (icmp6_t), NULL, mctl_present, 1878 zoneid, ipst); 1879 } 1880 1881 /* 1882 * Generate an ICMP unreachable message. 1883 */ 1884 void 1885 icmp_unreachable_v6(queue_t *q, mblk_t *mp, uint8_t code, 1886 boolean_t llbcast, boolean_t mcast_ok, zoneid_t zoneid, 1887 ip_stack_t *ipst) 1888 { 1889 icmp6_t icmp6; 1890 boolean_t mctl_present; 1891 mblk_t *first_mp; 1892 1893 EXTRACT_PKT_MP(mp, first_mp, mctl_present); 1894 1895 mp = icmp_pkt_err_ok_v6(q, mp, llbcast, mcast_ok, ipst); 1896 if (mp == NULL) { 1897 if (mctl_present) 1898 freeb(first_mp); 1899 return; 1900 } 1901 bzero(&icmp6, sizeof (icmp6_t)); 1902 icmp6.icmp6_type = ICMP6_DST_UNREACH; 1903 icmp6.icmp6_code = code; 1904 icmp_pkt_v6(q, first_mp, &icmp6, sizeof (icmp6_t), NULL, mctl_present, 1905 zoneid, ipst); 1906 } 1907 1908 /* 1909 * Generate an ICMP pkt too big message. 1910 */ 1911 static void 1912 icmp_pkt2big_v6(queue_t *q, mblk_t *mp, uint32_t mtu, 1913 boolean_t llbcast, boolean_t mcast_ok, zoneid_t zoneid, ip_stack_t *ipst) 1914 { 1915 icmp6_t icmp6; 1916 mblk_t *first_mp; 1917 boolean_t mctl_present; 1918 1919 EXTRACT_PKT_MP(mp, first_mp, mctl_present); 1920 1921 mp = icmp_pkt_err_ok_v6(q, mp, llbcast, mcast_ok, ipst); 1922 if (mp == NULL) { 1923 if (mctl_present) 1924 freeb(first_mp); 1925 return; 1926 } 1927 bzero(&icmp6, sizeof (icmp6_t)); 1928 icmp6.icmp6_type = ICMP6_PACKET_TOO_BIG; 1929 icmp6.icmp6_code = 0; 1930 icmp6.icmp6_mtu = htonl(mtu); 1931 1932 icmp_pkt_v6(q, first_mp, &icmp6, sizeof (icmp6_t), NULL, mctl_present, 1933 zoneid, ipst); 1934 } 1935 1936 /* 1937 * Generate an ICMP parameter problem message. (May be called as writer.) 1938 * 'offset' is the offset from the beginning of the packet in error. 1939 */ 1940 static void 1941 icmp_param_problem_v6(queue_t *q, mblk_t *mp, uint8_t code, 1942 uint32_t offset, boolean_t llbcast, boolean_t mcast_ok, zoneid_t zoneid, 1943 ip_stack_t *ipst) 1944 { 1945 icmp6_t icmp6; 1946 boolean_t mctl_present; 1947 mblk_t *first_mp; 1948 1949 EXTRACT_PKT_MP(mp, first_mp, mctl_present); 1950 1951 mp = icmp_pkt_err_ok_v6(q, mp, llbcast, mcast_ok, ipst); 1952 if (mp == NULL) { 1953 if (mctl_present) 1954 freeb(first_mp); 1955 return; 1956 } 1957 bzero((char *)&icmp6, sizeof (icmp6_t)); 1958 icmp6.icmp6_type = ICMP6_PARAM_PROB; 1959 icmp6.icmp6_code = code; 1960 icmp6.icmp6_pptr = htonl(offset); 1961 icmp_pkt_v6(q, first_mp, &icmp6, sizeof (icmp6_t), NULL, mctl_present, 1962 zoneid, ipst); 1963 } 1964 1965 /* 1966 * This code will need to take into account the possibility of binding 1967 * to a link local address on a multi-homed host, in which case the 1968 * outgoing interface (from the conn) will need to be used when getting 1969 * an ire for the dst. Going through proper outgoing interface and 1970 * choosing the source address corresponding to the outgoing interface 1971 * is necessary when the destination address is a link-local address and 1972 * IPV6_BOUND_IF or IPV6_PKTINFO or scope_id has been set. 1973 * This can happen when active connection is setup; thus ipp pointer 1974 * is passed here from tcp_connect_*() routines, in non-TCP cases NULL 1975 * pointer is passed as ipp pointer. 1976 */ 1977 mblk_t * 1978 ip_bind_v6(queue_t *q, mblk_t *mp, conn_t *connp, ip6_pkt_t *ipp) 1979 { 1980 ssize_t len; 1981 int protocol; 1982 struct T_bind_req *tbr; 1983 sin6_t *sin6; 1984 ipa6_conn_t *ac6; 1985 in6_addr_t *v6srcp; 1986 in6_addr_t *v6dstp; 1987 uint16_t lport; 1988 uint16_t fport; 1989 uchar_t *ucp; 1990 int error = 0; 1991 boolean_t local_bind; 1992 ipa6_conn_x_t *acx6; 1993 boolean_t verify_dst; 1994 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 1995 1996 ASSERT(connp->conn_af_isv6); 1997 len = mp->b_wptr - mp->b_rptr; 1998 if (len < (sizeof (*tbr) + 1)) { 1999 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 2000 "ip_bind_v6: bogus msg, len %ld", len); 2001 goto bad_addr; 2002 } 2003 /* Back up and extract the protocol identifier. */ 2004 mp->b_wptr--; 2005 tbr = (struct T_bind_req *)mp->b_rptr; 2006 /* Reset the message type in preparation for shipping it back. */ 2007 mp->b_datap->db_type = M_PCPROTO; 2008 2009 protocol = *mp->b_wptr & 0xFF; 2010 connp->conn_ulp = (uint8_t)protocol; 2011 2012 /* 2013 * Check for a zero length address. This is from a protocol that 2014 * wants to register to receive all packets of its type. 2015 */ 2016 if (tbr->ADDR_length == 0) { 2017 if ((protocol == IPPROTO_TCP || protocol == IPPROTO_SCTP || 2018 protocol == IPPROTO_ESP || protocol == IPPROTO_AH) && 2019 ipst->ips_ipcl_proto_fanout_v6[protocol].connf_head != 2020 NULL) { 2021 /* 2022 * TCP, SCTP, AH, and ESP have single protocol fanouts. 2023 * Do not allow others to bind to these. 2024 */ 2025 goto bad_addr; 2026 } 2027 2028 /* 2029 * 2030 * The udp module never sends down a zero-length address, 2031 * and allowing this on a labeled system will break MLP 2032 * functionality. 2033 */ 2034 if (is_system_labeled() && protocol == IPPROTO_UDP) 2035 goto bad_addr; 2036 2037 /* Allow ipsec plumbing */ 2038 if (connp->conn_mac_exempt && protocol != IPPROTO_AH && 2039 protocol != IPPROTO_ESP) 2040 goto bad_addr; 2041 2042 connp->conn_srcv6 = ipv6_all_zeros; 2043 ipcl_proto_insert_v6(connp, protocol); 2044 2045 tbr->PRIM_type = T_BIND_ACK; 2046 return (mp); 2047 } 2048 2049 /* Extract the address pointer from the message. */ 2050 ucp = (uchar_t *)mi_offset_param(mp, tbr->ADDR_offset, 2051 tbr->ADDR_length); 2052 if (ucp == NULL) { 2053 ip1dbg(("ip_bind_v6: no address\n")); 2054 goto bad_addr; 2055 } 2056 if (!OK_32PTR(ucp)) { 2057 ip1dbg(("ip_bind_v6: unaligned address\n")); 2058 goto bad_addr; 2059 } 2060 2061 switch (tbr->ADDR_length) { 2062 default: 2063 ip1dbg(("ip_bind_v6: bad address length %d\n", 2064 (int)tbr->ADDR_length)); 2065 goto bad_addr; 2066 2067 case IPV6_ADDR_LEN: 2068 /* Verification of local address only */ 2069 v6srcp = (in6_addr_t *)ucp; 2070 lport = 0; 2071 local_bind = B_TRUE; 2072 break; 2073 2074 case sizeof (sin6_t): 2075 sin6 = (sin6_t *)ucp; 2076 v6srcp = &sin6->sin6_addr; 2077 lport = sin6->sin6_port; 2078 local_bind = B_TRUE; 2079 break; 2080 2081 case sizeof (ipa6_conn_t): 2082 /* 2083 * Verify that both the source and destination addresses 2084 * are valid. 2085 */ 2086 ac6 = (ipa6_conn_t *)ucp; 2087 v6srcp = &ac6->ac6_laddr; 2088 v6dstp = &ac6->ac6_faddr; 2089 fport = ac6->ac6_fport; 2090 /* For raw socket, the local port is not set. */ 2091 lport = ac6->ac6_lport != 0 ? ac6->ac6_lport : 2092 connp->conn_lport; 2093 local_bind = B_FALSE; 2094 /* Always verify destination reachability. */ 2095 verify_dst = B_TRUE; 2096 break; 2097 2098 case sizeof (ipa6_conn_x_t): 2099 /* 2100 * Verify that the source address is valid. 2101 */ 2102 acx6 = (ipa6_conn_x_t *)ucp; 2103 ac6 = &acx6->ac6x_conn; 2104 v6srcp = &ac6->ac6_laddr; 2105 v6dstp = &ac6->ac6_faddr; 2106 fport = ac6->ac6_fport; 2107 lport = ac6->ac6_lport; 2108 local_bind = B_FALSE; 2109 /* 2110 * Client that passed ipa6_conn_x_t to us specifies whether to 2111 * verify destination reachability. 2112 */ 2113 verify_dst = (acx6->ac6x_flags & ACX_VERIFY_DST) != 0; 2114 break; 2115 } 2116 if (local_bind) { 2117 error = ip_proto_bind_laddr_v6(connp, &mp->b_cont, protocol, 2118 v6srcp, lport, tbr->ADDR_length != IPV6_ADDR_LEN); 2119 } else { 2120 error = ip_proto_bind_connected_v6(connp, &mp->b_cont, protocol, 2121 v6srcp, lport, v6dstp, ipp, fport, B_TRUE, verify_dst); 2122 } 2123 2124 if (error == 0) { 2125 /* Send it home. */ 2126 mp->b_datap->db_type = M_PCPROTO; 2127 tbr->PRIM_type = T_BIND_ACK; 2128 return (mp); 2129 } 2130 2131 bad_addr: 2132 ASSERT(error != EINPROGRESS); 2133 if (error > 0) 2134 mp = mi_tpi_err_ack_alloc(mp, TSYSERR, error); 2135 else 2136 mp = mi_tpi_err_ack_alloc(mp, TBADADDR, 0); 2137 return (mp); 2138 } 2139 2140 static void 2141 ip_bind_post_handling_v6(conn_t *connp, mblk_t *mp, 2142 boolean_t version_changed, boolean_t ire_requested, ip_stack_t *ipst) 2143 { 2144 /* Update conn_send and pktversion if v4/v6 changed */ 2145 if (version_changed) { 2146 ip_setpktversion(connp, connp->conn_pkt_isv6, B_TRUE, ipst); 2147 } 2148 2149 /* 2150 * Pass the IPSEC headers size in ire_ipsec_overhead. 2151 * We can't do this in ip_bind_insert_ire because the policy 2152 * may not have been inherited at that point in time and hence 2153 * conn_out_enforce_policy may not be set. 2154 */ 2155 if (ire_requested && connp->conn_out_enforce_policy && 2156 mp != NULL && DB_TYPE(mp) == IRE_DB_REQ_TYPE) { 2157 ire_t *ire = (ire_t *)mp->b_rptr; 2158 ASSERT(MBLKL(mp) >= sizeof (ire_t)); 2159 ire->ire_ipsec_overhead = (conn_ipsec_length(connp)); 2160 } 2161 } 2162 2163 /* 2164 * Here address is verified to be a valid local address. 2165 * If the IRE_DB_REQ_TYPE mp is present, a multicast 2166 * address is also considered a valid local address. 2167 * In the case of a multicast address, however, the 2168 * upper protocol is expected to reset the src address 2169 * to 0 if it sees an ire with IN6_IS_ADDR_MULTICAST returned so that 2170 * no packets are emitted with multicast address as 2171 * source address. 2172 * The addresses valid for bind are: 2173 * (1) - in6addr_any 2174 * (2) - IP address of an UP interface 2175 * (3) - IP address of a DOWN interface 2176 * (4) - a multicast address. In this case 2177 * the conn will only receive packets destined to 2178 * the specified multicast address. Note: the 2179 * application still has to issue an 2180 * IPV6_JOIN_GROUP socket option. 2181 * 2182 * In all the above cases, the bound address must be valid in the current zone. 2183 * When the address is loopback or multicast, there might be many matching IREs 2184 * so bind has to look up based on the zone. 2185 */ 2186 /* 2187 * Verify the local IP address. Does not change the conn_t except 2188 * conn_fully_bound and conn_policy_cached. 2189 */ 2190 static int 2191 ip_bind_laddr_v6(conn_t *connp, mblk_t **mpp, uint8_t protocol, 2192 const in6_addr_t *v6src, uint16_t lport, boolean_t fanout_insert) 2193 { 2194 int error = 0; 2195 ire_t *src_ire = NULL; 2196 zoneid_t zoneid; 2197 mblk_t *mp = NULL; 2198 boolean_t ire_requested; 2199 boolean_t ipsec_policy_set; 2200 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 2201 2202 if (mpp) 2203 mp = *mpp; 2204 2205 ire_requested = (mp != NULL && DB_TYPE(mp) == IRE_DB_REQ_TYPE); 2206 ipsec_policy_set = (mp != NULL && DB_TYPE(mp) == IPSEC_POLICY_SET); 2207 2208 /* 2209 * If it was previously connected, conn_fully_bound would have 2210 * been set. 2211 */ 2212 connp->conn_fully_bound = B_FALSE; 2213 2214 zoneid = connp->conn_zoneid; 2215 2216 if (!IN6_IS_ADDR_UNSPECIFIED(v6src)) { 2217 src_ire = ire_route_lookup_v6(v6src, 0, 0, 2218 0, NULL, NULL, zoneid, NULL, MATCH_IRE_ZONEONLY, ipst); 2219 /* 2220 * If an address other than in6addr_any is requested, 2221 * we verify that it is a valid address for bind 2222 * Note: Following code is in if-else-if form for 2223 * readability compared to a condition check. 2224 */ 2225 ASSERT(src_ire == NULL || !(src_ire->ire_type & IRE_BROADCAST)); 2226 /* LINTED - statement has no consequent */ 2227 if (IRE_IS_LOCAL(src_ire)) { 2228 /* 2229 * (2) Bind to address of local UP interface 2230 */ 2231 } else if (IN6_IS_ADDR_MULTICAST(v6src)) { 2232 ipif_t *multi_ipif = NULL; 2233 ire_t *save_ire; 2234 /* 2235 * (4) bind to multicast address. 2236 * Fake out the IRE returned to upper 2237 * layer to be a broadcast IRE in 2238 * ip_bind_insert_ire_v6(). 2239 * Pass other information that matches 2240 * the ipif (e.g. the source address). 2241 * conn_multicast_ill is only used for 2242 * IPv6 packets 2243 */ 2244 mutex_enter(&connp->conn_lock); 2245 if (connp->conn_multicast_ill != NULL) { 2246 (void) ipif_lookup_zoneid( 2247 connp->conn_multicast_ill, zoneid, 0, 2248 &multi_ipif); 2249 } else { 2250 /* 2251 * Look for default like 2252 * ip_wput_v6 2253 */ 2254 multi_ipif = ipif_lookup_group_v6( 2255 &ipv6_unspecified_group, zoneid, ipst); 2256 } 2257 mutex_exit(&connp->conn_lock); 2258 save_ire = src_ire; 2259 src_ire = NULL; 2260 if (multi_ipif == NULL || !ire_requested || 2261 (src_ire = ipif_to_ire_v6(multi_ipif)) == NULL) { 2262 src_ire = save_ire; 2263 error = EADDRNOTAVAIL; 2264 } else { 2265 ASSERT(src_ire != NULL); 2266 if (save_ire != NULL) 2267 ire_refrele(save_ire); 2268 } 2269 if (multi_ipif != NULL) 2270 ipif_refrele(multi_ipif); 2271 } else { 2272 if (!ip_addr_exists_v6(v6src, zoneid, ipst)) { 2273 /* 2274 * Not a valid address for bind 2275 */ 2276 error = EADDRNOTAVAIL; 2277 } 2278 } 2279 2280 if (error != 0) { 2281 /* Red Alert! Attempting to be a bogon! */ 2282 if (ip_debug > 2) { 2283 /* ip1dbg */ 2284 pr_addr_dbg("ip_bind_laddr_v6: bad src" 2285 " address %s\n", AF_INET6, v6src); 2286 } 2287 goto bad_addr; 2288 } 2289 } 2290 2291 /* 2292 * Allow setting new policies. For example, disconnects come 2293 * down as ipa_t bind. As we would have set conn_policy_cached 2294 * to B_TRUE before, we should set it to B_FALSE, so that policy 2295 * can change after the disconnect. 2296 */ 2297 connp->conn_policy_cached = B_FALSE; 2298 2299 /* If not fanout_insert this was just an address verification */ 2300 if (fanout_insert) { 2301 /* 2302 * The addresses have been verified. Time to insert in 2303 * the correct fanout list. 2304 */ 2305 connp->conn_srcv6 = *v6src; 2306 connp->conn_remv6 = ipv6_all_zeros; 2307 connp->conn_lport = lport; 2308 connp->conn_fport = 0; 2309 error = ipcl_bind_insert_v6(connp, protocol, v6src, lport); 2310 } 2311 if (error == 0) { 2312 if (ire_requested) { 2313 if (!ip_bind_get_ire_v6(mpp, src_ire, v6src, NULL, 2314 ipst)) { 2315 error = -1; 2316 goto bad_addr; 2317 } 2318 mp = *mpp; 2319 } else if (ipsec_policy_set) { 2320 if (!ip_bind_ipsec_policy_set(connp, mp)) { 2321 error = -1; 2322 goto bad_addr; 2323 } 2324 } 2325 } 2326 bad_addr: 2327 if (error != 0) { 2328 if (connp->conn_anon_port) { 2329 (void) tsol_mlp_anon(crgetzone(connp->conn_cred), 2330 connp->conn_mlp_type, connp->conn_ulp, ntohs(lport), 2331 B_FALSE); 2332 } 2333 connp->conn_mlp_type = mlptSingle; 2334 } 2335 2336 if (src_ire != NULL) 2337 ire_refrele(src_ire); 2338 2339 if (ipsec_policy_set) { 2340 ASSERT(mp != NULL); 2341 freeb(mp); 2342 /* 2343 * As of now assume that nothing else accompanies 2344 * IPSEC_POLICY_SET. 2345 */ 2346 *mpp = NULL; 2347 } 2348 2349 return (error); 2350 } 2351 int 2352 ip_proto_bind_laddr_v6(conn_t *connp, mblk_t **mpp, uint8_t protocol, 2353 const in6_addr_t *v6srcp, uint16_t lport, boolean_t fanout_insert) 2354 { 2355 int error; 2356 boolean_t ire_requested; 2357 mblk_t *mp = NULL; 2358 boolean_t orig_pkt_isv6 = connp->conn_pkt_isv6; 2359 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 2360 2361 /* 2362 * Note that we allow connect to broadcast and multicast 2363 * address when ire_requested is set. Thus the ULP 2364 * has to check for IRE_BROADCAST and multicast. 2365 */ 2366 if (mpp) 2367 mp = *mpp; 2368 ire_requested = (mp && DB_TYPE(mp) == IRE_DB_REQ_TYPE); 2369 2370 ASSERT(connp->conn_af_isv6); 2371 connp->conn_ulp = protocol; 2372 2373 if (IN6_IS_ADDR_V4MAPPED(v6srcp) && !connp->conn_ipv6_v6only) { 2374 /* Bind to IPv4 address */ 2375 ipaddr_t v4src; 2376 2377 IN6_V4MAPPED_TO_IPADDR(v6srcp, v4src); 2378 2379 error = ip_bind_laddr_v4(connp, mpp, protocol, v4src, lport, 2380 fanout_insert); 2381 if (error != 0) 2382 goto bad_addr; 2383 connp->conn_pkt_isv6 = B_FALSE; 2384 } else { 2385 if (IN6_IS_ADDR_V4MAPPED(v6srcp)) { 2386 error = 0; 2387 goto bad_addr; 2388 } 2389 error = ip_bind_laddr_v6(connp, mpp, protocol, v6srcp, 2390 lport, fanout_insert); 2391 if (error != 0) 2392 goto bad_addr; 2393 connp->conn_pkt_isv6 = B_TRUE; 2394 } 2395 2396 ip_bind_post_handling_v6(connp, mpp ? *mpp : NULL, 2397 orig_pkt_isv6 != connp->conn_pkt_isv6, ire_requested, ipst); 2398 return (0); 2399 2400 bad_addr: 2401 if (error < 0) 2402 error = -TBADADDR; 2403 return (error); 2404 } 2405 2406 /* 2407 * Verify that both the source and destination addresses 2408 * are valid. If verify_dst, then destination address must also be reachable, 2409 * i.e. have a route. Protocols like TCP want this. Tunnels do not. 2410 * It takes ip6_pkt_t * as one of the arguments to determine correct 2411 * source address when IPV6_PKTINFO or scope_id is set along with a link-local 2412 * destination address. Note that parameter ipp is only useful for TCP connect 2413 * when scope_id is set or IPV6_PKTINFO option is set with an ifindex. For all 2414 * non-TCP cases, it is NULL and for all other tcp cases it is not useful. 2415 * 2416 */ 2417 int 2418 ip_bind_connected_v6(conn_t *connp, mblk_t **mpp, uint8_t protocol, 2419 in6_addr_t *v6src, uint16_t lport, const in6_addr_t *v6dst, 2420 ip6_pkt_t *ipp, uint16_t fport, boolean_t fanout_insert, 2421 boolean_t verify_dst) 2422 { 2423 ire_t *src_ire; 2424 ire_t *dst_ire; 2425 int error = 0; 2426 ire_t *sire = NULL; 2427 ire_t *md_dst_ire = NULL; 2428 ill_t *md_ill = NULL; 2429 ill_t *dst_ill = NULL; 2430 ipif_t *src_ipif = NULL; 2431 zoneid_t zoneid; 2432 boolean_t ill_held = B_FALSE; 2433 mblk_t *mp = NULL; 2434 boolean_t ire_requested = B_FALSE; 2435 boolean_t ipsec_policy_set = B_FALSE; 2436 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 2437 ts_label_t *tsl = NULL; 2438 2439 if (mpp) 2440 mp = *mpp; 2441 2442 if (mp != NULL) { 2443 ire_requested = (DB_TYPE(mp) == IRE_DB_REQ_TYPE); 2444 ipsec_policy_set = (DB_TYPE(mp) == IPSEC_POLICY_SET); 2445 tsl = MBLK_GETLABEL(mp); 2446 } 2447 2448 src_ire = dst_ire = NULL; 2449 /* 2450 * If we never got a disconnect before, clear it now. 2451 */ 2452 connp->conn_fully_bound = B_FALSE; 2453 2454 zoneid = connp->conn_zoneid; 2455 2456 if (IN6_IS_ADDR_MULTICAST(v6dst)) { 2457 ipif_t *ipif; 2458 2459 /* 2460 * Use an "emulated" IRE_BROADCAST to tell the transport it 2461 * is a multicast. 2462 * Pass other information that matches 2463 * the ipif (e.g. the source address). 2464 * 2465 * conn_multicast_ill is only used for IPv6 packets 2466 */ 2467 mutex_enter(&connp->conn_lock); 2468 if (connp->conn_multicast_ill != NULL) { 2469 (void) ipif_lookup_zoneid(connp->conn_multicast_ill, 2470 zoneid, 0, &ipif); 2471 } else { 2472 /* Look for default like ip_wput_v6 */ 2473 ipif = ipif_lookup_group_v6(v6dst, zoneid, ipst); 2474 } 2475 mutex_exit(&connp->conn_lock); 2476 if (ipif == NULL || ire_requested || 2477 (dst_ire = ipif_to_ire_v6(ipif)) == NULL) { 2478 if (ipif != NULL) 2479 ipif_refrele(ipif); 2480 if (ip_debug > 2) { 2481 /* ip1dbg */ 2482 pr_addr_dbg("ip_bind_connected_v6: bad " 2483 "connected multicast %s\n", AF_INET6, 2484 v6dst); 2485 } 2486 error = ENETUNREACH; 2487 goto bad_addr; 2488 } 2489 if (ipif != NULL) 2490 ipif_refrele(ipif); 2491 } else { 2492 dst_ire = ire_route_lookup_v6(v6dst, NULL, NULL, 0, 2493 NULL, &sire, zoneid, tsl, 2494 MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT | 2495 MATCH_IRE_PARENT | MATCH_IRE_RJ_BHOLE | MATCH_IRE_SECATTR, 2496 ipst); 2497 /* 2498 * We also prevent ire's with src address INADDR_ANY to 2499 * be used, which are created temporarily for 2500 * sending out packets from endpoints that have 2501 * conn_unspec_src set. 2502 */ 2503 if (dst_ire == NULL || 2504 (dst_ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) || 2505 IN6_IS_ADDR_UNSPECIFIED(&dst_ire->ire_src_addr_v6)) { 2506 /* 2507 * When verifying destination reachability, we always 2508 * complain. 2509 * 2510 * When not verifying destination reachability but we 2511 * found an IRE, i.e. the destination is reachable, 2512 * then the other tests still apply and we complain. 2513 */ 2514 if (verify_dst || (dst_ire != NULL)) { 2515 if (ip_debug > 2) { 2516 /* ip1dbg */ 2517 pr_addr_dbg("ip_bind_connected_v6: bad" 2518 " connected dst %s\n", AF_INET6, 2519 v6dst); 2520 } 2521 if (dst_ire == NULL || 2522 !(dst_ire->ire_type & IRE_HOST)) { 2523 error = ENETUNREACH; 2524 } else { 2525 error = EHOSTUNREACH; 2526 } 2527 goto bad_addr; 2528 } 2529 } 2530 } 2531 2532 /* 2533 * We now know that routing will allow us to reach the destination. 2534 * Check whether Trusted Solaris policy allows communication with this 2535 * host, and pretend that the destination is unreachable if not. 2536 * 2537 * This is never a problem for TCP, since that transport is known to 2538 * compute the label properly as part of the tcp_rput_other T_BIND_ACK 2539 * handling. If the remote is unreachable, it will be detected at that 2540 * point, so there's no reason to check it here. 2541 * 2542 * Note that for sendto (and other datagram-oriented friends), this 2543 * check is done as part of the data path label computation instead. 2544 * The check here is just to make non-TCP connect() report the right 2545 * error. 2546 */ 2547 if (dst_ire != NULL && is_system_labeled() && 2548 !IPCL_IS_TCP(connp) && 2549 tsol_compute_label_v6(DB_CREDDEF(mp, connp->conn_cred), 2550 v6dst, NULL, connp->conn_mac_exempt, ipst) != 0) { 2551 error = EHOSTUNREACH; 2552 if (ip_debug > 2) { 2553 pr_addr_dbg("ip_bind_connected: no label for dst %s\n", 2554 AF_INET6, v6dst); 2555 } 2556 goto bad_addr; 2557 } 2558 2559 /* 2560 * If the app does a connect(), it means that it will most likely 2561 * send more than 1 packet to the destination. It makes sense 2562 * to clear the temporary flag. 2563 */ 2564 if (dst_ire != NULL && dst_ire->ire_type == IRE_CACHE && 2565 (dst_ire->ire_marks & IRE_MARK_TEMPORARY)) { 2566 irb_t *irb = dst_ire->ire_bucket; 2567 2568 rw_enter(&irb->irb_lock, RW_WRITER); 2569 /* 2570 * We need to recheck for IRE_MARK_TEMPORARY after acquiring 2571 * the lock in order to guarantee irb_tmp_ire_cnt. 2572 */ 2573 if (dst_ire->ire_marks & IRE_MARK_TEMPORARY) { 2574 dst_ire->ire_marks &= ~IRE_MARK_TEMPORARY; 2575 irb->irb_tmp_ire_cnt--; 2576 } 2577 rw_exit(&irb->irb_lock); 2578 } 2579 2580 ASSERT(dst_ire == NULL || dst_ire->ire_ipversion == IPV6_VERSION); 2581 2582 /* 2583 * See if we should notify ULP about MDT; we do this whether or not 2584 * ire_requested is TRUE, in order to handle active connects; MDT 2585 * eligibility tests for passive connects are handled separately 2586 * through tcp_adapt_ire(). We do this before the source address 2587 * selection, because dst_ire may change after a call to 2588 * ipif_select_source_v6(). This is a best-effort check, as the 2589 * packet for this connection may not actually go through 2590 * dst_ire->ire_stq, and the exact IRE can only be known after 2591 * calling ip_newroute_v6(). This is why we further check on the 2592 * IRE during Multidata packet transmission in tcp_multisend(). 2593 */ 2594 if (ipst->ips_ip_multidata_outbound && !ipsec_policy_set && 2595 dst_ire != NULL && 2596 !(dst_ire->ire_type & (IRE_LOCAL | IRE_LOOPBACK | IRE_BROADCAST)) && 2597 (md_ill = ire_to_ill(dst_ire), md_ill != NULL) && 2598 ILL_MDT_CAPABLE(md_ill)) { 2599 md_dst_ire = dst_ire; 2600 IRE_REFHOLD(md_dst_ire); 2601 } 2602 2603 if (dst_ire != NULL && 2604 dst_ire->ire_type == IRE_LOCAL && 2605 dst_ire->ire_zoneid != zoneid && 2606 dst_ire->ire_zoneid != ALL_ZONES) { 2607 src_ire = ire_ftable_lookup_v6(v6dst, 0, 0, 0, NULL, NULL, 2608 zoneid, 0, NULL, 2609 MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT | 2610 MATCH_IRE_RJ_BHOLE, ipst); 2611 if (src_ire == NULL) { 2612 error = EHOSTUNREACH; 2613 goto bad_addr; 2614 } else if (src_ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) { 2615 if (!(src_ire->ire_type & IRE_HOST)) 2616 error = ENETUNREACH; 2617 else 2618 error = EHOSTUNREACH; 2619 goto bad_addr; 2620 } 2621 if (IN6_IS_ADDR_UNSPECIFIED(v6src)) { 2622 src_ipif = src_ire->ire_ipif; 2623 ipif_refhold(src_ipif); 2624 *v6src = src_ipif->ipif_v6lcl_addr; 2625 } 2626 ire_refrele(src_ire); 2627 src_ire = NULL; 2628 } else if (IN6_IS_ADDR_UNSPECIFIED(v6src) && dst_ire != NULL) { 2629 if ((sire != NULL) && (sire->ire_flags & RTF_SETSRC)) { 2630 *v6src = sire->ire_src_addr_v6; 2631 ire_refrele(dst_ire); 2632 dst_ire = sire; 2633 sire = NULL; 2634 } else if (dst_ire->ire_type == IRE_CACHE && 2635 (dst_ire->ire_flags & RTF_SETSRC)) { 2636 ASSERT(dst_ire->ire_zoneid == zoneid || 2637 dst_ire->ire_zoneid == ALL_ZONES); 2638 *v6src = dst_ire->ire_src_addr_v6; 2639 } else { 2640 /* 2641 * Pick a source address so that a proper inbound load 2642 * spreading would happen. Use dst_ill specified by the 2643 * app. when socket option or scopeid is set. 2644 */ 2645 int err; 2646 2647 if (ipp != NULL && ipp->ipp_ifindex != 0) { 2648 uint_t if_index; 2649 2650 /* 2651 * Scope id or IPV6_PKTINFO 2652 */ 2653 2654 if_index = ipp->ipp_ifindex; 2655 dst_ill = ill_lookup_on_ifindex( 2656 if_index, B_TRUE, NULL, NULL, NULL, NULL, 2657 ipst); 2658 if (dst_ill == NULL) { 2659 ip1dbg(("ip_bind_connected_v6:" 2660 " bad ifindex %d\n", if_index)); 2661 error = EADDRNOTAVAIL; 2662 goto bad_addr; 2663 } 2664 ill_held = B_TRUE; 2665 } else if (connp->conn_outgoing_ill != NULL) { 2666 /* 2667 * For IPV6_BOUND_IF socket option, 2668 * conn_outgoing_ill should be set 2669 * already in TCP or UDP/ICMP. 2670 */ 2671 dst_ill = conn_get_held_ill(connp, 2672 &connp->conn_outgoing_ill, &err); 2673 if (err == ILL_LOOKUP_FAILED) { 2674 ip1dbg(("ip_bind_connected_v6:" 2675 "no ill for bound_if\n")); 2676 error = EADDRNOTAVAIL; 2677 goto bad_addr; 2678 } 2679 ill_held = B_TRUE; 2680 } else if (dst_ire->ire_stq != NULL) { 2681 /* No need to hold ill here */ 2682 dst_ill = (ill_t *)dst_ire->ire_stq->q_ptr; 2683 } else { 2684 /* No need to hold ill here */ 2685 dst_ill = dst_ire->ire_ipif->ipif_ill; 2686 } 2687 if (ip6_asp_can_lookup(ipst)) { 2688 src_ipif = ipif_select_source_v6(dst_ill, 2689 v6dst, B_FALSE, connp->conn_src_preferences, 2690 zoneid); 2691 ip6_asp_table_refrele(ipst); 2692 if (src_ipif == NULL) { 2693 pr_addr_dbg("ip_bind_connected_v6: " 2694 "no usable source address for " 2695 "connection to %s\n", 2696 AF_INET6, v6dst); 2697 error = EADDRNOTAVAIL; 2698 goto bad_addr; 2699 } 2700 *v6src = src_ipif->ipif_v6lcl_addr; 2701 } else { 2702 error = EADDRNOTAVAIL; 2703 goto bad_addr; 2704 } 2705 } 2706 } 2707 2708 /* 2709 * We do ire_route_lookup_v6() here (and not an interface lookup) 2710 * as we assert that v6src should only come from an 2711 * UP interface for hard binding. 2712 */ 2713 src_ire = ire_route_lookup_v6(v6src, 0, 0, 0, NULL, 2714 NULL, zoneid, NULL, MATCH_IRE_ZONEONLY, ipst); 2715 2716 /* src_ire must be a local|loopback */ 2717 if (!IRE_IS_LOCAL(src_ire)) { 2718 if (ip_debug > 2) { 2719 /* ip1dbg */ 2720 pr_addr_dbg("ip_bind_connected_v6: bad " 2721 "connected src %s\n", AF_INET6, v6src); 2722 } 2723 error = EADDRNOTAVAIL; 2724 goto bad_addr; 2725 } 2726 2727 /* 2728 * If the source address is a loopback address, the 2729 * destination had best be local or multicast. 2730 * The transports that can't handle multicast will reject 2731 * those addresses. 2732 */ 2733 if (src_ire->ire_type == IRE_LOOPBACK && 2734 !(IRE_IS_LOCAL(dst_ire) || IN6_IS_ADDR_MULTICAST(v6dst) || 2735 IN6_IS_ADDR_V4MAPPED_CLASSD(v6dst))) { 2736 ip1dbg(("ip_bind_connected_v6: bad connected loopback\n")); 2737 error = -1; 2738 goto bad_addr; 2739 } 2740 /* 2741 * Allow setting new policies. For example, disconnects come 2742 * down as ipa_t bind. As we would have set conn_policy_cached 2743 * to B_TRUE before, we should set it to B_FALSE, so that policy 2744 * can change after the disconnect. 2745 */ 2746 connp->conn_policy_cached = B_FALSE; 2747 2748 /* 2749 * The addresses have been verified. Initialize the conn 2750 * before calling the policy as they expect the conns 2751 * initialized. 2752 */ 2753 connp->conn_srcv6 = *v6src; 2754 connp->conn_remv6 = *v6dst; 2755 connp->conn_lport = lport; 2756 connp->conn_fport = fport; 2757 2758 ASSERT(!(ipsec_policy_set && ire_requested)); 2759 if (ire_requested) { 2760 iulp_t *ulp_info = NULL; 2761 2762 /* 2763 * Note that sire will not be NULL if this is an off-link 2764 * connection and there is not cache for that dest yet. 2765 * 2766 * XXX Because of an existing bug, if there are multiple 2767 * default routes, the IRE returned now may not be the actual 2768 * default route used (default routes are chosen in a 2769 * round robin fashion). So if the metrics for different 2770 * default routes are different, we may return the wrong 2771 * metrics. This will not be a problem if the existing 2772 * bug is fixed. 2773 */ 2774 if (sire != NULL) 2775 ulp_info = &(sire->ire_uinfo); 2776 2777 if (!ip_bind_get_ire_v6(mpp, dst_ire, v6dst, ulp_info, 2778 ipst)) { 2779 error = -1; 2780 goto bad_addr; 2781 } 2782 } else if (ipsec_policy_set) { 2783 if (!ip_bind_ipsec_policy_set(connp, mp)) { 2784 error = -1; 2785 goto bad_addr; 2786 } 2787 } 2788 2789 /* 2790 * Cache IPsec policy in this conn. If we have per-socket policy, 2791 * we'll cache that. If we don't, we'll inherit global policy. 2792 * 2793 * We can't insert until the conn reflects the policy. Note that 2794 * conn_policy_cached is set by ipsec_conn_cache_policy() even for 2795 * connections where we don't have a policy. This is to prevent 2796 * global policy lookups in the inbound path. 2797 * 2798 * If we insert before we set conn_policy_cached, 2799 * CONN_INBOUND_POLICY_PRESENT_V6() check can still evaluate true 2800 * because global policy cound be non-empty. We normally call 2801 * ipsec_check_policy() for conn_policy_cached connections only if 2802 * conn_in_enforce_policy is set. But in this case, 2803 * conn_policy_cached can get set anytime since we made the 2804 * CONN_INBOUND_POLICY_PRESENT_V6() check and ipsec_check_policy() 2805 * is called, which will make the above assumption false. Thus, we 2806 * need to insert after we set conn_policy_cached. 2807 */ 2808 if ((error = ipsec_conn_cache_policy(connp, B_FALSE)) != 0) 2809 goto bad_addr; 2810 2811 /* If not fanout_insert this was just an address verification */ 2812 if (fanout_insert) { 2813 /* 2814 * The addresses have been verified. Time to insert in 2815 * the correct fanout list. 2816 */ 2817 error = ipcl_conn_insert_v6(connp, protocol, v6src, v6dst, 2818 connp->conn_ports, 2819 IPCL_IS_TCP(connp) ? connp->conn_tcp->tcp_bound_if : 0); 2820 } 2821 if (error == 0) { 2822 connp->conn_fully_bound = B_TRUE; 2823 /* 2824 * Our initial checks for MDT have passed; the IRE is not 2825 * LOCAL/LOOPBACK/BROADCAST, and the link layer seems to 2826 * be supporting MDT. Pass the IRE, IPC and ILL into 2827 * ip_mdinfo_return(), which performs further checks 2828 * against them and upon success, returns the MDT info 2829 * mblk which we will attach to the bind acknowledgment. 2830 */ 2831 if (md_dst_ire != NULL) { 2832 mblk_t *mdinfo_mp; 2833 2834 ASSERT(md_ill != NULL); 2835 ASSERT(md_ill->ill_mdt_capab != NULL); 2836 if ((mdinfo_mp = ip_mdinfo_return(md_dst_ire, connp, 2837 md_ill->ill_name, md_ill->ill_mdt_capab)) != NULL) { 2838 if (mp == NULL) { 2839 *mpp = mdinfo_mp; 2840 } else { 2841 linkb(mp, mdinfo_mp); 2842 } 2843 } 2844 } 2845 } 2846 bad_addr: 2847 if (ipsec_policy_set) { 2848 ASSERT(mp != NULL); 2849 freeb(mp); 2850 /* 2851 * As of now assume that nothing else accompanies 2852 * IPSEC_POLICY_SET. 2853 */ 2854 *mpp = NULL; 2855 } 2856 refrele_and_quit: 2857 if (src_ire != NULL) 2858 IRE_REFRELE(src_ire); 2859 if (dst_ire != NULL) 2860 IRE_REFRELE(dst_ire); 2861 if (sire != NULL) 2862 IRE_REFRELE(sire); 2863 if (src_ipif != NULL) 2864 ipif_refrele(src_ipif); 2865 if (md_dst_ire != NULL) 2866 IRE_REFRELE(md_dst_ire); 2867 if (ill_held && dst_ill != NULL) 2868 ill_refrele(dst_ill); 2869 return (error); 2870 } 2871 2872 /* ARGSUSED */ 2873 int 2874 ip_proto_bind_connected_v6(conn_t *connp, mblk_t **mpp, uint8_t protocol, 2875 in6_addr_t *v6srcp, uint16_t lport, const in6_addr_t *v6dstp, 2876 ip6_pkt_t *ipp, uint16_t fport, boolean_t fanout_insert, 2877 boolean_t verify_dst) 2878 { 2879 int error = 0; 2880 boolean_t orig_pkt_isv6 = connp->conn_pkt_isv6; 2881 boolean_t ire_requested; 2882 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 2883 2884 /* 2885 * Note that we allow connect to broadcast and multicast 2886 * address when ire_requested is set. Thus the ULP 2887 * has to check for IRE_BROADCAST and multicast. 2888 */ 2889 ASSERT(mpp != NULL); 2890 ire_requested = (*mpp != NULL && DB_TYPE(*mpp) == IRE_DB_REQ_TYPE); 2891 2892 ASSERT(connp->conn_af_isv6); 2893 connp->conn_ulp = protocol; 2894 2895 /* For raw socket, the local port is not set. */ 2896 lport = lport != 0 ? lport : connp->conn_lport; 2897 2898 /* 2899 * Bind to local and remote address. Local might be 2900 * unspecified in which case it will be extracted from 2901 * ire_src_addr_v6 2902 */ 2903 if (IN6_IS_ADDR_V4MAPPED(v6dstp) && !connp->conn_ipv6_v6only) { 2904 /* Connect to IPv4 address */ 2905 ipaddr_t v4src; 2906 ipaddr_t v4dst; 2907 2908 /* Is the source unspecified or mapped? */ 2909 if (!IN6_IS_ADDR_V4MAPPED(v6srcp) && 2910 !IN6_IS_ADDR_UNSPECIFIED(v6srcp)) { 2911 ip1dbg(("ip_proto_bind_connected_v6: " 2912 "dst is mapped, but not the src\n")); 2913 goto bad_addr; 2914 } 2915 IN6_V4MAPPED_TO_IPADDR(v6srcp, v4src); 2916 IN6_V4MAPPED_TO_IPADDR(v6dstp, v4dst); 2917 2918 /* Always verify destination reachability. */ 2919 error = ip_bind_connected_v4(connp, mpp, protocol, &v4src, 2920 lport, v4dst, fport, B_TRUE, B_TRUE); 2921 if (error != 0) 2922 goto bad_addr; 2923 IN6_IPADDR_TO_V4MAPPED(v4src, v6srcp); 2924 connp->conn_pkt_isv6 = B_FALSE; 2925 } else if (IN6_IS_ADDR_V4MAPPED(v6srcp)) { 2926 ip1dbg(("ip_proto_bind_connected_v6: " 2927 "src is mapped, but not the dst\n")); 2928 goto bad_addr; 2929 } else { 2930 error = ip_bind_connected_v6(connp, mpp, protocol, v6srcp, 2931 lport, v6dstp, ipp, fport, B_TRUE, verify_dst); 2932 if (error != 0) 2933 goto bad_addr; 2934 connp->conn_pkt_isv6 = B_TRUE; 2935 } 2936 2937 ip_bind_post_handling_v6(connp, mpp ? *mpp : NULL, 2938 orig_pkt_isv6 != connp->conn_pkt_isv6, ire_requested, ipst); 2939 2940 /* Send it home. */ 2941 return (0); 2942 2943 bad_addr: 2944 if (error == 0) 2945 error = -TBADADDR; 2946 return (error); 2947 } 2948 2949 /* 2950 * Get the ire in *mpp. Returns false if it fails (due to lack of space). 2951 * Makes the IRE be IRE_BROADCAST if dst is a multicast address. 2952 */ 2953 /* ARGSUSED4 */ 2954 static boolean_t 2955 ip_bind_get_ire_v6(mblk_t **mpp, ire_t *ire, const in6_addr_t *dst, 2956 iulp_t *ulp_info, ip_stack_t *ipst) 2957 { 2958 mblk_t *mp = *mpp; 2959 ire_t *ret_ire; 2960 2961 ASSERT(mp != NULL); 2962 2963 if (ire != NULL) { 2964 /* 2965 * mp initialized above to IRE_DB_REQ_TYPE 2966 * appended mblk. Its <upper protocol>'s 2967 * job to make sure there is room. 2968 */ 2969 if ((mp->b_datap->db_lim - mp->b_rptr) < sizeof (ire_t)) 2970 return (B_FALSE); 2971 2972 mp->b_datap->db_type = IRE_DB_TYPE; 2973 mp->b_wptr = mp->b_rptr + sizeof (ire_t); 2974 bcopy(ire, mp->b_rptr, sizeof (ire_t)); 2975 ret_ire = (ire_t *)mp->b_rptr; 2976 if (IN6_IS_ADDR_MULTICAST(dst) || 2977 IN6_IS_ADDR_V4MAPPED_CLASSD(dst)) { 2978 ret_ire->ire_type = IRE_BROADCAST; 2979 ret_ire->ire_addr_v6 = *dst; 2980 } 2981 if (ulp_info != NULL) { 2982 bcopy(ulp_info, &(ret_ire->ire_uinfo), 2983 sizeof (iulp_t)); 2984 } 2985 ret_ire->ire_mp = mp; 2986 } else { 2987 /* 2988 * No IRE was found. Remove IRE mblk. 2989 */ 2990 *mpp = mp->b_cont; 2991 freeb(mp); 2992 } 2993 return (B_TRUE); 2994 } 2995 2996 /* 2997 * Add an ip6i_t header to the front of the mblk. 2998 * Inline if possible else allocate a separate mblk containing only the ip6i_t. 2999 * Returns NULL if allocation fails (and frees original message). 3000 * Used in outgoing path when going through ip_newroute_*v6(). 3001 * Used in incoming path to pass ifindex to transports. 3002 */ 3003 mblk_t * 3004 ip_add_info_v6(mblk_t *mp, ill_t *ill, const in6_addr_t *dst) 3005 { 3006 mblk_t *mp1; 3007 ip6i_t *ip6i; 3008 ip6_t *ip6h; 3009 3010 ip6h = (ip6_t *)mp->b_rptr; 3011 ip6i = (ip6i_t *)(mp->b_rptr - sizeof (ip6i_t)); 3012 if ((uchar_t *)ip6i < mp->b_datap->db_base || 3013 mp->b_datap->db_ref > 1) { 3014 mp1 = allocb(sizeof (ip6i_t), BPRI_MED); 3015 if (mp1 == NULL) { 3016 freemsg(mp); 3017 return (NULL); 3018 } 3019 mp1->b_wptr = mp1->b_rptr = mp1->b_datap->db_lim; 3020 mp1->b_cont = mp; 3021 mp = mp1; 3022 ip6i = (ip6i_t *)(mp->b_rptr - sizeof (ip6i_t)); 3023 } 3024 mp->b_rptr = (uchar_t *)ip6i; 3025 ip6i->ip6i_vcf = ip6h->ip6_vcf; 3026 ip6i->ip6i_nxt = IPPROTO_RAW; 3027 if (ill != NULL) { 3028 ip6i->ip6i_flags = IP6I_IFINDEX; 3029 /* 3030 * If `ill' is in an IPMP group, make sure we use the IPMP 3031 * interface index so that e.g. IPV6_RECVPKTINFO will get the 3032 * IPMP interface index and not an underlying interface index. 3033 */ 3034 if (IS_UNDER_IPMP(ill)) 3035 ip6i->ip6i_ifindex = ipmp_ill_get_ipmp_ifindex(ill); 3036 else 3037 ip6i->ip6i_ifindex = ill->ill_phyint->phyint_ifindex; 3038 } else { 3039 ip6i->ip6i_flags = 0; 3040 } 3041 ip6i->ip6i_nexthop = *dst; 3042 return (mp); 3043 } 3044 3045 /* 3046 * Handle protocols with which IP is less intimate. There 3047 * can be more than one stream bound to a particular 3048 * protocol. When this is the case, normally each one gets a copy 3049 * of any incoming packets. 3050 * However, if the packet was tunneled and not multicast we only send to it 3051 * the first match. 3052 * 3053 * Zones notes: 3054 * Packets will be distributed to streams in all zones. This is really only 3055 * useful for ICMPv6 as only applications in the global zone can create raw 3056 * sockets for other protocols. 3057 */ 3058 static void 3059 ip_fanout_proto_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, ill_t *ill, 3060 ill_t *inill, uint8_t nexthdr, uint_t nexthdr_offset, uint_t flags, 3061 boolean_t mctl_present, zoneid_t zoneid) 3062 { 3063 queue_t *rq; 3064 mblk_t *mp1, *first_mp1; 3065 in6_addr_t dst = ip6h->ip6_dst; 3066 in6_addr_t src = ip6h->ip6_src; 3067 boolean_t one_only; 3068 mblk_t *first_mp = mp; 3069 boolean_t secure, shared_addr; 3070 conn_t *connp, *first_connp, *next_connp; 3071 connf_t *connfp; 3072 ip_stack_t *ipst = inill->ill_ipst; 3073 ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec; 3074 3075 if (mctl_present) { 3076 mp = first_mp->b_cont; 3077 secure = ipsec_in_is_secure(first_mp); 3078 ASSERT(mp != NULL); 3079 } else { 3080 secure = B_FALSE; 3081 } 3082 3083 /* 3084 * If the packet was tunneled and not multicast we only send to it 3085 * the first match. 3086 */ 3087 one_only = ((nexthdr == IPPROTO_ENCAP || nexthdr == IPPROTO_IPV6) && 3088 !IN6_IS_ADDR_MULTICAST(&dst)); 3089 3090 shared_addr = (zoneid == ALL_ZONES); 3091 if (shared_addr) { 3092 /* 3093 * We don't allow multilevel ports for raw IP, so no need to 3094 * check for that here. 3095 */ 3096 zoneid = tsol_packet_to_zoneid(mp); 3097 } 3098 3099 connfp = &ipst->ips_ipcl_proto_fanout_v6[nexthdr]; 3100 mutex_enter(&connfp->connf_lock); 3101 connp = connfp->connf_head; 3102 for (connp = connfp->connf_head; connp != NULL; 3103 connp = connp->conn_next) { 3104 if (IPCL_PROTO_MATCH_V6(connp, nexthdr, ip6h, ill, flags, 3105 zoneid) && 3106 (!is_system_labeled() || 3107 tsol_receive_local(mp, &dst, IPV6_VERSION, shared_addr, 3108 connp))) 3109 break; 3110 } 3111 3112 if (connp == NULL) { 3113 /* 3114 * No one bound to this port. Is 3115 * there a client that wants all 3116 * unclaimed datagrams? 3117 */ 3118 mutex_exit(&connfp->connf_lock); 3119 if (ip_fanout_send_icmp_v6(q, first_mp, flags, 3120 ICMP6_PARAM_PROB, ICMP6_PARAMPROB_NEXTHEADER, 3121 nexthdr_offset, mctl_present, zoneid, ipst)) { 3122 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInUnknownProtos); 3123 } 3124 3125 return; 3126 } 3127 3128 ASSERT(IPCL_IS_NONSTR(connp) || connp->conn_upq != NULL); 3129 3130 CONN_INC_REF(connp); 3131 first_connp = connp; 3132 3133 /* 3134 * XXX: Fix the multiple protocol listeners case. We should not 3135 * be walking the conn->next list here. 3136 */ 3137 if (one_only) { 3138 /* 3139 * Only send message to one tunnel driver by immediately 3140 * terminating the loop. 3141 */ 3142 connp = NULL; 3143 } else { 3144 connp = connp->conn_next; 3145 3146 } 3147 for (;;) { 3148 while (connp != NULL) { 3149 if (IPCL_PROTO_MATCH_V6(connp, nexthdr, ip6h, ill, 3150 flags, zoneid) && 3151 (!is_system_labeled() || 3152 tsol_receive_local(mp, &dst, IPV6_VERSION, 3153 shared_addr, connp))) 3154 break; 3155 connp = connp->conn_next; 3156 } 3157 3158 /* 3159 * Just copy the data part alone. The mctl part is 3160 * needed just for verifying policy and it is never 3161 * sent up. 3162 */ 3163 if (connp == NULL || 3164 (((first_mp1 = dupmsg(first_mp)) == NULL) && 3165 ((first_mp1 = ip_copymsg(first_mp)) == NULL))) { 3166 /* 3167 * No more intested clients or memory 3168 * allocation failed 3169 */ 3170 connp = first_connp; 3171 break; 3172 } 3173 ASSERT(IPCL_IS_NONSTR(connp) || connp->conn_rq != NULL); 3174 mp1 = mctl_present ? first_mp1->b_cont : first_mp1; 3175 CONN_INC_REF(connp); 3176 mutex_exit(&connfp->connf_lock); 3177 rq = connp->conn_rq; 3178 /* 3179 * For link-local always add ifindex so that transport can set 3180 * sin6_scope_id. Avoid it for ICMP error fanout. 3181 */ 3182 if ((connp->conn_ip_recvpktinfo || 3183 IN6_IS_ADDR_LINKLOCAL(&src)) && 3184 (flags & IP_FF_IPINFO)) { 3185 /* Add header */ 3186 mp1 = ip_add_info_v6(mp1, inill, &dst); 3187 } 3188 if (mp1 == NULL) { 3189 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3190 } else if ( 3191 (IPCL_IS_NONSTR(connp) && PROTO_FLOW_CNTRLD(connp)) || 3192 (!IPCL_IS_NONSTR(connp) && !canputnext(rq))) { 3193 if (flags & IP_FF_RAWIP) { 3194 BUMP_MIB(ill->ill_ip_mib, 3195 rawipIfStatsInOverflows); 3196 } else { 3197 BUMP_MIB(ill->ill_icmp6_mib, 3198 ipv6IfIcmpInOverflows); 3199 } 3200 3201 freemsg(mp1); 3202 } else { 3203 /* 3204 * Don't enforce here if we're a tunnel - let "tun" do 3205 * it instead. 3206 */ 3207 if (!IPCL_IS_IPTUN(connp) && 3208 (CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) || 3209 secure)) { 3210 first_mp1 = ipsec_check_inbound_policy( 3211 first_mp1, connp, NULL, ip6h, mctl_present); 3212 } 3213 if (first_mp1 != NULL) { 3214 if (mctl_present) 3215 freeb(first_mp1); 3216 BUMP_MIB(ill->ill_ip_mib, 3217 ipIfStatsHCInDelivers); 3218 (connp->conn_recv)(connp, mp1, NULL); 3219 } 3220 } 3221 mutex_enter(&connfp->connf_lock); 3222 /* Follow the next pointer before releasing the conn. */ 3223 next_connp = connp->conn_next; 3224 CONN_DEC_REF(connp); 3225 connp = next_connp; 3226 } 3227 3228 /* Last one. Send it upstream. */ 3229 mutex_exit(&connfp->connf_lock); 3230 3231 /* Initiate IPPF processing */ 3232 if (IP6_IN_IPP(flags, ipst)) { 3233 uint_t ifindex; 3234 3235 mutex_enter(&ill->ill_lock); 3236 ifindex = ill->ill_phyint->phyint_ifindex; 3237 mutex_exit(&ill->ill_lock); 3238 ip_process(IPP_LOCAL_IN, &mp, ifindex); 3239 if (mp == NULL) { 3240 CONN_DEC_REF(connp); 3241 if (mctl_present) 3242 freeb(first_mp); 3243 return; 3244 } 3245 } 3246 3247 /* 3248 * For link-local always add ifindex so that transport can set 3249 * sin6_scope_id. Avoid it for ICMP error fanout. 3250 */ 3251 if ((connp->conn_ip_recvpktinfo || IN6_IS_ADDR_LINKLOCAL(&src)) && 3252 (flags & IP_FF_IPINFO)) { 3253 /* Add header */ 3254 mp = ip_add_info_v6(mp, inill, &dst); 3255 if (mp == NULL) { 3256 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3257 CONN_DEC_REF(connp); 3258 if (mctl_present) 3259 freeb(first_mp); 3260 return; 3261 } else if (mctl_present) { 3262 first_mp->b_cont = mp; 3263 } else { 3264 first_mp = mp; 3265 } 3266 } 3267 3268 rq = connp->conn_rq; 3269 if ((IPCL_IS_NONSTR(connp) && PROTO_FLOW_CNTRLD(connp)) || 3270 (!IPCL_IS_NONSTR(connp) && !canputnext(rq))) { 3271 3272 if (flags & IP_FF_RAWIP) { 3273 BUMP_MIB(ill->ill_ip_mib, rawipIfStatsInOverflows); 3274 } else { 3275 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInOverflows); 3276 } 3277 3278 freemsg(first_mp); 3279 } else { 3280 if (IPCL_IS_IPTUN(connp)) { 3281 /* 3282 * Tunneled packet. We enforce policy in the tunnel 3283 * module itself. 3284 * 3285 * Send the WHOLE packet up (incl. IPSEC_IN) without 3286 * a policy check. 3287 */ 3288 putnext(rq, first_mp); 3289 CONN_DEC_REF(connp); 3290 return; 3291 } 3292 /* 3293 * Don't enforce here if we're a tunnel - let "tun" do 3294 * it instead. 3295 */ 3296 if (nexthdr != IPPROTO_ENCAP && nexthdr != IPPROTO_IPV6 && 3297 (CONN_INBOUND_POLICY_PRESENT(connp, ipss) || secure)) { 3298 first_mp = ipsec_check_inbound_policy(first_mp, connp, 3299 NULL, ip6h, mctl_present); 3300 if (first_mp == NULL) { 3301 CONN_DEC_REF(connp); 3302 return; 3303 } 3304 } 3305 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 3306 (connp->conn_recv)(connp, mp, NULL); 3307 if (mctl_present) 3308 freeb(first_mp); 3309 } 3310 CONN_DEC_REF(connp); 3311 } 3312 3313 /* 3314 * Send an ICMP error after patching up the packet appropriately. Returns 3315 * non-zero if the appropriate MIB should be bumped; zero otherwise. 3316 */ 3317 int 3318 ip_fanout_send_icmp_v6(queue_t *q, mblk_t *mp, uint_t flags, 3319 uint_t icmp_type, uint8_t icmp_code, uint_t nexthdr_offset, 3320 boolean_t mctl_present, zoneid_t zoneid, ip_stack_t *ipst) 3321 { 3322 ip6_t *ip6h; 3323 mblk_t *first_mp; 3324 boolean_t secure; 3325 unsigned char db_type; 3326 ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec; 3327 3328 first_mp = mp; 3329 if (mctl_present) { 3330 mp = mp->b_cont; 3331 secure = ipsec_in_is_secure(first_mp); 3332 ASSERT(mp != NULL); 3333 } else { 3334 /* 3335 * If this is an ICMP error being reported - which goes 3336 * up as M_CTLs, we need to convert them to M_DATA till 3337 * we finish checking with global policy because 3338 * ipsec_check_global_policy() assumes M_DATA as clear 3339 * and M_CTL as secure. 3340 */ 3341 db_type = mp->b_datap->db_type; 3342 mp->b_datap->db_type = M_DATA; 3343 secure = B_FALSE; 3344 } 3345 /* 3346 * We are generating an icmp error for some inbound packet. 3347 * Called from all ip_fanout_(udp, tcp, proto) functions. 3348 * Before we generate an error, check with global policy 3349 * to see whether this is allowed to enter the system. As 3350 * there is no "conn", we are checking with global policy. 3351 */ 3352 ip6h = (ip6_t *)mp->b_rptr; 3353 if (secure || ipss->ipsec_inbound_v6_policy_present) { 3354 first_mp = ipsec_check_global_policy(first_mp, NULL, 3355 NULL, ip6h, mctl_present, ipst->ips_netstack); 3356 if (first_mp == NULL) 3357 return (0); 3358 } 3359 3360 if (!mctl_present) 3361 mp->b_datap->db_type = db_type; 3362 3363 if (flags & IP_FF_SEND_ICMP) { 3364 if (flags & IP_FF_HDR_COMPLETE) { 3365 if (ip_hdr_complete_v6(ip6h, zoneid, ipst)) { 3366 freemsg(first_mp); 3367 return (1); 3368 } 3369 } 3370 switch (icmp_type) { 3371 case ICMP6_DST_UNREACH: 3372 icmp_unreachable_v6(WR(q), first_mp, icmp_code, 3373 B_FALSE, B_FALSE, zoneid, ipst); 3374 break; 3375 case ICMP6_PARAM_PROB: 3376 icmp_param_problem_v6(WR(q), first_mp, icmp_code, 3377 nexthdr_offset, B_FALSE, B_FALSE, zoneid, ipst); 3378 break; 3379 default: 3380 #ifdef DEBUG 3381 panic("ip_fanout_send_icmp_v6: wrong type"); 3382 /*NOTREACHED*/ 3383 #else 3384 freemsg(first_mp); 3385 break; 3386 #endif 3387 } 3388 } else { 3389 freemsg(first_mp); 3390 return (0); 3391 } 3392 3393 return (1); 3394 } 3395 3396 3397 /* 3398 * Fanout for TCP packets 3399 * The caller puts <fport, lport> in the ports parameter. 3400 */ 3401 static void 3402 ip_fanout_tcp_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, ill_t *ill, ill_t *inill, 3403 uint_t flags, uint_t hdr_len, boolean_t mctl_present, zoneid_t zoneid) 3404 { 3405 mblk_t *first_mp; 3406 boolean_t secure; 3407 conn_t *connp; 3408 tcph_t *tcph; 3409 boolean_t syn_present = B_FALSE; 3410 ip_stack_t *ipst = inill->ill_ipst; 3411 ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec; 3412 3413 first_mp = mp; 3414 if (mctl_present) { 3415 mp = first_mp->b_cont; 3416 secure = ipsec_in_is_secure(first_mp); 3417 ASSERT(mp != NULL); 3418 } else { 3419 secure = B_FALSE; 3420 } 3421 3422 connp = ipcl_classify_v6(mp, IPPROTO_TCP, hdr_len, zoneid, ipst); 3423 3424 if (connp == NULL || 3425 !conn_wantpacket_v6(connp, ill, ip6h, flags, zoneid)) { 3426 /* 3427 * No hard-bound match. Send Reset. 3428 */ 3429 dblk_t *dp = mp->b_datap; 3430 uint32_t ill_index; 3431 3432 ASSERT((dp->db_struioflag & STRUIO_IP) == 0); 3433 3434 /* Initiate IPPf processing, if needed. */ 3435 if (IPP_ENABLED(IPP_LOCAL_IN, ipst) && 3436 (flags & IP6_NO_IPPOLICY)) { 3437 ill_index = ill->ill_phyint->phyint_ifindex; 3438 ip_process(IPP_LOCAL_IN, &first_mp, ill_index); 3439 if (first_mp == NULL) { 3440 if (connp != NULL) 3441 CONN_DEC_REF(connp); 3442 return; 3443 } 3444 } 3445 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 3446 tcp_xmit_listeners_reset(first_mp, hdr_len, zoneid, 3447 ipst->ips_netstack->netstack_tcp, connp); 3448 if (connp != NULL) 3449 CONN_DEC_REF(connp); 3450 return; 3451 } 3452 3453 tcph = (tcph_t *)&mp->b_rptr[hdr_len]; 3454 if ((tcph->th_flags[0] & (TH_SYN|TH_ACK|TH_RST|TH_URG)) == TH_SYN) { 3455 if (connp->conn_flags & IPCL_TCP) { 3456 squeue_t *sqp; 3457 3458 /* 3459 * For fused tcp loopback, assign the eager's 3460 * squeue to be that of the active connect's. 3461 */ 3462 if ((flags & IP_FF_LOOPBACK) && do_tcp_fusion && 3463 !CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) && 3464 !secure && 3465 !IP6_IN_IPP(flags, ipst)) { 3466 ASSERT(Q_TO_CONN(q) != NULL); 3467 sqp = Q_TO_CONN(q)->conn_sqp; 3468 } else { 3469 sqp = IP_SQUEUE_GET(lbolt); 3470 } 3471 3472 mp->b_datap->db_struioflag |= STRUIO_EAGER; 3473 DB_CKSUMSTART(mp) = (intptr_t)sqp; 3474 3475 /* 3476 * db_cksumstuff is unused in the incoming 3477 * path; Thus store the ifindex here. It will 3478 * be cleared in tcp_conn_create_v6(). 3479 */ 3480 DB_CKSUMSTUFF(mp) = 3481 (intptr_t)ill->ill_phyint->phyint_ifindex; 3482 syn_present = B_TRUE; 3483 } 3484 } 3485 3486 if (IPCL_IS_TCP(connp) && IPCL_IS_BOUND(connp) && !syn_present) { 3487 uint_t flags = (unsigned int)tcph->th_flags[0] & 0xFF; 3488 if ((flags & TH_RST) || (flags & TH_URG)) { 3489 CONN_DEC_REF(connp); 3490 freemsg(first_mp); 3491 return; 3492 } 3493 if (flags & TH_ACK) { 3494 tcp_xmit_listeners_reset(first_mp, hdr_len, zoneid, 3495 ipst->ips_netstack->netstack_tcp, connp); 3496 CONN_DEC_REF(connp); 3497 return; 3498 } 3499 3500 CONN_DEC_REF(connp); 3501 freemsg(first_mp); 3502 return; 3503 } 3504 3505 if (CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) || secure) { 3506 first_mp = ipsec_check_inbound_policy(first_mp, connp, 3507 NULL, ip6h, mctl_present); 3508 if (first_mp == NULL) { 3509 CONN_DEC_REF(connp); 3510 return; 3511 } 3512 if (IPCL_IS_TCP(connp) && IPCL_IS_BOUND(connp)) { 3513 ASSERT(syn_present); 3514 if (mctl_present) { 3515 ASSERT(first_mp != mp); 3516 first_mp->b_datap->db_struioflag |= 3517 STRUIO_POLICY; 3518 } else { 3519 ASSERT(first_mp == mp); 3520 mp->b_datap->db_struioflag &= 3521 ~STRUIO_EAGER; 3522 mp->b_datap->db_struioflag |= 3523 STRUIO_POLICY; 3524 } 3525 } else { 3526 /* 3527 * Discard first_mp early since we're dealing with a 3528 * fully-connected conn_t and tcp doesn't do policy in 3529 * this case. Also, if someone is bound to IPPROTO_TCP 3530 * over raw IP, they don't expect to see a M_CTL. 3531 */ 3532 if (mctl_present) { 3533 freeb(first_mp); 3534 mctl_present = B_FALSE; 3535 } 3536 first_mp = mp; 3537 } 3538 } 3539 3540 /* Initiate IPPF processing */ 3541 if (IP6_IN_IPP(flags, ipst)) { 3542 uint_t ifindex; 3543 3544 mutex_enter(&ill->ill_lock); 3545 ifindex = ill->ill_phyint->phyint_ifindex; 3546 mutex_exit(&ill->ill_lock); 3547 ip_process(IPP_LOCAL_IN, &mp, ifindex); 3548 if (mp == NULL) { 3549 CONN_DEC_REF(connp); 3550 if (mctl_present) { 3551 freeb(first_mp); 3552 } 3553 return; 3554 } else if (mctl_present) { 3555 /* 3556 * ip_add_info_v6 might return a new mp. 3557 */ 3558 ASSERT(first_mp != mp); 3559 first_mp->b_cont = mp; 3560 } else { 3561 first_mp = mp; 3562 } 3563 } 3564 3565 /* 3566 * For link-local always add ifindex so that TCP can bind to that 3567 * interface. Avoid it for ICMP error fanout. 3568 */ 3569 if (!syn_present && ((connp->conn_ip_recvpktinfo || 3570 IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src)) && 3571 (flags & IP_FF_IPINFO))) { 3572 /* Add header */ 3573 mp = ip_add_info_v6(mp, inill, &ip6h->ip6_dst); 3574 if (mp == NULL) { 3575 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3576 CONN_DEC_REF(connp); 3577 if (mctl_present) 3578 freeb(first_mp); 3579 return; 3580 } else if (mctl_present) { 3581 ASSERT(first_mp != mp); 3582 first_mp->b_cont = mp; 3583 } else { 3584 first_mp = mp; 3585 } 3586 } 3587 3588 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 3589 if (IPCL_IS_TCP(connp)) { 3590 SQUEUE_ENTER_ONE(connp->conn_sqp, first_mp, connp->conn_recv, 3591 connp, ip_squeue_flag, SQTAG_IP6_TCP_INPUT); 3592 } else { 3593 /* SOCK_RAW, IPPROTO_TCP case */ 3594 (connp->conn_recv)(connp, first_mp, NULL); 3595 CONN_DEC_REF(connp); 3596 } 3597 } 3598 3599 /* 3600 * Fanout for UDP packets. 3601 * The caller puts <fport, lport> in the ports parameter. 3602 * ire_type must be IRE_BROADCAST for multicast and broadcast packets. 3603 * 3604 * If SO_REUSEADDR is set all multicast and broadcast packets 3605 * will be delivered to all streams bound to the same port. 3606 * 3607 * Zones notes: 3608 * Multicast packets will be distributed to streams in all zones. 3609 */ 3610 static void 3611 ip_fanout_udp_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, uint32_t ports, 3612 ill_t *ill, ill_t *inill, uint_t flags, boolean_t mctl_present, 3613 zoneid_t zoneid) 3614 { 3615 uint32_t dstport, srcport; 3616 in6_addr_t dst; 3617 mblk_t *first_mp; 3618 boolean_t secure; 3619 conn_t *connp; 3620 connf_t *connfp; 3621 conn_t *first_conn; 3622 conn_t *next_conn; 3623 mblk_t *mp1, *first_mp1; 3624 in6_addr_t src; 3625 boolean_t shared_addr; 3626 ip_stack_t *ipst = inill->ill_ipst; 3627 ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec; 3628 3629 first_mp = mp; 3630 if (mctl_present) { 3631 mp = first_mp->b_cont; 3632 secure = ipsec_in_is_secure(first_mp); 3633 ASSERT(mp != NULL); 3634 } else { 3635 secure = B_FALSE; 3636 } 3637 3638 /* Extract ports in net byte order */ 3639 dstport = htons(ntohl(ports) & 0xFFFF); 3640 srcport = htons(ntohl(ports) >> 16); 3641 dst = ip6h->ip6_dst; 3642 src = ip6h->ip6_src; 3643 3644 shared_addr = (zoneid == ALL_ZONES); 3645 if (shared_addr) { 3646 /* 3647 * No need to handle exclusive-stack zones since ALL_ZONES 3648 * only applies to the shared stack. 3649 */ 3650 zoneid = tsol_mlp_findzone(IPPROTO_UDP, dstport); 3651 /* 3652 * If no shared MLP is found, tsol_mlp_findzone returns 3653 * ALL_ZONES. In that case, we assume it's SLP, and 3654 * search for the zone based on the packet label. 3655 * That will also return ALL_ZONES on failure, but 3656 * we never allow conn_zoneid to be set to ALL_ZONES. 3657 */ 3658 if (zoneid == ALL_ZONES) 3659 zoneid = tsol_packet_to_zoneid(mp); 3660 } 3661 3662 /* Attempt to find a client stream based on destination port. */ 3663 connfp = &ipst->ips_ipcl_udp_fanout[IPCL_UDP_HASH(dstport, ipst)]; 3664 mutex_enter(&connfp->connf_lock); 3665 connp = connfp->connf_head; 3666 if (!IN6_IS_ADDR_MULTICAST(&dst)) { 3667 /* 3668 * Not multicast. Send to the one (first) client we find. 3669 */ 3670 while (connp != NULL) { 3671 if (IPCL_UDP_MATCH_V6(connp, dstport, dst, srcport, 3672 src) && IPCL_ZONE_MATCH(connp, zoneid) && 3673 conn_wantpacket_v6(connp, ill, ip6h, 3674 flags, zoneid)) { 3675 break; 3676 } 3677 connp = connp->conn_next; 3678 } 3679 if (connp == NULL || connp->conn_upq == NULL) 3680 goto notfound; 3681 3682 if (is_system_labeled() && 3683 !tsol_receive_local(mp, &dst, IPV6_VERSION, shared_addr, 3684 connp)) 3685 goto notfound; 3686 3687 /* Found a client */ 3688 CONN_INC_REF(connp); 3689 mutex_exit(&connfp->connf_lock); 3690 3691 if ((IPCL_IS_NONSTR(connp) && PROTO_FLOW_CNTRLD(connp)) || 3692 (!IPCL_IS_NONSTR(connp) && CONN_UDP_FLOWCTLD(connp))) { 3693 freemsg(first_mp); 3694 CONN_DEC_REF(connp); 3695 return; 3696 } 3697 if (CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) || secure) { 3698 first_mp = ipsec_check_inbound_policy(first_mp, 3699 connp, NULL, ip6h, mctl_present); 3700 if (first_mp == NULL) { 3701 CONN_DEC_REF(connp); 3702 return; 3703 } 3704 } 3705 /* Initiate IPPF processing */ 3706 if (IP6_IN_IPP(flags, ipst)) { 3707 uint_t ifindex; 3708 3709 mutex_enter(&ill->ill_lock); 3710 ifindex = ill->ill_phyint->phyint_ifindex; 3711 mutex_exit(&ill->ill_lock); 3712 ip_process(IPP_LOCAL_IN, &mp, ifindex); 3713 if (mp == NULL) { 3714 CONN_DEC_REF(connp); 3715 if (mctl_present) 3716 freeb(first_mp); 3717 return; 3718 } 3719 } 3720 /* 3721 * For link-local always add ifindex so that 3722 * transport can set sin6_scope_id. Avoid it for 3723 * ICMP error fanout. 3724 */ 3725 if ((connp->conn_ip_recvpktinfo || 3726 IN6_IS_ADDR_LINKLOCAL(&src)) && 3727 (flags & IP_FF_IPINFO)) { 3728 /* Add header */ 3729 mp = ip_add_info_v6(mp, inill, &dst); 3730 if (mp == NULL) { 3731 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3732 CONN_DEC_REF(connp); 3733 if (mctl_present) 3734 freeb(first_mp); 3735 return; 3736 } else if (mctl_present) { 3737 first_mp->b_cont = mp; 3738 } else { 3739 first_mp = mp; 3740 } 3741 } 3742 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 3743 3744 /* Send it upstream */ 3745 (connp->conn_recv)(connp, mp, NULL); 3746 3747 IP6_STAT(ipst, ip6_udp_fannorm); 3748 CONN_DEC_REF(connp); 3749 if (mctl_present) 3750 freeb(first_mp); 3751 return; 3752 } 3753 3754 while (connp != NULL) { 3755 if ((IPCL_UDP_MATCH_V6(connp, dstport, dst, srcport, src)) && 3756 conn_wantpacket_v6(connp, ill, ip6h, flags, zoneid) && 3757 (!is_system_labeled() || 3758 tsol_receive_local(mp, &dst, IPV6_VERSION, shared_addr, 3759 connp))) 3760 break; 3761 connp = connp->conn_next; 3762 } 3763 3764 if (connp == NULL || connp->conn_upq == NULL) 3765 goto notfound; 3766 3767 first_conn = connp; 3768 3769 CONN_INC_REF(connp); 3770 connp = connp->conn_next; 3771 for (;;) { 3772 while (connp != NULL) { 3773 if (IPCL_UDP_MATCH_V6(connp, dstport, dst, srcport, 3774 src) && conn_wantpacket_v6(connp, ill, ip6h, 3775 flags, zoneid) && 3776 (!is_system_labeled() || 3777 tsol_receive_local(mp, &dst, IPV6_VERSION, 3778 shared_addr, connp))) 3779 break; 3780 connp = connp->conn_next; 3781 } 3782 /* 3783 * Just copy the data part alone. The mctl part is 3784 * needed just for verifying policy and it is never 3785 * sent up. 3786 */ 3787 if (connp == NULL || 3788 (((first_mp1 = dupmsg(first_mp)) == NULL) && 3789 ((first_mp1 = ip_copymsg(first_mp)) == NULL))) { 3790 /* 3791 * No more interested clients or memory 3792 * allocation failed 3793 */ 3794 connp = first_conn; 3795 break; 3796 } 3797 mp1 = mctl_present ? first_mp1->b_cont : first_mp1; 3798 CONN_INC_REF(connp); 3799 mutex_exit(&connfp->connf_lock); 3800 /* 3801 * For link-local always add ifindex so that transport 3802 * can set sin6_scope_id. Avoid it for ICMP error 3803 * fanout. 3804 */ 3805 if ((connp->conn_ip_recvpktinfo || 3806 IN6_IS_ADDR_LINKLOCAL(&src)) && 3807 (flags & IP_FF_IPINFO)) { 3808 /* Add header */ 3809 mp1 = ip_add_info_v6(mp1, inill, &dst); 3810 } 3811 /* mp1 could have changed */ 3812 if (mctl_present) 3813 first_mp1->b_cont = mp1; 3814 else 3815 first_mp1 = mp1; 3816 if (mp1 == NULL) { 3817 if (mctl_present) 3818 freeb(first_mp1); 3819 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3820 goto next_one; 3821 } 3822 if ((IPCL_IS_NONSTR(connp) && PROTO_FLOW_CNTRLD(connp)) || 3823 (!IPCL_IS_NONSTR(connp) && CONN_UDP_FLOWCTLD(connp))) { 3824 BUMP_MIB(ill->ill_ip_mib, udpIfStatsInOverflows); 3825 freemsg(first_mp1); 3826 goto next_one; 3827 } 3828 3829 if (CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) || secure) { 3830 first_mp1 = ipsec_check_inbound_policy 3831 (first_mp1, connp, NULL, ip6h, 3832 mctl_present); 3833 } 3834 if (first_mp1 != NULL) { 3835 if (mctl_present) 3836 freeb(first_mp1); 3837 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 3838 3839 /* Send it upstream */ 3840 (connp->conn_recv)(connp, mp1, NULL); 3841 } 3842 next_one: 3843 mutex_enter(&connfp->connf_lock); 3844 /* Follow the next pointer before releasing the conn. */ 3845 next_conn = connp->conn_next; 3846 IP6_STAT(ipst, ip6_udp_fanmb); 3847 CONN_DEC_REF(connp); 3848 connp = next_conn; 3849 } 3850 3851 /* Last one. Send it upstream. */ 3852 mutex_exit(&connfp->connf_lock); 3853 3854 /* Initiate IPPF processing */ 3855 if (IP6_IN_IPP(flags, ipst)) { 3856 uint_t ifindex; 3857 3858 mutex_enter(&ill->ill_lock); 3859 ifindex = ill->ill_phyint->phyint_ifindex; 3860 mutex_exit(&ill->ill_lock); 3861 ip_process(IPP_LOCAL_IN, &mp, ifindex); 3862 if (mp == NULL) { 3863 CONN_DEC_REF(connp); 3864 if (mctl_present) { 3865 freeb(first_mp); 3866 } 3867 return; 3868 } 3869 } 3870 3871 /* 3872 * For link-local always add ifindex so that transport can set 3873 * sin6_scope_id. Avoid it for ICMP error fanout. 3874 */ 3875 if ((connp->conn_ip_recvpktinfo || 3876 IN6_IS_ADDR_LINKLOCAL(&src)) && (flags & IP_FF_IPINFO)) { 3877 /* Add header */ 3878 mp = ip_add_info_v6(mp, inill, &dst); 3879 if (mp == NULL) { 3880 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3881 CONN_DEC_REF(connp); 3882 if (mctl_present) 3883 freeb(first_mp); 3884 return; 3885 } else if (mctl_present) { 3886 first_mp->b_cont = mp; 3887 } else { 3888 first_mp = mp; 3889 } 3890 } 3891 if ((IPCL_IS_NONSTR(connp) && PROTO_FLOW_CNTRLD(connp)) || 3892 (!IPCL_IS_NONSTR(connp) && CONN_UDP_FLOWCTLD(connp))) { 3893 BUMP_MIB(ill->ill_ip_mib, udpIfStatsInOverflows); 3894 freemsg(mp); 3895 } else { 3896 if (CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) || secure) { 3897 first_mp = ipsec_check_inbound_policy(first_mp, 3898 connp, NULL, ip6h, mctl_present); 3899 if (first_mp == NULL) { 3900 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3901 CONN_DEC_REF(connp); 3902 return; 3903 } 3904 } 3905 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 3906 3907 /* Send it upstream */ 3908 (connp->conn_recv)(connp, mp, NULL); 3909 } 3910 IP6_STAT(ipst, ip6_udp_fanmb); 3911 CONN_DEC_REF(connp); 3912 if (mctl_present) 3913 freeb(first_mp); 3914 return; 3915 3916 notfound: 3917 mutex_exit(&connfp->connf_lock); 3918 /* 3919 * No one bound to this port. Is 3920 * there a client that wants all 3921 * unclaimed datagrams? 3922 */ 3923 if (ipst->ips_ipcl_proto_fanout_v6[IPPROTO_UDP].connf_head != NULL) { 3924 ip_fanout_proto_v6(q, first_mp, ip6h, ill, inill, IPPROTO_UDP, 3925 0, flags | IP_FF_RAWIP | IP_FF_IPINFO, mctl_present, 3926 zoneid); 3927 } else { 3928 if (ip_fanout_send_icmp_v6(q, first_mp, flags, 3929 ICMP6_DST_UNREACH, ICMP6_DST_UNREACH_NOPORT, 0, 3930 mctl_present, zoneid, ipst)) { 3931 BUMP_MIB(ill->ill_ip_mib, udpIfStatsNoPorts); 3932 } 3933 } 3934 } 3935 3936 /* 3937 * int ip_find_hdr_v6() 3938 * 3939 * This routine is used by the upper layer protocols and the IP tunnel 3940 * module to: 3941 * - Set extension header pointers to appropriate locations 3942 * - Determine IPv6 header length and return it 3943 * - Return a pointer to the last nexthdr value 3944 * 3945 * The caller must initialize ipp_fields. 3946 * 3947 * NOTE: If multiple extension headers of the same type are present, 3948 * ip_find_hdr_v6() will set the respective extension header pointers 3949 * to the first one that it encounters in the IPv6 header. It also 3950 * skips fragment headers. This routine deals with malformed packets 3951 * of various sorts in which case the returned length is up to the 3952 * malformed part. 3953 */ 3954 int 3955 ip_find_hdr_v6(mblk_t *mp, ip6_t *ip6h, ip6_pkt_t *ipp, uint8_t *nexthdrp) 3956 { 3957 uint_t length, ehdrlen; 3958 uint8_t nexthdr; 3959 uint8_t *whereptr, *endptr; 3960 ip6_dest_t *tmpdstopts; 3961 ip6_rthdr_t *tmprthdr; 3962 ip6_hbh_t *tmphopopts; 3963 ip6_frag_t *tmpfraghdr; 3964 3965 length = IPV6_HDR_LEN; 3966 whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */ 3967 endptr = mp->b_wptr; 3968 3969 nexthdr = ip6h->ip6_nxt; 3970 while (whereptr < endptr) { 3971 /* Is there enough left for len + nexthdr? */ 3972 if (whereptr + MIN_EHDR_LEN > endptr) 3973 goto done; 3974 3975 switch (nexthdr) { 3976 case IPPROTO_HOPOPTS: 3977 tmphopopts = (ip6_hbh_t *)whereptr; 3978 ehdrlen = 8 * (tmphopopts->ip6h_len + 1); 3979 if ((uchar_t *)tmphopopts + ehdrlen > endptr) 3980 goto done; 3981 nexthdr = tmphopopts->ip6h_nxt; 3982 /* return only 1st hbh */ 3983 if (!(ipp->ipp_fields & IPPF_HOPOPTS)) { 3984 ipp->ipp_fields |= IPPF_HOPOPTS; 3985 ipp->ipp_hopopts = tmphopopts; 3986 ipp->ipp_hopoptslen = ehdrlen; 3987 } 3988 break; 3989 case IPPROTO_DSTOPTS: 3990 tmpdstopts = (ip6_dest_t *)whereptr; 3991 ehdrlen = 8 * (tmpdstopts->ip6d_len + 1); 3992 if ((uchar_t *)tmpdstopts + ehdrlen > endptr) 3993 goto done; 3994 nexthdr = tmpdstopts->ip6d_nxt; 3995 /* 3996 * ipp_dstopts is set to the destination header after a 3997 * routing header. 3998 * Assume it is a post-rthdr destination header 3999 * and adjust when we find an rthdr. 4000 */ 4001 if (!(ipp->ipp_fields & IPPF_DSTOPTS)) { 4002 ipp->ipp_fields |= IPPF_DSTOPTS; 4003 ipp->ipp_dstopts = tmpdstopts; 4004 ipp->ipp_dstoptslen = ehdrlen; 4005 } 4006 break; 4007 case IPPROTO_ROUTING: 4008 tmprthdr = (ip6_rthdr_t *)whereptr; 4009 ehdrlen = 8 * (tmprthdr->ip6r_len + 1); 4010 if ((uchar_t *)tmprthdr + ehdrlen > endptr) 4011 goto done; 4012 nexthdr = tmprthdr->ip6r_nxt; 4013 /* return only 1st rthdr */ 4014 if (!(ipp->ipp_fields & IPPF_RTHDR)) { 4015 ipp->ipp_fields |= IPPF_RTHDR; 4016 ipp->ipp_rthdr = tmprthdr; 4017 ipp->ipp_rthdrlen = ehdrlen; 4018 } 4019 /* 4020 * Make any destination header we've seen be a 4021 * pre-rthdr destination header. 4022 */ 4023 if (ipp->ipp_fields & IPPF_DSTOPTS) { 4024 ipp->ipp_fields &= ~IPPF_DSTOPTS; 4025 ipp->ipp_fields |= IPPF_RTDSTOPTS; 4026 ipp->ipp_rtdstopts = ipp->ipp_dstopts; 4027 ipp->ipp_dstopts = NULL; 4028 ipp->ipp_rtdstoptslen = ipp->ipp_dstoptslen; 4029 ipp->ipp_dstoptslen = 0; 4030 } 4031 break; 4032 case IPPROTO_FRAGMENT: 4033 tmpfraghdr = (ip6_frag_t *)whereptr; 4034 ehdrlen = sizeof (ip6_frag_t); 4035 if ((uchar_t *)tmpfraghdr + ehdrlen > endptr) 4036 goto done; 4037 nexthdr = tmpfraghdr->ip6f_nxt; 4038 if (!(ipp->ipp_fields & IPPF_FRAGHDR)) { 4039 ipp->ipp_fields |= IPPF_FRAGHDR; 4040 ipp->ipp_fraghdr = tmpfraghdr; 4041 ipp->ipp_fraghdrlen = ehdrlen; 4042 } 4043 break; 4044 case IPPROTO_NONE: 4045 default: 4046 goto done; 4047 } 4048 length += ehdrlen; 4049 whereptr += ehdrlen; 4050 } 4051 done: 4052 if (nexthdrp != NULL) 4053 *nexthdrp = nexthdr; 4054 return (length); 4055 } 4056 4057 int 4058 ip_hdr_complete_v6(ip6_t *ip6h, zoneid_t zoneid, ip_stack_t *ipst) 4059 { 4060 ire_t *ire; 4061 4062 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 4063 ire = ire_lookup_local_v6(zoneid, ipst); 4064 if (ire == NULL) { 4065 ip1dbg(("ip_hdr_complete_v6: no source IRE\n")); 4066 return (1); 4067 } 4068 ip6h->ip6_src = ire->ire_addr_v6; 4069 ire_refrele(ire); 4070 } 4071 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 4072 ip6h->ip6_hops = ipst->ips_ipv6_def_hops; 4073 return (0); 4074 } 4075 4076 /* 4077 * Try to determine where and what are the IPv6 header length and 4078 * pointer to nexthdr value for the upper layer protocol (or an 4079 * unknown next hdr). 4080 * 4081 * Parameters returns a pointer to the nexthdr value; 4082 * Must handle malformed packets of various sorts. 4083 * Function returns failure for malformed cases. 4084 */ 4085 boolean_t 4086 ip_hdr_length_nexthdr_v6(mblk_t *mp, ip6_t *ip6h, uint16_t *hdr_length_ptr, 4087 uint8_t **nexthdrpp) 4088 { 4089 uint16_t length; 4090 uint_t ehdrlen; 4091 uint8_t *nexthdrp; 4092 uint8_t *whereptr; 4093 uint8_t *endptr; 4094 ip6_dest_t *desthdr; 4095 ip6_rthdr_t *rthdr; 4096 ip6_frag_t *fraghdr; 4097 4098 ASSERT((IPH_HDR_VERSION(ip6h) & ~IP_FORWARD_PROG_BIT) == IPV6_VERSION); 4099 length = IPV6_HDR_LEN; 4100 whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */ 4101 endptr = mp->b_wptr; 4102 4103 nexthdrp = &ip6h->ip6_nxt; 4104 while (whereptr < endptr) { 4105 /* Is there enough left for len + nexthdr? */ 4106 if (whereptr + MIN_EHDR_LEN > endptr) 4107 break; 4108 4109 switch (*nexthdrp) { 4110 case IPPROTO_HOPOPTS: 4111 case IPPROTO_DSTOPTS: 4112 /* Assumes the headers are identical for hbh and dst */ 4113 desthdr = (ip6_dest_t *)whereptr; 4114 ehdrlen = 8 * (desthdr->ip6d_len + 1); 4115 if ((uchar_t *)desthdr + ehdrlen > endptr) 4116 return (B_FALSE); 4117 nexthdrp = &desthdr->ip6d_nxt; 4118 break; 4119 case IPPROTO_ROUTING: 4120 rthdr = (ip6_rthdr_t *)whereptr; 4121 ehdrlen = 8 * (rthdr->ip6r_len + 1); 4122 if ((uchar_t *)rthdr + ehdrlen > endptr) 4123 return (B_FALSE); 4124 nexthdrp = &rthdr->ip6r_nxt; 4125 break; 4126 case IPPROTO_FRAGMENT: 4127 fraghdr = (ip6_frag_t *)whereptr; 4128 ehdrlen = sizeof (ip6_frag_t); 4129 if ((uchar_t *)&fraghdr[1] > endptr) 4130 return (B_FALSE); 4131 nexthdrp = &fraghdr->ip6f_nxt; 4132 break; 4133 case IPPROTO_NONE: 4134 /* No next header means we're finished */ 4135 default: 4136 *hdr_length_ptr = length; 4137 *nexthdrpp = nexthdrp; 4138 return (B_TRUE); 4139 } 4140 length += ehdrlen; 4141 whereptr += ehdrlen; 4142 *hdr_length_ptr = length; 4143 *nexthdrpp = nexthdrp; 4144 } 4145 switch (*nexthdrp) { 4146 case IPPROTO_HOPOPTS: 4147 case IPPROTO_DSTOPTS: 4148 case IPPROTO_ROUTING: 4149 case IPPROTO_FRAGMENT: 4150 /* 4151 * If any know extension headers are still to be processed, 4152 * the packet's malformed (or at least all the IP header(s) are 4153 * not in the same mblk - and that should never happen. 4154 */ 4155 return (B_FALSE); 4156 4157 default: 4158 /* 4159 * If we get here, we know that all of the IP headers were in 4160 * the same mblk, even if the ULP header is in the next mblk. 4161 */ 4162 *hdr_length_ptr = length; 4163 *nexthdrpp = nexthdrp; 4164 return (B_TRUE); 4165 } 4166 } 4167 4168 /* 4169 * Return the length of the IPv6 related headers (including extension headers) 4170 * Returns a length even if the packet is malformed. 4171 */ 4172 int 4173 ip_hdr_length_v6(mblk_t *mp, ip6_t *ip6h) 4174 { 4175 uint16_t hdr_len; 4176 uint8_t *nexthdrp; 4177 4178 (void) ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_len, &nexthdrp); 4179 return (hdr_len); 4180 } 4181 4182 /* 4183 * IPv6 - 4184 * ip_newroute_v6 is called by ip_rput_data_v6 or ip_wput_v6 whenever we need 4185 * to send out a packet to a destination address for which we do not have 4186 * specific routing information. 4187 * 4188 * Handle non-multicast packets. If ill is non-NULL the match is done 4189 * for that ill. 4190 * 4191 * When a specific ill is specified (using IPV6_PKTINFO, 4192 * IPV6_MULTICAST_IF, or IPV6_BOUND_IF) we will only match 4193 * on routing entries (ftable and ctable) that have a matching 4194 * ire->ire_ipif->ipif_ill. Thus this can only be used 4195 * for destinations that are on-link for the specific ill 4196 * and that can appear on multiple links. Thus it is useful 4197 * for multicast destinations, link-local destinations, and 4198 * at some point perhaps for site-local destinations (if the 4199 * node sits at a site boundary). 4200 * We create the cache entries in the regular ctable since 4201 * it can not "confuse" things for other destinations. 4202 * 4203 * NOTE : These are the scopes of some of the variables that point at IRE, 4204 * which needs to be followed while making any future modifications 4205 * to avoid memory leaks. 4206 * 4207 * - ire and sire are the entries looked up initially by 4208 * ire_ftable_lookup_v6. 4209 * - ipif_ire is used to hold the interface ire associated with 4210 * the new cache ire. But it's scope is limited, so we always REFRELE 4211 * it before branching out to error paths. 4212 * - save_ire is initialized before ire_create, so that ire returned 4213 * by ire_create will not over-write the ire. We REFRELE save_ire 4214 * before breaking out of the switch. 4215 * 4216 * Thus on failures, we have to REFRELE only ire and sire, if they 4217 * are not NULL. 4218 */ 4219 /* ARGSUSED */ 4220 void 4221 ip_newroute_v6(queue_t *q, mblk_t *mp, const in6_addr_t *v6dstp, 4222 const in6_addr_t *v6srcp, ill_t *ill, zoneid_t zoneid, ip_stack_t *ipst) 4223 { 4224 in6_addr_t v6gw; 4225 in6_addr_t dst; 4226 ire_t *ire = NULL; 4227 ipif_t *src_ipif = NULL; 4228 ill_t *dst_ill = NULL; 4229 ire_t *sire = NULL; 4230 ire_t *save_ire; 4231 ip6_t *ip6h; 4232 int err = 0; 4233 mblk_t *first_mp; 4234 ipsec_out_t *io; 4235 ushort_t ire_marks = 0; 4236 int match_flags; 4237 ire_t *first_sire = NULL; 4238 mblk_t *copy_mp = NULL; 4239 mblk_t *xmit_mp = NULL; 4240 in6_addr_t save_dst; 4241 uint32_t multirt_flags = 4242 MULTIRT_CACHEGW | MULTIRT_USESTAMP | MULTIRT_SETSTAMP; 4243 boolean_t multirt_is_resolvable; 4244 boolean_t multirt_resolve_next; 4245 boolean_t need_rele = B_FALSE; 4246 boolean_t ip6_asp_table_held = B_FALSE; 4247 tsol_ire_gw_secattr_t *attrp = NULL; 4248 tsol_gcgrp_t *gcgrp = NULL; 4249 tsol_gcgrp_addr_t ga; 4250 4251 ASSERT(!IN6_IS_ADDR_MULTICAST(v6dstp)); 4252 4253 first_mp = mp; 4254 if (mp->b_datap->db_type == M_CTL) { 4255 mp = mp->b_cont; 4256 io = (ipsec_out_t *)first_mp->b_rptr; 4257 ASSERT(io->ipsec_out_type == IPSEC_OUT); 4258 } else { 4259 io = NULL; 4260 } 4261 4262 ip6h = (ip6_t *)mp->b_rptr; 4263 4264 if (IN6_IS_ADDR_LOOPBACK(v6dstp)) { 4265 ip1dbg(("ip_newroute_v6: dst with loopback addr\n")); 4266 goto icmp_err_ret; 4267 } else if (IN6_IS_ADDR_LOOPBACK(v6srcp)) { 4268 ip1dbg(("ip_newroute_v6: src with loopback addr\n")); 4269 goto icmp_err_ret; 4270 } 4271 4272 /* 4273 * If this IRE is created for forwarding or it is not for 4274 * TCP traffic, mark it as temporary. 4275 * 4276 * Is it sufficient just to check the next header?? 4277 */ 4278 if (mp->b_prev != NULL || !IP_FLOW_CONTROLLED_ULP(ip6h->ip6_nxt)) 4279 ire_marks |= IRE_MARK_TEMPORARY; 4280 4281 /* 4282 * Get what we can from ire_ftable_lookup_v6 which will follow an IRE 4283 * chain until it gets the most specific information available. 4284 * For example, we know that there is no IRE_CACHE for this dest, 4285 * but there may be an IRE_OFFSUBNET which specifies a gateway. 4286 * ire_ftable_lookup_v6 will look up the gateway, etc. 4287 */ 4288 4289 if (ill == NULL) { 4290 match_flags = MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT | 4291 MATCH_IRE_PARENT | MATCH_IRE_RJ_BHOLE | MATCH_IRE_SECATTR; 4292 ire = ire_ftable_lookup_v6(v6dstp, 0, 0, 0, 4293 NULL, &sire, zoneid, 0, MBLK_GETLABEL(mp), 4294 match_flags, ipst); 4295 } else { 4296 match_flags = MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT | 4297 MATCH_IRE_RJ_BHOLE | MATCH_IRE_ILL; 4298 match_flags |= MATCH_IRE_PARENT | MATCH_IRE_SECATTR; 4299 4300 /* 4301 * Because nce_xmit() calls ip_output_v6() and NCEs are always 4302 * tied to an underlying interface, IS_UNDER_IPMP() may be 4303 * true even when building IREs that will be used for data 4304 * traffic. As such, use the packet's source address to 4305 * determine whether the traffic is test traffic, and set 4306 * MATCH_IRE_MARK_TESTHIDDEN if so. 4307 */ 4308 if (IS_UNDER_IPMP(ill) && !IN6_IS_ADDR_UNSPECIFIED(v6srcp)) { 4309 if (ipif_lookup_testaddr_v6(ill, v6srcp, NULL)) 4310 match_flags |= MATCH_IRE_MARK_TESTHIDDEN; 4311 } 4312 4313 ire = ire_ftable_lookup_v6(v6dstp, NULL, NULL, 0, ill->ill_ipif, 4314 &sire, zoneid, 0, MBLK_GETLABEL(mp), match_flags, ipst); 4315 } 4316 4317 ip3dbg(("ip_newroute_v6: ire_ftable_lookup_v6() " 4318 "returned ire %p, sire %p\n", (void *)ire, (void *)sire)); 4319 4320 /* 4321 * We enter a loop that will be run only once in most cases. 4322 * The loop is re-entered in the case where the destination 4323 * can be reached through multiple RTF_MULTIRT-flagged routes. 4324 * The intention is to compute multiple routes to a single 4325 * destination in a single ip_newroute_v6 call. 4326 * The information is contained in sire->ire_flags. 4327 */ 4328 do { 4329 multirt_resolve_next = B_FALSE; 4330 4331 if (dst_ill != NULL) { 4332 ill_refrele(dst_ill); 4333 dst_ill = NULL; 4334 } 4335 if (src_ipif != NULL) { 4336 ipif_refrele(src_ipif); 4337 src_ipif = NULL; 4338 } 4339 if ((sire != NULL) && sire->ire_flags & RTF_MULTIRT) { 4340 ip3dbg(("ip_newroute_v6: starting new resolution " 4341 "with first_mp %p, tag %d\n", 4342 (void *)first_mp, MULTIRT_DEBUG_TAGGED(first_mp))); 4343 4344 /* 4345 * We check if there are trailing unresolved routes for 4346 * the destination contained in sire. 4347 */ 4348 multirt_is_resolvable = ire_multirt_lookup_v6(&ire, 4349 &sire, multirt_flags, MBLK_GETLABEL(mp), ipst); 4350 4351 ip3dbg(("ip_newroute_v6: multirt_is_resolvable %d, " 4352 "ire %p, sire %p\n", 4353 multirt_is_resolvable, (void *)ire, (void *)sire)); 4354 4355 if (!multirt_is_resolvable) { 4356 /* 4357 * No more multirt routes to resolve; give up 4358 * (all routes resolved or no more resolvable 4359 * routes). 4360 */ 4361 if (ire != NULL) { 4362 ire_refrele(ire); 4363 ire = NULL; 4364 } 4365 } else { 4366 ASSERT(sire != NULL); 4367 ASSERT(ire != NULL); 4368 /* 4369 * We simply use first_sire as a flag that 4370 * indicates if a resolvable multirt route has 4371 * already been found during the preceding 4372 * loops. If it is not the case, we may have 4373 * to send an ICMP error to report that the 4374 * destination is unreachable. We do not 4375 * IRE_REFHOLD first_sire. 4376 */ 4377 if (first_sire == NULL) { 4378 first_sire = sire; 4379 } 4380 } 4381 } 4382 if ((ire == NULL) || (ire == sire)) { 4383 /* 4384 * either ire == NULL (the destination cannot be 4385 * resolved) or ire == sire (the gateway cannot be 4386 * resolved). At this point, there are no more routes 4387 * to resolve for the destination, thus we exit. 4388 */ 4389 if (ip_debug > 3) { 4390 /* ip2dbg */ 4391 pr_addr_dbg("ip_newroute_v6: " 4392 "can't resolve %s\n", AF_INET6, v6dstp); 4393 } 4394 ip3dbg(("ip_newroute_v6: " 4395 "ire %p, sire %p, first_sire %p\n", 4396 (void *)ire, (void *)sire, (void *)first_sire)); 4397 4398 if (sire != NULL) { 4399 ire_refrele(sire); 4400 sire = NULL; 4401 } 4402 4403 if (first_sire != NULL) { 4404 /* 4405 * At least one multirt route has been found 4406 * in the same ip_newroute() call; there is no 4407 * need to report an ICMP error. 4408 * first_sire was not IRE_REFHOLDed. 4409 */ 4410 MULTIRT_DEBUG_UNTAG(first_mp); 4411 freemsg(first_mp); 4412 return; 4413 } 4414 ip_rts_change_v6(RTM_MISS, v6dstp, 0, 0, 0, 0, 0, 0, 4415 RTA_DST, ipst); 4416 goto icmp_err_ret; 4417 } 4418 4419 ASSERT(ire->ire_ipversion == IPV6_VERSION); 4420 4421 /* 4422 * Verify that the returned IRE does not have either the 4423 * RTF_REJECT or RTF_BLACKHOLE flags set and that the IRE is 4424 * either an IRE_CACHE, IRE_IF_NORESOLVER or IRE_IF_RESOLVER. 4425 */ 4426 if ((ire->ire_flags & (RTF_REJECT | RTF_BLACKHOLE)) || 4427 (ire->ire_type & (IRE_CACHE | IRE_INTERFACE)) == 0) 4428 goto icmp_err_ret; 4429 4430 /* 4431 * Increment the ire_ob_pkt_count field for ire if it is an 4432 * INTERFACE (IF_RESOLVER or IF_NORESOLVER) IRE type, and 4433 * increment the same for the parent IRE, sire, if it is some 4434 * sort of prefix IRE (which includes DEFAULT, PREFIX, and HOST) 4435 */ 4436 if ((ire->ire_type & IRE_INTERFACE) != 0) { 4437 UPDATE_OB_PKT_COUNT(ire); 4438 ire->ire_last_used_time = lbolt; 4439 } 4440 4441 if (sire != NULL) { 4442 mutex_enter(&sire->ire_lock); 4443 v6gw = sire->ire_gateway_addr_v6; 4444 mutex_exit(&sire->ire_lock); 4445 ASSERT((sire->ire_type & (IRE_CACHETABLE | 4446 IRE_INTERFACE)) == 0); 4447 UPDATE_OB_PKT_COUNT(sire); 4448 sire->ire_last_used_time = lbolt; 4449 } else { 4450 v6gw = ipv6_all_zeros; 4451 } 4452 4453 /* 4454 * We have a route to reach the destination. Find the 4455 * appropriate ill, then get a source address that matches the 4456 * right scope via ipif_select_source_v6(). 4457 * 4458 * If we are here trying to create an IRE_CACHE for an offlink 4459 * destination and have an IRE_CACHE entry for VNI, then use 4460 * ire_stq instead since VNI's queue is a black hole. 4461 * 4462 * Note: While we pick a dst_ill we are really only interested 4463 * in the ill for load spreading. The source ipif is 4464 * determined by source address selection below. 4465 */ 4466 if ((ire->ire_type == IRE_CACHE) && 4467 IS_VNI(ire->ire_ipif->ipif_ill)) { 4468 dst_ill = ire->ire_stq->q_ptr; 4469 ill_refhold(dst_ill); 4470 } else { 4471 ill_t *ill = ire->ire_ipif->ipif_ill; 4472 4473 if (IS_IPMP(ill)) { 4474 dst_ill = 4475 ipmp_illgrp_hold_next_ill(ill->ill_grp); 4476 } else { 4477 dst_ill = ill; 4478 ill_refhold(dst_ill); 4479 } 4480 } 4481 4482 if (dst_ill == NULL) { 4483 if (ip_debug > 2) { 4484 pr_addr_dbg("ip_newroute_v6 : no dst " 4485 "ill for dst %s\n", AF_INET6, v6dstp); 4486 } 4487 goto icmp_err_ret; 4488 } 4489 4490 if (ill != NULL && dst_ill != ill && 4491 !IS_IN_SAME_ILLGRP(dst_ill, ill)) { 4492 /* 4493 * We should have found a route matching "ill" 4494 * as we called ire_ftable_lookup_v6 with 4495 * MATCH_IRE_ILL. Rather than asserting when 4496 * there is a mismatch, we just drop the packet. 4497 */ 4498 ip0dbg(("ip_newroute_v6: BOUND_IF failed: " 4499 "dst_ill %s ill %s\n", dst_ill->ill_name, 4500 ill->ill_name)); 4501 goto icmp_err_ret; 4502 } 4503 4504 /* 4505 * Pick a source address which matches the scope of the 4506 * destination address. 4507 * For RTF_SETSRC routes, the source address is imposed by the 4508 * parent ire (sire). 4509 */ 4510 ASSERT(src_ipif == NULL); 4511 4512 /* 4513 * Because nce_xmit() calls ip_output_v6() and NCEs are always 4514 * tied to the underlying interface, IS_UNDER_IPMP() may be 4515 * true even when building IREs that will be used for data 4516 * traffic. As such, see if the packet's source address is a 4517 * test address, and if so use that test address's ipif for 4518 * the IRE so that the logic that sets IRE_MARK_TESTHIDDEN in 4519 * ire_add_v6() can work properly. 4520 */ 4521 if (ill != NULL && IS_UNDER_IPMP(ill)) 4522 (void) ipif_lookup_testaddr_v6(ill, v6srcp, &src_ipif); 4523 4524 if (src_ipif == NULL && ire->ire_type == IRE_IF_RESOLVER && 4525 !IN6_IS_ADDR_UNSPECIFIED(&v6gw) && 4526 ip6_asp_can_lookup(ipst)) { 4527 /* 4528 * The ire cache entry we're adding is for the 4529 * gateway itself. The source address in this case 4530 * is relative to the gateway's address. 4531 */ 4532 ip6_asp_table_held = B_TRUE; 4533 src_ipif = ipif_select_source_v6(dst_ill, &v6gw, 4534 B_TRUE, IPV6_PREFER_SRC_DEFAULT, zoneid); 4535 if (src_ipif != NULL) 4536 ire_marks |= IRE_MARK_USESRC_CHECK; 4537 } else if (src_ipif == NULL) { 4538 if ((sire != NULL) && (sire->ire_flags & RTF_SETSRC)) { 4539 /* 4540 * Check that the ipif matching the requested 4541 * source address still exists. 4542 */ 4543 src_ipif = ipif_lookup_addr_v6( 4544 &sire->ire_src_addr_v6, NULL, zoneid, 4545 NULL, NULL, NULL, NULL, ipst); 4546 } 4547 if (src_ipif == NULL && ip6_asp_can_lookup(ipst)) { 4548 ip6_asp_table_held = B_TRUE; 4549 src_ipif = ipif_select_source_v6(dst_ill, 4550 v6dstp, B_FALSE, 4551 IPV6_PREFER_SRC_DEFAULT, zoneid); 4552 if (src_ipif != NULL) 4553 ire_marks |= IRE_MARK_USESRC_CHECK; 4554 } 4555 } 4556 4557 if (src_ipif == NULL) { 4558 if (ip_debug > 2) { 4559 /* ip1dbg */ 4560 pr_addr_dbg("ip_newroute_v6: no src for " 4561 "dst %s\n", AF_INET6, v6dstp); 4562 printf("ip_newroute_v6: interface name %s\n", 4563 dst_ill->ill_name); 4564 } 4565 goto icmp_err_ret; 4566 } 4567 4568 if (ip_debug > 3) { 4569 /* ip2dbg */ 4570 pr_addr_dbg("ip_newroute_v6: first hop %s\n", 4571 AF_INET6, &v6gw); 4572 } 4573 ip2dbg(("\tire type %s (%d)\n", 4574 ip_nv_lookup(ire_nv_tbl, ire->ire_type), ire->ire_type)); 4575 4576 /* 4577 * At this point in ip_newroute_v6(), ire is either the 4578 * IRE_CACHE of the next-hop gateway for an off-subnet 4579 * destination or an IRE_INTERFACE type that should be used 4580 * to resolve an on-subnet destination or an on-subnet 4581 * next-hop gateway. 4582 * 4583 * In the IRE_CACHE case, we have the following : 4584 * 4585 * 1) src_ipif - used for getting a source address. 4586 * 4587 * 2) dst_ill - from which we derive ire_stq/ire_rfq. This 4588 * means packets using this IRE_CACHE will go out on dst_ill. 4589 * 4590 * 3) The IRE sire will point to the prefix that is the longest 4591 * matching route for the destination. These prefix types 4592 * include IRE_DEFAULT, IRE_PREFIX, IRE_HOST. 4593 * 4594 * The newly created IRE_CACHE entry for the off-subnet 4595 * destination is tied to both the prefix route and the 4596 * interface route used to resolve the next-hop gateway 4597 * via the ire_phandle and ire_ihandle fields, respectively. 4598 * 4599 * In the IRE_INTERFACE case, we have the following : 4600 * 4601 * 1) src_ipif - used for getting a source address. 4602 * 4603 * 2) dst_ill - from which we derive ire_stq/ire_rfq. This 4604 * means packets using the IRE_CACHE that we will build 4605 * here will go out on dst_ill. 4606 * 4607 * 3) sire may or may not be NULL. But, the IRE_CACHE that is 4608 * to be created will only be tied to the IRE_INTERFACE that 4609 * was derived from the ire_ihandle field. 4610 * 4611 * If sire is non-NULL, it means the destination is off-link 4612 * and we will first create the IRE_CACHE for the gateway. 4613 * Next time through ip_newroute_v6, we will create the 4614 * IRE_CACHE for the final destination as described above. 4615 */ 4616 save_ire = ire; 4617 switch (ire->ire_type) { 4618 case IRE_CACHE: { 4619 ire_t *ipif_ire; 4620 4621 ASSERT(sire != NULL); 4622 if (IN6_IS_ADDR_UNSPECIFIED(&v6gw)) { 4623 mutex_enter(&ire->ire_lock); 4624 v6gw = ire->ire_gateway_addr_v6; 4625 mutex_exit(&ire->ire_lock); 4626 } 4627 /* 4628 * We need 3 ire's to create a new cache ire for an 4629 * off-link destination from the cache ire of the 4630 * gateway. 4631 * 4632 * 1. The prefix ire 'sire' 4633 * 2. The cache ire of the gateway 'ire' 4634 * 3. The interface ire 'ipif_ire' 4635 * 4636 * We have (1) and (2). We lookup (3) below. 4637 * 4638 * If there is no interface route to the gateway, 4639 * it is a race condition, where we found the cache 4640 * but the inteface route has been deleted. 4641 */ 4642 ipif_ire = ire_ihandle_lookup_offlink_v6(ire, sire); 4643 if (ipif_ire == NULL) { 4644 ip1dbg(("ip_newroute_v6:" 4645 "ire_ihandle_lookup_offlink_v6 failed\n")); 4646 goto icmp_err_ret; 4647 } 4648 4649 /* 4650 * Note: the new ire inherits RTF_SETSRC 4651 * and RTF_MULTIRT to propagate these flags from prefix 4652 * to cache. 4653 */ 4654 4655 /* 4656 * Check cached gateway IRE for any security 4657 * attributes; if found, associate the gateway 4658 * credentials group to the destination IRE. 4659 */ 4660 if ((attrp = save_ire->ire_gw_secattr) != NULL) { 4661 mutex_enter(&attrp->igsa_lock); 4662 if ((gcgrp = attrp->igsa_gcgrp) != NULL) 4663 GCGRP_REFHOLD(gcgrp); 4664 mutex_exit(&attrp->igsa_lock); 4665 } 4666 4667 ire = ire_create_v6( 4668 v6dstp, /* dest address */ 4669 &ipv6_all_ones, /* mask */ 4670 &src_ipif->ipif_v6src_addr, /* source address */ 4671 &v6gw, /* gateway address */ 4672 &save_ire->ire_max_frag, 4673 NULL, /* src nce */ 4674 dst_ill->ill_rq, /* recv-from queue */ 4675 dst_ill->ill_wq, /* send-to queue */ 4676 IRE_CACHE, 4677 src_ipif, 4678 &sire->ire_mask_v6, /* Parent mask */ 4679 sire->ire_phandle, /* Parent handle */ 4680 ipif_ire->ire_ihandle, /* Interface handle */ 4681 sire->ire_flags & /* flags if any */ 4682 (RTF_SETSRC | RTF_MULTIRT), 4683 &(sire->ire_uinfo), 4684 NULL, 4685 gcgrp, 4686 ipst); 4687 4688 if (ire == NULL) { 4689 if (gcgrp != NULL) { 4690 GCGRP_REFRELE(gcgrp); 4691 gcgrp = NULL; 4692 } 4693 ire_refrele(save_ire); 4694 ire_refrele(ipif_ire); 4695 break; 4696 } 4697 4698 /* reference now held by IRE */ 4699 gcgrp = NULL; 4700 4701 ire->ire_marks |= ire_marks; 4702 4703 /* 4704 * Prevent sire and ipif_ire from getting deleted. The 4705 * newly created ire is tied to both of them via the 4706 * phandle and ihandle respectively. 4707 */ 4708 IRB_REFHOLD(sire->ire_bucket); 4709 /* Has it been removed already ? */ 4710 if (sire->ire_marks & IRE_MARK_CONDEMNED) { 4711 IRB_REFRELE(sire->ire_bucket); 4712 ire_refrele(ipif_ire); 4713 ire_refrele(save_ire); 4714 break; 4715 } 4716 4717 IRB_REFHOLD(ipif_ire->ire_bucket); 4718 /* Has it been removed already ? */ 4719 if (ipif_ire->ire_marks & IRE_MARK_CONDEMNED) { 4720 IRB_REFRELE(ipif_ire->ire_bucket); 4721 IRB_REFRELE(sire->ire_bucket); 4722 ire_refrele(ipif_ire); 4723 ire_refrele(save_ire); 4724 break; 4725 } 4726 4727 xmit_mp = first_mp; 4728 if (ire->ire_flags & RTF_MULTIRT) { 4729 copy_mp = copymsg(first_mp); 4730 if (copy_mp != NULL) { 4731 xmit_mp = copy_mp; 4732 MULTIRT_DEBUG_TAG(first_mp); 4733 } 4734 } 4735 ire_add_then_send(q, ire, xmit_mp); 4736 if (ip6_asp_table_held) { 4737 ip6_asp_table_refrele(ipst); 4738 ip6_asp_table_held = B_FALSE; 4739 } 4740 ire_refrele(save_ire); 4741 4742 /* Assert that sire is not deleted yet. */ 4743 ASSERT(sire->ire_ptpn != NULL); 4744 IRB_REFRELE(sire->ire_bucket); 4745 4746 /* Assert that ipif_ire is not deleted yet. */ 4747 ASSERT(ipif_ire->ire_ptpn != NULL); 4748 IRB_REFRELE(ipif_ire->ire_bucket); 4749 ire_refrele(ipif_ire); 4750 4751 if (copy_mp != NULL) { 4752 /* 4753 * Search for the next unresolved 4754 * multirt route. 4755 */ 4756 copy_mp = NULL; 4757 ipif_ire = NULL; 4758 ire = NULL; 4759 /* re-enter the loop */ 4760 multirt_resolve_next = B_TRUE; 4761 continue; 4762 } 4763 ire_refrele(sire); 4764 ill_refrele(dst_ill); 4765 ipif_refrele(src_ipif); 4766 return; 4767 } 4768 case IRE_IF_NORESOLVER: 4769 /* 4770 * We have what we need to build an IRE_CACHE. 4771 * 4772 * handle the Gated case, where we create 4773 * a NORESOLVER route for loopback. 4774 */ 4775 if (dst_ill->ill_net_type != IRE_IF_NORESOLVER) 4776 break; 4777 /* 4778 * TSol note: We are creating the ire cache for the 4779 * destination 'dst'. If 'dst' is offlink, going 4780 * through the first hop 'gw', the security attributes 4781 * of 'dst' must be set to point to the gateway 4782 * credentials of gateway 'gw'. If 'dst' is onlink, it 4783 * is possible that 'dst' is a potential gateway that is 4784 * referenced by some route that has some security 4785 * attributes. Thus in the former case, we need to do a 4786 * gcgrp_lookup of 'gw' while in the latter case we 4787 * need to do gcgrp_lookup of 'dst' itself. 4788 */ 4789 ga.ga_af = AF_INET6; 4790 if (!IN6_IS_ADDR_UNSPECIFIED(&v6gw)) 4791 ga.ga_addr = v6gw; 4792 else 4793 ga.ga_addr = *v6dstp; 4794 gcgrp = gcgrp_lookup(&ga, B_FALSE); 4795 4796 /* 4797 * Note: the new ire inherits sire flags RTF_SETSRC 4798 * and RTF_MULTIRT to propagate those rules from prefix 4799 * to cache. 4800 */ 4801 ire = ire_create_v6( 4802 v6dstp, /* dest address */ 4803 &ipv6_all_ones, /* mask */ 4804 &src_ipif->ipif_v6src_addr, /* source address */ 4805 &v6gw, /* gateway address */ 4806 &save_ire->ire_max_frag, 4807 NULL, /* no src nce */ 4808 dst_ill->ill_rq, /* recv-from queue */ 4809 dst_ill->ill_wq, /* send-to queue */ 4810 IRE_CACHE, 4811 src_ipif, 4812 &save_ire->ire_mask_v6, /* Parent mask */ 4813 (sire != NULL) ? /* Parent handle */ 4814 sire->ire_phandle : 0, 4815 save_ire->ire_ihandle, /* Interface handle */ 4816 (sire != NULL) ? /* flags if any */ 4817 sire->ire_flags & 4818 (RTF_SETSRC | RTF_MULTIRT) : 0, 4819 &(save_ire->ire_uinfo), 4820 NULL, 4821 gcgrp, 4822 ipst); 4823 4824 if (ire == NULL) { 4825 if (gcgrp != NULL) { 4826 GCGRP_REFRELE(gcgrp); 4827 gcgrp = NULL; 4828 } 4829 ire_refrele(save_ire); 4830 break; 4831 } 4832 4833 /* reference now held by IRE */ 4834 gcgrp = NULL; 4835 4836 ire->ire_marks |= ire_marks; 4837 4838 if (!IN6_IS_ADDR_UNSPECIFIED(&v6gw)) 4839 dst = v6gw; 4840 else 4841 dst = *v6dstp; 4842 err = ndp_noresolver(dst_ill, &dst); 4843 if (err != 0) { 4844 ire_refrele(save_ire); 4845 break; 4846 } 4847 4848 /* Prevent save_ire from getting deleted */ 4849 IRB_REFHOLD(save_ire->ire_bucket); 4850 /* Has it been removed already ? */ 4851 if (save_ire->ire_marks & IRE_MARK_CONDEMNED) { 4852 IRB_REFRELE(save_ire->ire_bucket); 4853 ire_refrele(save_ire); 4854 break; 4855 } 4856 4857 xmit_mp = first_mp; 4858 /* 4859 * In case of MULTIRT, a copy of the current packet 4860 * to send is made to further re-enter the 4861 * loop and attempt another route resolution 4862 */ 4863 if ((sire != NULL) && sire->ire_flags & RTF_MULTIRT) { 4864 copy_mp = copymsg(first_mp); 4865 if (copy_mp != NULL) { 4866 xmit_mp = copy_mp; 4867 MULTIRT_DEBUG_TAG(first_mp); 4868 } 4869 } 4870 ire_add_then_send(q, ire, xmit_mp); 4871 if (ip6_asp_table_held) { 4872 ip6_asp_table_refrele(ipst); 4873 ip6_asp_table_held = B_FALSE; 4874 } 4875 4876 /* Assert that it is not deleted yet. */ 4877 ASSERT(save_ire->ire_ptpn != NULL); 4878 IRB_REFRELE(save_ire->ire_bucket); 4879 ire_refrele(save_ire); 4880 4881 if (copy_mp != NULL) { 4882 /* 4883 * If we found a (no)resolver, we ignore any 4884 * trailing top priority IRE_CACHE in 4885 * further loops. This ensures that we do not 4886 * omit any (no)resolver despite the priority 4887 * in this call. 4888 * IRE_CACHE, if any, will be processed 4889 * by another thread entering ip_newroute(), 4890 * (on resolver response, for example). 4891 * We use this to force multiple parallel 4892 * resolution as soon as a packet needs to be 4893 * sent. The result is, after one packet 4894 * emission all reachable routes are generally 4895 * resolved. 4896 * Otherwise, complete resolution of MULTIRT 4897 * routes would require several emissions as 4898 * side effect. 4899 */ 4900 multirt_flags &= ~MULTIRT_CACHEGW; 4901 4902 /* 4903 * Search for the next unresolved multirt 4904 * route. 4905 */ 4906 copy_mp = NULL; 4907 save_ire = NULL; 4908 ire = NULL; 4909 /* re-enter the loop */ 4910 multirt_resolve_next = B_TRUE; 4911 continue; 4912 } 4913 4914 /* Don't need sire anymore */ 4915 if (sire != NULL) 4916 ire_refrele(sire); 4917 ill_refrele(dst_ill); 4918 ipif_refrele(src_ipif); 4919 return; 4920 4921 case IRE_IF_RESOLVER: 4922 /* 4923 * We can't build an IRE_CACHE yet, but at least we 4924 * found a resolver that can help. 4925 */ 4926 dst = *v6dstp; 4927 4928 /* 4929 * To be at this point in the code with a non-zero gw 4930 * means that dst is reachable through a gateway that 4931 * we have never resolved. By changing dst to the gw 4932 * addr we resolve the gateway first. When 4933 * ire_add_then_send() tries to put the IP dg to dst, 4934 * it will reenter ip_newroute() at which time we will 4935 * find the IRE_CACHE for the gw and create another 4936 * IRE_CACHE above (for dst itself). 4937 */ 4938 if (!IN6_IS_ADDR_UNSPECIFIED(&v6gw)) { 4939 save_dst = dst; 4940 dst = v6gw; 4941 v6gw = ipv6_all_zeros; 4942 } 4943 if (dst_ill->ill_flags & ILLF_XRESOLV) { 4944 /* 4945 * Ask the external resolver to do its thing. 4946 * Make an mblk chain in the following form: 4947 * ARQ_REQ_MBLK-->IRE_MBLK-->packet 4948 */ 4949 mblk_t *ire_mp; 4950 mblk_t *areq_mp; 4951 areq_t *areq; 4952 in6_addr_t *addrp; 4953 4954 ip1dbg(("ip_newroute_v6:ILLF_XRESOLV\n")); 4955 if (ip6_asp_table_held) { 4956 ip6_asp_table_refrele(ipst); 4957 ip6_asp_table_held = B_FALSE; 4958 } 4959 ire = ire_create_mp_v6( 4960 &dst, /* dest address */ 4961 &ipv6_all_ones, /* mask */ 4962 &src_ipif->ipif_v6src_addr, 4963 /* source address */ 4964 &v6gw, /* gateway address */ 4965 NULL, /* no src nce */ 4966 dst_ill->ill_rq, /* recv-from queue */ 4967 dst_ill->ill_wq, /* send-to queue */ 4968 IRE_CACHE, 4969 src_ipif, 4970 &save_ire->ire_mask_v6, /* Parent mask */ 4971 0, 4972 save_ire->ire_ihandle, 4973 /* Interface handle */ 4974 0, /* flags if any */ 4975 &(save_ire->ire_uinfo), 4976 NULL, 4977 NULL, 4978 ipst); 4979 4980 ire_refrele(save_ire); 4981 if (ire == NULL) { 4982 ip1dbg(("ip_newroute_v6:" 4983 "ire is NULL\n")); 4984 break; 4985 } 4986 4987 if ((sire != NULL) && 4988 (sire->ire_flags & RTF_MULTIRT)) { 4989 /* 4990 * processing a copy of the packet to 4991 * send for further resolution loops 4992 */ 4993 copy_mp = copymsg(first_mp); 4994 if (copy_mp != NULL) 4995 MULTIRT_DEBUG_TAG(copy_mp); 4996 } 4997 ire->ire_marks |= ire_marks; 4998 ire_mp = ire->ire_mp; 4999 /* 5000 * Now create or find an nce for this interface. 5001 * The hw addr will need to to be set from 5002 * the reply to the AR_ENTRY_QUERY that 5003 * we're about to send. This will be done in 5004 * ire_add_v6(). 5005 */ 5006 err = ndp_resolver(dst_ill, &dst, mp, zoneid); 5007 switch (err) { 5008 case 0: 5009 /* 5010 * New cache entry created. 5011 * Break, then ask the external 5012 * resolver. 5013 */ 5014 break; 5015 case EINPROGRESS: 5016 /* 5017 * Resolution in progress; 5018 * packet has been queued by 5019 * ndp_resolver(). 5020 */ 5021 ire_delete(ire); 5022 ire = NULL; 5023 /* 5024 * Check if another multirt 5025 * route must be resolved. 5026 */ 5027 if (copy_mp != NULL) { 5028 /* 5029 * If we found a resolver, we 5030 * ignore any trailing top 5031 * priority IRE_CACHE in 5032 * further loops. The reason is 5033 * the same as for noresolver. 5034 */ 5035 multirt_flags &= 5036 ~MULTIRT_CACHEGW; 5037 /* 5038 * Search for the next 5039 * unresolved multirt route. 5040 */ 5041 first_mp = copy_mp; 5042 copy_mp = NULL; 5043 mp = first_mp; 5044 if (mp->b_datap->db_type == 5045 M_CTL) { 5046 mp = mp->b_cont; 5047 } 5048 ASSERT(sire != NULL); 5049 dst = save_dst; 5050 /* 5051 * re-enter the loop 5052 */ 5053 multirt_resolve_next = 5054 B_TRUE; 5055 continue; 5056 } 5057 5058 if (sire != NULL) 5059 ire_refrele(sire); 5060 ill_refrele(dst_ill); 5061 ipif_refrele(src_ipif); 5062 return; 5063 default: 5064 /* 5065 * Transient error; packet will be 5066 * freed. 5067 */ 5068 ire_delete(ire); 5069 ire = NULL; 5070 break; 5071 } 5072 if (err != 0) 5073 break; 5074 /* 5075 * Now set up the AR_ENTRY_QUERY and send it. 5076 */ 5077 areq_mp = ill_arp_alloc(dst_ill, 5078 (uchar_t *)&ipv6_areq_template, 5079 (caddr_t)&dst); 5080 if (areq_mp == NULL) { 5081 ip1dbg(("ip_newroute_v6:" 5082 "areq_mp is NULL\n")); 5083 freemsg(ire_mp); 5084 break; 5085 } 5086 areq = (areq_t *)areq_mp->b_rptr; 5087 addrp = (in6_addr_t *)((char *)areq + 5088 areq->areq_target_addr_offset); 5089 *addrp = dst; 5090 addrp = (in6_addr_t *)((char *)areq + 5091 areq->areq_sender_addr_offset); 5092 *addrp = src_ipif->ipif_v6src_addr; 5093 /* 5094 * link the chain, then send up to the resolver. 5095 */ 5096 linkb(areq_mp, ire_mp); 5097 linkb(areq_mp, mp); 5098 ip1dbg(("ip_newroute_v6:" 5099 "putnext to resolver\n")); 5100 putnext(dst_ill->ill_rq, areq_mp); 5101 /* 5102 * Check if another multirt route 5103 * must be resolved. 5104 */ 5105 ire = NULL; 5106 if (copy_mp != NULL) { 5107 /* 5108 * If we find a resolver, we ignore any 5109 * trailing top priority IRE_CACHE in 5110 * further loops. The reason is the 5111 * same as for noresolver. 5112 */ 5113 multirt_flags &= ~MULTIRT_CACHEGW; 5114 /* 5115 * Search for the next unresolved 5116 * multirt route. 5117 */ 5118 first_mp = copy_mp; 5119 copy_mp = NULL; 5120 mp = first_mp; 5121 if (mp->b_datap->db_type == M_CTL) { 5122 mp = mp->b_cont; 5123 } 5124 ASSERT(sire != NULL); 5125 dst = save_dst; 5126 /* 5127 * re-enter the loop 5128 */ 5129 multirt_resolve_next = B_TRUE; 5130 continue; 5131 } 5132 5133 if (sire != NULL) 5134 ire_refrele(sire); 5135 ill_refrele(dst_ill); 5136 ipif_refrele(src_ipif); 5137 return; 5138 } 5139 /* 5140 * Non-external resolver case. 5141 * 5142 * TSol note: Please see the note above the 5143 * IRE_IF_NORESOLVER case. 5144 */ 5145 ga.ga_af = AF_INET6; 5146 ga.ga_addr = dst; 5147 gcgrp = gcgrp_lookup(&ga, B_FALSE); 5148 5149 ire = ire_create_v6( 5150 &dst, /* dest address */ 5151 &ipv6_all_ones, /* mask */ 5152 &src_ipif->ipif_v6src_addr, /* source address */ 5153 &v6gw, /* gateway address */ 5154 &save_ire->ire_max_frag, 5155 NULL, /* no src nce */ 5156 dst_ill->ill_rq, /* recv-from queue */ 5157 dst_ill->ill_wq, /* send-to queue */ 5158 IRE_CACHE, 5159 src_ipif, 5160 &save_ire->ire_mask_v6, /* Parent mask */ 5161 0, 5162 save_ire->ire_ihandle, /* Interface handle */ 5163 0, /* flags if any */ 5164 &(save_ire->ire_uinfo), 5165 NULL, 5166 gcgrp, 5167 ipst); 5168 5169 if (ire == NULL) { 5170 if (gcgrp != NULL) { 5171 GCGRP_REFRELE(gcgrp); 5172 gcgrp = NULL; 5173 } 5174 ire_refrele(save_ire); 5175 break; 5176 } 5177 5178 /* reference now held by IRE */ 5179 gcgrp = NULL; 5180 5181 if ((sire != NULL) && 5182 (sire->ire_flags & RTF_MULTIRT)) { 5183 copy_mp = copymsg(first_mp); 5184 if (copy_mp != NULL) 5185 MULTIRT_DEBUG_TAG(copy_mp); 5186 } 5187 5188 ire->ire_marks |= ire_marks; 5189 err = ndp_resolver(dst_ill, &dst, first_mp, zoneid); 5190 switch (err) { 5191 case 0: 5192 /* Prevent save_ire from getting deleted */ 5193 IRB_REFHOLD(save_ire->ire_bucket); 5194 /* Has it been removed already ? */ 5195 if (save_ire->ire_marks & IRE_MARK_CONDEMNED) { 5196 IRB_REFRELE(save_ire->ire_bucket); 5197 ire_refrele(save_ire); 5198 break; 5199 } 5200 5201 /* 5202 * We have a resolved cache entry, 5203 * add in the IRE. 5204 */ 5205 ire_add_then_send(q, ire, first_mp); 5206 if (ip6_asp_table_held) { 5207 ip6_asp_table_refrele(ipst); 5208 ip6_asp_table_held = B_FALSE; 5209 } 5210 5211 /* Assert that it is not deleted yet. */ 5212 ASSERT(save_ire->ire_ptpn != NULL); 5213 IRB_REFRELE(save_ire->ire_bucket); 5214 ire_refrele(save_ire); 5215 /* 5216 * Check if another multirt route 5217 * must be resolved. 5218 */ 5219 ire = NULL; 5220 if (copy_mp != NULL) { 5221 /* 5222 * If we find a resolver, we ignore any 5223 * trailing top priority IRE_CACHE in 5224 * further loops. The reason is the 5225 * same as for noresolver. 5226 */ 5227 multirt_flags &= ~MULTIRT_CACHEGW; 5228 /* 5229 * Search for the next unresolved 5230 * multirt route. 5231 */ 5232 first_mp = copy_mp; 5233 copy_mp = NULL; 5234 mp = first_mp; 5235 if (mp->b_datap->db_type == M_CTL) { 5236 mp = mp->b_cont; 5237 } 5238 ASSERT(sire != NULL); 5239 dst = save_dst; 5240 /* 5241 * re-enter the loop 5242 */ 5243 multirt_resolve_next = B_TRUE; 5244 continue; 5245 } 5246 5247 if (sire != NULL) 5248 ire_refrele(sire); 5249 ill_refrele(dst_ill); 5250 ipif_refrele(src_ipif); 5251 return; 5252 5253 case EINPROGRESS: 5254 /* 5255 * mp was consumed - presumably queued. 5256 * No need for ire, presumably resolution is 5257 * in progress, and ire will be added when the 5258 * address is resolved. 5259 */ 5260 if (ip6_asp_table_held) { 5261 ip6_asp_table_refrele(ipst); 5262 ip6_asp_table_held = B_FALSE; 5263 } 5264 ASSERT(ire->ire_nce == NULL); 5265 ire_delete(ire); 5266 ire_refrele(save_ire); 5267 /* 5268 * Check if another multirt route 5269 * must be resolved. 5270 */ 5271 ire = NULL; 5272 if (copy_mp != NULL) { 5273 /* 5274 * If we find a resolver, we ignore any 5275 * trailing top priority IRE_CACHE in 5276 * further loops. The reason is the 5277 * same as for noresolver. 5278 */ 5279 multirt_flags &= ~MULTIRT_CACHEGW; 5280 /* 5281 * Search for the next unresolved 5282 * multirt route. 5283 */ 5284 first_mp = copy_mp; 5285 copy_mp = NULL; 5286 mp = first_mp; 5287 if (mp->b_datap->db_type == M_CTL) { 5288 mp = mp->b_cont; 5289 } 5290 ASSERT(sire != NULL); 5291 dst = save_dst; 5292 /* 5293 * re-enter the loop 5294 */ 5295 multirt_resolve_next = B_TRUE; 5296 continue; 5297 } 5298 if (sire != NULL) 5299 ire_refrele(sire); 5300 ill_refrele(dst_ill); 5301 ipif_refrele(src_ipif); 5302 return; 5303 default: 5304 /* Some transient error */ 5305 ASSERT(ire->ire_nce == NULL); 5306 ire_refrele(save_ire); 5307 break; 5308 } 5309 break; 5310 default: 5311 break; 5312 } 5313 if (ip6_asp_table_held) { 5314 ip6_asp_table_refrele(ipst); 5315 ip6_asp_table_held = B_FALSE; 5316 } 5317 } while (multirt_resolve_next); 5318 5319 err_ret: 5320 ip1dbg(("ip_newroute_v6: dropped\n")); 5321 if (src_ipif != NULL) 5322 ipif_refrele(src_ipif); 5323 if (dst_ill != NULL) { 5324 need_rele = B_TRUE; 5325 ill = dst_ill; 5326 } 5327 if (ill != NULL) { 5328 if (mp->b_prev != NULL) { 5329 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 5330 } else { 5331 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); 5332 } 5333 5334 if (need_rele) 5335 ill_refrele(ill); 5336 } else { 5337 if (mp->b_prev != NULL) { 5338 BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsInDiscards); 5339 } else { 5340 BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsOutDiscards); 5341 } 5342 } 5343 /* Did this packet originate externally? */ 5344 if (mp->b_prev) { 5345 mp->b_next = NULL; 5346 mp->b_prev = NULL; 5347 } 5348 if (copy_mp != NULL) { 5349 MULTIRT_DEBUG_UNTAG(copy_mp); 5350 freemsg(copy_mp); 5351 } 5352 MULTIRT_DEBUG_UNTAG(first_mp); 5353 freemsg(first_mp); 5354 if (ire != NULL) 5355 ire_refrele(ire); 5356 if (sire != NULL) 5357 ire_refrele(sire); 5358 return; 5359 5360 icmp_err_ret: 5361 if (ip6_asp_table_held) 5362 ip6_asp_table_refrele(ipst); 5363 if (src_ipif != NULL) 5364 ipif_refrele(src_ipif); 5365 if (dst_ill != NULL) { 5366 need_rele = B_TRUE; 5367 ill = dst_ill; 5368 } 5369 ip1dbg(("ip_newroute_v6: no route\n")); 5370 if (sire != NULL) 5371 ire_refrele(sire); 5372 /* 5373 * We need to set sire to NULL to avoid double freeing if we 5374 * ever goto err_ret from below. 5375 */ 5376 sire = NULL; 5377 ip6h = (ip6_t *)mp->b_rptr; 5378 /* Skip ip6i_t header if present */ 5379 if (ip6h->ip6_nxt == IPPROTO_RAW) { 5380 /* Make sure the IPv6 header is present */ 5381 if ((mp->b_wptr - (uchar_t *)ip6h) < 5382 sizeof (ip6i_t) + IPV6_HDR_LEN) { 5383 if (!pullupmsg(mp, sizeof (ip6i_t) + IPV6_HDR_LEN)) { 5384 ip1dbg(("ip_newroute_v6: pullupmsg failed\n")); 5385 goto err_ret; 5386 } 5387 } 5388 mp->b_rptr += sizeof (ip6i_t); 5389 ip6h = (ip6_t *)mp->b_rptr; 5390 } 5391 /* Did this packet originate externally? */ 5392 if (mp->b_prev) { 5393 if (ill != NULL) { 5394 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInNoRoutes); 5395 } else { 5396 BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsInNoRoutes); 5397 } 5398 mp->b_next = NULL; 5399 mp->b_prev = NULL; 5400 q = WR(q); 5401 } else { 5402 if (ill != NULL) { 5403 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutNoRoutes); 5404 } else { 5405 BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsOutNoRoutes); 5406 } 5407 if (ip_hdr_complete_v6(ip6h, zoneid, ipst)) { 5408 /* Failed */ 5409 if (copy_mp != NULL) { 5410 MULTIRT_DEBUG_UNTAG(copy_mp); 5411 freemsg(copy_mp); 5412 } 5413 MULTIRT_DEBUG_UNTAG(first_mp); 5414 freemsg(first_mp); 5415 if (ire != NULL) 5416 ire_refrele(ire); 5417 if (need_rele) 5418 ill_refrele(ill); 5419 return; 5420 } 5421 } 5422 5423 if (need_rele) 5424 ill_refrele(ill); 5425 5426 /* 5427 * At this point we will have ire only if RTF_BLACKHOLE 5428 * or RTF_REJECT flags are set on the IRE. It will not 5429 * generate ICMP6_DST_UNREACH_NOROUTE if RTF_BLACKHOLE is set. 5430 */ 5431 if (ire != NULL) { 5432 if (ire->ire_flags & RTF_BLACKHOLE) { 5433 ire_refrele(ire); 5434 if (copy_mp != NULL) { 5435 MULTIRT_DEBUG_UNTAG(copy_mp); 5436 freemsg(copy_mp); 5437 } 5438 MULTIRT_DEBUG_UNTAG(first_mp); 5439 freemsg(first_mp); 5440 return; 5441 } 5442 ire_refrele(ire); 5443 } 5444 if (ip_debug > 3) { 5445 /* ip2dbg */ 5446 pr_addr_dbg("ip_newroute_v6: no route to %s\n", 5447 AF_INET6, v6dstp); 5448 } 5449 icmp_unreachable_v6(WR(q), first_mp, ICMP6_DST_UNREACH_NOROUTE, 5450 B_FALSE, B_FALSE, zoneid, ipst); 5451 } 5452 5453 /* 5454 * ip_newroute_ipif_v6 is called by ip_wput_v6 and ip_wput_ipsec_out_v6 whenever 5455 * we need to send out a packet to a destination address for which we do not 5456 * have specific routing information. It is only used for multicast packets. 5457 * 5458 * If unspec_src we allow creating an IRE with source address zero. 5459 * ire_send_v6() will delete it after the packet is sent. 5460 */ 5461 void 5462 ip_newroute_ipif_v6(queue_t *q, mblk_t *mp, ipif_t *ipif, 5463 const in6_addr_t *v6dstp, const in6_addr_t *v6srcp, int unspec_src, 5464 zoneid_t zoneid) 5465 { 5466 ire_t *ire = NULL; 5467 ipif_t *src_ipif = NULL; 5468 int err = 0; 5469 ill_t *dst_ill = NULL; 5470 ire_t *save_ire; 5471 ipsec_out_t *io; 5472 ill_t *ill; 5473 mblk_t *first_mp; 5474 ire_t *fire = NULL; 5475 mblk_t *copy_mp = NULL; 5476 const in6_addr_t *ire_v6srcp; 5477 boolean_t probe = B_FALSE; 5478 boolean_t multirt_resolve_next; 5479 boolean_t ipif_held = B_FALSE; 5480 boolean_t ill_held = B_FALSE; 5481 boolean_t ip6_asp_table_held = B_FALSE; 5482 ip_stack_t *ipst = ipif->ipif_ill->ill_ipst; 5483 5484 /* 5485 * This loop is run only once in most cases. 5486 * We loop to resolve further routes only when the destination 5487 * can be reached through multiple RTF_MULTIRT-flagged ires. 5488 */ 5489 do { 5490 multirt_resolve_next = B_FALSE; 5491 if (dst_ill != NULL) { 5492 ill_refrele(dst_ill); 5493 dst_ill = NULL; 5494 } 5495 5496 if (src_ipif != NULL) { 5497 ipif_refrele(src_ipif); 5498 src_ipif = NULL; 5499 } 5500 ASSERT(ipif != NULL); 5501 ill = ipif->ipif_ill; 5502 5503 ASSERT(!IN6_IS_ADDR_V4MAPPED(v6dstp)); 5504 if (ip_debug > 2) { 5505 /* ip1dbg */ 5506 pr_addr_dbg("ip_newroute_ipif_v6: v6dst %s\n", 5507 AF_INET6, v6dstp); 5508 printf("ip_newroute_ipif_v6: if %s, v6 %d\n", 5509 ill->ill_name, ipif->ipif_isv6); 5510 } 5511 5512 first_mp = mp; 5513 if (mp->b_datap->db_type == M_CTL) { 5514 mp = mp->b_cont; 5515 io = (ipsec_out_t *)first_mp->b_rptr; 5516 ASSERT(io->ipsec_out_type == IPSEC_OUT); 5517 } else { 5518 io = NULL; 5519 } 5520 5521 /* 5522 * If the interface is a pt-pt interface we look for an 5523 * IRE_IF_RESOLVER or IRE_IF_NORESOLVER that matches both the 5524 * local_address and the pt-pt destination address. 5525 * Otherwise we just match the local address. 5526 */ 5527 if (!(ill->ill_flags & ILLF_MULTICAST)) { 5528 goto err_ret; 5529 } 5530 5531 /* 5532 * We check if an IRE_OFFSUBNET for the addr that goes through 5533 * ipif exists. We need it to determine if the RTF_SETSRC and/or 5534 * RTF_MULTIRT flags must be honored. 5535 */ 5536 fire = ipif_lookup_multi_ire_v6(ipif, v6dstp); 5537 ip2dbg(("ip_newroute_ipif_v6: " 5538 "ipif_lookup_multi_ire_v6(" 5539 "ipif %p, dst %08x) = fire %p\n", 5540 (void *)ipif, ntohl(V4_PART_OF_V6((*v6dstp))), 5541 (void *)fire)); 5542 5543 ASSERT(src_ipif == NULL); 5544 5545 /* 5546 * Because nce_xmit() calls ip_output_v6() and NCEs are always 5547 * tied to the underlying interface, IS_UNDER_IPMP() may be 5548 * true even when building IREs that will be used for data 5549 * traffic. As such, see if the packet's source address is a 5550 * test address, and if so use that test address's ipif for 5551 * the IRE so that the logic that sets IRE_MARK_TESTHIDDEN in 5552 * ire_add_v6() can work properly. 5553 */ 5554 if (IS_UNDER_IPMP(ill)) 5555 probe = ipif_lookup_testaddr_v6(ill, v6srcp, &src_ipif); 5556 5557 /* 5558 * Determine the outbound (destination) ill for this route. 5559 * If IPMP is not in use, that's the same as our ill. If IPMP 5560 * is in-use and we're on the IPMP interface, or we're on an 5561 * underlying ill but sending data traffic, use a suitable 5562 * destination ill from the group. The latter case covers a 5563 * subtle edge condition with multicast: when we bring up an 5564 * IPv6 data address, we will create an NCE on an underlying 5565 * interface, and send solitications to ff02::1, which would 5566 * take us through here, and cause us to create an IRE for 5567 * ff02::1. To meet our defined semantics for multicast (and 5568 * ensure there aren't unexpected echoes), that IRE needs to 5569 * use the IPMP group's nominated multicast interface. 5570 * 5571 * Note: the source ipif is determined by source address 5572 * selection later. 5573 */ 5574 if (IS_IPMP(ill) || (IS_UNDER_IPMP(ill) && !probe)) { 5575 ill_t *ipmp_ill; 5576 ipmp_illgrp_t *illg; 5577 5578 if (IS_UNDER_IPMP(ill)) { 5579 ipmp_ill = ipmp_ill_hold_ipmp_ill(ill); 5580 } else { 5581 ipmp_ill = ill; 5582 ill_refhold(ipmp_ill); /* for symmetry */ 5583 } 5584 5585 if (ipmp_ill == NULL) 5586 goto err_ret; 5587 5588 illg = ipmp_ill->ill_grp; 5589 if (IN6_IS_ADDR_MULTICAST(v6dstp)) 5590 dst_ill = ipmp_illgrp_hold_cast_ill(illg); 5591 else 5592 dst_ill = ipmp_illgrp_hold_next_ill(illg); 5593 5594 ill_refrele(ipmp_ill); 5595 } else { 5596 dst_ill = ill; 5597 ill_refhold(dst_ill); /* for symmetry */ 5598 } 5599 5600 if (dst_ill == NULL) { 5601 if (ip_debug > 2) { 5602 pr_addr_dbg("ip_newroute_ipif_v6: " 5603 "no dst ill for dst %s\n", 5604 AF_INET6, v6dstp); 5605 } 5606 goto err_ret; 5607 } 5608 5609 /* 5610 * Pick a source address which matches the scope of the 5611 * destination address. 5612 * For RTF_SETSRC routes, the source address is imposed by the 5613 * parent ire (fire). 5614 */ 5615 5616 if (src_ipif == NULL && fire != NULL && 5617 (fire->ire_flags & RTF_SETSRC)) { 5618 /* 5619 * Check that the ipif matching the requested source 5620 * address still exists. 5621 */ 5622 src_ipif = ipif_lookup_addr_v6(&fire->ire_src_addr_v6, 5623 NULL, zoneid, NULL, NULL, NULL, NULL, ipst); 5624 } 5625 5626 if (src_ipif == NULL && ip6_asp_can_lookup(ipst)) { 5627 ip6_asp_table_held = B_TRUE; 5628 src_ipif = ipif_select_source_v6(dst_ill, v6dstp, 5629 B_FALSE, IPV6_PREFER_SRC_DEFAULT, zoneid); 5630 } 5631 5632 if (src_ipif == NULL) { 5633 if (!unspec_src) { 5634 if (ip_debug > 2) { 5635 /* ip1dbg */ 5636 pr_addr_dbg("ip_newroute_ipif_v6: " 5637 "no src for dst %s\n", 5638 AF_INET6, v6dstp); 5639 printf(" through interface %s\n", 5640 dst_ill->ill_name); 5641 } 5642 goto err_ret; 5643 } 5644 ire_v6srcp = &ipv6_all_zeros; 5645 src_ipif = ipif; 5646 ipif_refhold(src_ipif); 5647 } else { 5648 ire_v6srcp = &src_ipif->ipif_v6src_addr; 5649 } 5650 5651 ire = ipif_to_ire_v6(ipif); 5652 if (ire == NULL) { 5653 if (ip_debug > 2) { 5654 /* ip1dbg */ 5655 pr_addr_dbg("ip_newroute_ipif_v6: v6src %s\n", 5656 AF_INET6, &ipif->ipif_v6lcl_addr); 5657 printf("ip_newroute_ipif_v6: " 5658 "if %s\n", dst_ill->ill_name); 5659 } 5660 goto err_ret; 5661 } 5662 if (ire->ire_flags & (RTF_REJECT | RTF_BLACKHOLE)) 5663 goto err_ret; 5664 5665 ASSERT(ire->ire_ipversion == IPV6_VERSION); 5666 5667 ip1dbg(("ip_newroute_ipif_v6: interface type %s (%d),", 5668 ip_nv_lookup(ire_nv_tbl, ire->ire_type), ire->ire_type)); 5669 if (ip_debug > 2) { 5670 /* ip1dbg */ 5671 pr_addr_dbg(" address %s\n", 5672 AF_INET6, &ire->ire_src_addr_v6); 5673 } 5674 save_ire = ire; 5675 ip2dbg(("ip_newroute_ipif: ire %p, ipif %p\n", 5676 (void *)ire, (void *)ipif)); 5677 5678 if ((fire != NULL) && (fire->ire_flags & RTF_MULTIRT)) { 5679 /* 5680 * an IRE_OFFSUBET was looked up 5681 * on that interface. 5682 * this ire has RTF_MULTIRT flag, 5683 * so the resolution loop 5684 * will be re-entered to resolve 5685 * additional routes on other 5686 * interfaces. For that purpose, 5687 * a copy of the packet is 5688 * made at this point. 5689 */ 5690 fire->ire_last_used_time = lbolt; 5691 copy_mp = copymsg(first_mp); 5692 if (copy_mp) { 5693 MULTIRT_DEBUG_TAG(copy_mp); 5694 } 5695 } 5696 5697 switch (ire->ire_type) { 5698 case IRE_IF_NORESOLVER: { 5699 /* 5700 * We have what we need to build an IRE_CACHE. 5701 * 5702 * handle the Gated case, where we create 5703 * a NORESOLVER route for loopback. 5704 */ 5705 if (dst_ill->ill_net_type != IRE_IF_NORESOLVER) 5706 break; 5707 /* 5708 * The newly created ire will inherit the flags of the 5709 * parent ire, if any. 5710 */ 5711 ire = ire_create_v6( 5712 v6dstp, /* dest address */ 5713 &ipv6_all_ones, /* mask */ 5714 ire_v6srcp, /* source address */ 5715 NULL, /* gateway address */ 5716 &save_ire->ire_max_frag, 5717 NULL, /* no src nce */ 5718 dst_ill->ill_rq, /* recv-from queue */ 5719 dst_ill->ill_wq, /* send-to queue */ 5720 IRE_CACHE, 5721 src_ipif, 5722 NULL, 5723 (fire != NULL) ? /* Parent handle */ 5724 fire->ire_phandle : 0, 5725 save_ire->ire_ihandle, /* Interface handle */ 5726 (fire != NULL) ? 5727 (fire->ire_flags & (RTF_SETSRC | RTF_MULTIRT)) : 5728 0, 5729 &ire_uinfo_null, 5730 NULL, 5731 NULL, 5732 ipst); 5733 5734 if (ire == NULL) { 5735 ire_refrele(save_ire); 5736 break; 5737 } 5738 5739 err = ndp_noresolver(dst_ill, v6dstp); 5740 if (err != 0) { 5741 ire_refrele(save_ire); 5742 break; 5743 } 5744 5745 /* Prevent save_ire from getting deleted */ 5746 IRB_REFHOLD(save_ire->ire_bucket); 5747 /* Has it been removed already ? */ 5748 if (save_ire->ire_marks & IRE_MARK_CONDEMNED) { 5749 IRB_REFRELE(save_ire->ire_bucket); 5750 ire_refrele(save_ire); 5751 break; 5752 } 5753 5754 ire_add_then_send(q, ire, first_mp); 5755 if (ip6_asp_table_held) { 5756 ip6_asp_table_refrele(ipst); 5757 ip6_asp_table_held = B_FALSE; 5758 } 5759 5760 /* Assert that it is not deleted yet. */ 5761 ASSERT(save_ire->ire_ptpn != NULL); 5762 IRB_REFRELE(save_ire->ire_bucket); 5763 ire_refrele(save_ire); 5764 if (fire != NULL) { 5765 ire_refrele(fire); 5766 fire = NULL; 5767 } 5768 5769 /* 5770 * The resolution loop is re-entered if we 5771 * actually are in a multirouting case. 5772 */ 5773 if (copy_mp != NULL) { 5774 boolean_t need_resolve = 5775 ire_multirt_need_resolve_v6(v6dstp, 5776 MBLK_GETLABEL(copy_mp), ipst); 5777 if (!need_resolve) { 5778 MULTIRT_DEBUG_UNTAG(copy_mp); 5779 freemsg(copy_mp); 5780 copy_mp = NULL; 5781 } else { 5782 /* 5783 * ipif_lookup_group_v6() calls 5784 * ire_lookup_multi_v6() that uses 5785 * ire_ftable_lookup_v6() to find 5786 * an IRE_INTERFACE for the group. 5787 * In the multirt case, 5788 * ire_lookup_multi_v6() then invokes 5789 * ire_multirt_lookup_v6() to find 5790 * the next resolvable ire. 5791 * As a result, we obtain a new 5792 * interface, derived from the 5793 * next ire. 5794 */ 5795 if (ipif_held) { 5796 ipif_refrele(ipif); 5797 ipif_held = B_FALSE; 5798 } 5799 ipif = ipif_lookup_group_v6(v6dstp, 5800 zoneid, ipst); 5801 ip2dbg(("ip_newroute_ipif: " 5802 "multirt dst %08x, ipif %p\n", 5803 ntohl(V4_PART_OF_V6((*v6dstp))), 5804 (void *)ipif)); 5805 if (ipif != NULL) { 5806 ipif_held = B_TRUE; 5807 mp = copy_mp; 5808 copy_mp = NULL; 5809 multirt_resolve_next = 5810 B_TRUE; 5811 continue; 5812 } else { 5813 freemsg(copy_mp); 5814 } 5815 } 5816 } 5817 ill_refrele(dst_ill); 5818 if (ipif_held) { 5819 ipif_refrele(ipif); 5820 ipif_held = B_FALSE; 5821 } 5822 if (src_ipif != NULL) 5823 ipif_refrele(src_ipif); 5824 return; 5825 } 5826 case IRE_IF_RESOLVER: { 5827 5828 ASSERT(dst_ill->ill_isv6); 5829 5830 /* 5831 * We obtain a partial IRE_CACHE which we will pass 5832 * along with the resolver query. When the response 5833 * comes back it will be there ready for us to add. 5834 */ 5835 /* 5836 * the newly created ire will inherit the flags of the 5837 * parent ire, if any. 5838 */ 5839 ire = ire_create_v6( 5840 v6dstp, /* dest address */ 5841 &ipv6_all_ones, /* mask */ 5842 ire_v6srcp, /* source address */ 5843 NULL, /* gateway address */ 5844 &save_ire->ire_max_frag, 5845 NULL, /* src nce */ 5846 dst_ill->ill_rq, /* recv-from queue */ 5847 dst_ill->ill_wq, /* send-to queue */ 5848 IRE_CACHE, 5849 src_ipif, 5850 NULL, 5851 (fire != NULL) ? /* Parent handle */ 5852 fire->ire_phandle : 0, 5853 save_ire->ire_ihandle, /* Interface handle */ 5854 (fire != NULL) ? 5855 (fire->ire_flags & (RTF_SETSRC | RTF_MULTIRT)) : 5856 0, 5857 &ire_uinfo_null, 5858 NULL, 5859 NULL, 5860 ipst); 5861 5862 if (ire == NULL) { 5863 ire_refrele(save_ire); 5864 break; 5865 } 5866 5867 /* Resolve and add ire to the ctable */ 5868 err = ndp_resolver(dst_ill, v6dstp, first_mp, zoneid); 5869 switch (err) { 5870 case 0: 5871 /* Prevent save_ire from getting deleted */ 5872 IRB_REFHOLD(save_ire->ire_bucket); 5873 /* Has it been removed already ? */ 5874 if (save_ire->ire_marks & IRE_MARK_CONDEMNED) { 5875 IRB_REFRELE(save_ire->ire_bucket); 5876 ire_refrele(save_ire); 5877 break; 5878 } 5879 /* 5880 * We have a resolved cache entry, 5881 * add in the IRE. 5882 */ 5883 ire_add_then_send(q, ire, first_mp); 5884 if (ip6_asp_table_held) { 5885 ip6_asp_table_refrele(ipst); 5886 ip6_asp_table_held = B_FALSE; 5887 } 5888 5889 /* Assert that it is not deleted yet. */ 5890 ASSERT(save_ire->ire_ptpn != NULL); 5891 IRB_REFRELE(save_ire->ire_bucket); 5892 ire_refrele(save_ire); 5893 if (fire != NULL) { 5894 ire_refrele(fire); 5895 fire = NULL; 5896 } 5897 5898 /* 5899 * The resolution loop is re-entered if we 5900 * actually are in a multirouting case. 5901 */ 5902 if (copy_mp != NULL) { 5903 boolean_t need_resolve = 5904 ire_multirt_need_resolve_v6(v6dstp, 5905 MBLK_GETLABEL(copy_mp), ipst); 5906 if (!need_resolve) { 5907 MULTIRT_DEBUG_UNTAG(copy_mp); 5908 freemsg(copy_mp); 5909 copy_mp = NULL; 5910 } else { 5911 /* 5912 * ipif_lookup_group_v6() calls 5913 * ire_lookup_multi_v6() that 5914 * uses ire_ftable_lookup_v6() 5915 * to find an IRE_INTERFACE for 5916 * the group. In the multirt 5917 * case, ire_lookup_multi_v6() 5918 * then invokes 5919 * ire_multirt_lookup_v6() to 5920 * find the next resolvable ire. 5921 * As a result, we obtain a new 5922 * interface, derived from the 5923 * next ire. 5924 */ 5925 if (ipif_held) { 5926 ipif_refrele(ipif); 5927 ipif_held = B_FALSE; 5928 } 5929 ipif = ipif_lookup_group_v6( 5930 v6dstp, zoneid, ipst); 5931 ip2dbg(("ip_newroute_ipif: " 5932 "multirt dst %08x, " 5933 "ipif %p\n", 5934 ntohl(V4_PART_OF_V6( 5935 (*v6dstp))), 5936 (void *)ipif)); 5937 if (ipif != NULL) { 5938 ipif_held = B_TRUE; 5939 mp = copy_mp; 5940 copy_mp = NULL; 5941 multirt_resolve_next = 5942 B_TRUE; 5943 continue; 5944 } else { 5945 freemsg(copy_mp); 5946 } 5947 } 5948 } 5949 ill_refrele(dst_ill); 5950 if (ipif_held) { 5951 ipif_refrele(ipif); 5952 ipif_held = B_FALSE; 5953 } 5954 if (src_ipif != NULL) 5955 ipif_refrele(src_ipif); 5956 return; 5957 5958 case EINPROGRESS: 5959 /* 5960 * mp was consumed - presumably queued. 5961 * No need for ire, presumably resolution is 5962 * in progress, and ire will be added when the 5963 * address is resolved. 5964 */ 5965 if (ip6_asp_table_held) { 5966 ip6_asp_table_refrele(ipst); 5967 ip6_asp_table_held = B_FALSE; 5968 } 5969 ire_delete(ire); 5970 ire_refrele(save_ire); 5971 if (fire != NULL) { 5972 ire_refrele(fire); 5973 fire = NULL; 5974 } 5975 5976 /* 5977 * The resolution loop is re-entered if we 5978 * actually are in a multirouting case. 5979 */ 5980 if (copy_mp != NULL) { 5981 boolean_t need_resolve = 5982 ire_multirt_need_resolve_v6(v6dstp, 5983 MBLK_GETLABEL(copy_mp), ipst); 5984 if (!need_resolve) { 5985 MULTIRT_DEBUG_UNTAG(copy_mp); 5986 freemsg(copy_mp); 5987 copy_mp = NULL; 5988 } else { 5989 /* 5990 * ipif_lookup_group_v6() calls 5991 * ire_lookup_multi_v6() that 5992 * uses ire_ftable_lookup_v6() 5993 * to find an IRE_INTERFACE for 5994 * the group. In the multirt 5995 * case, ire_lookup_multi_v6() 5996 * then invokes 5997 * ire_multirt_lookup_v6() to 5998 * find the next resolvable ire. 5999 * As a result, we obtain a new 6000 * interface, derived from the 6001 * next ire. 6002 */ 6003 if (ipif_held) { 6004 ipif_refrele(ipif); 6005 ipif_held = B_FALSE; 6006 } 6007 ipif = ipif_lookup_group_v6( 6008 v6dstp, zoneid, ipst); 6009 ip2dbg(("ip_newroute_ipif: " 6010 "multirt dst %08x, " 6011 "ipif %p\n", 6012 ntohl(V4_PART_OF_V6( 6013 (*v6dstp))), 6014 (void *)ipif)); 6015 if (ipif != NULL) { 6016 ipif_held = B_TRUE; 6017 mp = copy_mp; 6018 copy_mp = NULL; 6019 multirt_resolve_next = 6020 B_TRUE; 6021 continue; 6022 } else { 6023 freemsg(copy_mp); 6024 } 6025 } 6026 } 6027 ill_refrele(dst_ill); 6028 if (ipif_held) { 6029 ipif_refrele(ipif); 6030 ipif_held = B_FALSE; 6031 } 6032 if (src_ipif != NULL) 6033 ipif_refrele(src_ipif); 6034 return; 6035 default: 6036 /* Some transient error */ 6037 ire_refrele(save_ire); 6038 break; 6039 } 6040 break; 6041 } 6042 default: 6043 break; 6044 } 6045 if (ip6_asp_table_held) { 6046 ip6_asp_table_refrele(ipst); 6047 ip6_asp_table_held = B_FALSE; 6048 } 6049 } while (multirt_resolve_next); 6050 6051 err_ret: 6052 if (ip6_asp_table_held) 6053 ip6_asp_table_refrele(ipst); 6054 if (ire != NULL) 6055 ire_refrele(ire); 6056 if (fire != NULL) 6057 ire_refrele(fire); 6058 if (ipif != NULL && ipif_held) 6059 ipif_refrele(ipif); 6060 if (src_ipif != NULL) 6061 ipif_refrele(src_ipif); 6062 6063 /* Multicast - no point in trying to generate ICMP error */ 6064 if (dst_ill != NULL) { 6065 ill = dst_ill; 6066 ill_held = B_TRUE; 6067 } 6068 if (mp->b_prev || mp->b_next) { 6069 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 6070 } else { 6071 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); 6072 } 6073 ip1dbg(("ip_newroute_ipif_v6: dropped\n")); 6074 mp->b_next = NULL; 6075 mp->b_prev = NULL; 6076 freemsg(first_mp); 6077 if (ill_held) 6078 ill_refrele(ill); 6079 } 6080 6081 /* 6082 * Parse and process any hop-by-hop or destination options. 6083 * 6084 * Assumes that q is an ill read queue so that ICMP errors for link-local 6085 * destinations are sent out the correct interface. 6086 * 6087 * Returns -1 if there was an error and mp has been consumed. 6088 * Returns 0 if no special action is needed. 6089 * Returns 1 if the packet contained a router alert option for this node 6090 * which is verified to be "interesting/known" for our implementation. 6091 * 6092 * XXX Note: In future as more hbh or dest options are defined, 6093 * it may be better to have different routines for hbh and dest 6094 * options as opt_type fields other than IP6OPT_PAD1 and IP6OPT_PADN 6095 * may have same value in different namespaces. Or is it same namespace ?? 6096 * Current code checks for each opt_type (other than pads) if it is in 6097 * the expected nexthdr (hbh or dest) 6098 */ 6099 static int 6100 ip_process_options_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, 6101 uint8_t *optptr, uint_t optlen, uint8_t hdr_type, ip_stack_t *ipst) 6102 { 6103 uint8_t opt_type; 6104 uint_t optused; 6105 int ret = 0; 6106 mblk_t *first_mp; 6107 const char *errtype; 6108 zoneid_t zoneid; 6109 ill_t *ill = q->q_ptr; 6110 ipif_t *ipif; 6111 6112 first_mp = mp; 6113 if (mp->b_datap->db_type == M_CTL) { 6114 mp = mp->b_cont; 6115 } 6116 6117 while (optlen != 0) { 6118 opt_type = *optptr; 6119 if (opt_type == IP6OPT_PAD1) { 6120 optused = 1; 6121 } else { 6122 if (optlen < 2) 6123 goto bad_opt; 6124 errtype = "malformed"; 6125 if (opt_type == ip6opt_ls) { 6126 optused = 2 + optptr[1]; 6127 if (optused > optlen) 6128 goto bad_opt; 6129 } else switch (opt_type) { 6130 case IP6OPT_PADN: 6131 /* 6132 * Note:We don't verify that (N-2) pad octets 6133 * are zero as required by spec. Adhere to 6134 * "be liberal in what you accept..." part of 6135 * implementation philosophy (RFC791,RFC1122) 6136 */ 6137 optused = 2 + optptr[1]; 6138 if (optused > optlen) 6139 goto bad_opt; 6140 break; 6141 6142 case IP6OPT_JUMBO: 6143 if (hdr_type != IPPROTO_HOPOPTS) 6144 goto opt_error; 6145 goto opt_error; /* XXX Not implemented! */ 6146 6147 case IP6OPT_ROUTER_ALERT: { 6148 struct ip6_opt_router *or; 6149 6150 if (hdr_type != IPPROTO_HOPOPTS) 6151 goto opt_error; 6152 optused = 2 + optptr[1]; 6153 if (optused > optlen) 6154 goto bad_opt; 6155 or = (struct ip6_opt_router *)optptr; 6156 /* Check total length and alignment */ 6157 if (optused != sizeof (*or) || 6158 ((uintptr_t)or->ip6or_value & 0x1) != 0) 6159 goto opt_error; 6160 /* Check value */ 6161 switch (*((uint16_t *)or->ip6or_value)) { 6162 case IP6_ALERT_MLD: 6163 case IP6_ALERT_RSVP: 6164 ret = 1; 6165 } 6166 break; 6167 } 6168 case IP6OPT_HOME_ADDRESS: { 6169 /* 6170 * Minimal support for the home address option 6171 * (which is required by all IPv6 nodes). 6172 * Implement by just swapping the home address 6173 * and source address. 6174 * XXX Note: this has IPsec implications since 6175 * AH needs to take this into account. 6176 * Also, when IPsec is used we need to ensure 6177 * that this is only processed once 6178 * in the received packet (to avoid swapping 6179 * back and forth). 6180 * NOTE:This option processing is considered 6181 * to be unsafe and prone to a denial of 6182 * service attack. 6183 * The current processing is not safe even with 6184 * IPsec secured IP packets. Since the home 6185 * address option processing requirement still 6186 * is in the IETF draft and in the process of 6187 * being redefined for its usage, it has been 6188 * decided to turn off the option by default. 6189 * If this section of code needs to be executed, 6190 * ndd variable ip6_ignore_home_address_opt 6191 * should be set to 0 at the user's own risk. 6192 */ 6193 struct ip6_opt_home_address *oh; 6194 in6_addr_t tmp; 6195 6196 if (ipst->ips_ipv6_ignore_home_address_opt) 6197 goto opt_error; 6198 6199 if (hdr_type != IPPROTO_DSTOPTS) 6200 goto opt_error; 6201 optused = 2 + optptr[1]; 6202 if (optused > optlen) 6203 goto bad_opt; 6204 6205 /* 6206 * We did this dest. opt the first time 6207 * around (i.e. before AH processing). 6208 * If we've done AH... stop now. 6209 */ 6210 if (first_mp != mp) { 6211 ipsec_in_t *ii; 6212 6213 ii = (ipsec_in_t *)first_mp->b_rptr; 6214 if (ii->ipsec_in_ah_sa != NULL) 6215 break; 6216 } 6217 6218 oh = (struct ip6_opt_home_address *)optptr; 6219 /* Check total length and alignment */ 6220 if (optused < sizeof (*oh) || 6221 ((uintptr_t)oh->ip6oh_addr & 0x7) != 0) 6222 goto opt_error; 6223 /* Swap ip6_src and the home address */ 6224 tmp = ip6h->ip6_src; 6225 /* XXX Note: only 8 byte alignment option */ 6226 ip6h->ip6_src = *(in6_addr_t *)oh->ip6oh_addr; 6227 *(in6_addr_t *)oh->ip6oh_addr = tmp; 6228 break; 6229 } 6230 6231 case IP6OPT_TUNNEL_LIMIT: 6232 if (hdr_type != IPPROTO_DSTOPTS) { 6233 goto opt_error; 6234 } 6235 optused = 2 + optptr[1]; 6236 if (optused > optlen) { 6237 goto bad_opt; 6238 } 6239 if (optused != 3) { 6240 goto opt_error; 6241 } 6242 break; 6243 6244 default: 6245 errtype = "unknown"; 6246 /* FALLTHROUGH */ 6247 opt_error: 6248 /* Determine which zone should send error */ 6249 zoneid = ipif_lookup_addr_zoneid_v6( 6250 &ip6h->ip6_dst, ill, ipst); 6251 switch (IP6OPT_TYPE(opt_type)) { 6252 case IP6OPT_TYPE_SKIP: 6253 optused = 2 + optptr[1]; 6254 if (optused > optlen) 6255 goto bad_opt; 6256 ip1dbg(("ip_process_options_v6: %s " 6257 "opt 0x%x skipped\n", 6258 errtype, opt_type)); 6259 break; 6260 case IP6OPT_TYPE_DISCARD: 6261 ip1dbg(("ip_process_options_v6: %s " 6262 "opt 0x%x; packet dropped\n", 6263 errtype, opt_type)); 6264 freemsg(first_mp); 6265 return (-1); 6266 case IP6OPT_TYPE_ICMP: 6267 if (zoneid == ALL_ZONES) { 6268 freemsg(first_mp); 6269 return (-1); 6270 } 6271 icmp_param_problem_v6(WR(q), first_mp, 6272 ICMP6_PARAMPROB_OPTION, 6273 (uint32_t)(optptr - 6274 (uint8_t *)ip6h), 6275 B_FALSE, B_FALSE, zoneid, ipst); 6276 return (-1); 6277 case IP6OPT_TYPE_FORCEICMP: 6278 /* 6279 * If we don't have a zone and the dst 6280 * addr is multicast, then pick a zone 6281 * based on the inbound interface. 6282 */ 6283 if (zoneid == ALL_ZONES && 6284 IN6_IS_ADDR_MULTICAST( 6285 &ip6h->ip6_dst)) { 6286 ipif = ipif_select_source_v6( 6287 ill, &ip6h->ip6_src, 6288 B_TRUE, 6289 IPV6_PREFER_SRC_DEFAULT, 6290 ALL_ZONES); 6291 if (ipif != NULL) { 6292 zoneid = 6293 ipif->ipif_zoneid; 6294 ipif_refrele(ipif); 6295 } 6296 } 6297 if (zoneid == ALL_ZONES) { 6298 freemsg(first_mp); 6299 return (-1); 6300 } 6301 icmp_param_problem_v6(WR(q), first_mp, 6302 ICMP6_PARAMPROB_OPTION, 6303 (uint32_t)(optptr - 6304 (uint8_t *)ip6h), 6305 B_FALSE, B_TRUE, zoneid, ipst); 6306 return (-1); 6307 default: 6308 ASSERT(0); 6309 } 6310 } 6311 } 6312 optlen -= optused; 6313 optptr += optused; 6314 } 6315 return (ret); 6316 6317 bad_opt: 6318 /* Determine which zone should send error */ 6319 zoneid = ipif_lookup_addr_zoneid_v6(&ip6h->ip6_dst, ill, ipst); 6320 if (zoneid == ALL_ZONES) { 6321 freemsg(first_mp); 6322 } else { 6323 icmp_param_problem_v6(WR(q), first_mp, ICMP6_PARAMPROB_OPTION, 6324 (uint32_t)(optptr - (uint8_t *)ip6h), 6325 B_FALSE, B_FALSE, zoneid, ipst); 6326 } 6327 return (-1); 6328 } 6329 6330 /* 6331 * Process a routing header that is not yet empty. 6332 * Only handles type 0 routing headers. 6333 */ 6334 static void 6335 ip_process_rthdr(queue_t *q, mblk_t *mp, ip6_t *ip6h, ip6_rthdr_t *rth, 6336 ill_t *ill, uint_t flags, mblk_t *hada_mp, mblk_t *dl_mp) 6337 { 6338 ip6_rthdr0_t *rthdr; 6339 uint_t ehdrlen; 6340 uint_t numaddr; 6341 in6_addr_t *addrptr; 6342 in6_addr_t tmp; 6343 ip_stack_t *ipst = ill->ill_ipst; 6344 6345 ASSERT(rth->ip6r_segleft != 0); 6346 6347 if (!ipst->ips_ipv6_forward_src_routed) { 6348 /* XXX Check for source routed out same interface? */ 6349 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 6350 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInAddrErrors); 6351 freemsg(hada_mp); 6352 freemsg(mp); 6353 return; 6354 } 6355 6356 if (rth->ip6r_type != 0) { 6357 if (hada_mp != NULL) 6358 goto hada_drop; 6359 /* Sent by forwarding path, and router is global zone */ 6360 icmp_param_problem_v6(WR(q), mp, 6361 ICMP6_PARAMPROB_HEADER, 6362 (uint32_t)((uchar_t *)&rth->ip6r_type - (uchar_t *)ip6h), 6363 B_FALSE, B_FALSE, GLOBAL_ZONEID, ipst); 6364 return; 6365 } 6366 rthdr = (ip6_rthdr0_t *)rth; 6367 ehdrlen = 8 * (rthdr->ip6r0_len + 1); 6368 ASSERT(mp->b_rptr + ehdrlen <= mp->b_wptr); 6369 addrptr = (in6_addr_t *)((char *)rthdr + sizeof (*rthdr)); 6370 /* rthdr->ip6r0_len is twice the number of addresses in the header */ 6371 if (rthdr->ip6r0_len & 0x1) { 6372 /* An odd length is impossible */ 6373 if (hada_mp != NULL) 6374 goto hada_drop; 6375 /* Sent by forwarding path, and router is global zone */ 6376 icmp_param_problem_v6(WR(q), mp, 6377 ICMP6_PARAMPROB_HEADER, 6378 (uint32_t)((uchar_t *)&rthdr->ip6r0_len - (uchar_t *)ip6h), 6379 B_FALSE, B_FALSE, GLOBAL_ZONEID, ipst); 6380 return; 6381 } 6382 numaddr = rthdr->ip6r0_len / 2; 6383 if (rthdr->ip6r0_segleft > numaddr) { 6384 /* segleft exceeds number of addresses in routing header */ 6385 if (hada_mp != NULL) 6386 goto hada_drop; 6387 /* Sent by forwarding path, and router is global zone */ 6388 icmp_param_problem_v6(WR(q), mp, 6389 ICMP6_PARAMPROB_HEADER, 6390 (uint32_t)((uchar_t *)&rthdr->ip6r0_segleft - 6391 (uchar_t *)ip6h), 6392 B_FALSE, B_FALSE, GLOBAL_ZONEID, ipst); 6393 return; 6394 } 6395 addrptr += (numaddr - rthdr->ip6r0_segleft); 6396 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst) || 6397 IN6_IS_ADDR_MULTICAST(addrptr)) { 6398 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 6399 freemsg(hada_mp); 6400 freemsg(mp); 6401 return; 6402 } 6403 /* Swap */ 6404 tmp = *addrptr; 6405 *addrptr = ip6h->ip6_dst; 6406 ip6h->ip6_dst = tmp; 6407 rthdr->ip6r0_segleft--; 6408 /* Don't allow any mapped addresses - ip_wput_v6 can't handle them */ 6409 if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_dst)) { 6410 if (hada_mp != NULL) 6411 goto hada_drop; 6412 /* Sent by forwarding path, and router is global zone */ 6413 icmp_unreachable_v6(WR(q), mp, ICMP6_DST_UNREACH_NOROUTE, 6414 B_FALSE, B_FALSE, GLOBAL_ZONEID, ipst); 6415 return; 6416 } 6417 if (ip_check_v6_mblk(mp, ill) == IP6_MBLK_OK) { 6418 ip6h = (ip6_t *)mp->b_rptr; 6419 ip_rput_data_v6(q, ill, mp, ip6h, flags, hada_mp, dl_mp); 6420 } else { 6421 freemsg(mp); 6422 } 6423 return; 6424 hada_drop: 6425 /* IPsec kstats: bean counter? */ 6426 freemsg(hada_mp); 6427 freemsg(mp); 6428 } 6429 6430 /* 6431 * Read side put procedure for IPv6 module. 6432 */ 6433 void 6434 ip_rput_v6(queue_t *q, mblk_t *mp) 6435 { 6436 mblk_t *first_mp; 6437 mblk_t *hada_mp = NULL; 6438 ip6_t *ip6h; 6439 boolean_t ll_multicast = B_FALSE; 6440 boolean_t mctl_present = B_FALSE; 6441 ill_t *ill; 6442 struct iocblk *iocp; 6443 uint_t flags = 0; 6444 mblk_t *dl_mp; 6445 ip_stack_t *ipst; 6446 int check; 6447 6448 ill = (ill_t *)q->q_ptr; 6449 ipst = ill->ill_ipst; 6450 if (ill->ill_state_flags & ILL_CONDEMNED) { 6451 union DL_primitives *dl; 6452 6453 dl = (union DL_primitives *)mp->b_rptr; 6454 /* 6455 * Things are opening or closing - only accept DLPI 6456 * ack messages. If the stream is closing and ip_wsrv 6457 * has completed, ip_close is out of the qwait, but has 6458 * not yet completed qprocsoff. Don't proceed any further 6459 * because the ill has been cleaned up and things hanging 6460 * off the ill have been freed. 6461 */ 6462 if ((mp->b_datap->db_type != M_PCPROTO) || 6463 (dl->dl_primitive == DL_UNITDATA_IND)) { 6464 inet_freemsg(mp); 6465 return; 6466 } 6467 } 6468 6469 dl_mp = NULL; 6470 switch (mp->b_datap->db_type) { 6471 case M_DATA: { 6472 int hlen; 6473 uchar_t *ucp; 6474 struct ether_header *eh; 6475 dl_unitdata_ind_t *dui; 6476 6477 /* 6478 * This is a work-around for CR 6451644, a bug in Nemo. It 6479 * should be removed when that problem is fixed. 6480 */ 6481 if (ill->ill_mactype == DL_ETHER && 6482 (hlen = MBLKHEAD(mp)) >= sizeof (struct ether_header) && 6483 (ucp = mp->b_rptr)[-1] == (IP6_DL_SAP & 0xFF) && 6484 ucp[-2] == (IP6_DL_SAP >> 8)) { 6485 if (hlen >= sizeof (struct ether_vlan_header) && 6486 ucp[-5] == 0 && ucp[-6] == 0x81) 6487 ucp -= sizeof (struct ether_vlan_header); 6488 else 6489 ucp -= sizeof (struct ether_header); 6490 /* 6491 * If it's a group address, then fabricate a 6492 * DL_UNITDATA_IND message. 6493 */ 6494 if ((ll_multicast = (ucp[0] & 1)) != 0 && 6495 (dl_mp = allocb(DL_UNITDATA_IND_SIZE + 16, 6496 BPRI_HI)) != NULL) { 6497 eh = (struct ether_header *)ucp; 6498 dui = (dl_unitdata_ind_t *)dl_mp->b_rptr; 6499 DB_TYPE(dl_mp) = M_PROTO; 6500 dl_mp->b_wptr = (uchar_t *)(dui + 1) + 16; 6501 dui->dl_primitive = DL_UNITDATA_IND; 6502 dui->dl_dest_addr_length = 8; 6503 dui->dl_dest_addr_offset = DL_UNITDATA_IND_SIZE; 6504 dui->dl_src_addr_length = 8; 6505 dui->dl_src_addr_offset = DL_UNITDATA_IND_SIZE + 6506 8; 6507 dui->dl_group_address = 1; 6508 ucp = (uchar_t *)(dui + 1); 6509 if (ill->ill_sap_length > 0) 6510 ucp += ill->ill_sap_length; 6511 bcopy(&eh->ether_dhost, ucp, 6); 6512 bcopy(&eh->ether_shost, ucp + 8, 6); 6513 ucp = (uchar_t *)(dui + 1); 6514 if (ill->ill_sap_length < 0) 6515 ucp += 8 + ill->ill_sap_length; 6516 bcopy(&eh->ether_type, ucp, 2); 6517 bcopy(&eh->ether_type, ucp + 8, 2); 6518 } 6519 } 6520 break; 6521 } 6522 6523 case M_PROTO: 6524 case M_PCPROTO: 6525 if (((dl_unitdata_ind_t *)mp->b_rptr)->dl_primitive != 6526 DL_UNITDATA_IND) { 6527 /* Go handle anything other than data elsewhere. */ 6528 ip_rput_dlpi(q, mp); 6529 return; 6530 } 6531 ll_multicast = ip_get_dlpi_mbcast(ill, mp); 6532 6533 /* Save the DLPI header. */ 6534 dl_mp = mp; 6535 mp = mp->b_cont; 6536 dl_mp->b_cont = NULL; 6537 break; 6538 case M_BREAK: 6539 panic("ip_rput_v6: got an M_BREAK"); 6540 /*NOTREACHED*/ 6541 case M_IOCACK: 6542 iocp = (struct iocblk *)mp->b_rptr; 6543 switch (iocp->ioc_cmd) { 6544 case DL_IOC_HDR_INFO: 6545 ill = (ill_t *)q->q_ptr; 6546 ill_fastpath_ack(ill, mp); 6547 return; 6548 6549 case SIOCGTUNPARAM: 6550 case OSIOCGTUNPARAM: 6551 ip_rput_other(NULL, q, mp, NULL); 6552 return; 6553 6554 case SIOCSTUNPARAM: 6555 case OSIOCSTUNPARAM: 6556 /* Go through qwriter */ 6557 break; 6558 default: 6559 putnext(q, mp); 6560 return; 6561 } 6562 /* FALLTHRU */ 6563 case M_ERROR: 6564 case M_HANGUP: 6565 mutex_enter(&ill->ill_lock); 6566 if (ill->ill_state_flags & ILL_CONDEMNED) { 6567 mutex_exit(&ill->ill_lock); 6568 freemsg(mp); 6569 return; 6570 } 6571 ill_refhold_locked(ill); 6572 mutex_exit(&ill->ill_lock); 6573 qwriter_ip(ill, q, mp, ip_rput_other, CUR_OP, B_FALSE); 6574 return; 6575 case M_CTL: 6576 if ((MBLKL(mp) > sizeof (int)) && 6577 ((da_ipsec_t *)mp->b_rptr)->da_type == IPHADA_M_CTL) { 6578 ASSERT(MBLKL(mp) >= sizeof (da_ipsec_t)); 6579 mctl_present = B_TRUE; 6580 break; 6581 } 6582 putnext(q, mp); 6583 return; 6584 case M_IOCNAK: 6585 iocp = (struct iocblk *)mp->b_rptr; 6586 switch (iocp->ioc_cmd) { 6587 case DL_IOC_HDR_INFO: 6588 case SIOCGTUNPARAM: 6589 case OSIOCGTUNPARAM: 6590 ip_rput_other(NULL, q, mp, NULL); 6591 return; 6592 6593 case SIOCSTUNPARAM: 6594 case OSIOCSTUNPARAM: 6595 mutex_enter(&ill->ill_lock); 6596 if (ill->ill_state_flags & ILL_CONDEMNED) { 6597 mutex_exit(&ill->ill_lock); 6598 freemsg(mp); 6599 return; 6600 } 6601 ill_refhold_locked(ill); 6602 mutex_exit(&ill->ill_lock); 6603 qwriter_ip(ill, q, mp, ip_rput_other, CUR_OP, B_FALSE); 6604 return; 6605 default: 6606 break; 6607 } 6608 /* FALLTHRU */ 6609 default: 6610 putnext(q, mp); 6611 return; 6612 } 6613 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInReceives); 6614 UPDATE_MIB(ill->ill_ip_mib, ipIfStatsHCInOctets, 6615 (mp->b_cont == NULL) ? MBLKL(mp) : msgdsize(mp)); 6616 /* 6617 * if db_ref > 1 then copymsg and free original. Packet may be 6618 * changed and do not want other entity who has a reference to this 6619 * message to trip over the changes. This is a blind change because 6620 * trying to catch all places that might change packet is too 6621 * difficult (since it may be a module above this one). 6622 */ 6623 if (mp->b_datap->db_ref > 1) { 6624 mblk_t *mp1; 6625 6626 mp1 = copymsg(mp); 6627 freemsg(mp); 6628 if (mp1 == NULL) { 6629 first_mp = NULL; 6630 goto discard; 6631 } 6632 mp = mp1; 6633 } 6634 first_mp = mp; 6635 if (mctl_present) { 6636 hada_mp = first_mp; 6637 mp = first_mp->b_cont; 6638 } 6639 6640 if ((check = ip_check_v6_mblk(mp, ill)) == IP6_MBLK_HDR_ERR) { 6641 freemsg(mp); 6642 return; 6643 } 6644 6645 ip6h = (ip6_t *)mp->b_rptr; 6646 6647 /* 6648 * ip:::receive must see ipv6 packets with a full header, 6649 * and so is placed after the IP6_MBLK_HDR_ERR check. 6650 */ 6651 DTRACE_IP7(receive, mblk_t *, first_mp, conn_t *, NULL, void_ip_t *, 6652 ip6h, __dtrace_ipsr_ill_t *, ill, ipha_t *, NULL, ip6_t *, ip6h, 6653 int, 0); 6654 6655 if (check != IP6_MBLK_OK) { 6656 freemsg(mp); 6657 return; 6658 } 6659 6660 DTRACE_PROBE4(ip6__physical__in__start, 6661 ill_t *, ill, ill_t *, NULL, 6662 ip6_t *, ip6h, mblk_t *, first_mp); 6663 6664 FW_HOOKS6(ipst->ips_ip6_physical_in_event, 6665 ipst->ips_ipv6firewall_physical_in, 6666 ill, NULL, ip6h, first_mp, mp, ll_multicast, ipst); 6667 6668 DTRACE_PROBE1(ip6__physical__in__end, mblk_t *, first_mp); 6669 6670 if (first_mp == NULL) 6671 return; 6672 6673 /* 6674 * Attach any necessary label information to this packet. 6675 */ 6676 if (is_system_labeled() && !tsol_get_pkt_label(mp, IPV6_VERSION)) { 6677 if (ip6opt_ls != 0) 6678 ip0dbg(("tsol_get_pkt_label v6 failed\n")); 6679 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors); 6680 goto discard; 6681 } 6682 6683 /* IP observability hook. */ 6684 if (ipst->ips_ipobs_enabled) { 6685 zoneid_t dzone; 6686 6687 dzone = ip_get_zoneid_v6(&ip6h->ip6_dst, mp, ill, ipst, 6688 ALL_ZONES); 6689 ipobs_hook(mp, IPOBS_HOOK_INBOUND, ALL_ZONES, dzone, ill, 6690 IPV6_VERSION, 0, ipst); 6691 } 6692 6693 if ((ip6h->ip6_vcf & IPV6_VERS_AND_FLOW_MASK) == 6694 IPV6_DEFAULT_VERS_AND_FLOW) { 6695 /* 6696 * It may be a bit too expensive to do this mapped address 6697 * check here, but in the interest of robustness, it seems 6698 * like the correct place. 6699 * TODO: Avoid this check for e.g. connected TCP sockets 6700 */ 6701 if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_src)) { 6702 ip1dbg(("ip_rput_v6: pkt with mapped src addr\n")); 6703 goto discard; 6704 } 6705 6706 if (IN6_IS_ADDR_LOOPBACK(&ip6h->ip6_src)) { 6707 ip1dbg(("ip_rput_v6: pkt with loopback src")); 6708 goto discard; 6709 } else if (IN6_IS_ADDR_LOOPBACK(&ip6h->ip6_dst)) { 6710 ip1dbg(("ip_rput_v6: pkt with loopback dst")); 6711 goto discard; 6712 } 6713 6714 flags |= (ll_multicast ? IP6_IN_LLMCAST : 0); 6715 ip_rput_data_v6(q, ill, mp, ip6h, flags, hada_mp, dl_mp); 6716 } else { 6717 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInWrongIPVersion); 6718 goto discard; 6719 } 6720 freemsg(dl_mp); 6721 return; 6722 6723 discard: 6724 if (dl_mp != NULL) 6725 freeb(dl_mp); 6726 freemsg(first_mp); 6727 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 6728 } 6729 6730 /* 6731 * Walk through the IPv6 packet in mp and see if there's an AH header 6732 * in it. See if the AH header needs to get done before other headers in 6733 * the packet. (Worker function for ipsec_early_ah_v6().) 6734 */ 6735 #define IPSEC_HDR_DONT_PROCESS 0 6736 #define IPSEC_HDR_PROCESS 1 6737 #define IPSEC_MEMORY_ERROR 2 /* or malformed packet */ 6738 static int 6739 ipsec_needs_processing_v6(mblk_t *mp, uint8_t *nexthdr) 6740 { 6741 uint_t length; 6742 uint_t ehdrlen; 6743 uint8_t *whereptr; 6744 uint8_t *endptr; 6745 uint8_t *nexthdrp; 6746 ip6_dest_t *desthdr; 6747 ip6_rthdr_t *rthdr; 6748 ip6_t *ip6h; 6749 6750 /* 6751 * For now just pullup everything. In general, the less pullups, 6752 * the better, but there's so much squirrelling through anyway, 6753 * it's just easier this way. 6754 */ 6755 if (!pullupmsg(mp, -1)) { 6756 return (IPSEC_MEMORY_ERROR); 6757 } 6758 6759 ip6h = (ip6_t *)mp->b_rptr; 6760 length = IPV6_HDR_LEN; 6761 whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */ 6762 endptr = mp->b_wptr; 6763 6764 /* 6765 * We can't just use the argument nexthdr in the place 6766 * of nexthdrp becaue we don't dereference nexthdrp 6767 * till we confirm whether it is a valid address. 6768 */ 6769 nexthdrp = &ip6h->ip6_nxt; 6770 while (whereptr < endptr) { 6771 /* Is there enough left for len + nexthdr? */ 6772 if (whereptr + MIN_EHDR_LEN > endptr) 6773 return (IPSEC_MEMORY_ERROR); 6774 6775 switch (*nexthdrp) { 6776 case IPPROTO_HOPOPTS: 6777 case IPPROTO_DSTOPTS: 6778 /* Assumes the headers are identical for hbh and dst */ 6779 desthdr = (ip6_dest_t *)whereptr; 6780 ehdrlen = 8 * (desthdr->ip6d_len + 1); 6781 if ((uchar_t *)desthdr + ehdrlen > endptr) 6782 return (IPSEC_MEMORY_ERROR); 6783 /* 6784 * Return DONT_PROCESS because the destination 6785 * options header may be for each hop in a 6786 * routing-header, and we only want AH if we're 6787 * finished with routing headers. 6788 */ 6789 if (*nexthdrp == IPPROTO_DSTOPTS) 6790 return (IPSEC_HDR_DONT_PROCESS); 6791 nexthdrp = &desthdr->ip6d_nxt; 6792 break; 6793 case IPPROTO_ROUTING: 6794 rthdr = (ip6_rthdr_t *)whereptr; 6795 6796 /* 6797 * If there's more hops left on the routing header, 6798 * return now with DON'T PROCESS. 6799 */ 6800 if (rthdr->ip6r_segleft > 0) 6801 return (IPSEC_HDR_DONT_PROCESS); 6802 6803 ehdrlen = 8 * (rthdr->ip6r_len + 1); 6804 if ((uchar_t *)rthdr + ehdrlen > endptr) 6805 return (IPSEC_MEMORY_ERROR); 6806 nexthdrp = &rthdr->ip6r_nxt; 6807 break; 6808 case IPPROTO_FRAGMENT: 6809 /* Wait for reassembly */ 6810 return (IPSEC_HDR_DONT_PROCESS); 6811 case IPPROTO_AH: 6812 *nexthdr = IPPROTO_AH; 6813 return (IPSEC_HDR_PROCESS); 6814 case IPPROTO_NONE: 6815 /* No next header means we're finished */ 6816 default: 6817 return (IPSEC_HDR_DONT_PROCESS); 6818 } 6819 length += ehdrlen; 6820 whereptr += ehdrlen; 6821 } 6822 /* 6823 * Malformed/truncated packet. 6824 */ 6825 return (IPSEC_MEMORY_ERROR); 6826 } 6827 6828 /* 6829 * Path for AH if options are present. If this is the first time we are 6830 * sending a datagram to AH, allocate a IPSEC_IN message and prepend it. 6831 * Otherwise, just fanout. Return value answers the boolean question: 6832 * "Did I consume the mblk you sent me?" 6833 * 6834 * Sometimes AH needs to be done before other IPv6 headers for security 6835 * reasons. This function (and its ipsec_needs_processing_v6() above) 6836 * indicates if that is so, and fans out to the appropriate IPsec protocol 6837 * for the datagram passed in. 6838 */ 6839 static boolean_t 6840 ipsec_early_ah_v6(queue_t *q, mblk_t *first_mp, boolean_t mctl_present, 6841 ill_t *ill, ill_t *inill, mblk_t *hada_mp, zoneid_t zoneid) 6842 { 6843 mblk_t *mp; 6844 uint8_t nexthdr; 6845 ipsec_in_t *ii = NULL; 6846 ah_t *ah; 6847 ipsec_status_t ipsec_rc; 6848 ip_stack_t *ipst = ill->ill_ipst; 6849 netstack_t *ns = ipst->ips_netstack; 6850 ipsec_stack_t *ipss = ns->netstack_ipsec; 6851 6852 ASSERT((hada_mp == NULL) || (!mctl_present)); 6853 6854 switch (ipsec_needs_processing_v6( 6855 (mctl_present ? first_mp->b_cont : first_mp), &nexthdr)) { 6856 case IPSEC_MEMORY_ERROR: 6857 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 6858 freemsg(hada_mp); 6859 freemsg(first_mp); 6860 return (B_TRUE); 6861 case IPSEC_HDR_DONT_PROCESS: 6862 return (B_FALSE); 6863 } 6864 6865 /* Default means send it to AH! */ 6866 ASSERT(nexthdr == IPPROTO_AH); 6867 if (!mctl_present) { 6868 mp = first_mp; 6869 first_mp = ipsec_in_alloc(B_FALSE, ipst->ips_netstack); 6870 if (first_mp == NULL) { 6871 ip1dbg(("ipsec_early_ah_v6: IPSEC_IN " 6872 "allocation failure.\n")); 6873 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 6874 freemsg(hada_mp); 6875 freemsg(mp); 6876 return (B_TRUE); 6877 } 6878 /* 6879 * Store the ill_index so that when we come back 6880 * from IPSEC we ride on the same queue. 6881 */ 6882 ii = (ipsec_in_t *)first_mp->b_rptr; 6883 ii->ipsec_in_ill_index = ill->ill_phyint->phyint_ifindex; 6884 ii->ipsec_in_rill_index = inill->ill_phyint->phyint_ifindex; 6885 first_mp->b_cont = mp; 6886 } 6887 /* 6888 * Cache hardware acceleration info. 6889 */ 6890 if (hada_mp != NULL) { 6891 ASSERT(ii != NULL); 6892 IPSECHW_DEBUG(IPSECHW_PKT, ("ipsec_early_ah_v6: " 6893 "caching data attr.\n")); 6894 ii->ipsec_in_accelerated = B_TRUE; 6895 ii->ipsec_in_da = hada_mp; 6896 } 6897 6898 if (!ipsec_loaded(ipss)) { 6899 ip_proto_not_sup(q, first_mp, IP_FF_SEND_ICMP, zoneid, ipst); 6900 return (B_TRUE); 6901 } 6902 6903 ah = ipsec_inbound_ah_sa(first_mp, ns); 6904 if (ah == NULL) 6905 return (B_TRUE); 6906 ASSERT(ii->ipsec_in_ah_sa != NULL); 6907 ASSERT(ii->ipsec_in_ah_sa->ipsa_input_func != NULL); 6908 ipsec_rc = ii->ipsec_in_ah_sa->ipsa_input_func(first_mp, ah); 6909 6910 switch (ipsec_rc) { 6911 case IPSEC_STATUS_SUCCESS: 6912 /* we're done with IPsec processing, send it up */ 6913 ip_fanout_proto_again(first_mp, ill, inill, NULL); 6914 break; 6915 case IPSEC_STATUS_FAILED: 6916 BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsInDiscards); 6917 break; 6918 case IPSEC_STATUS_PENDING: 6919 /* no action needed */ 6920 break; 6921 } 6922 return (B_TRUE); 6923 } 6924 6925 /* 6926 * Validate the IPv6 mblk for alignment. 6927 */ 6928 int 6929 ip_check_v6_mblk(mblk_t *mp, ill_t *ill) 6930 { 6931 int pkt_len, ip6_len; 6932 ip6_t *ip6h = (ip6_t *)mp->b_rptr; 6933 6934 /* check for alignment and full IPv6 header */ 6935 if (!OK_32PTR((uchar_t *)ip6h) || 6936 (mp->b_wptr - (uchar_t *)ip6h) < IPV6_HDR_LEN) { 6937 if (!pullupmsg(mp, IPV6_HDR_LEN)) { 6938 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 6939 ip1dbg(("ip_rput_v6: pullupmsg failed\n")); 6940 return (IP6_MBLK_HDR_ERR); 6941 } 6942 ip6h = (ip6_t *)mp->b_rptr; 6943 } 6944 6945 ASSERT(OK_32PTR((uchar_t *)ip6h) && 6946 (mp->b_wptr - (uchar_t *)ip6h) >= IPV6_HDR_LEN); 6947 6948 if (mp->b_cont == NULL) 6949 pkt_len = mp->b_wptr - mp->b_rptr; 6950 else 6951 pkt_len = msgdsize(mp); 6952 ip6_len = ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN; 6953 6954 /* 6955 * Check for bogus (too short packet) and packet which 6956 * was padded by the link layer. 6957 */ 6958 if (ip6_len != pkt_len) { 6959 ssize_t diff; 6960 6961 if (ip6_len > pkt_len) { 6962 ip1dbg(("ip_rput_data_v6: packet too short %d %d\n", 6963 ip6_len, pkt_len)); 6964 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInTruncatedPkts); 6965 return (IP6_MBLK_LEN_ERR); 6966 } 6967 diff = (ssize_t)(pkt_len - ip6_len); 6968 6969 if (!adjmsg(mp, -diff)) { 6970 ip1dbg(("ip_rput_data_v6: adjmsg failed\n")); 6971 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 6972 return (IP6_MBLK_LEN_ERR); 6973 } 6974 } 6975 return (IP6_MBLK_OK); 6976 } 6977 6978 /* 6979 * ip_rput_data_v6 -- received IPv6 packets in M_DATA messages show up here. 6980 * ip_rput_v6 has already verified alignment, the min length, the version, 6981 * and db_ref = 1. 6982 * 6983 * The ill passed in (the arg named inill) is the ill that the packet 6984 * actually arrived on. We need to remember this when saving the 6985 * input interface index into potential IPV6_PKTINFO data in 6986 * ip_add_info_v6(). 6987 * 6988 * This routine doesn't free dl_mp; that's the caller's responsibility on 6989 * return. (Note that the callers are complex enough that there's no tail 6990 * recursion here anyway.) 6991 */ 6992 void 6993 ip_rput_data_v6(queue_t *q, ill_t *inill, mblk_t *mp, ip6_t *ip6h, 6994 uint_t flags, mblk_t *hada_mp, mblk_t *dl_mp) 6995 { 6996 ire_t *ire = NULL; 6997 ill_t *ill = inill; 6998 ill_t *outill; 6999 ipif_t *ipif; 7000 uint8_t *whereptr; 7001 uint8_t nexthdr; 7002 uint16_t remlen; 7003 uint_t prev_nexthdr_offset; 7004 uint_t used; 7005 size_t old_pkt_len; 7006 size_t pkt_len; 7007 uint16_t ip6_len; 7008 uint_t hdr_len; 7009 boolean_t mctl_present; 7010 mblk_t *first_mp; 7011 mblk_t *first_mp1; 7012 boolean_t no_forward; 7013 ip6_hbh_t *hbhhdr; 7014 boolean_t ll_multicast = (flags & IP6_IN_LLMCAST); 7015 conn_t *connp; 7016 uint32_t ports; 7017 zoneid_t zoneid = GLOBAL_ZONEID; 7018 uint16_t hck_flags, reass_hck_flags; 7019 uint32_t reass_sum; 7020 boolean_t cksum_err; 7021 mblk_t *mp1; 7022 ip_stack_t *ipst = inill->ill_ipst; 7023 7024 EXTRACT_PKT_MP(mp, first_mp, mctl_present); 7025 7026 if (hada_mp != NULL) { 7027 /* 7028 * It's an IPsec accelerated packet. 7029 * Keep a pointer to the data attributes around until 7030 * we allocate the ipsecinfo structure. 7031 */ 7032 IPSECHW_DEBUG(IPSECHW_PKT, 7033 ("ip_rput_data_v6: inbound HW accelerated IPsec pkt\n")); 7034 hada_mp->b_cont = NULL; 7035 /* 7036 * Since it is accelerated, it came directly from 7037 * the ill. 7038 */ 7039 ASSERT(mctl_present == B_FALSE); 7040 ASSERT(mp->b_datap->db_type != M_CTL); 7041 } 7042 7043 ip6h = (ip6_t *)mp->b_rptr; 7044 ip6_len = ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN; 7045 old_pkt_len = pkt_len = ip6_len; 7046 7047 if (ILL_HCKSUM_CAPABLE(ill) && !mctl_present && dohwcksum) 7048 hck_flags = DB_CKSUMFLAGS(mp); 7049 else 7050 hck_flags = 0; 7051 7052 /* Clear checksum flags in case we need to forward */ 7053 DB_CKSUMFLAGS(mp) = 0; 7054 reass_sum = reass_hck_flags = 0; 7055 7056 nexthdr = ip6h->ip6_nxt; 7057 7058 prev_nexthdr_offset = (uint_t)((uchar_t *)&ip6h->ip6_nxt - 7059 (uchar_t *)ip6h); 7060 whereptr = (uint8_t *)&ip6h[1]; 7061 remlen = pkt_len - IPV6_HDR_LEN; /* Track how much is left */ 7062 7063 /* Process hop by hop header options */ 7064 if (nexthdr == IPPROTO_HOPOPTS) { 7065 uint_t ehdrlen; 7066 uint8_t *optptr; 7067 7068 if (remlen < MIN_EHDR_LEN) 7069 goto pkt_too_short; 7070 if (mp->b_cont != NULL && 7071 whereptr + MIN_EHDR_LEN > mp->b_wptr) { 7072 if (!pullupmsg(mp, IPV6_HDR_LEN + MIN_EHDR_LEN)) { 7073 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 7074 freemsg(hada_mp); 7075 freemsg(first_mp); 7076 return; 7077 } 7078 ip6h = (ip6_t *)mp->b_rptr; 7079 whereptr = (uint8_t *)ip6h + pkt_len - remlen; 7080 } 7081 hbhhdr = (ip6_hbh_t *)whereptr; 7082 nexthdr = hbhhdr->ip6h_nxt; 7083 prev_nexthdr_offset = (uint_t)(whereptr - (uint8_t *)ip6h); 7084 ehdrlen = 8 * (hbhhdr->ip6h_len + 1); 7085 7086 if (remlen < ehdrlen) 7087 goto pkt_too_short; 7088 if (mp->b_cont != NULL && 7089 whereptr + ehdrlen > mp->b_wptr) { 7090 if (!pullupmsg(mp, IPV6_HDR_LEN + ehdrlen)) { 7091 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 7092 freemsg(hada_mp); 7093 freemsg(first_mp); 7094 return; 7095 } 7096 ip6h = (ip6_t *)mp->b_rptr; 7097 whereptr = (uint8_t *)ip6h + pkt_len - remlen; 7098 hbhhdr = (ip6_hbh_t *)whereptr; 7099 } 7100 7101 optptr = whereptr + 2; 7102 whereptr += ehdrlen; 7103 remlen -= ehdrlen; 7104 switch (ip_process_options_v6(q, first_mp, ip6h, optptr, 7105 ehdrlen - 2, IPPROTO_HOPOPTS, ipst)) { 7106 case -1: 7107 /* 7108 * Packet has been consumed and any 7109 * needed ICMP messages sent. 7110 */ 7111 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors); 7112 freemsg(hada_mp); 7113 return; 7114 case 0: 7115 /* no action needed */ 7116 break; 7117 case 1: 7118 /* Known router alert */ 7119 goto ipv6forus; 7120 } 7121 } 7122 7123 /* 7124 * On incoming v6 multicast packets we will bypass the ire table, 7125 * and assume that the read queue corresponds to the targetted 7126 * interface. 7127 * 7128 * The effect of this is the same as the IPv4 original code, but is 7129 * much cleaner I think. See ip_rput for how that was done. 7130 */ 7131 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 7132 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInMcastPkts); 7133 UPDATE_MIB(ill->ill_ip_mib, ipIfStatsHCInMcastOctets, pkt_len); 7134 7135 /* 7136 * So that we don't end up with dups, only one ill in an IPMP 7137 * group is nominated to receive multicast data traffic. 7138 * However, link-locals on any underlying interfaces will have 7139 * joined their solicited-node multicast addresses and we must 7140 * accept those packets. (We don't attempt to precisely 7141 * filter out duplicate solicited-node multicast packets since 7142 * e.g. an IPMP interface and underlying interface may have 7143 * the same solicited-node multicast address.) Note that we 7144 * won't generally have duplicates because we only issue a 7145 * DL_ENABMULTI_REQ on one interface in a group; the exception 7146 * is when PHYI_MULTI_BCAST is set. 7147 */ 7148 if (IS_UNDER_IPMP(ill) && !ill->ill_nom_cast && 7149 !IN6_IS_ADDR_MC_SOLICITEDNODE(&ip6h->ip6_dst)) { 7150 goto drop_pkt; 7151 } 7152 7153 /* 7154 * XXX TODO Give to mrouted to for multicast forwarding. 7155 */ 7156 if (ilm_lookup_ill_v6(ill, &ip6h->ip6_dst, B_FALSE, 7157 ALL_ZONES) == NULL) { 7158 if (ip_debug > 3) { 7159 /* ip2dbg */ 7160 pr_addr_dbg("ip_rput_data_v6: got mcast packet" 7161 " which is not for us: %s\n", AF_INET6, 7162 &ip6h->ip6_dst); 7163 } 7164 drop_pkt: BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 7165 freemsg(hada_mp); 7166 freemsg(first_mp); 7167 return; 7168 } 7169 if (ip_debug > 3) { 7170 /* ip2dbg */ 7171 pr_addr_dbg("ip_rput_data_v6: multicast for us: %s\n", 7172 AF_INET6, &ip6h->ip6_dst); 7173 } 7174 zoneid = GLOBAL_ZONEID; 7175 goto ipv6forus; 7176 } 7177 7178 ipif = ill->ill_ipif; 7179 7180 /* 7181 * If a packet was received on an interface that is a 6to4 tunnel, 7182 * incoming IPv6 packets, with a 6to4 addressed IPv6 destination, must 7183 * be checked to have a 6to4 prefix (2002:V4ADDR::/48) that is equal to 7184 * the 6to4 prefix of the address configured on the receiving interface. 7185 * Otherwise, the packet was delivered to this interface in error and 7186 * the packet must be dropped. 7187 */ 7188 if ((ill->ill_is_6to4tun) && IN6_IS_ADDR_6TO4(&ip6h->ip6_dst)) { 7189 7190 if (!IN6_ARE_6TO4_PREFIX_EQUAL(&ipif->ipif_v6lcl_addr, 7191 &ip6h->ip6_dst)) { 7192 if (ip_debug > 2) { 7193 /* ip1dbg */ 7194 pr_addr_dbg("ip_rput_data_v6: received 6to4 " 7195 "addressed packet which is not for us: " 7196 "%s\n", AF_INET6, &ip6h->ip6_dst); 7197 } 7198 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 7199 freemsg(first_mp); 7200 return; 7201 } 7202 } 7203 7204 /* 7205 * Find an ire that matches destination. For link-local addresses 7206 * we have to match the ill. 7207 * TBD for site local addresses. 7208 */ 7209 if (IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_dst)) { 7210 ire = ire_ctable_lookup_v6(&ip6h->ip6_dst, NULL, 7211 IRE_CACHE|IRE_LOCAL, ill->ill_ipif, ALL_ZONES, NULL, 7212 MATCH_IRE_TYPE | MATCH_IRE_ILL, ipst); 7213 } else { 7214 ire = ire_cache_lookup_v6(&ip6h->ip6_dst, ALL_ZONES, 7215 MBLK_GETLABEL(mp), ipst); 7216 7217 if (ire != NULL && ire->ire_stq != NULL && 7218 ire->ire_zoneid != GLOBAL_ZONEID && 7219 ire->ire_zoneid != ALL_ZONES) { 7220 /* 7221 * Should only use IREs that are visible from the 7222 * global zone for forwarding. 7223 */ 7224 ire_refrele(ire); 7225 ire = ire_cache_lookup_v6(&ip6h->ip6_dst, 7226 GLOBAL_ZONEID, MBLK_GETLABEL(mp), ipst); 7227 } 7228 } 7229 7230 if (ire == NULL) { 7231 /* 7232 * No matching IRE found. Mark this packet as having 7233 * originated externally. 7234 */ 7235 if (!(ill->ill_flags & ILLF_ROUTER) || ll_multicast) { 7236 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 7237 if (!(ill->ill_flags & ILLF_ROUTER)) { 7238 BUMP_MIB(ill->ill_ip_mib, 7239 ipIfStatsInAddrErrors); 7240 } 7241 freemsg(hada_mp); 7242 freemsg(first_mp); 7243 return; 7244 } 7245 if (ip6h->ip6_hops <= 1) { 7246 if (hada_mp != NULL) 7247 goto hada_drop; 7248 /* Sent by forwarding path, and router is global zone */ 7249 icmp_time_exceeded_v6(WR(q), first_mp, 7250 ICMP6_TIME_EXCEED_TRANSIT, ll_multicast, B_FALSE, 7251 GLOBAL_ZONEID, ipst); 7252 return; 7253 } 7254 /* 7255 * Per RFC 3513 section 2.5.2, we must not forward packets with 7256 * an unspecified source address. 7257 */ 7258 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 7259 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 7260 freemsg(hada_mp); 7261 freemsg(first_mp); 7262 return; 7263 } 7264 mp->b_prev = (mblk_t *)(uintptr_t) 7265 ill->ill_phyint->phyint_ifindex; 7266 ip_newroute_v6(q, mp, &ip6h->ip6_dst, &ip6h->ip6_src, 7267 IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_dst) ? ill : NULL, 7268 GLOBAL_ZONEID, ipst); 7269 return; 7270 } 7271 /* we have a matching IRE */ 7272 if (ire->ire_stq != NULL) { 7273 /* 7274 * To be quicker, we may wish not to chase pointers 7275 * (ire->ire_ipif->ipif_ill...) and instead store the 7276 * forwarding policy in the ire. An unfortunate side- 7277 * effect of this would be requiring an ire flush whenever 7278 * the ILLF_ROUTER flag changes. For now, chase pointers 7279 * once and store in the boolean no_forward. 7280 * 7281 * This appears twice to keep it out of the non-forwarding, 7282 * yes-it's-for-us-on-the-right-interface case. 7283 */ 7284 no_forward = ((ill->ill_flags & 7285 ire->ire_ipif->ipif_ill->ill_flags & ILLF_ROUTER) == 0); 7286 7287 ASSERT(first_mp == mp); 7288 /* 7289 * This ire has a send-to queue - forward the packet. 7290 */ 7291 if (no_forward || ll_multicast || (hada_mp != NULL)) { 7292 freemsg(hada_mp); 7293 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 7294 if (no_forward) { 7295 BUMP_MIB(ill->ill_ip_mib, 7296 ipIfStatsInAddrErrors); 7297 } 7298 freemsg(mp); 7299 ire_refrele(ire); 7300 return; 7301 } 7302 /* 7303 * ipIfStatsHCInForwDatagrams should only be increment if there 7304 * will be an attempt to forward the packet, which is why we 7305 * increment after the above condition has been checked. 7306 */ 7307 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInForwDatagrams); 7308 if (ip6h->ip6_hops <= 1) { 7309 ip1dbg(("ip_rput_data_v6: hop limit expired.\n")); 7310 /* Sent by forwarding path, and router is global zone */ 7311 icmp_time_exceeded_v6(WR(q), mp, 7312 ICMP6_TIME_EXCEED_TRANSIT, ll_multicast, B_FALSE, 7313 GLOBAL_ZONEID, ipst); 7314 ire_refrele(ire); 7315 return; 7316 } 7317 /* 7318 * Per RFC 3513 section 2.5.2, we must not forward packets with 7319 * an unspecified source address. 7320 */ 7321 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 7322 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 7323 freemsg(mp); 7324 ire_refrele(ire); 7325 return; 7326 } 7327 7328 if (is_system_labeled()) { 7329 mblk_t *mp1; 7330 7331 if ((mp1 = tsol_ip_forward(ire, mp)) == NULL) { 7332 BUMP_MIB(ill->ill_ip_mib, 7333 ipIfStatsForwProhibits); 7334 freemsg(mp); 7335 ire_refrele(ire); 7336 return; 7337 } 7338 /* Size may have changed */ 7339 mp = mp1; 7340 ip6h = (ip6_t *)mp->b_rptr; 7341 pkt_len = msgdsize(mp); 7342 } 7343 7344 if (pkt_len > ire->ire_max_frag) { 7345 int max_frag = ire->ire_max_frag; 7346 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInTooBigErrors); 7347 /* 7348 * Handle labeled packet resizing. 7349 */ 7350 if (is_system_labeled()) { 7351 max_frag = tsol_pmtu_adjust(mp, max_frag, 7352 pkt_len - old_pkt_len, AF_INET6); 7353 } 7354 7355 /* Sent by forwarding path, and router is global zone */ 7356 icmp_pkt2big_v6(WR(q), mp, max_frag, 7357 ll_multicast, B_TRUE, GLOBAL_ZONEID, ipst); 7358 ire_refrele(ire); 7359 return; 7360 } 7361 7362 /* 7363 * Check to see if we're forwarding the packet to a 7364 * different link from which it came. If so, check the 7365 * source and destination addresses since routers must not 7366 * forward any packets with link-local source or 7367 * destination addresses to other links. Otherwise (if 7368 * we're forwarding onto the same link), conditionally send 7369 * a redirect message. 7370 */ 7371 if (ire->ire_rfq != q && 7372 !IS_IN_SAME_ILLGRP(ill, (ill_t *)ire->ire_rfq->q_ptr)) { 7373 if (IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_dst) || 7374 IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src)) { 7375 BUMP_MIB(ill->ill_ip_mib, 7376 ipIfStatsInAddrErrors); 7377 freemsg(mp); 7378 ire_refrele(ire); 7379 return; 7380 } 7381 /* TBD add site-local check at site boundary? */ 7382 } else if (ipst->ips_ipv6_send_redirects) { 7383 in6_addr_t *v6targ; 7384 in6_addr_t gw_addr_v6; 7385 ire_t *src_ire_v6 = NULL; 7386 7387 /* 7388 * Don't send a redirect when forwarding a source 7389 * routed packet. 7390 */ 7391 if (ip_source_routed_v6(ip6h, mp, ipst)) 7392 goto forward; 7393 7394 mutex_enter(&ire->ire_lock); 7395 gw_addr_v6 = ire->ire_gateway_addr_v6; 7396 mutex_exit(&ire->ire_lock); 7397 if (!IN6_IS_ADDR_UNSPECIFIED(&gw_addr_v6)) { 7398 v6targ = &gw_addr_v6; 7399 /* 7400 * We won't send redirects to a router 7401 * that doesn't have a link local 7402 * address, but will forward. 7403 */ 7404 if (!IN6_IS_ADDR_LINKLOCAL(v6targ)) { 7405 BUMP_MIB(ill->ill_ip_mib, 7406 ipIfStatsInAddrErrors); 7407 goto forward; 7408 } 7409 } else { 7410 v6targ = &ip6h->ip6_dst; 7411 } 7412 7413 src_ire_v6 = ire_ftable_lookup_v6(&ip6h->ip6_src, 7414 NULL, NULL, IRE_INTERFACE, ire->ire_ipif, NULL, 7415 GLOBAL_ZONEID, 0, NULL, 7416 MATCH_IRE_IPIF | MATCH_IRE_TYPE, 7417 ipst); 7418 7419 if (src_ire_v6 != NULL) { 7420 /* 7421 * The source is directly connected. 7422 */ 7423 mp1 = copymsg(mp); 7424 if (mp1 != NULL) { 7425 icmp_send_redirect_v6(WR(q), 7426 mp1, v6targ, &ip6h->ip6_dst, 7427 ill, B_FALSE); 7428 } 7429 ire_refrele(src_ire_v6); 7430 } 7431 } 7432 7433 forward: 7434 /* Hoplimit verified above */ 7435 ip6h->ip6_hops--; 7436 7437 outill = ire->ire_ipif->ipif_ill; 7438 7439 DTRACE_PROBE4(ip6__forwarding__start, 7440 ill_t *, inill, ill_t *, outill, 7441 ip6_t *, ip6h, mblk_t *, mp); 7442 7443 FW_HOOKS6(ipst->ips_ip6_forwarding_event, 7444 ipst->ips_ipv6firewall_forwarding, 7445 inill, outill, ip6h, mp, mp, 0, ipst); 7446 7447 DTRACE_PROBE1(ip6__forwarding__end, mblk_t *, mp); 7448 7449 if (mp != NULL) { 7450 UPDATE_IB_PKT_COUNT(ire); 7451 ire->ire_last_used_time = lbolt; 7452 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCOutForwDatagrams); 7453 ip_xmit_v6(mp, ire, 0, NULL, B_FALSE, NULL); 7454 } 7455 IRE_REFRELE(ire); 7456 return; 7457 } 7458 7459 /* 7460 * Need to put on correct queue for reassembly to find it. 7461 * No need to use put() since reassembly has its own locks. 7462 * Note: multicast packets and packets destined to addresses 7463 * assigned to loopback (ire_rfq is NULL) will be reassembled on 7464 * the arriving ill. Unlike the IPv4 case, enabling strict 7465 * destination multihoming will prevent accepting packets 7466 * addressed to an IRE_LOCAL on lo0. 7467 */ 7468 if (ire->ire_rfq != q) { 7469 if ((ire = ip_check_multihome(&ip6h->ip6_dst, ire, ill)) 7470 == NULL) { 7471 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 7472 freemsg(hada_mp); 7473 freemsg(first_mp); 7474 return; 7475 } 7476 if (ire->ire_rfq != NULL) { 7477 q = ire->ire_rfq; 7478 ill = (ill_t *)q->q_ptr; 7479 ASSERT(ill != NULL); 7480 } 7481 } 7482 7483 zoneid = ire->ire_zoneid; 7484 UPDATE_IB_PKT_COUNT(ire); 7485 ire->ire_last_used_time = lbolt; 7486 /* Don't use the ire after this point, we'll NULL it out to be sure. */ 7487 ire_refrele(ire); 7488 ire = NULL; 7489 ipv6forus: 7490 /* 7491 * Looks like this packet is for us one way or another. 7492 * This is where we'll process destination headers etc. 7493 */ 7494 for (; ; ) { 7495 switch (nexthdr) { 7496 case IPPROTO_TCP: { 7497 uint16_t *up; 7498 uint32_t sum; 7499 int offset; 7500 7501 hdr_len = pkt_len - remlen; 7502 7503 if (hada_mp != NULL) { 7504 ip0dbg(("tcp hada drop\n")); 7505 goto hada_drop; 7506 } 7507 7508 7509 /* TCP needs all of the TCP header */ 7510 if (remlen < TCP_MIN_HEADER_LENGTH) 7511 goto pkt_too_short; 7512 if (mp->b_cont != NULL && 7513 whereptr + TCP_MIN_HEADER_LENGTH > mp->b_wptr) { 7514 if (!pullupmsg(mp, 7515 hdr_len + TCP_MIN_HEADER_LENGTH)) { 7516 BUMP_MIB(ill->ill_ip_mib, 7517 ipIfStatsInDiscards); 7518 freemsg(first_mp); 7519 return; 7520 } 7521 hck_flags = 0; 7522 ip6h = (ip6_t *)mp->b_rptr; 7523 whereptr = (uint8_t *)ip6h + hdr_len; 7524 } 7525 /* 7526 * Extract the offset field from the TCP header. 7527 */ 7528 offset = ((uchar_t *)ip6h)[hdr_len + 12] >> 4; 7529 if (offset != 5) { 7530 if (offset < 5) { 7531 ip1dbg(("ip_rput_data_v6: short " 7532 "TCP data offset")); 7533 BUMP_MIB(ill->ill_ip_mib, 7534 ipIfStatsInDiscards); 7535 freemsg(first_mp); 7536 return; 7537 } 7538 /* 7539 * There must be TCP options. 7540 * Make sure we can grab them. 7541 */ 7542 offset <<= 2; 7543 if (remlen < offset) 7544 goto pkt_too_short; 7545 if (mp->b_cont != NULL && 7546 whereptr + offset > mp->b_wptr) { 7547 if (!pullupmsg(mp, 7548 hdr_len + offset)) { 7549 BUMP_MIB(ill->ill_ip_mib, 7550 ipIfStatsInDiscards); 7551 freemsg(first_mp); 7552 return; 7553 } 7554 hck_flags = 0; 7555 ip6h = (ip6_t *)mp->b_rptr; 7556 whereptr = (uint8_t *)ip6h + hdr_len; 7557 } 7558 } 7559 7560 up = (uint16_t *)&ip6h->ip6_src; 7561 /* 7562 * TCP checksum calculation. First sum up the 7563 * pseudo-header fields: 7564 * - Source IPv6 address 7565 * - Destination IPv6 address 7566 * - TCP payload length 7567 * - TCP protocol ID 7568 */ 7569 sum = htons(IPPROTO_TCP + remlen) + 7570 up[0] + up[1] + up[2] + up[3] + 7571 up[4] + up[5] + up[6] + up[7] + 7572 up[8] + up[9] + up[10] + up[11] + 7573 up[12] + up[13] + up[14] + up[15]; 7574 7575 /* Fold initial sum */ 7576 sum = (sum & 0xffff) + (sum >> 16); 7577 7578 mp1 = mp->b_cont; 7579 7580 if ((hck_flags & (HCK_FULLCKSUM|HCK_PARTIALCKSUM)) == 0) 7581 IP6_STAT(ipst, ip6_in_sw_cksum); 7582 7583 IP_CKSUM_RECV(hck_flags, sum, (uchar_t *) 7584 ((uchar_t *)mp->b_rptr + DB_CKSUMSTART(mp)), 7585 (int32_t)(whereptr - (uchar_t *)mp->b_rptr), 7586 mp, mp1, cksum_err); 7587 7588 if (cksum_err) { 7589 BUMP_MIB(ill->ill_ip_mib, tcpIfStatsInErrs); 7590 7591 if (hck_flags & HCK_FULLCKSUM) { 7592 IP6_STAT(ipst, 7593 ip6_tcp_in_full_hw_cksum_err); 7594 } else if (hck_flags & HCK_PARTIALCKSUM) { 7595 IP6_STAT(ipst, 7596 ip6_tcp_in_part_hw_cksum_err); 7597 } else { 7598 IP6_STAT(ipst, ip6_tcp_in_sw_cksum_err); 7599 } 7600 freemsg(first_mp); 7601 return; 7602 } 7603 tcp_fanout: 7604 ip_fanout_tcp_v6(q, first_mp, ip6h, ill, inill, 7605 (flags|IP_FF_SEND_ICMP|IP_FF_SYN_ADDIRE| 7606 IP_FF_IPINFO), hdr_len, mctl_present, zoneid); 7607 return; 7608 } 7609 case IPPROTO_SCTP: 7610 { 7611 sctp_hdr_t *sctph; 7612 uint32_t calcsum, pktsum; 7613 uint_t hdr_len = pkt_len - remlen; 7614 sctp_stack_t *sctps; 7615 7616 sctps = inill->ill_ipst->ips_netstack->netstack_sctp; 7617 7618 /* SCTP needs all of the SCTP header */ 7619 if (remlen < sizeof (*sctph)) { 7620 goto pkt_too_short; 7621 } 7622 if (whereptr + sizeof (*sctph) > mp->b_wptr) { 7623 ASSERT(mp->b_cont != NULL); 7624 if (!pullupmsg(mp, hdr_len + sizeof (*sctph))) { 7625 BUMP_MIB(ill->ill_ip_mib, 7626 ipIfStatsInDiscards); 7627 freemsg(mp); 7628 return; 7629 } 7630 ip6h = (ip6_t *)mp->b_rptr; 7631 whereptr = (uint8_t *)ip6h + hdr_len; 7632 } 7633 7634 sctph = (sctp_hdr_t *)(mp->b_rptr + hdr_len); 7635 /* checksum */ 7636 pktsum = sctph->sh_chksum; 7637 sctph->sh_chksum = 0; 7638 calcsum = sctp_cksum(mp, hdr_len); 7639 if (calcsum != pktsum) { 7640 BUMP_MIB(&sctps->sctps_mib, sctpChecksumError); 7641 freemsg(mp); 7642 return; 7643 } 7644 sctph->sh_chksum = pktsum; 7645 ports = *(uint32_t *)(mp->b_rptr + hdr_len); 7646 if ((connp = sctp_fanout(&ip6h->ip6_src, &ip6h->ip6_dst, 7647 ports, zoneid, mp, sctps)) == NULL) { 7648 ip_fanout_sctp_raw(first_mp, ill, 7649 (ipha_t *)ip6h, B_FALSE, ports, 7650 mctl_present, 7651 (flags|IP_FF_SEND_ICMP|IP_FF_IPINFO), 7652 B_TRUE, zoneid); 7653 return; 7654 } 7655 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 7656 sctp_input(connp, (ipha_t *)ip6h, mp, first_mp, ill, 7657 B_FALSE, mctl_present); 7658 return; 7659 } 7660 case IPPROTO_UDP: { 7661 uint16_t *up; 7662 uint32_t sum; 7663 7664 hdr_len = pkt_len - remlen; 7665 7666 if (hada_mp != NULL) { 7667 ip0dbg(("udp hada drop\n")); 7668 goto hada_drop; 7669 } 7670 7671 /* Verify that at least the ports are present */ 7672 if (remlen < UDPH_SIZE) 7673 goto pkt_too_short; 7674 if (mp->b_cont != NULL && 7675 whereptr + UDPH_SIZE > mp->b_wptr) { 7676 if (!pullupmsg(mp, hdr_len + UDPH_SIZE)) { 7677 BUMP_MIB(ill->ill_ip_mib, 7678 ipIfStatsInDiscards); 7679 freemsg(first_mp); 7680 return; 7681 } 7682 hck_flags = 0; 7683 ip6h = (ip6_t *)mp->b_rptr; 7684 whereptr = (uint8_t *)ip6h + hdr_len; 7685 } 7686 7687 /* 7688 * Before going through the regular checksum 7689 * calculation, make sure the received checksum 7690 * is non-zero. RFC 2460 says, a 0x0000 checksum 7691 * in a UDP packet (within IPv6 packet) is invalid 7692 * and should be replaced by 0xffff. This makes 7693 * sense as regular checksum calculation will 7694 * pass for both the cases i.e. 0x0000 and 0xffff. 7695 * Removing one of the case makes error detection 7696 * stronger. 7697 */ 7698 7699 if (((udpha_t *)whereptr)->uha_checksum == 0) { 7700 /* 0x0000 checksum is invalid */ 7701 ip1dbg(("ip_rput_data_v6: Invalid UDP " 7702 "checksum value 0x0000\n")); 7703 BUMP_MIB(ill->ill_ip_mib, 7704 udpIfStatsInCksumErrs); 7705 freemsg(first_mp); 7706 return; 7707 } 7708 7709 up = (uint16_t *)&ip6h->ip6_src; 7710 7711 /* 7712 * UDP checksum calculation. First sum up the 7713 * pseudo-header fields: 7714 * - Source IPv6 address 7715 * - Destination IPv6 address 7716 * - UDP payload length 7717 * - UDP protocol ID 7718 */ 7719 7720 sum = htons(IPPROTO_UDP + remlen) + 7721 up[0] + up[1] + up[2] + up[3] + 7722 up[4] + up[5] + up[6] + up[7] + 7723 up[8] + up[9] + up[10] + up[11] + 7724 up[12] + up[13] + up[14] + up[15]; 7725 7726 /* Fold initial sum */ 7727 sum = (sum & 0xffff) + (sum >> 16); 7728 7729 if (reass_hck_flags != 0) { 7730 hck_flags = reass_hck_flags; 7731 7732 IP_CKSUM_RECV_REASS(hck_flags, 7733 (int32_t)(whereptr - (uchar_t *)mp->b_rptr), 7734 sum, reass_sum, cksum_err); 7735 } else { 7736 mp1 = mp->b_cont; 7737 7738 IP_CKSUM_RECV(hck_flags, sum, (uchar_t *) 7739 ((uchar_t *)mp->b_rptr + DB_CKSUMSTART(mp)), 7740 (int32_t)(whereptr - (uchar_t *)mp->b_rptr), 7741 mp, mp1, cksum_err); 7742 } 7743 7744 if ((hck_flags & (HCK_FULLCKSUM|HCK_PARTIALCKSUM)) == 0) 7745 IP6_STAT(ipst, ip6_in_sw_cksum); 7746 7747 if (cksum_err) { 7748 BUMP_MIB(ill->ill_ip_mib, 7749 udpIfStatsInCksumErrs); 7750 7751 if (hck_flags & HCK_FULLCKSUM) 7752 IP6_STAT(ipst, 7753 ip6_udp_in_full_hw_cksum_err); 7754 else if (hck_flags & HCK_PARTIALCKSUM) 7755 IP6_STAT(ipst, 7756 ip6_udp_in_part_hw_cksum_err); 7757 else 7758 IP6_STAT(ipst, ip6_udp_in_sw_cksum_err); 7759 7760 freemsg(first_mp); 7761 return; 7762 } 7763 goto udp_fanout; 7764 } 7765 case IPPROTO_ICMPV6: { 7766 uint16_t *up; 7767 uint32_t sum; 7768 uint_t hdr_len = pkt_len - remlen; 7769 7770 if (hada_mp != NULL) { 7771 ip0dbg(("icmp hada drop\n")); 7772 goto hada_drop; 7773 } 7774 7775 up = (uint16_t *)&ip6h->ip6_src; 7776 sum = htons(IPPROTO_ICMPV6 + remlen) + 7777 up[0] + up[1] + up[2] + up[3] + 7778 up[4] + up[5] + up[6] + up[7] + 7779 up[8] + up[9] + up[10] + up[11] + 7780 up[12] + up[13] + up[14] + up[15]; 7781 sum = (sum & 0xffff) + (sum >> 16); 7782 sum = IP_CSUM(mp, hdr_len, sum); 7783 if (sum != 0) { 7784 /* IPv6 ICMP checksum failed */ 7785 ip1dbg(("ip_rput_data_v6: ICMPv6 checksum " 7786 "failed %x\n", 7787 sum)); 7788 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInMsgs); 7789 BUMP_MIB(ill->ill_icmp6_mib, 7790 ipv6IfIcmpInErrors); 7791 freemsg(first_mp); 7792 return; 7793 } 7794 7795 icmp_fanout: 7796 /* Check variable for testing applications */ 7797 if (ipst->ips_ipv6_drop_inbound_icmpv6) { 7798 freemsg(first_mp); 7799 return; 7800 } 7801 /* 7802 * Assume that there is always at least one conn for 7803 * ICMPv6 (in.ndpd) i.e. don't optimize the case 7804 * where there is no conn. 7805 */ 7806 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 7807 ilm_t *ilm; 7808 ilm_walker_t ilw; 7809 7810 ASSERT(!IS_LOOPBACK(ill)); 7811 /* 7812 * In the multicast case, applications may have 7813 * joined the group from different zones, so we 7814 * need to deliver the packet to each of them. 7815 * Loop through the multicast memberships 7816 * structures (ilm) on the receive ill and send 7817 * a copy of the packet up each matching one. 7818 */ 7819 ilm = ilm_walker_start(&ilw, inill); 7820 for (; ilm != NULL; 7821 ilm = ilm_walker_step(&ilw, ilm)) { 7822 if (!IN6_ARE_ADDR_EQUAL( 7823 &ilm->ilm_v6addr, &ip6h->ip6_dst)) 7824 continue; 7825 if (!ipif_lookup_zoneid( 7826 ilw.ilw_walk_ill, ilm->ilm_zoneid, 7827 IPIF_UP, NULL)) 7828 continue; 7829 7830 first_mp1 = ip_copymsg(first_mp); 7831 if (first_mp1 == NULL) 7832 continue; 7833 icmp_inbound_v6(q, first_mp1, 7834 ilw.ilw_walk_ill, inill, 7835 hdr_len, mctl_present, 0, 7836 ilm->ilm_zoneid, dl_mp); 7837 } 7838 ilm_walker_finish(&ilw); 7839 } else { 7840 first_mp1 = ip_copymsg(first_mp); 7841 if (first_mp1 != NULL) 7842 icmp_inbound_v6(q, first_mp1, ill, 7843 inill, hdr_len, mctl_present, 0, 7844 zoneid, dl_mp); 7845 } 7846 } 7847 /* FALLTHRU */ 7848 default: { 7849 /* 7850 * Handle protocols with which IPv6 is less intimate. 7851 */ 7852 uint_t proto_flags = IP_FF_RAWIP|IP_FF_IPINFO; 7853 7854 if (hada_mp != NULL) { 7855 ip0dbg(("default hada drop\n")); 7856 goto hada_drop; 7857 } 7858 7859 /* 7860 * Enable sending ICMP for "Unknown" nexthdr 7861 * case. i.e. where we did not FALLTHRU from 7862 * IPPROTO_ICMPV6 processing case above. 7863 * If we did FALLTHRU, then the packet has already been 7864 * processed for IPPF, don't process it again in 7865 * ip_fanout_proto_v6; set IP6_NO_IPPOLICY in the 7866 * flags 7867 */ 7868 if (nexthdr != IPPROTO_ICMPV6) 7869 proto_flags |= IP_FF_SEND_ICMP; 7870 else 7871 proto_flags |= IP6_NO_IPPOLICY; 7872 7873 ip_fanout_proto_v6(q, first_mp, ip6h, ill, inill, 7874 nexthdr, prev_nexthdr_offset, (flags|proto_flags), 7875 mctl_present, zoneid); 7876 return; 7877 } 7878 7879 case IPPROTO_DSTOPTS: { 7880 uint_t ehdrlen; 7881 uint8_t *optptr; 7882 ip6_dest_t *desthdr; 7883 7884 /* If packet is too short, look no further */ 7885 if (remlen < MIN_EHDR_LEN) 7886 goto pkt_too_short; 7887 7888 /* Check if AH is present. */ 7889 if (ipsec_early_ah_v6(q, first_mp, mctl_present, ill, 7890 inill, hada_mp, zoneid)) { 7891 return; 7892 } 7893 7894 /* 7895 * Reinitialize pointers, as ipsec_early_ah_v6() does 7896 * complete pullups. We don't have to do more pullups 7897 * as a result. 7898 */ 7899 whereptr = (uint8_t *)((uintptr_t)mp->b_rptr + 7900 (uintptr_t)(whereptr - ((uint8_t *)ip6h))); 7901 ip6h = (ip6_t *)mp->b_rptr; 7902 7903 desthdr = (ip6_dest_t *)whereptr; 7904 nexthdr = desthdr->ip6d_nxt; 7905 prev_nexthdr_offset = (uint_t)(whereptr - 7906 (uint8_t *)ip6h); 7907 ehdrlen = 8 * (desthdr->ip6d_len + 1); 7908 if (remlen < ehdrlen) 7909 goto pkt_too_short; 7910 optptr = whereptr + 2; 7911 /* 7912 * Note: XXX This code does not seem to make 7913 * distinction between Destination Options Header 7914 * being before/after Routing Header which can 7915 * happen if we are at the end of source route. 7916 * This may become significant in future. 7917 * (No real significant Destination Options are 7918 * defined/implemented yet ). 7919 */ 7920 switch (ip_process_options_v6(q, first_mp, ip6h, optptr, 7921 ehdrlen - 2, IPPROTO_DSTOPTS, ipst)) { 7922 case -1: 7923 /* 7924 * Packet has been consumed and any needed 7925 * ICMP errors sent. 7926 */ 7927 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors); 7928 freemsg(hada_mp); 7929 return; 7930 case 0: 7931 /* No action needed continue */ 7932 break; 7933 case 1: 7934 /* 7935 * Unnexpected return value 7936 * (Router alert is a Hop-by-Hop option) 7937 */ 7938 #ifdef DEBUG 7939 panic("ip_rput_data_v6: router " 7940 "alert hbh opt indication in dest opt"); 7941 /*NOTREACHED*/ 7942 #else 7943 freemsg(hada_mp); 7944 freemsg(first_mp); 7945 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 7946 return; 7947 #endif 7948 } 7949 used = ehdrlen; 7950 break; 7951 } 7952 case IPPROTO_FRAGMENT: { 7953 ip6_frag_t *fraghdr; 7954 size_t no_frag_hdr_len; 7955 7956 if (hada_mp != NULL) { 7957 ip0dbg(("frag hada drop\n")); 7958 goto hada_drop; 7959 } 7960 7961 ASSERT(first_mp == mp); 7962 if (remlen < sizeof (ip6_frag_t)) 7963 goto pkt_too_short; 7964 7965 if (mp->b_cont != NULL && 7966 whereptr + sizeof (ip6_frag_t) > mp->b_wptr) { 7967 if (!pullupmsg(mp, 7968 pkt_len - remlen + sizeof (ip6_frag_t))) { 7969 BUMP_MIB(ill->ill_ip_mib, 7970 ipIfStatsInDiscards); 7971 freemsg(mp); 7972 return; 7973 } 7974 hck_flags = 0; 7975 ip6h = (ip6_t *)mp->b_rptr; 7976 whereptr = (uint8_t *)ip6h + pkt_len - remlen; 7977 } 7978 7979 fraghdr = (ip6_frag_t *)whereptr; 7980 used = (uint_t)sizeof (ip6_frag_t); 7981 BUMP_MIB(ill->ill_ip_mib, ipIfStatsReasmReqds); 7982 7983 /* 7984 * Invoke the CGTP (multirouting) filtering module to 7985 * process the incoming packet. Packets identified as 7986 * duplicates must be discarded. Filtering is active 7987 * only if the the ip_cgtp_filter ndd variable is 7988 * non-zero. 7989 */ 7990 if (ipst->ips_ip_cgtp_filter && 7991 ipst->ips_ip_cgtp_filter_ops != NULL) { 7992 int cgtp_flt_pkt; 7993 netstackid_t stackid; 7994 7995 stackid = ipst->ips_netstack->netstack_stackid; 7996 7997 cgtp_flt_pkt = 7998 ipst->ips_ip_cgtp_filter_ops->cfo_filter_v6( 7999 stackid, inill->ill_phyint->phyint_ifindex, 8000 ip6h, fraghdr); 8001 if (cgtp_flt_pkt == CGTP_IP_PKT_DUPLICATE) { 8002 freemsg(mp); 8003 return; 8004 } 8005 } 8006 8007 /* Restore the flags */ 8008 DB_CKSUMFLAGS(mp) = hck_flags; 8009 8010 mp = ip_rput_frag_v6(ill, inill, mp, ip6h, fraghdr, 8011 remlen - used, &prev_nexthdr_offset, 8012 &reass_sum, &reass_hck_flags); 8013 if (mp == NULL) { 8014 /* Reassembly is still pending */ 8015 return; 8016 } 8017 /* The first mblk are the headers before the frag hdr */ 8018 BUMP_MIB(ill->ill_ip_mib, ipIfStatsReasmOKs); 8019 8020 first_mp = mp; /* mp has most likely changed! */ 8021 no_frag_hdr_len = mp->b_wptr - mp->b_rptr; 8022 ip6h = (ip6_t *)mp->b_rptr; 8023 nexthdr = ((char *)ip6h)[prev_nexthdr_offset]; 8024 whereptr = mp->b_rptr + no_frag_hdr_len; 8025 remlen = ntohs(ip6h->ip6_plen) + 8026 (uint16_t)(IPV6_HDR_LEN - no_frag_hdr_len); 8027 pkt_len = msgdsize(mp); 8028 used = 0; 8029 break; 8030 } 8031 case IPPROTO_HOPOPTS: { 8032 if (hada_mp != NULL) { 8033 ip0dbg(("hop hada drop\n")); 8034 goto hada_drop; 8035 } 8036 /* 8037 * Illegal header sequence. 8038 * (Hop-by-hop headers are processed above 8039 * and required to immediately follow IPv6 header) 8040 */ 8041 icmp_param_problem_v6(WR(q), first_mp, 8042 ICMP6_PARAMPROB_NEXTHEADER, 8043 prev_nexthdr_offset, 8044 B_FALSE, B_FALSE, zoneid, ipst); 8045 return; 8046 } 8047 case IPPROTO_ROUTING: { 8048 uint_t ehdrlen; 8049 ip6_rthdr_t *rthdr; 8050 8051 /* If packet is too short, look no further */ 8052 if (remlen < MIN_EHDR_LEN) 8053 goto pkt_too_short; 8054 8055 /* Check if AH is present. */ 8056 if (ipsec_early_ah_v6(q, first_mp, mctl_present, ill, 8057 inill, hada_mp, zoneid)) { 8058 return; 8059 } 8060 8061 /* 8062 * Reinitialize pointers, as ipsec_early_ah_v6() does 8063 * complete pullups. We don't have to do more pullups 8064 * as a result. 8065 */ 8066 whereptr = (uint8_t *)((uintptr_t)mp->b_rptr + 8067 (uintptr_t)(whereptr - ((uint8_t *)ip6h))); 8068 ip6h = (ip6_t *)mp->b_rptr; 8069 8070 rthdr = (ip6_rthdr_t *)whereptr; 8071 nexthdr = rthdr->ip6r_nxt; 8072 prev_nexthdr_offset = (uint_t)(whereptr - 8073 (uint8_t *)ip6h); 8074 ehdrlen = 8 * (rthdr->ip6r_len + 1); 8075 if (remlen < ehdrlen) 8076 goto pkt_too_short; 8077 if (rthdr->ip6r_segleft != 0) { 8078 /* Not end of source route */ 8079 if (ll_multicast) { 8080 BUMP_MIB(ill->ill_ip_mib, 8081 ipIfStatsForwProhibits); 8082 freemsg(hada_mp); 8083 freemsg(mp); 8084 return; 8085 } 8086 ip_process_rthdr(q, mp, ip6h, rthdr, ill, 8087 flags, hada_mp, dl_mp); 8088 return; 8089 } 8090 used = ehdrlen; 8091 break; 8092 } 8093 case IPPROTO_AH: 8094 case IPPROTO_ESP: { 8095 /* 8096 * Fast path for AH/ESP. If this is the first time 8097 * we are sending a datagram to AH/ESP, allocate 8098 * a IPSEC_IN message and prepend it. Otherwise, 8099 * just fanout. 8100 */ 8101 8102 ipsec_in_t *ii; 8103 int ipsec_rc; 8104 ipsec_stack_t *ipss; 8105 8106 ipss = ipst->ips_netstack->netstack_ipsec; 8107 if (!mctl_present) { 8108 ASSERT(first_mp == mp); 8109 first_mp = ipsec_in_alloc(B_FALSE, 8110 ipst->ips_netstack); 8111 if (first_mp == NULL) { 8112 ip1dbg(("ip_rput_data_v6: IPSEC_IN " 8113 "allocation failure.\n")); 8114 BUMP_MIB(ill->ill_ip_mib, 8115 ipIfStatsInDiscards); 8116 freemsg(mp); 8117 return; 8118 } 8119 /* 8120 * Store the ill_index so that when we come back 8121 * from IPSEC we ride on the same queue. 8122 */ 8123 ii = (ipsec_in_t *)first_mp->b_rptr; 8124 ii->ipsec_in_ill_index = 8125 ill->ill_phyint->phyint_ifindex; 8126 ii->ipsec_in_rill_index = 8127 inill->ill_phyint->phyint_ifindex; 8128 first_mp->b_cont = mp; 8129 /* 8130 * Cache hardware acceleration info. 8131 */ 8132 if (hada_mp != NULL) { 8133 IPSECHW_DEBUG(IPSECHW_PKT, 8134 ("ip_rput_data_v6: " 8135 "caching data attr.\n")); 8136 ii->ipsec_in_accelerated = B_TRUE; 8137 ii->ipsec_in_da = hada_mp; 8138 hada_mp = NULL; 8139 } 8140 } else { 8141 ii = (ipsec_in_t *)first_mp->b_rptr; 8142 } 8143 8144 if (!ipsec_loaded(ipss)) { 8145 ip_proto_not_sup(q, first_mp, IP_FF_SEND_ICMP, 8146 zoneid, ipst); 8147 return; 8148 } 8149 8150 /* select inbound SA and have IPsec process the pkt */ 8151 if (nexthdr == IPPROTO_ESP) { 8152 esph_t *esph = ipsec_inbound_esp_sa(first_mp, 8153 ipst->ips_netstack); 8154 if (esph == NULL) 8155 return; 8156 ASSERT(ii->ipsec_in_esp_sa != NULL); 8157 ASSERT(ii->ipsec_in_esp_sa->ipsa_input_func != 8158 NULL); 8159 ipsec_rc = ii->ipsec_in_esp_sa->ipsa_input_func( 8160 first_mp, esph); 8161 } else { 8162 ah_t *ah = ipsec_inbound_ah_sa(first_mp, 8163 ipst->ips_netstack); 8164 if (ah == NULL) 8165 return; 8166 ASSERT(ii->ipsec_in_ah_sa != NULL); 8167 ASSERT(ii->ipsec_in_ah_sa->ipsa_input_func != 8168 NULL); 8169 ipsec_rc = ii->ipsec_in_ah_sa->ipsa_input_func( 8170 first_mp, ah); 8171 } 8172 8173 switch (ipsec_rc) { 8174 case IPSEC_STATUS_SUCCESS: 8175 break; 8176 case IPSEC_STATUS_FAILED: 8177 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 8178 /* FALLTHRU */ 8179 case IPSEC_STATUS_PENDING: 8180 return; 8181 } 8182 /* we're done with IPsec processing, send it up */ 8183 ip_fanout_proto_again(first_mp, ill, inill, NULL); 8184 return; 8185 } 8186 case IPPROTO_NONE: 8187 /* All processing is done. Count as "delivered". */ 8188 freemsg(hada_mp); 8189 freemsg(first_mp); 8190 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 8191 return; 8192 } 8193 whereptr += used; 8194 ASSERT(remlen >= used); 8195 remlen -= used; 8196 } 8197 /* NOTREACHED */ 8198 8199 pkt_too_short: 8200 ip1dbg(("ip_rput_data_v6: packet too short %d %lu %d\n", 8201 ip6_len, pkt_len, remlen)); 8202 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInTruncatedPkts); 8203 freemsg(hada_mp); 8204 freemsg(first_mp); 8205 return; 8206 udp_fanout: 8207 if (mctl_present || IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 8208 connp = NULL; 8209 } else { 8210 connp = ipcl_classify_v6(mp, IPPROTO_UDP, hdr_len, zoneid, 8211 ipst); 8212 if ((connp != NULL) && (connp->conn_upq == NULL)) { 8213 CONN_DEC_REF(connp); 8214 connp = NULL; 8215 } 8216 } 8217 8218 if (connp == NULL) { 8219 uint32_t ports; 8220 8221 ports = *(uint32_t *)(mp->b_rptr + hdr_len + 8222 UDP_PORTS_OFFSET); 8223 IP6_STAT(ipst, ip6_udp_slow_path); 8224 ip_fanout_udp_v6(q, first_mp, ip6h, ports, ill, inill, 8225 (flags|IP_FF_SEND_ICMP|IP_FF_IPINFO), mctl_present, 8226 zoneid); 8227 return; 8228 } 8229 8230 if ((IPCL_IS_NONSTR(connp) && PROTO_FLOW_CNTRLD(connp)) || 8231 (!IPCL_IS_NONSTR(connp) && CONN_UDP_FLOWCTLD(connp))) { 8232 freemsg(first_mp); 8233 BUMP_MIB(ill->ill_ip_mib, udpIfStatsInOverflows); 8234 CONN_DEC_REF(connp); 8235 return; 8236 } 8237 8238 /* Initiate IPPF processing */ 8239 if (IP6_IN_IPP(flags, ipst)) { 8240 ip_process(IPP_LOCAL_IN, &mp, ill->ill_phyint->phyint_ifindex); 8241 if (mp == NULL) { 8242 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 8243 CONN_DEC_REF(connp); 8244 return; 8245 } 8246 } 8247 8248 if (connp->conn_ip_recvpktinfo || 8249 IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src)) { 8250 mp = ip_add_info_v6(mp, inill, &ip6h->ip6_dst); 8251 if (mp == NULL) { 8252 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 8253 CONN_DEC_REF(connp); 8254 return; 8255 } 8256 } 8257 8258 IP6_STAT(ipst, ip6_udp_fast_path); 8259 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 8260 8261 /* Send it upstream */ 8262 (connp->conn_recv)(connp, mp, NULL); 8263 8264 CONN_DEC_REF(connp); 8265 freemsg(hada_mp); 8266 return; 8267 8268 hada_drop: 8269 ip1dbg(("ip_rput_data_v6: malformed accelerated packet\n")); 8270 /* IPsec kstats: bump counter here */ 8271 freemsg(hada_mp); 8272 freemsg(first_mp); 8273 } 8274 8275 /* 8276 * Reassemble fragment. 8277 * When it returns a completed message the first mblk will only contain 8278 * the headers prior to the fragment header. 8279 * 8280 * prev_nexthdr_offset is an offset indication of where the nexthdr field is 8281 * of the preceding header. This is needed to patch the previous header's 8282 * nexthdr field when reassembly completes. 8283 */ 8284 static mblk_t * 8285 ip_rput_frag_v6(ill_t *ill, ill_t *inill, mblk_t *mp, ip6_t *ip6h, 8286 ip6_frag_t *fraghdr, uint_t remlen, uint_t *prev_nexthdr_offset, 8287 uint32_t *cksum_val, uint16_t *cksum_flags) 8288 { 8289 uint32_t ident = ntohl(fraghdr->ip6f_ident); 8290 uint16_t offset; 8291 boolean_t more_frags; 8292 uint8_t nexthdr = fraghdr->ip6f_nxt; 8293 in6_addr_t *v6dst_ptr; 8294 in6_addr_t *v6src_ptr; 8295 uint_t end; 8296 uint_t hdr_length; 8297 size_t count; 8298 ipf_t *ipf; 8299 ipf_t **ipfp; 8300 ipfb_t *ipfb; 8301 mblk_t *mp1; 8302 uint8_t ecn_info = 0; 8303 size_t msg_len; 8304 mblk_t *tail_mp; 8305 mblk_t *t_mp; 8306 boolean_t pruned = B_FALSE; 8307 uint32_t sum_val; 8308 uint16_t sum_flags; 8309 ip_stack_t *ipst = ill->ill_ipst; 8310 8311 if (cksum_val != NULL) 8312 *cksum_val = 0; 8313 if (cksum_flags != NULL) 8314 *cksum_flags = 0; 8315 8316 /* 8317 * We utilize hardware computed checksum info only for UDP since 8318 * IP fragmentation is a normal occurence for the protocol. In 8319 * addition, checksum offload support for IP fragments carrying 8320 * UDP payload is commonly implemented across network adapters. 8321 */ 8322 ASSERT(inill != NULL); 8323 if (nexthdr == IPPROTO_UDP && dohwcksum && ILL_HCKSUM_CAPABLE(inill) && 8324 (DB_CKSUMFLAGS(mp) & (HCK_FULLCKSUM | HCK_PARTIALCKSUM))) { 8325 mblk_t *mp1 = mp->b_cont; 8326 int32_t len; 8327 8328 /* Record checksum information from the packet */ 8329 sum_val = (uint32_t)DB_CKSUM16(mp); 8330 sum_flags = DB_CKSUMFLAGS(mp); 8331 8332 /* fragmented payload offset from beginning of mblk */ 8333 offset = (uint16_t)((uchar_t *)&fraghdr[1] - mp->b_rptr); 8334 8335 if ((sum_flags & HCK_PARTIALCKSUM) && 8336 (mp1 == NULL || mp1->b_cont == NULL) && 8337 offset >= (uint16_t)DB_CKSUMSTART(mp) && 8338 ((len = offset - (uint16_t)DB_CKSUMSTART(mp)) & 1) == 0) { 8339 uint32_t adj; 8340 /* 8341 * Partial checksum has been calculated by hardware 8342 * and attached to the packet; in addition, any 8343 * prepended extraneous data is even byte aligned. 8344 * If any such data exists, we adjust the checksum; 8345 * this would also handle any postpended data. 8346 */ 8347 IP_ADJCKSUM_PARTIAL(mp->b_rptr + DB_CKSUMSTART(mp), 8348 mp, mp1, len, adj); 8349 8350 /* One's complement subtract extraneous checksum */ 8351 if (adj >= sum_val) 8352 sum_val = ~(adj - sum_val) & 0xFFFF; 8353 else 8354 sum_val -= adj; 8355 } 8356 } else { 8357 sum_val = 0; 8358 sum_flags = 0; 8359 } 8360 8361 /* Clear hardware checksumming flag */ 8362 DB_CKSUMFLAGS(mp) = 0; 8363 8364 /* 8365 * Note: Fragment offset in header is in 8-octet units. 8366 * Clearing least significant 3 bits not only extracts 8367 * it but also gets it in units of octets. 8368 */ 8369 offset = ntohs(fraghdr->ip6f_offlg) & ~7; 8370 more_frags = (fraghdr->ip6f_offlg & IP6F_MORE_FRAG); 8371 8372 /* 8373 * Is the more frags flag on and the payload length not a multiple 8374 * of eight? 8375 */ 8376 if (more_frags && (ntohs(ip6h->ip6_plen) & 7)) { 8377 zoneid_t zoneid; 8378 8379 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors); 8380 zoneid = ipif_lookup_addr_zoneid_v6(&ip6h->ip6_dst, ill, ipst); 8381 if (zoneid == ALL_ZONES) { 8382 freemsg(mp); 8383 return (NULL); 8384 } 8385 icmp_param_problem_v6(ill->ill_wq, mp, ICMP6_PARAMPROB_HEADER, 8386 (uint32_t)((char *)&ip6h->ip6_plen - 8387 (char *)ip6h), B_FALSE, B_FALSE, zoneid, ipst); 8388 return (NULL); 8389 } 8390 8391 v6src_ptr = &ip6h->ip6_src; 8392 v6dst_ptr = &ip6h->ip6_dst; 8393 end = remlen; 8394 8395 hdr_length = (uint_t)((char *)&fraghdr[1] - (char *)ip6h); 8396 end += offset; 8397 8398 /* 8399 * Would fragment cause reassembled packet to have a payload length 8400 * greater than IP_MAXPACKET - the max payload size? 8401 */ 8402 if (end > IP_MAXPACKET) { 8403 zoneid_t zoneid; 8404 8405 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors); 8406 zoneid = ipif_lookup_addr_zoneid_v6(&ip6h->ip6_dst, ill, ipst); 8407 if (zoneid == ALL_ZONES) { 8408 freemsg(mp); 8409 return (NULL); 8410 } 8411 icmp_param_problem_v6(ill->ill_wq, mp, ICMP6_PARAMPROB_HEADER, 8412 (uint32_t)((char *)&fraghdr->ip6f_offlg - 8413 (char *)ip6h), B_FALSE, B_FALSE, zoneid, ipst); 8414 return (NULL); 8415 } 8416 8417 /* 8418 * This packet just has one fragment. Reassembly not 8419 * needed. 8420 */ 8421 if (!more_frags && offset == 0) { 8422 goto reass_done; 8423 } 8424 8425 /* 8426 * Drop the fragmented as early as possible, if 8427 * we don't have resource(s) to re-assemble. 8428 */ 8429 if (ipst->ips_ip_reass_queue_bytes == 0) { 8430 freemsg(mp); 8431 return (NULL); 8432 } 8433 8434 /* Record the ECN field info. */ 8435 ecn_info = (uint8_t)(ntohl(ip6h->ip6_vcf & htonl(~0xFFCFFFFF)) >> 20); 8436 /* 8437 * If this is not the first fragment, dump the unfragmentable 8438 * portion of the packet. 8439 */ 8440 if (offset) 8441 mp->b_rptr = (uchar_t *)&fraghdr[1]; 8442 8443 /* 8444 * Fragmentation reassembly. Each ILL has a hash table for 8445 * queueing packets undergoing reassembly for all IPIFs 8446 * associated with the ILL. The hash is based on the packet 8447 * IP ident field. The ILL frag hash table was allocated 8448 * as a timer block at the time the ILL was created. Whenever 8449 * there is anything on the reassembly queue, the timer will 8450 * be running. 8451 */ 8452 msg_len = MBLKSIZE(mp); 8453 tail_mp = mp; 8454 while (tail_mp->b_cont != NULL) { 8455 tail_mp = tail_mp->b_cont; 8456 msg_len += MBLKSIZE(tail_mp); 8457 } 8458 /* 8459 * If the reassembly list for this ILL will get too big 8460 * prune it. 8461 */ 8462 8463 if ((msg_len + sizeof (*ipf) + ill->ill_frag_count) >= 8464 ipst->ips_ip_reass_queue_bytes) { 8465 ill_frag_prune(ill, 8466 (ipst->ips_ip_reass_queue_bytes < msg_len) ? 0 : 8467 (ipst->ips_ip_reass_queue_bytes - msg_len)); 8468 pruned = B_TRUE; 8469 } 8470 8471 ipfb = &ill->ill_frag_hash_tbl[ILL_FRAG_HASH_V6(*v6src_ptr, ident)]; 8472 mutex_enter(&ipfb->ipfb_lock); 8473 8474 ipfp = &ipfb->ipfb_ipf; 8475 /* Try to find an existing fragment queue for this packet. */ 8476 for (;;) { 8477 ipf = ipfp[0]; 8478 if (ipf) { 8479 /* 8480 * It has to match on ident, source address, and 8481 * dest address. 8482 */ 8483 if (ipf->ipf_ident == ident && 8484 IN6_ARE_ADDR_EQUAL(&ipf->ipf_v6src, v6src_ptr) && 8485 IN6_ARE_ADDR_EQUAL(&ipf->ipf_v6dst, v6dst_ptr)) { 8486 8487 /* 8488 * If we have received too many 8489 * duplicate fragments for this packet 8490 * free it. 8491 */ 8492 if (ipf->ipf_num_dups > ip_max_frag_dups) { 8493 ill_frag_free_pkts(ill, ipfb, ipf, 1); 8494 freemsg(mp); 8495 mutex_exit(&ipfb->ipfb_lock); 8496 return (NULL); 8497 } 8498 8499 break; 8500 } 8501 ipfp = &ipf->ipf_hash_next; 8502 continue; 8503 } 8504 8505 8506 /* 8507 * If we pruned the list, do we want to store this new 8508 * fragment?. We apply an optimization here based on the 8509 * fact that most fragments will be received in order. 8510 * So if the offset of this incoming fragment is zero, 8511 * it is the first fragment of a new packet. We will 8512 * keep it. Otherwise drop the fragment, as we have 8513 * probably pruned the packet already (since the 8514 * packet cannot be found). 8515 */ 8516 8517 if (pruned && offset != 0) { 8518 mutex_exit(&ipfb->ipfb_lock); 8519 freemsg(mp); 8520 return (NULL); 8521 } 8522 8523 /* New guy. Allocate a frag message. */ 8524 mp1 = allocb(sizeof (*ipf), BPRI_MED); 8525 if (!mp1) { 8526 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 8527 freemsg(mp); 8528 partial_reass_done: 8529 mutex_exit(&ipfb->ipfb_lock); 8530 return (NULL); 8531 } 8532 8533 if (ipfb->ipfb_frag_pkts >= MAX_FRAG_PKTS(ipst)) { 8534 /* 8535 * Too many fragmented packets in this hash bucket. 8536 * Free the oldest. 8537 */ 8538 ill_frag_free_pkts(ill, ipfb, ipfb->ipfb_ipf, 1); 8539 } 8540 8541 mp1->b_cont = mp; 8542 8543 /* Initialize the fragment header. */ 8544 ipf = (ipf_t *)mp1->b_rptr; 8545 ipf->ipf_mp = mp1; 8546 ipf->ipf_ptphn = ipfp; 8547 ipfp[0] = ipf; 8548 ipf->ipf_hash_next = NULL; 8549 ipf->ipf_ident = ident; 8550 ipf->ipf_v6src = *v6src_ptr; 8551 ipf->ipf_v6dst = *v6dst_ptr; 8552 /* Record reassembly start time. */ 8553 ipf->ipf_timestamp = gethrestime_sec(); 8554 /* Record ipf generation and account for frag header */ 8555 ipf->ipf_gen = ill->ill_ipf_gen++; 8556 ipf->ipf_count = MBLKSIZE(mp1); 8557 ipf->ipf_protocol = nexthdr; 8558 ipf->ipf_nf_hdr_len = 0; 8559 ipf->ipf_prev_nexthdr_offset = 0; 8560 ipf->ipf_last_frag_seen = B_FALSE; 8561 ipf->ipf_ecn = ecn_info; 8562 ipf->ipf_num_dups = 0; 8563 ipfb->ipfb_frag_pkts++; 8564 ipf->ipf_checksum = 0; 8565 ipf->ipf_checksum_flags = 0; 8566 8567 /* Store checksum value in fragment header */ 8568 if (sum_flags != 0) { 8569 sum_val = (sum_val & 0xFFFF) + (sum_val >> 16); 8570 sum_val = (sum_val & 0xFFFF) + (sum_val >> 16); 8571 ipf->ipf_checksum = sum_val; 8572 ipf->ipf_checksum_flags = sum_flags; 8573 } 8574 8575 /* 8576 * We handle reassembly two ways. In the easy case, 8577 * where all the fragments show up in order, we do 8578 * minimal bookkeeping, and just clip new pieces on 8579 * the end. If we ever see a hole, then we go off 8580 * to ip_reassemble which has to mark the pieces and 8581 * keep track of the number of holes, etc. Obviously, 8582 * the point of having both mechanisms is so we can 8583 * handle the easy case as efficiently as possible. 8584 */ 8585 if (offset == 0) { 8586 /* Easy case, in-order reassembly so far. */ 8587 /* Update the byte count */ 8588 ipf->ipf_count += msg_len; 8589 ipf->ipf_tail_mp = tail_mp; 8590 /* 8591 * Keep track of next expected offset in 8592 * ipf_end. 8593 */ 8594 ipf->ipf_end = end; 8595 ipf->ipf_nf_hdr_len = hdr_length; 8596 ipf->ipf_prev_nexthdr_offset = *prev_nexthdr_offset; 8597 } else { 8598 /* Hard case, hole at the beginning. */ 8599 ipf->ipf_tail_mp = NULL; 8600 /* 8601 * ipf_end == 0 means that we have given up 8602 * on easy reassembly. 8603 */ 8604 ipf->ipf_end = 0; 8605 8606 /* Forget checksum offload from now on */ 8607 ipf->ipf_checksum_flags = 0; 8608 8609 /* 8610 * ipf_hole_cnt is set by ip_reassemble. 8611 * ipf_count is updated by ip_reassemble. 8612 * No need to check for return value here 8613 * as we don't expect reassembly to complete or 8614 * fail for the first fragment itself. 8615 */ 8616 (void) ip_reassemble(mp, ipf, offset, more_frags, ill, 8617 msg_len); 8618 } 8619 /* Update per ipfb and ill byte counts */ 8620 ipfb->ipfb_count += ipf->ipf_count; 8621 ASSERT(ipfb->ipfb_count > 0); /* Wraparound */ 8622 atomic_add_32(&ill->ill_frag_count, ipf->ipf_count); 8623 /* If the frag timer wasn't already going, start it. */ 8624 mutex_enter(&ill->ill_lock); 8625 ill_frag_timer_start(ill); 8626 mutex_exit(&ill->ill_lock); 8627 goto partial_reass_done; 8628 } 8629 8630 /* 8631 * If the packet's flag has changed (it could be coming up 8632 * from an interface different than the previous, therefore 8633 * possibly different checksum capability), then forget about 8634 * any stored checksum states. Otherwise add the value to 8635 * the existing one stored in the fragment header. 8636 */ 8637 if (sum_flags != 0 && sum_flags == ipf->ipf_checksum_flags) { 8638 sum_val += ipf->ipf_checksum; 8639 sum_val = (sum_val & 0xFFFF) + (sum_val >> 16); 8640 sum_val = (sum_val & 0xFFFF) + (sum_val >> 16); 8641 ipf->ipf_checksum = sum_val; 8642 } else if (ipf->ipf_checksum_flags != 0) { 8643 /* Forget checksum offload from now on */ 8644 ipf->ipf_checksum_flags = 0; 8645 } 8646 8647 /* 8648 * We have a new piece of a datagram which is already being 8649 * reassembled. Update the ECN info if all IP fragments 8650 * are ECN capable. If there is one which is not, clear 8651 * all the info. If there is at least one which has CE 8652 * code point, IP needs to report that up to transport. 8653 */ 8654 if (ecn_info != IPH_ECN_NECT && ipf->ipf_ecn != IPH_ECN_NECT) { 8655 if (ecn_info == IPH_ECN_CE) 8656 ipf->ipf_ecn = IPH_ECN_CE; 8657 } else { 8658 ipf->ipf_ecn = IPH_ECN_NECT; 8659 } 8660 8661 if (offset && ipf->ipf_end == offset) { 8662 /* The new fragment fits at the end */ 8663 ipf->ipf_tail_mp->b_cont = mp; 8664 /* Update the byte count */ 8665 ipf->ipf_count += msg_len; 8666 /* Update per ipfb and ill byte counts */ 8667 ipfb->ipfb_count += msg_len; 8668 ASSERT(ipfb->ipfb_count > 0); /* Wraparound */ 8669 atomic_add_32(&ill->ill_frag_count, msg_len); 8670 if (more_frags) { 8671 /* More to come. */ 8672 ipf->ipf_end = end; 8673 ipf->ipf_tail_mp = tail_mp; 8674 goto partial_reass_done; 8675 } 8676 } else { 8677 /* 8678 * Go do the hard cases. 8679 * Call ip_reassemble(). 8680 */ 8681 int ret; 8682 8683 if (offset == 0) { 8684 if (ipf->ipf_prev_nexthdr_offset == 0) { 8685 ipf->ipf_nf_hdr_len = hdr_length; 8686 ipf->ipf_prev_nexthdr_offset = 8687 *prev_nexthdr_offset; 8688 } 8689 } 8690 /* Save current byte count */ 8691 count = ipf->ipf_count; 8692 ret = ip_reassemble(mp, ipf, offset, more_frags, ill, msg_len); 8693 8694 /* Count of bytes added and subtracted (freeb()ed) */ 8695 count = ipf->ipf_count - count; 8696 if (count) { 8697 /* Update per ipfb and ill byte counts */ 8698 ipfb->ipfb_count += count; 8699 ASSERT(ipfb->ipfb_count > 0); /* Wraparound */ 8700 atomic_add_32(&ill->ill_frag_count, count); 8701 } 8702 if (ret == IP_REASS_PARTIAL) { 8703 goto partial_reass_done; 8704 } else if (ret == IP_REASS_FAILED) { 8705 /* Reassembly failed. Free up all resources */ 8706 ill_frag_free_pkts(ill, ipfb, ipf, 1); 8707 for (t_mp = mp; t_mp != NULL; t_mp = t_mp->b_cont) { 8708 IP_REASS_SET_START(t_mp, 0); 8709 IP_REASS_SET_END(t_mp, 0); 8710 } 8711 freemsg(mp); 8712 goto partial_reass_done; 8713 } 8714 8715 /* We will reach here iff 'ret' is IP_REASS_COMPLETE */ 8716 } 8717 /* 8718 * We have completed reassembly. Unhook the frag header from 8719 * the reassembly list. 8720 * 8721 * Grab the unfragmentable header length next header value out 8722 * of the first fragment 8723 */ 8724 ASSERT(ipf->ipf_nf_hdr_len != 0); 8725 hdr_length = ipf->ipf_nf_hdr_len; 8726 8727 /* 8728 * Before we free the frag header, record the ECN info 8729 * to report back to the transport. 8730 */ 8731 ecn_info = ipf->ipf_ecn; 8732 8733 /* 8734 * Store the nextheader field in the header preceding the fragment 8735 * header 8736 */ 8737 nexthdr = ipf->ipf_protocol; 8738 *prev_nexthdr_offset = ipf->ipf_prev_nexthdr_offset; 8739 ipfp = ipf->ipf_ptphn; 8740 8741 /* We need to supply these to caller */ 8742 if ((sum_flags = ipf->ipf_checksum_flags) != 0) 8743 sum_val = ipf->ipf_checksum; 8744 else 8745 sum_val = 0; 8746 8747 mp1 = ipf->ipf_mp; 8748 count = ipf->ipf_count; 8749 ipf = ipf->ipf_hash_next; 8750 if (ipf) 8751 ipf->ipf_ptphn = ipfp; 8752 ipfp[0] = ipf; 8753 atomic_add_32(&ill->ill_frag_count, -count); 8754 ASSERT(ipfb->ipfb_count >= count); 8755 ipfb->ipfb_count -= count; 8756 ipfb->ipfb_frag_pkts--; 8757 mutex_exit(&ipfb->ipfb_lock); 8758 /* Ditch the frag header. */ 8759 mp = mp1->b_cont; 8760 freeb(mp1); 8761 8762 /* 8763 * Make sure the packet is good by doing some sanity 8764 * check. If bad we can silentely drop the packet. 8765 */ 8766 reass_done: 8767 if (hdr_length < sizeof (ip6_frag_t)) { 8768 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors); 8769 ip1dbg(("ip_rput_frag_v6: bad packet\n")); 8770 freemsg(mp); 8771 return (NULL); 8772 } 8773 8774 /* 8775 * Remove the fragment header from the initial header by 8776 * splitting the mblk into the non-fragmentable header and 8777 * everthing after the fragment extension header. This has the 8778 * side effect of putting all the headers that need destination 8779 * processing into the b_cont block-- on return this fact is 8780 * used in order to avoid having to look at the extensions 8781 * already processed. 8782 * 8783 * Note that this code assumes that the unfragmentable portion 8784 * of the header is in the first mblk and increments 8785 * the read pointer past it. If this assumption is broken 8786 * this code fails badly. 8787 */ 8788 if (mp->b_rptr + hdr_length != mp->b_wptr) { 8789 mblk_t *nmp; 8790 8791 if (!(nmp = dupb(mp))) { 8792 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 8793 ip1dbg(("ip_rput_frag_v6: dupb failed\n")); 8794 freemsg(mp); 8795 return (NULL); 8796 } 8797 nmp->b_cont = mp->b_cont; 8798 mp->b_cont = nmp; 8799 nmp->b_rptr += hdr_length; 8800 } 8801 mp->b_wptr = mp->b_rptr + hdr_length - sizeof (ip6_frag_t); 8802 8803 ip6h = (ip6_t *)mp->b_rptr; 8804 ((char *)ip6h)[*prev_nexthdr_offset] = nexthdr; 8805 8806 /* Restore original IP length in header. */ 8807 ip6h->ip6_plen = htons((uint16_t)(msgdsize(mp) - IPV6_HDR_LEN)); 8808 /* Record the ECN info. */ 8809 ip6h->ip6_vcf &= htonl(0xFFCFFFFF); 8810 ip6h->ip6_vcf |= htonl(ecn_info << 20); 8811 8812 /* Reassembly is successful; return checksum information if needed */ 8813 if (cksum_val != NULL) 8814 *cksum_val = sum_val; 8815 if (cksum_flags != NULL) 8816 *cksum_flags = sum_flags; 8817 8818 return (mp); 8819 } 8820 8821 /* 8822 * Walk through the options to see if there is a routing header. 8823 * If present get the destination which is the last address of 8824 * the option. 8825 */ 8826 in6_addr_t 8827 ip_get_dst_v6(ip6_t *ip6h, boolean_t *is_fragment) 8828 { 8829 uint8_t nexthdr; 8830 uint8_t *whereptr; 8831 ip6_hbh_t *hbhhdr; 8832 ip6_dest_t *dsthdr; 8833 ip6_rthdr0_t *rthdr; 8834 ip6_frag_t *fraghdr; 8835 int ehdrlen; 8836 int left; 8837 in6_addr_t *ap, rv; 8838 8839 if (is_fragment != NULL) 8840 *is_fragment = B_FALSE; 8841 8842 rv = ip6h->ip6_dst; 8843 8844 nexthdr = ip6h->ip6_nxt; 8845 whereptr = (uint8_t *)&ip6h[1]; 8846 for (;;) { 8847 8848 ASSERT(nexthdr != IPPROTO_RAW); 8849 switch (nexthdr) { 8850 case IPPROTO_HOPOPTS: 8851 hbhhdr = (ip6_hbh_t *)whereptr; 8852 nexthdr = hbhhdr->ip6h_nxt; 8853 ehdrlen = 8 * (hbhhdr->ip6h_len + 1); 8854 break; 8855 case IPPROTO_DSTOPTS: 8856 dsthdr = (ip6_dest_t *)whereptr; 8857 nexthdr = dsthdr->ip6d_nxt; 8858 ehdrlen = 8 * (dsthdr->ip6d_len + 1); 8859 break; 8860 case IPPROTO_ROUTING: 8861 rthdr = (ip6_rthdr0_t *)whereptr; 8862 nexthdr = rthdr->ip6r0_nxt; 8863 ehdrlen = 8 * (rthdr->ip6r0_len + 1); 8864 8865 left = rthdr->ip6r0_segleft; 8866 ap = (in6_addr_t *)((char *)rthdr + sizeof (*rthdr)); 8867 rv = *(ap + left - 1); 8868 /* 8869 * If the caller doesn't care whether the packet 8870 * is a fragment or not, we can stop here since 8871 * we have our destination. 8872 */ 8873 if (is_fragment == NULL) 8874 goto done; 8875 break; 8876 case IPPROTO_FRAGMENT: 8877 fraghdr = (ip6_frag_t *)whereptr; 8878 nexthdr = fraghdr->ip6f_nxt; 8879 ehdrlen = sizeof (ip6_frag_t); 8880 if (is_fragment != NULL) 8881 *is_fragment = B_TRUE; 8882 goto done; 8883 default : 8884 goto done; 8885 } 8886 whereptr += ehdrlen; 8887 } 8888 8889 done: 8890 return (rv); 8891 } 8892 8893 /* 8894 * ip_source_routed_v6: 8895 * This function is called by redirect code in ip_rput_data_v6 to 8896 * know whether this packet is source routed through this node i.e 8897 * whether this node (router) is part of the journey. This 8898 * function is called under two cases : 8899 * 8900 * case 1 : Routing header was processed by this node and 8901 * ip_process_rthdr replaced ip6_dst with the next hop 8902 * and we are forwarding the packet to the next hop. 8903 * 8904 * case 2 : Routing header was not processed by this node and we 8905 * are just forwarding the packet. 8906 * 8907 * For case (1) we don't want to send redirects. For case(2) we 8908 * want to send redirects. 8909 */ 8910 static boolean_t 8911 ip_source_routed_v6(ip6_t *ip6h, mblk_t *mp, ip_stack_t *ipst) 8912 { 8913 uint8_t nexthdr; 8914 in6_addr_t *addrptr; 8915 ip6_rthdr0_t *rthdr; 8916 uint8_t numaddr; 8917 ip6_hbh_t *hbhhdr; 8918 uint_t ehdrlen; 8919 uint8_t *byteptr; 8920 8921 ip2dbg(("ip_source_routed_v6\n")); 8922 nexthdr = ip6h->ip6_nxt; 8923 ehdrlen = IPV6_HDR_LEN; 8924 8925 /* if a routing hdr is preceeded by HOPOPT or DSTOPT */ 8926 while (nexthdr == IPPROTO_HOPOPTS || 8927 nexthdr == IPPROTO_DSTOPTS) { 8928 byteptr = (uint8_t *)ip6h + ehdrlen; 8929 /* 8930 * Check if we have already processed 8931 * packets or we are just a forwarding 8932 * router which only pulled up msgs up 8933 * to IPV6HDR and one HBH ext header 8934 */ 8935 if (byteptr + MIN_EHDR_LEN > mp->b_wptr) { 8936 ip2dbg(("ip_source_routed_v6: Extension" 8937 " headers not processed\n")); 8938 return (B_FALSE); 8939 } 8940 hbhhdr = (ip6_hbh_t *)byteptr; 8941 nexthdr = hbhhdr->ip6h_nxt; 8942 ehdrlen = ehdrlen + 8 * (hbhhdr->ip6h_len + 1); 8943 } 8944 switch (nexthdr) { 8945 case IPPROTO_ROUTING: 8946 byteptr = (uint8_t *)ip6h + ehdrlen; 8947 /* 8948 * If for some reason, we haven't pulled up 8949 * the routing hdr data mblk, then we must 8950 * not have processed it at all. So for sure 8951 * we are not part of the source routed journey. 8952 */ 8953 if (byteptr + MIN_EHDR_LEN > mp->b_wptr) { 8954 ip2dbg(("ip_source_routed_v6: Routing" 8955 " header not processed\n")); 8956 return (B_FALSE); 8957 } 8958 rthdr = (ip6_rthdr0_t *)byteptr; 8959 /* 8960 * Either we are an intermediate router or the 8961 * last hop before destination and we have 8962 * already processed the routing header. 8963 * If segment_left is greater than or equal to zero, 8964 * then we must be the (numaddr - segleft) entry 8965 * of the routing header. Although ip6r0_segleft 8966 * is a unit8_t variable, we still check for zero 8967 * or greater value, if in case the data type 8968 * is changed someday in future. 8969 */ 8970 if (rthdr->ip6r0_segleft > 0 || 8971 rthdr->ip6r0_segleft == 0) { 8972 ire_t *ire = NULL; 8973 8974 numaddr = rthdr->ip6r0_len / 2; 8975 addrptr = (in6_addr_t *)((char *)rthdr + 8976 sizeof (*rthdr)); 8977 addrptr += (numaddr - (rthdr->ip6r0_segleft + 1)); 8978 if (addrptr != NULL) { 8979 ire = ire_ctable_lookup_v6(addrptr, NULL, 8980 IRE_LOCAL, NULL, ALL_ZONES, NULL, 8981 MATCH_IRE_TYPE, 8982 ipst); 8983 if (ire != NULL) { 8984 ire_refrele(ire); 8985 return (B_TRUE); 8986 } 8987 ip1dbg(("ip_source_routed_v6: No ire found\n")); 8988 } 8989 } 8990 /* FALLTHRU */ 8991 default: 8992 ip2dbg(("ip_source_routed_v6: Not source routed here\n")); 8993 return (B_FALSE); 8994 } 8995 } 8996 8997 /* 8998 * ip_wput_v6 -- Packets sent down from transport modules show up here. 8999 * Assumes that the following set of headers appear in the first 9000 * mblk: 9001 * ip6i_t (if present) CAN also appear as a separate mblk. 9002 * ip6_t 9003 * Any extension headers 9004 * TCP/UDP/SCTP header (if present) 9005 * The routine can handle an ICMPv6 header that is not in the first mblk. 9006 * 9007 * The order to determine the outgoing interface is as follows: 9008 * 1. If an ip6i_t with IP6I_IFINDEX set then use that ill. 9009 * 2. If q is an ill queue and (link local or multicast destination) then 9010 * use that ill. 9011 * 3. If IPV6_BOUND_IF has been set use that ill. 9012 * 4. For multicast: if IPV6_MULTICAST_IF has been set use it. Otherwise 9013 * look for the best IRE match for the unspecified group to determine 9014 * the ill. 9015 * 5. For unicast: Just do an IRE lookup for the best match. 9016 * 9017 * arg2 is always a queue_t *. 9018 * When that queue is an ill_t (i.e. q_next != NULL), then arg must be 9019 * the zoneid. 9020 * When that queue is not an ill_t, then arg must be a conn_t pointer. 9021 */ 9022 void 9023 ip_output_v6(void *arg, mblk_t *mp, void *arg2, int caller) 9024 { 9025 conn_t *connp = NULL; 9026 queue_t *q = (queue_t *)arg2; 9027 ire_t *ire = NULL; 9028 ire_t *sctp_ire = NULL; 9029 ip6_t *ip6h; 9030 in6_addr_t *v6dstp; 9031 ill_t *ill = NULL; 9032 ipif_t *ipif; 9033 ip6i_t *ip6i; 9034 int cksum_request; /* -1 => normal. */ 9035 /* 1 => Skip TCP/UDP/SCTP checksum */ 9036 /* Otherwise contains insert offset for checksum */ 9037 int unspec_src; 9038 boolean_t do_outrequests; /* Increment OutRequests? */ 9039 mib2_ipIfStatsEntry_t *mibptr; 9040 int match_flags = MATCH_IRE_ILL; 9041 mblk_t *first_mp; 9042 boolean_t mctl_present; 9043 ipsec_out_t *io; 9044 boolean_t multirt_need_resolve = B_FALSE; 9045 mblk_t *copy_mp = NULL; 9046 int err = 0; 9047 int ip6i_flags = 0; 9048 zoneid_t zoneid; 9049 ill_t *saved_ill = NULL; 9050 boolean_t conn_lock_held; 9051 boolean_t need_decref = B_FALSE; 9052 ip_stack_t *ipst; 9053 9054 if (q->q_next != NULL) { 9055 ill = (ill_t *)q->q_ptr; 9056 ipst = ill->ill_ipst; 9057 } else { 9058 connp = (conn_t *)arg; 9059 ASSERT(connp != NULL); 9060 ipst = connp->conn_netstack->netstack_ip; 9061 } 9062 9063 /* 9064 * Highest bit in version field is Reachability Confirmation bit 9065 * used by NUD in ip_xmit_v6(). 9066 */ 9067 #ifdef _BIG_ENDIAN 9068 #define IPVER(ip6h) ((((uint32_t *)ip6h)[0] >> 28) & 0x7) 9069 #else 9070 #define IPVER(ip6h) ((((uint32_t *)ip6h)[0] >> 4) & 0x7) 9071 #endif 9072 9073 /* 9074 * M_CTL comes from 6 places 9075 * 9076 * 1) TCP sends down IPSEC_OUT(M_CTL) for detached connections 9077 * both V4 and V6 datagrams. 9078 * 9079 * 2) AH/ESP sends down M_CTL after doing their job with both 9080 * V4 and V6 datagrams. 9081 * 9082 * 3) NDP callbacks when nce is resolved and IPSEC_OUT has been 9083 * attached. 9084 * 9085 * 4) Notifications from an external resolver (for XRESOLV ifs) 9086 * 9087 * 5) AH/ESP send down IPSEC_CTL(M_CTL) to be relayed to hardware for 9088 * IPsec hardware acceleration support. 9089 * 9090 * 6) TUN_HELLO. 9091 * 9092 * We need to handle (1)'s IPv6 case and (3) here. For the 9093 * IPv4 case in (1), and (2), IPSEC processing has already 9094 * started. The code in ip_wput() already knows how to handle 9095 * continuing IPSEC processing (for IPv4 and IPv6). All other 9096 * M_CTLs (including case (4)) are passed on to ip_wput_nondata() 9097 * for handling. 9098 */ 9099 first_mp = mp; 9100 mctl_present = B_FALSE; 9101 io = NULL; 9102 9103 /* Multidata transmit? */ 9104 if (DB_TYPE(mp) == M_MULTIDATA) { 9105 /* 9106 * We should never get here, since all Multidata messages 9107 * originating from tcp should have been directed over to 9108 * tcp_multisend() in the first place. 9109 */ 9110 BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsOutDiscards); 9111 freemsg(mp); 9112 return; 9113 } else if (DB_TYPE(mp) == M_CTL) { 9114 uint32_t mctltype = 0; 9115 uint32_t mlen = MBLKL(first_mp); 9116 9117 mp = mp->b_cont; 9118 mctl_present = B_TRUE; 9119 io = (ipsec_out_t *)first_mp->b_rptr; 9120 9121 /* 9122 * Validate this M_CTL message. The only three types of 9123 * M_CTL messages we expect to see in this code path are 9124 * ipsec_out_t or ipsec_in_t structures (allocated as 9125 * ipsec_info_t unions), or ipsec_ctl_t structures. 9126 * The ipsec_out_type and ipsec_in_type overlap in the two 9127 * data structures, and they are either set to IPSEC_OUT 9128 * or IPSEC_IN depending on which data structure it is. 9129 * ipsec_ctl_t is an IPSEC_CTL. 9130 * 9131 * All other M_CTL messages are sent to ip_wput_nondata() 9132 * for handling. 9133 */ 9134 if (mlen >= sizeof (io->ipsec_out_type)) 9135 mctltype = io->ipsec_out_type; 9136 9137 if ((mlen == sizeof (ipsec_ctl_t)) && 9138 (mctltype == IPSEC_CTL)) { 9139 ip_output(arg, first_mp, arg2, caller); 9140 return; 9141 } 9142 9143 if ((mlen < sizeof (ipsec_info_t)) || 9144 (mctltype != IPSEC_OUT && mctltype != IPSEC_IN) || 9145 mp == NULL) { 9146 ip_wput_nondata(NULL, q, first_mp, NULL); 9147 return; 9148 } 9149 /* NDP callbacks have q_next non-NULL. That's case #3. */ 9150 if (q->q_next == NULL) { 9151 ip6h = (ip6_t *)mp->b_rptr; 9152 /* 9153 * For a freshly-generated TCP dgram that needs IPV6 9154 * processing, don't call ip_wput immediately. We can 9155 * tell this by the ipsec_out_proc_begin. In-progress 9156 * IPSEC_OUT messages have proc_begin set to TRUE, 9157 * and we want to send all IPSEC_IN messages to 9158 * ip_wput() for IPsec processing or finishing. 9159 */ 9160 if (mctltype == IPSEC_IN || 9161 IPVER(ip6h) != IPV6_VERSION || 9162 io->ipsec_out_proc_begin) { 9163 mibptr = &ipst->ips_ip6_mib; 9164 goto notv6; 9165 } 9166 } 9167 } else if (DB_TYPE(mp) != M_DATA) { 9168 ip_wput_nondata(NULL, q, mp, NULL); 9169 return; 9170 } 9171 9172 ip6h = (ip6_t *)mp->b_rptr; 9173 9174 if (IPVER(ip6h) != IPV6_VERSION) { 9175 mibptr = &ipst->ips_ip6_mib; 9176 goto notv6; 9177 } 9178 9179 if (is_system_labeled() && DB_TYPE(mp) == M_DATA && 9180 (connp == NULL || !connp->conn_ulp_labeled)) { 9181 if (connp != NULL) { 9182 ASSERT(CONN_CRED(connp) != NULL); 9183 err = tsol_check_label_v6(BEST_CRED(mp, connp), 9184 &mp, connp->conn_mac_exempt, ipst); 9185 } else if (DB_CRED(mp) != NULL) { 9186 err = tsol_check_label_v6(DB_CRED(mp), 9187 &mp, B_FALSE, ipst); 9188 } 9189 if (mctl_present) 9190 first_mp->b_cont = mp; 9191 else 9192 first_mp = mp; 9193 if (err != 0) { 9194 DTRACE_PROBE3( 9195 tsol_ip_log_drop_checklabel_ip6, char *, 9196 "conn(1), failed to check/update mp(2)", 9197 conn_t, connp, mblk_t, mp); 9198 freemsg(first_mp); 9199 return; 9200 } 9201 ip6h = (ip6_t *)mp->b_rptr; 9202 } 9203 if (q->q_next != NULL) { 9204 /* 9205 * We don't know if this ill will be used for IPv6 9206 * until the ILLF_IPV6 flag is set via SIOCSLIFNAME. 9207 * ipif_set_values() sets the ill_isv6 flag to true if 9208 * ILLF_IPV6 is set. If the ill_isv6 flag isn't true, 9209 * just drop the packet. 9210 */ 9211 if (!ill->ill_isv6) { 9212 ip1dbg(("ip_wput_v6: Received an IPv6 packet before " 9213 "ILLF_IPV6 was set\n")); 9214 freemsg(first_mp); 9215 return; 9216 } 9217 /* For uniformity do a refhold */ 9218 mutex_enter(&ill->ill_lock); 9219 if (!ILL_CAN_LOOKUP(ill)) { 9220 mutex_exit(&ill->ill_lock); 9221 freemsg(first_mp); 9222 return; 9223 } 9224 ill_refhold_locked(ill); 9225 mutex_exit(&ill->ill_lock); 9226 mibptr = ill->ill_ip_mib; 9227 9228 ASSERT(mibptr != NULL); 9229 unspec_src = 0; 9230 BUMP_MIB(mibptr, ipIfStatsHCOutRequests); 9231 do_outrequests = B_FALSE; 9232 zoneid = (zoneid_t)(uintptr_t)arg; 9233 } else { 9234 ASSERT(connp != NULL); 9235 zoneid = connp->conn_zoneid; 9236 9237 /* is queue flow controlled? */ 9238 if ((q->q_first || connp->conn_draining) && 9239 (caller == IP_WPUT)) { 9240 /* 9241 * 1) TCP sends down M_CTL for detached connections. 9242 * 2) AH/ESP sends down M_CTL. 9243 * 9244 * We don't flow control either of the above. Only 9245 * UDP and others are flow controlled for which we 9246 * can't have a M_CTL. 9247 */ 9248 ASSERT(first_mp == mp); 9249 (void) putq(q, mp); 9250 return; 9251 } 9252 mibptr = &ipst->ips_ip6_mib; 9253 unspec_src = connp->conn_unspec_src; 9254 do_outrequests = B_TRUE; 9255 if (mp->b_flag & MSGHASREF) { 9256 mp->b_flag &= ~MSGHASREF; 9257 ASSERT(connp->conn_ulp == IPPROTO_SCTP); 9258 SCTP_EXTRACT_IPINFO(mp, sctp_ire); 9259 need_decref = B_TRUE; 9260 } 9261 9262 /* 9263 * If there is a policy, try to attach an ipsec_out in 9264 * the front. At the end, first_mp either points to a 9265 * M_DATA message or IPSEC_OUT message linked to a 9266 * M_DATA message. We have to do it now as we might 9267 * lose the "conn" if we go through ip_newroute. 9268 */ 9269 if (!mctl_present && 9270 (connp->conn_out_enforce_policy || 9271 connp->conn_latch != NULL)) { 9272 ASSERT(first_mp == mp); 9273 /* XXX Any better way to get the protocol fast ? */ 9274 if (((mp = ipsec_attach_ipsec_out(&mp, connp, NULL, 9275 connp->conn_ulp, ipst->ips_netstack)) == NULL)) { 9276 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 9277 if (need_decref) 9278 CONN_DEC_REF(connp); 9279 return; 9280 } else { 9281 ASSERT(mp->b_datap->db_type == M_CTL); 9282 first_mp = mp; 9283 mp = mp->b_cont; 9284 mctl_present = B_TRUE; 9285 io = (ipsec_out_t *)first_mp->b_rptr; 9286 } 9287 } 9288 } 9289 9290 /* check for alignment and full IPv6 header */ 9291 if (!OK_32PTR((uchar_t *)ip6h) || 9292 (mp->b_wptr - (uchar_t *)ip6h) < IPV6_HDR_LEN) { 9293 ip0dbg(("ip_wput_v6: bad alignment or length\n")); 9294 if (do_outrequests) 9295 BUMP_MIB(mibptr, ipIfStatsHCOutRequests); 9296 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 9297 freemsg(first_mp); 9298 if (ill != NULL) 9299 ill_refrele(ill); 9300 if (need_decref) 9301 CONN_DEC_REF(connp); 9302 return; 9303 } 9304 v6dstp = &ip6h->ip6_dst; 9305 cksum_request = -1; 9306 ip6i = NULL; 9307 9308 /* 9309 * Once neighbor discovery has completed, ndp_process() will provide 9310 * locally generated packets for which processing can be reattempted. 9311 * In these cases, connp is NULL and the original zone is part of a 9312 * prepended ipsec_out_t. 9313 */ 9314 if (io != NULL) { 9315 /* 9316 * When coming from icmp_input_v6, the zoneid might not match 9317 * for the loopback case, because inside icmp_input_v6 the 9318 * queue_t is a conn queue from the sending side. 9319 */ 9320 zoneid = io->ipsec_out_zoneid; 9321 ASSERT(zoneid != ALL_ZONES); 9322 } 9323 9324 if (ip6h->ip6_nxt == IPPROTO_RAW) { 9325 /* 9326 * This is an ip6i_t header followed by an ip6_hdr. 9327 * Check which fields are set. 9328 * 9329 * When the packet comes from a transport we should have 9330 * all needed headers in the first mblk. However, when 9331 * going through ip_newroute*_v6 the ip6i might be in 9332 * a separate mblk when we return here. In that case 9333 * we pullup everything to ensure that extension and transport 9334 * headers "stay" in the first mblk. 9335 */ 9336 ip6i = (ip6i_t *)ip6h; 9337 ip6i_flags = ip6i->ip6i_flags; 9338 9339 ASSERT((mp->b_wptr - (uchar_t *)ip6i) == sizeof (ip6i_t) || 9340 ((mp->b_wptr - (uchar_t *)ip6i) >= 9341 sizeof (ip6i_t) + IPV6_HDR_LEN)); 9342 9343 if ((mp->b_wptr - (uchar_t *)ip6i) == sizeof (ip6i_t)) { 9344 if (!pullupmsg(mp, -1)) { 9345 ip1dbg(("ip_wput_v6: pullupmsg failed\n")); 9346 if (do_outrequests) { 9347 BUMP_MIB(mibptr, 9348 ipIfStatsHCOutRequests); 9349 } 9350 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 9351 freemsg(first_mp); 9352 if (ill != NULL) 9353 ill_refrele(ill); 9354 if (need_decref) 9355 CONN_DEC_REF(connp); 9356 return; 9357 } 9358 ip6h = (ip6_t *)mp->b_rptr; 9359 v6dstp = &ip6h->ip6_dst; 9360 ip6i = (ip6i_t *)ip6h; 9361 } 9362 ip6h = (ip6_t *)&ip6i[1]; 9363 9364 /* 9365 * Advance rptr past the ip6i_t to get ready for 9366 * transmitting the packet. However, if the packet gets 9367 * passed to ip_newroute*_v6 then rptr is moved back so 9368 * that the ip6i_t header can be inspected when the 9369 * packet comes back here after passing through 9370 * ire_add_then_send. 9371 */ 9372 mp->b_rptr = (uchar_t *)ip6h; 9373 9374 if (ip6i->ip6i_flags & IP6I_IFINDEX) { 9375 ASSERT(ip6i->ip6i_ifindex != 0); 9376 if (ill != NULL) 9377 ill_refrele(ill); 9378 ill = ill_lookup_on_ifindex(ip6i->ip6i_ifindex, 1, 9379 NULL, NULL, NULL, NULL, ipst); 9380 if (ill == NULL) { 9381 if (do_outrequests) { 9382 BUMP_MIB(mibptr, 9383 ipIfStatsHCOutRequests); 9384 } 9385 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 9386 ip1dbg(("ip_wput_v6: bad ifindex %d\n", 9387 ip6i->ip6i_ifindex)); 9388 if (need_decref) 9389 CONN_DEC_REF(connp); 9390 freemsg(first_mp); 9391 return; 9392 } 9393 mibptr = ill->ill_ip_mib; 9394 /* 9395 * Preserve the index so that when we return from 9396 * IPSEC processing, we know where to send the packet. 9397 */ 9398 if (mctl_present) { 9399 ASSERT(io != NULL); 9400 io->ipsec_out_ill_index = ip6i->ip6i_ifindex; 9401 } 9402 } 9403 if (ip6i->ip6i_flags & IP6I_VERIFY_SRC) { 9404 cred_t *cr = DB_CREDDEF(mp, GET_QUEUE_CRED(q)); 9405 9406 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)); 9407 if (secpolicy_net_rawaccess(cr) != 0) { 9408 /* 9409 * Use IPCL_ZONEID to honor SO_ALLZONES. 9410 */ 9411 ire = ire_route_lookup_v6(&ip6h->ip6_src, 9412 0, 0, (IRE_LOCAL|IRE_LOOPBACK), NULL, 9413 NULL, connp != NULL ? 9414 IPCL_ZONEID(connp) : zoneid, NULL, 9415 MATCH_IRE_TYPE | MATCH_IRE_ZONEONLY, ipst); 9416 if (ire == NULL) { 9417 if (do_outrequests) 9418 BUMP_MIB(mibptr, 9419 ipIfStatsHCOutRequests); 9420 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 9421 ip1dbg(("ip_wput_v6: bad source " 9422 "addr\n")); 9423 freemsg(first_mp); 9424 if (ill != NULL) 9425 ill_refrele(ill); 9426 if (need_decref) 9427 CONN_DEC_REF(connp); 9428 return; 9429 } 9430 ire_refrele(ire); 9431 } 9432 /* No need to verify again when using ip_newroute */ 9433 ip6i->ip6i_flags &= ~IP6I_VERIFY_SRC; 9434 } 9435 if (!(ip6i->ip6i_flags & IP6I_NEXTHOP)) { 9436 /* 9437 * Make sure they match since ip_newroute*_v6 etc might 9438 * (unknown to them) inspect ip6i_nexthop when 9439 * they think they access ip6_dst. 9440 */ 9441 ip6i->ip6i_nexthop = ip6h->ip6_dst; 9442 } 9443 if (ip6i->ip6i_flags & IP6I_NO_ULP_CKSUM) 9444 cksum_request = 1; 9445 if (ip6i->ip6i_flags & IP6I_RAW_CHECKSUM) 9446 cksum_request = ip6i->ip6i_checksum_off; 9447 if (ip6i->ip6i_flags & IP6I_UNSPEC_SRC) 9448 unspec_src = 1; 9449 9450 if (do_outrequests && ill != NULL) { 9451 BUMP_MIB(mibptr, ipIfStatsHCOutRequests); 9452 do_outrequests = B_FALSE; 9453 } 9454 /* 9455 * Store ip6i_t info that we need after we come back 9456 * from IPSEC processing. 9457 */ 9458 if (mctl_present) { 9459 ASSERT(io != NULL); 9460 io->ipsec_out_unspec_src = unspec_src; 9461 } 9462 } 9463 if (connp != NULL && connp->conn_dontroute) 9464 ip6h->ip6_hops = 1; 9465 9466 if (IN6_IS_ADDR_MULTICAST(v6dstp)) 9467 goto ipv6multicast; 9468 9469 /* 1. If an ip6i_t with IP6I_IFINDEX set then use that ill. */ 9470 if (ip6i != NULL && (ip6i->ip6i_flags & IP6I_IFINDEX)) { 9471 ASSERT(ill != NULL); 9472 goto send_from_ill; 9473 } 9474 9475 /* 9476 * 2. If q is an ill queue and there's a link-local destination 9477 * then use that ill. 9478 */ 9479 if (ill != NULL && IN6_IS_ADDR_LINKLOCAL(v6dstp)) 9480 goto send_from_ill; 9481 9482 /* 3. If IPV6_BOUND_IF has been set use that ill. */ 9483 if (connp != NULL && connp->conn_outgoing_ill != NULL) { 9484 ill_t *conn_outgoing_ill; 9485 9486 conn_outgoing_ill = conn_get_held_ill(connp, 9487 &connp->conn_outgoing_ill, &err); 9488 if (err == ILL_LOOKUP_FAILED) { 9489 if (ill != NULL) 9490 ill_refrele(ill); 9491 if (need_decref) 9492 CONN_DEC_REF(connp); 9493 freemsg(first_mp); 9494 return; 9495 } 9496 if (ill != NULL) 9497 ill_refrele(ill); 9498 ill = conn_outgoing_ill; 9499 mibptr = ill->ill_ip_mib; 9500 goto send_from_ill; 9501 } 9502 9503 /* 9504 * 4. For unicast: Just do an IRE lookup for the best match. 9505 * If we get here for a link-local address it is rather random 9506 * what interface we pick on a multihomed host. 9507 * *If* there is an IRE_CACHE (and the link-local address 9508 * isn't duplicated on multi links) this will find the IRE_CACHE. 9509 * Otherwise it will use one of the matching IRE_INTERFACE routes 9510 * for the link-local prefix. Hence, applications 9511 * *should* be encouraged to specify an outgoing interface when sending 9512 * to a link local address. 9513 */ 9514 if (connp == NULL || (IP_FLOW_CONTROLLED_ULP(connp->conn_ulp) && 9515 !connp->conn_fully_bound)) { 9516 /* 9517 * We cache IRE_CACHEs to avoid lookups. We don't do 9518 * this for the tcp global queue and listen end point 9519 * as it does not really have a real destination to 9520 * talk to. 9521 */ 9522 ire = ire_cache_lookup_v6(v6dstp, zoneid, MBLK_GETLABEL(mp), 9523 ipst); 9524 } else { 9525 /* 9526 * IRE_MARK_CONDEMNED is marked in ire_delete. We don't 9527 * grab a lock here to check for CONDEMNED as it is okay 9528 * to send a packet or two with the IRE_CACHE that is going 9529 * away. 9530 */ 9531 mutex_enter(&connp->conn_lock); 9532 ire = sctp_ire != NULL ? sctp_ire : connp->conn_ire_cache; 9533 if (ire != NULL && 9534 IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, v6dstp) && 9535 !(ire->ire_marks & IRE_MARK_CONDEMNED)) { 9536 9537 IRE_REFHOLD(ire); 9538 mutex_exit(&connp->conn_lock); 9539 9540 } else { 9541 boolean_t cached = B_FALSE; 9542 9543 connp->conn_ire_cache = NULL; 9544 mutex_exit(&connp->conn_lock); 9545 /* Release the old ire */ 9546 if (ire != NULL && sctp_ire == NULL) 9547 IRE_REFRELE_NOTR(ire); 9548 9549 ire = ire_cache_lookup_v6(v6dstp, zoneid, 9550 MBLK_GETLABEL(mp), ipst); 9551 if (ire != NULL) { 9552 IRE_REFHOLD_NOTR(ire); 9553 9554 mutex_enter(&connp->conn_lock); 9555 if (CONN_CACHE_IRE(connp) && 9556 (connp->conn_ire_cache == NULL)) { 9557 rw_enter(&ire->ire_bucket->irb_lock, 9558 RW_READER); 9559 if (!(ire->ire_marks & 9560 IRE_MARK_CONDEMNED)) { 9561 connp->conn_ire_cache = ire; 9562 cached = B_TRUE; 9563 } 9564 rw_exit(&ire->ire_bucket->irb_lock); 9565 } 9566 mutex_exit(&connp->conn_lock); 9567 9568 /* 9569 * We can continue to use the ire but since it 9570 * was not cached, we should drop the extra 9571 * reference. 9572 */ 9573 if (!cached) 9574 IRE_REFRELE_NOTR(ire); 9575 } 9576 } 9577 } 9578 9579 if (ire != NULL) { 9580 if (do_outrequests) { 9581 /* Handle IRE_LOCAL's that might appear here */ 9582 if (ire->ire_type == IRE_CACHE) { 9583 mibptr = ((ill_t *)ire->ire_stq->q_ptr)-> 9584 ill_ip_mib; 9585 } else { 9586 mibptr = ire->ire_ipif->ipif_ill->ill_ip_mib; 9587 } 9588 BUMP_MIB(mibptr, ipIfStatsHCOutRequests); 9589 } 9590 9591 /* 9592 * Check if the ire has the RTF_MULTIRT flag, inherited 9593 * from an IRE_OFFSUBNET ire entry in ip_newroute(). 9594 */ 9595 if (ire->ire_flags & RTF_MULTIRT) { 9596 /* 9597 * Force hop limit of multirouted packets if required. 9598 * The hop limit of such packets is bounded by the 9599 * ip_multirt_ttl ndd variable. 9600 * NDP packets must have a hop limit of 255; don't 9601 * change the hop limit in that case. 9602 */ 9603 if ((ipst->ips_ip_multirt_ttl > 0) && 9604 (ip6h->ip6_hops > ipst->ips_ip_multirt_ttl) && 9605 (ip6h->ip6_hops != IPV6_MAX_HOPS)) { 9606 if (ip_debug > 3) { 9607 ip2dbg(("ip_wput_v6: forcing multirt " 9608 "hop limit to %d (was %d) ", 9609 ipst->ips_ip_multirt_ttl, 9610 ip6h->ip6_hops)); 9611 pr_addr_dbg("v6dst %s\n", AF_INET6, 9612 &ire->ire_addr_v6); 9613 } 9614 ip6h->ip6_hops = ipst->ips_ip_multirt_ttl; 9615 } 9616 9617 /* 9618 * We look at this point if there are pending 9619 * unresolved routes. ire_multirt_need_resolve_v6() 9620 * checks in O(n) that all IRE_OFFSUBNET ire 9621 * entries for the packet's destination and 9622 * flagged RTF_MULTIRT are currently resolved. 9623 * If some remain unresolved, we do a copy 9624 * of the current message. It will be used 9625 * to initiate additional route resolutions. 9626 */ 9627 multirt_need_resolve = 9628 ire_multirt_need_resolve_v6(&ire->ire_addr_v6, 9629 MBLK_GETLABEL(first_mp), ipst); 9630 ip2dbg(("ip_wput_v6: ire %p, " 9631 "multirt_need_resolve %d, first_mp %p\n", 9632 (void *)ire, multirt_need_resolve, 9633 (void *)first_mp)); 9634 if (multirt_need_resolve) { 9635 copy_mp = copymsg(first_mp); 9636 if (copy_mp != NULL) { 9637 MULTIRT_DEBUG_TAG(copy_mp); 9638 } 9639 } 9640 } 9641 ip_wput_ire_v6(q, first_mp, ire, unspec_src, cksum_request, 9642 connp, caller, ip6i_flags, zoneid); 9643 if (need_decref) { 9644 CONN_DEC_REF(connp); 9645 connp = NULL; 9646 } 9647 IRE_REFRELE(ire); 9648 9649 /* 9650 * Try to resolve another multiroute if 9651 * ire_multirt_need_resolve_v6() deemed it necessary. 9652 * copy_mp will be consumed (sent or freed) by 9653 * ip_newroute_v6(). 9654 */ 9655 if (copy_mp != NULL) { 9656 if (mctl_present) { 9657 ip6h = (ip6_t *)copy_mp->b_cont->b_rptr; 9658 } else { 9659 ip6h = (ip6_t *)copy_mp->b_rptr; 9660 } 9661 ip_newroute_v6(q, copy_mp, &ip6h->ip6_dst, 9662 &ip6h->ip6_src, NULL, zoneid, ipst); 9663 } 9664 if (ill != NULL) 9665 ill_refrele(ill); 9666 return; 9667 } 9668 9669 /* 9670 * No full IRE for this destination. Send it to 9671 * ip_newroute_v6 to see if anything else matches. 9672 * Mark this packet as having originated on this 9673 * machine. 9674 * Update rptr if there was an ip6i_t header. 9675 */ 9676 mp->b_prev = NULL; 9677 mp->b_next = NULL; 9678 if (ip6i != NULL) 9679 mp->b_rptr -= sizeof (ip6i_t); 9680 9681 if (unspec_src) { 9682 if (ip6i == NULL) { 9683 /* 9684 * Add ip6i_t header to carry unspec_src 9685 * until the packet comes back in ip_wput_v6. 9686 */ 9687 mp = ip_add_info_v6(mp, NULL, v6dstp); 9688 if (mp == NULL) { 9689 if (do_outrequests) 9690 BUMP_MIB(mibptr, 9691 ipIfStatsHCOutRequests); 9692 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 9693 if (mctl_present) 9694 freeb(first_mp); 9695 if (ill != NULL) 9696 ill_refrele(ill); 9697 if (need_decref) 9698 CONN_DEC_REF(connp); 9699 return; 9700 } 9701 ip6i = (ip6i_t *)mp->b_rptr; 9702 9703 if (mctl_present) { 9704 ASSERT(first_mp != mp); 9705 first_mp->b_cont = mp; 9706 } else { 9707 first_mp = mp; 9708 } 9709 9710 if ((mp->b_wptr - (uchar_t *)ip6i) == 9711 sizeof (ip6i_t)) { 9712 /* 9713 * ndp_resolver called from ip_newroute_v6 9714 * expects pulled up message. 9715 */ 9716 if (!pullupmsg(mp, -1)) { 9717 ip1dbg(("ip_wput_v6: pullupmsg" 9718 " failed\n")); 9719 if (do_outrequests) { 9720 BUMP_MIB(mibptr, 9721 ipIfStatsHCOutRequests); 9722 } 9723 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 9724 freemsg(first_mp); 9725 if (ill != NULL) 9726 ill_refrele(ill); 9727 if (need_decref) 9728 CONN_DEC_REF(connp); 9729 return; 9730 } 9731 ip6i = (ip6i_t *)mp->b_rptr; 9732 } 9733 ip6h = (ip6_t *)&ip6i[1]; 9734 v6dstp = &ip6h->ip6_dst; 9735 } 9736 ip6i->ip6i_flags |= IP6I_UNSPEC_SRC; 9737 if (mctl_present) { 9738 ASSERT(io != NULL); 9739 io->ipsec_out_unspec_src = unspec_src; 9740 } 9741 } 9742 if (do_outrequests) 9743 BUMP_MIB(mibptr, ipIfStatsHCOutRequests); 9744 if (need_decref) 9745 CONN_DEC_REF(connp); 9746 ip_newroute_v6(q, first_mp, v6dstp, &ip6h->ip6_src, NULL, zoneid, ipst); 9747 if (ill != NULL) 9748 ill_refrele(ill); 9749 return; 9750 9751 9752 /* 9753 * Handle multicast packets with or without an conn. 9754 * Assumes that the transports set ip6_hops taking 9755 * IPV6_MULTICAST_HOPS (and the other ways to set the hoplimit) 9756 * into account. 9757 */ 9758 ipv6multicast: 9759 ip2dbg(("ip_wput_v6: multicast\n")); 9760 9761 /* 9762 * Hold the conn_lock till we refhold the ill of interest that is 9763 * pointed to from the conn. Since we cannot do an ill/ipif_refrele 9764 * while holding any locks, postpone the refrele until after the 9765 * conn_lock is dropped. 9766 */ 9767 if (connp != NULL) { 9768 mutex_enter(&connp->conn_lock); 9769 conn_lock_held = B_TRUE; 9770 } else { 9771 conn_lock_held = B_FALSE; 9772 } 9773 if (ip6i != NULL && (ip6i->ip6i_flags & IP6I_IFINDEX)) { 9774 /* 1. If an ip6i_t with IP6I_IFINDEX set then use that ill. */ 9775 ASSERT(ill != NULL); 9776 } else if (ill != NULL) { 9777 /* 9778 * 2. If q is an ill queue and (link local or multicast 9779 * destination) then use that ill. 9780 * We don't need the ipif initialization here. 9781 * This useless assert below is just to prevent lint from 9782 * reporting a null body if statement. 9783 */ 9784 ASSERT(ill != NULL); 9785 } else if (connp != NULL) { 9786 /* 9787 * 3. If IPV6_BOUND_IF has been set use that ill. 9788 * 9789 * 4. For multicast: if IPV6_MULTICAST_IF has been set use it. 9790 * Otherwise look for the best IRE match for the unspecified 9791 * group to determine the ill. 9792 * 9793 * conn_multicast_ill is used for only IPv6 packets. 9794 * conn_multicast_ipif is used for only IPv4 packets. 9795 * Thus a PF_INET6 socket send both IPv4 and IPv6 9796 * multicast packets using different IP*_MULTICAST_IF 9797 * interfaces. 9798 */ 9799 if (connp->conn_outgoing_ill != NULL) { 9800 err = ill_check_and_refhold(connp->conn_outgoing_ill); 9801 if (err == ILL_LOOKUP_FAILED) { 9802 ip1dbg(("ip_output_v6: multicast" 9803 " conn_outgoing_ill no ipif\n")); 9804 multicast_discard: 9805 ASSERT(saved_ill == NULL); 9806 if (conn_lock_held) 9807 mutex_exit(&connp->conn_lock); 9808 if (ill != NULL) 9809 ill_refrele(ill); 9810 freemsg(first_mp); 9811 if (do_outrequests) 9812 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 9813 if (need_decref) 9814 CONN_DEC_REF(connp); 9815 return; 9816 } 9817 ill = connp->conn_outgoing_ill; 9818 } else if (connp->conn_multicast_ill != NULL) { 9819 err = ill_check_and_refhold(connp->conn_multicast_ill); 9820 if (err == ILL_LOOKUP_FAILED) { 9821 ip1dbg(("ip_output_v6: multicast" 9822 " conn_multicast_ill no ipif\n")); 9823 goto multicast_discard; 9824 } 9825 ill = connp->conn_multicast_ill; 9826 } else { 9827 mutex_exit(&connp->conn_lock); 9828 conn_lock_held = B_FALSE; 9829 ipif = ipif_lookup_group_v6(v6dstp, zoneid, ipst); 9830 if (ipif == NULL) { 9831 ip1dbg(("ip_output_v6: multicast no ipif\n")); 9832 goto multicast_discard; 9833 } 9834 /* 9835 * We have a ref to this ipif, so we can safely 9836 * access ipif_ill. 9837 */ 9838 ill = ipif->ipif_ill; 9839 mutex_enter(&ill->ill_lock); 9840 if (!ILL_CAN_LOOKUP(ill)) { 9841 mutex_exit(&ill->ill_lock); 9842 ipif_refrele(ipif); 9843 ill = NULL; 9844 ip1dbg(("ip_output_v6: multicast no ipif\n")); 9845 goto multicast_discard; 9846 } 9847 ill_refhold_locked(ill); 9848 mutex_exit(&ill->ill_lock); 9849 ipif_refrele(ipif); 9850 /* 9851 * Save binding until IPV6_MULTICAST_IF 9852 * changes it 9853 */ 9854 mutex_enter(&connp->conn_lock); 9855 connp->conn_multicast_ill = ill; 9856 mutex_exit(&connp->conn_lock); 9857 } 9858 } 9859 if (conn_lock_held) 9860 mutex_exit(&connp->conn_lock); 9861 9862 if (saved_ill != NULL) 9863 ill_refrele(saved_ill); 9864 9865 ASSERT(ill != NULL); 9866 /* 9867 * For multicast loopback interfaces replace the multicast address 9868 * with a unicast address for the ire lookup. 9869 */ 9870 if (IS_LOOPBACK(ill)) 9871 v6dstp = &ill->ill_ipif->ipif_v6lcl_addr; 9872 9873 mibptr = ill->ill_ip_mib; 9874 if (do_outrequests) { 9875 BUMP_MIB(mibptr, ipIfStatsHCOutRequests); 9876 do_outrequests = B_FALSE; 9877 } 9878 BUMP_MIB(mibptr, ipIfStatsHCOutMcastPkts); 9879 UPDATE_MIB(mibptr, ipIfStatsHCOutMcastOctets, 9880 ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN); 9881 9882 /* 9883 * As we may lose the conn by the time we reach ip_wput_ire_v6 9884 * we copy conn_multicast_loop and conn_dontroute on to an 9885 * ipsec_out. In case if this datagram goes out secure, 9886 * we need the ill_index also. Copy that also into the 9887 * ipsec_out. 9888 */ 9889 if (mctl_present) { 9890 io = (ipsec_out_t *)first_mp->b_rptr; 9891 ASSERT(first_mp->b_datap->db_type == M_CTL); 9892 ASSERT(io->ipsec_out_type == IPSEC_OUT); 9893 } else { 9894 ASSERT(mp == first_mp); 9895 if ((first_mp = ipsec_alloc_ipsec_out(ipst->ips_netstack)) == 9896 NULL) { 9897 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 9898 freemsg(mp); 9899 if (ill != NULL) 9900 ill_refrele(ill); 9901 if (need_decref) 9902 CONN_DEC_REF(connp); 9903 return; 9904 } 9905 io = (ipsec_out_t *)first_mp->b_rptr; 9906 /* This is not a secure packet */ 9907 io->ipsec_out_secure = B_FALSE; 9908 io->ipsec_out_use_global_policy = B_TRUE; 9909 io->ipsec_out_zoneid = 9910 (zoneid != ALL_ZONES ? zoneid : GLOBAL_ZONEID); 9911 first_mp->b_cont = mp; 9912 mctl_present = B_TRUE; 9913 } 9914 io->ipsec_out_ill_index = ill->ill_phyint->phyint_ifindex; 9915 io->ipsec_out_unspec_src = unspec_src; 9916 if (connp != NULL) 9917 io->ipsec_out_dontroute = connp->conn_dontroute; 9918 9919 send_from_ill: 9920 ASSERT(ill != NULL); 9921 ASSERT(mibptr == ill->ill_ip_mib); 9922 9923 if (do_outrequests) { 9924 BUMP_MIB(mibptr, ipIfStatsHCOutRequests); 9925 do_outrequests = B_FALSE; 9926 } 9927 9928 /* 9929 * Because nce_xmit() calls ip_output_v6() and NCEs are always tied to 9930 * an underlying interface, IS_UNDER_IPMP() may be true even when 9931 * building IREs that will be used for data traffic. As such, use the 9932 * packet's source address to determine whether the traffic is test 9933 * traffic, and set MATCH_IRE_MARK_TESTHIDDEN if so. 9934 * 9935 * Separately, we also need to mark probe packets so that ND can 9936 * process them specially; see the comments in nce_queue_mp_common(). 9937 */ 9938 if (IS_UNDER_IPMP(ill) && !IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src) && 9939 ipif_lookup_testaddr_v6(ill, &ip6h->ip6_src, NULL)) { 9940 if (ip6i == NULL) { 9941 if ((mp = ip_add_info_v6(mp, NULL, v6dstp)) == NULL) { 9942 if (mctl_present) 9943 freeb(first_mp); 9944 goto discard; 9945 } 9946 9947 if (mctl_present) 9948 first_mp->b_cont = mp; 9949 else 9950 first_mp = mp; 9951 9952 /* ndp_resolver() expects a pulled-up message */ 9953 if (MBLKL(mp) == sizeof (ip6i_t) && 9954 pullupmsg(mp, -1) == 0) { 9955 ip1dbg(("ip_output_v6: pullupmsg failed\n")); 9956 discard: BUMP_MIB(mibptr, ipIfStatsOutDiscards); 9957 ill_refrele(ill); 9958 if (need_decref) 9959 CONN_DEC_REF(connp); 9960 return; 9961 } 9962 ip6i = (ip6i_t *)mp->b_rptr; 9963 ip6h = (ip6_t *)&ip6i[1]; 9964 v6dstp = &ip6h->ip6_dst; 9965 mp->b_rptr = (uchar_t *)ip6h; /* rewound below */ 9966 } 9967 ip6i->ip6i_flags |= IP6I_IPMP_PROBE; 9968 match_flags |= MATCH_IRE_MARK_TESTHIDDEN; 9969 } 9970 9971 if (io != NULL) 9972 io->ipsec_out_ill_index = ill->ill_phyint->phyint_ifindex; 9973 9974 /* 9975 * When a specific ill is specified (using IPV6_PKTINFO, 9976 * IPV6_MULTICAST_IF, or IPV6_BOUND_IF) we will only match 9977 * on routing entries (ftable and ctable) that have a matching 9978 * ire->ire_ipif->ipif_ill. Thus this can only be used 9979 * for destinations that are on-link for the specific ill 9980 * and that can appear on multiple links. Thus it is useful 9981 * for multicast destinations, link-local destinations, and 9982 * at some point perhaps for site-local destinations (if the 9983 * node sits at a site boundary). 9984 * We create the cache entries in the regular ctable since 9985 * it can not "confuse" things for other destinations. 9986 * table. 9987 * 9988 * NOTE : conn_ire_cache is not used for caching ire_ctable_lookups. 9989 * It is used only when ire_cache_lookup is used above. 9990 */ 9991 ire = ire_ctable_lookup_v6(v6dstp, 0, 0, ill->ill_ipif, 9992 zoneid, MBLK_GETLABEL(mp), match_flags, ipst); 9993 if (ire != NULL) { 9994 /* 9995 * Check if the ire has the RTF_MULTIRT flag, inherited 9996 * from an IRE_OFFSUBNET ire entry in ip_newroute(). 9997 */ 9998 if (ire->ire_flags & RTF_MULTIRT) { 9999 /* 10000 * Force hop limit of multirouted packets if required. 10001 * The hop limit of such packets is bounded by the 10002 * ip_multirt_ttl ndd variable. 10003 * NDP packets must have a hop limit of 255; don't 10004 * change the hop limit in that case. 10005 */ 10006 if ((ipst->ips_ip_multirt_ttl > 0) && 10007 (ip6h->ip6_hops > ipst->ips_ip_multirt_ttl) && 10008 (ip6h->ip6_hops != IPV6_MAX_HOPS)) { 10009 if (ip_debug > 3) { 10010 ip2dbg(("ip_wput_v6: forcing multirt " 10011 "hop limit to %d (was %d) ", 10012 ipst->ips_ip_multirt_ttl, 10013 ip6h->ip6_hops)); 10014 pr_addr_dbg("v6dst %s\n", AF_INET6, 10015 &ire->ire_addr_v6); 10016 } 10017 ip6h->ip6_hops = ipst->ips_ip_multirt_ttl; 10018 } 10019 10020 /* 10021 * We look at this point if there are pending 10022 * unresolved routes. ire_multirt_need_resolve_v6() 10023 * checks in O(n) that all IRE_OFFSUBNET ire 10024 * entries for the packet's destination and 10025 * flagged RTF_MULTIRT are currently resolved. 10026 * If some remain unresolved, we make a copy 10027 * of the current message. It will be used 10028 * to initiate additional route resolutions. 10029 */ 10030 multirt_need_resolve = 10031 ire_multirt_need_resolve_v6(&ire->ire_addr_v6, 10032 MBLK_GETLABEL(first_mp), ipst); 10033 ip2dbg(("ip_wput_v6[send_from_ill]: ire %p, " 10034 "multirt_need_resolve %d, first_mp %p\n", 10035 (void *)ire, multirt_need_resolve, 10036 (void *)first_mp)); 10037 if (multirt_need_resolve) { 10038 copy_mp = copymsg(first_mp); 10039 if (copy_mp != NULL) { 10040 MULTIRT_DEBUG_TAG(copy_mp); 10041 } 10042 } 10043 } 10044 10045 ip1dbg(("ip_wput_v6: send on %s, ire = %p, ill index = %d\n", 10046 ill->ill_name, (void *)ire, 10047 ill->ill_phyint->phyint_ifindex)); 10048 ip_wput_ire_v6(q, first_mp, ire, unspec_src, cksum_request, 10049 connp, caller, ip6i_flags, zoneid); 10050 ire_refrele(ire); 10051 if (need_decref) { 10052 CONN_DEC_REF(connp); 10053 connp = NULL; 10054 } 10055 10056 /* 10057 * Try to resolve another multiroute if 10058 * ire_multirt_need_resolve_v6() deemed it necessary. 10059 * copy_mp will be consumed (sent or freed) by 10060 * ip_newroute_[ipif_]v6(). 10061 */ 10062 if (copy_mp != NULL) { 10063 if (mctl_present) { 10064 ip6h = (ip6_t *)copy_mp->b_cont->b_rptr; 10065 } else { 10066 ip6h = (ip6_t *)copy_mp->b_rptr; 10067 } 10068 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 10069 ipif = ipif_lookup_group_v6(&ip6h->ip6_dst, 10070 zoneid, ipst); 10071 if (ipif == NULL) { 10072 ip1dbg(("ip_wput_v6: No ipif for " 10073 "multicast\n")); 10074 MULTIRT_DEBUG_UNTAG(copy_mp); 10075 freemsg(copy_mp); 10076 return; 10077 } 10078 ip_newroute_ipif_v6(q, copy_mp, ipif, 10079 &ip6h->ip6_dst, &ip6h->ip6_src, unspec_src, 10080 zoneid); 10081 ipif_refrele(ipif); 10082 } else { 10083 ip_newroute_v6(q, copy_mp, &ip6h->ip6_dst, 10084 &ip6h->ip6_src, ill, zoneid, ipst); 10085 } 10086 } 10087 ill_refrele(ill); 10088 return; 10089 } 10090 if (need_decref) { 10091 CONN_DEC_REF(connp); 10092 connp = NULL; 10093 } 10094 10095 /* Update rptr if there was an ip6i_t header. */ 10096 if (ip6i != NULL) 10097 mp->b_rptr -= sizeof (ip6i_t); 10098 if (unspec_src) { 10099 if (ip6i == NULL) { 10100 /* 10101 * Add ip6i_t header to carry unspec_src 10102 * until the packet comes back in ip_wput_v6. 10103 */ 10104 if (mctl_present) { 10105 first_mp->b_cont = 10106 ip_add_info_v6(mp, NULL, v6dstp); 10107 mp = first_mp->b_cont; 10108 if (mp == NULL) 10109 freeb(first_mp); 10110 } else { 10111 first_mp = mp = ip_add_info_v6(mp, NULL, 10112 v6dstp); 10113 } 10114 if (mp == NULL) { 10115 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10116 ill_refrele(ill); 10117 return; 10118 } 10119 ip6i = (ip6i_t *)mp->b_rptr; 10120 if ((mp->b_wptr - (uchar_t *)ip6i) == 10121 sizeof (ip6i_t)) { 10122 /* 10123 * ndp_resolver called from ip_newroute_v6 10124 * expects a pulled up message. 10125 */ 10126 if (!pullupmsg(mp, -1)) { 10127 ip1dbg(("ip_wput_v6: pullupmsg" 10128 " failed\n")); 10129 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10130 freemsg(first_mp); 10131 return; 10132 } 10133 ip6i = (ip6i_t *)mp->b_rptr; 10134 } 10135 ip6h = (ip6_t *)&ip6i[1]; 10136 v6dstp = &ip6h->ip6_dst; 10137 } 10138 ip6i->ip6i_flags |= IP6I_UNSPEC_SRC; 10139 if (mctl_present) { 10140 ASSERT(io != NULL); 10141 io->ipsec_out_unspec_src = unspec_src; 10142 } 10143 } 10144 if (IN6_IS_ADDR_MULTICAST(v6dstp)) { 10145 ip_newroute_ipif_v6(q, first_mp, ill->ill_ipif, v6dstp, 10146 &ip6h->ip6_src, unspec_src, zoneid); 10147 } else { 10148 ip_newroute_v6(q, first_mp, v6dstp, &ip6h->ip6_src, ill, 10149 zoneid, ipst); 10150 } 10151 ill_refrele(ill); 10152 return; 10153 10154 notv6: 10155 /* FIXME?: assume the caller calls the right version of ip_output? */ 10156 if (q->q_next == NULL) { 10157 connp = Q_TO_CONN(q); 10158 10159 /* 10160 * We can change conn_send for all types of conn, even 10161 * though only TCP uses it right now. 10162 * FIXME: sctp could use conn_send but doesn't currently. 10163 */ 10164 ip_setpktversion(connp, B_FALSE, B_TRUE, ipst); 10165 } 10166 BUMP_MIB(mibptr, ipIfStatsOutWrongIPVersion); 10167 (void) ip_output(arg, first_mp, arg2, caller); 10168 if (ill != NULL) 10169 ill_refrele(ill); 10170 } 10171 10172 /* 10173 * If this is a conn_t queue, then we pass in the conn. This includes the 10174 * zoneid. 10175 * Otherwise, this is a message for an ill_t queue, 10176 * in which case we use the global zoneid since those are all part of 10177 * the global zone. 10178 */ 10179 void 10180 ip_wput_v6(queue_t *q, mblk_t *mp) 10181 { 10182 if (CONN_Q(q)) 10183 ip_output_v6(Q_TO_CONN(q), mp, q, IP_WPUT); 10184 else 10185 ip_output_v6(GLOBAL_ZONEID, mp, q, IP_WPUT); 10186 } 10187 10188 /* 10189 * NULL send-to queue - packet is to be delivered locally. 10190 */ 10191 void 10192 ip_wput_local_v6(queue_t *q, ill_t *ill, ip6_t *ip6h, mblk_t *first_mp, 10193 ire_t *ire, int fanout_flags, zoneid_t zoneid) 10194 { 10195 uint32_t ports; 10196 mblk_t *mp = first_mp, *first_mp1; 10197 boolean_t mctl_present; 10198 uint8_t nexthdr; 10199 uint16_t hdr_length; 10200 ipsec_out_t *io; 10201 mib2_ipIfStatsEntry_t *mibptr; 10202 ilm_t *ilm; 10203 uint_t nexthdr_offset; 10204 ip_stack_t *ipst = ill->ill_ipst; 10205 10206 if (DB_TYPE(mp) == M_CTL) { 10207 io = (ipsec_out_t *)mp->b_rptr; 10208 if (!io->ipsec_out_secure) { 10209 mp = mp->b_cont; 10210 freeb(first_mp); 10211 first_mp = mp; 10212 mctl_present = B_FALSE; 10213 } else { 10214 mctl_present = B_TRUE; 10215 mp = first_mp->b_cont; 10216 ipsec_out_to_in(first_mp); 10217 } 10218 } else { 10219 mctl_present = B_FALSE; 10220 } 10221 10222 /* 10223 * Remove reachability confirmation bit from version field 10224 * before passing the packet on to any firewall hooks or 10225 * looping back the packet. 10226 */ 10227 if (ip6h->ip6_vcf & IP_FORWARD_PROG) 10228 ip6h->ip6_vcf &= ~IP_FORWARD_PROG; 10229 10230 DTRACE_PROBE4(ip6__loopback__in__start, 10231 ill_t *, ill, ill_t *, NULL, 10232 ip6_t *, ip6h, mblk_t *, first_mp); 10233 10234 FW_HOOKS6(ipst->ips_ip6_loopback_in_event, 10235 ipst->ips_ipv6firewall_loopback_in, 10236 ill, NULL, ip6h, first_mp, mp, 0, ipst); 10237 10238 DTRACE_PROBE1(ip6__loopback__in__end, mblk_t *, first_mp); 10239 10240 if (first_mp == NULL) 10241 return; 10242 10243 if (ipst->ips_ipobs_enabled) { 10244 zoneid_t szone, dzone, lookup_zoneid = ALL_ZONES; 10245 zoneid_t stackzoneid = netstackid_to_zoneid( 10246 ipst->ips_netstack->netstack_stackid); 10247 10248 szone = (stackzoneid == GLOBAL_ZONEID) ? zoneid : stackzoneid; 10249 /* 10250 * ::1 is special, as we cannot lookup its zoneid by 10251 * address. For this case, restrict the lookup to the 10252 * source zone. 10253 */ 10254 if (IN6_IS_ADDR_LOOPBACK(&ip6h->ip6_dst)) 10255 lookup_zoneid = zoneid; 10256 dzone = ip_get_zoneid_v6(&ip6h->ip6_dst, mp, ill, ipst, 10257 lookup_zoneid); 10258 ipobs_hook(mp, IPOBS_HOOK_LOCAL, szone, dzone, ill, 10259 IPV6_VERSION, 0, ipst); 10260 } 10261 10262 DTRACE_IP7(receive, mblk_t *, first_mp, conn_t *, NULL, void_ip_t *, 10263 ip6h, __dtrace_ipsr_ill_t *, ill, ipha_t *, NULL, ip6_t *, ip6h, 10264 int, 1); 10265 10266 nexthdr = ip6h->ip6_nxt; 10267 mibptr = ill->ill_ip_mib; 10268 10269 /* Fastpath */ 10270 switch (nexthdr) { 10271 case IPPROTO_TCP: 10272 case IPPROTO_UDP: 10273 case IPPROTO_ICMPV6: 10274 case IPPROTO_SCTP: 10275 hdr_length = IPV6_HDR_LEN; 10276 nexthdr_offset = (uint_t)((uchar_t *)&ip6h->ip6_nxt - 10277 (uchar_t *)ip6h); 10278 break; 10279 default: { 10280 uint8_t *nexthdrp; 10281 10282 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, 10283 &hdr_length, &nexthdrp)) { 10284 /* Malformed packet */ 10285 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10286 freemsg(first_mp); 10287 return; 10288 } 10289 nexthdr = *nexthdrp; 10290 nexthdr_offset = nexthdrp - (uint8_t *)ip6h; 10291 break; 10292 } 10293 } 10294 10295 UPDATE_OB_PKT_COUNT(ire); 10296 ire->ire_last_used_time = lbolt; 10297 10298 switch (nexthdr) { 10299 case IPPROTO_TCP: 10300 if (DB_TYPE(mp) == M_DATA) { 10301 /* 10302 * M_DATA mblk, so init mblk (chain) for 10303 * no struio(). 10304 */ 10305 mblk_t *mp1 = mp; 10306 10307 do { 10308 mp1->b_datap->db_struioflag = 0; 10309 } while ((mp1 = mp1->b_cont) != NULL); 10310 } 10311 ports = *(uint32_t *)(mp->b_rptr + hdr_length + 10312 TCP_PORTS_OFFSET); 10313 ip_fanout_tcp_v6(q, first_mp, ip6h, ill, ill, 10314 fanout_flags|IP_FF_SEND_ICMP|IP_FF_SYN_ADDIRE| 10315 IP_FF_IPINFO|IP6_NO_IPPOLICY|IP_FF_LOOPBACK, 10316 hdr_length, mctl_present, ire->ire_zoneid); 10317 return; 10318 10319 case IPPROTO_UDP: 10320 ports = *(uint32_t *)(mp->b_rptr + hdr_length + 10321 UDP_PORTS_OFFSET); 10322 ip_fanout_udp_v6(q, first_mp, ip6h, ports, ill, ill, 10323 fanout_flags|IP_FF_SEND_ICMP|IP_FF_IPINFO| 10324 IP6_NO_IPPOLICY, mctl_present, ire->ire_zoneid); 10325 return; 10326 10327 case IPPROTO_SCTP: 10328 { 10329 ports = *(uint32_t *)(mp->b_rptr + hdr_length); 10330 ip_fanout_sctp(first_mp, ill, (ipha_t *)ip6h, ports, 10331 fanout_flags|IP_FF_SEND_ICMP|IP_FF_IPINFO, 10332 mctl_present, IP6_NO_IPPOLICY, ire->ire_zoneid); 10333 return; 10334 } 10335 case IPPROTO_ICMPV6: { 10336 icmp6_t *icmp6; 10337 10338 /* check for full IPv6+ICMPv6 header */ 10339 if ((mp->b_wptr - mp->b_rptr) < 10340 (hdr_length + ICMP6_MINLEN)) { 10341 if (!pullupmsg(mp, hdr_length + ICMP6_MINLEN)) { 10342 ip1dbg(("ip_wput_v6: ICMP hdr pullupmsg" 10343 " failed\n")); 10344 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10345 freemsg(first_mp); 10346 return; 10347 } 10348 ip6h = (ip6_t *)mp->b_rptr; 10349 } 10350 icmp6 = (icmp6_t *)((uchar_t *)ip6h + hdr_length); 10351 10352 /* Update output mib stats */ 10353 icmp_update_out_mib_v6(ill, icmp6); 10354 10355 /* Check variable for testing applications */ 10356 if (ipst->ips_ipv6_drop_inbound_icmpv6) { 10357 freemsg(first_mp); 10358 return; 10359 } 10360 /* 10361 * Assume that there is always at least one conn for 10362 * ICMPv6 (in.ndpd) i.e. don't optimize the case 10363 * where there is no conn. 10364 */ 10365 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst) && 10366 !IS_LOOPBACK(ill)) { 10367 ilm_walker_t ilw; 10368 10369 /* 10370 * In the multicast case, applications may have 10371 * joined the group from different zones, so we 10372 * need to deliver the packet to each of them. 10373 * Loop through the multicast memberships 10374 * structures (ilm) on the receive ill and send 10375 * a copy of the packet up each matching one. 10376 * However, we don't do this for multicasts sent 10377 * on the loopback interface (PHYI_LOOPBACK flag 10378 * set) as they must stay in the sender's zone. 10379 */ 10380 ilm = ilm_walker_start(&ilw, ill); 10381 for (; ilm != NULL; 10382 ilm = ilm_walker_step(&ilw, ilm)) { 10383 if (!IN6_ARE_ADDR_EQUAL( 10384 &ilm->ilm_v6addr, &ip6h->ip6_dst)) 10385 continue; 10386 if ((fanout_flags & 10387 IP_FF_NO_MCAST_LOOP) && 10388 ilm->ilm_zoneid == ire->ire_zoneid) 10389 continue; 10390 if (!ipif_lookup_zoneid( 10391 ilw.ilw_walk_ill, ilm->ilm_zoneid, 10392 IPIF_UP, NULL)) 10393 continue; 10394 10395 first_mp1 = ip_copymsg(first_mp); 10396 if (first_mp1 == NULL) 10397 continue; 10398 icmp_inbound_v6(q, first_mp1, 10399 ilw.ilw_walk_ill, ill, hdr_length, 10400 mctl_present, IP6_NO_IPPOLICY, 10401 ilm->ilm_zoneid, NULL); 10402 } 10403 ilm_walker_finish(&ilw); 10404 } else { 10405 first_mp1 = ip_copymsg(first_mp); 10406 if (first_mp1 != NULL) 10407 icmp_inbound_v6(q, first_mp1, ill, ill, 10408 hdr_length, mctl_present, 10409 IP6_NO_IPPOLICY, ire->ire_zoneid, 10410 NULL); 10411 } 10412 } 10413 /* FALLTHRU */ 10414 default: { 10415 /* 10416 * Handle protocols with which IPv6 is less intimate. 10417 */ 10418 fanout_flags |= IP_FF_RAWIP|IP_FF_IPINFO; 10419 10420 /* 10421 * Enable sending ICMP for "Unknown" nexthdr 10422 * case. i.e. where we did not FALLTHRU from 10423 * IPPROTO_ICMPV6 processing case above. 10424 */ 10425 if (nexthdr != IPPROTO_ICMPV6) 10426 fanout_flags |= IP_FF_SEND_ICMP; 10427 /* 10428 * Note: There can be more than one stream bound 10429 * to a particular protocol. When this is the case, 10430 * each one gets a copy of any incoming packets. 10431 */ 10432 ip_fanout_proto_v6(q, first_mp, ip6h, ill, ill, nexthdr, 10433 nexthdr_offset, fanout_flags|IP6_NO_IPPOLICY, 10434 mctl_present, ire->ire_zoneid); 10435 return; 10436 } 10437 } 10438 } 10439 10440 /* 10441 * Send packet using IRE. 10442 * Checksumming is controlled by cksum_request: 10443 * -1 => normal i.e. TCP/UDP/SCTP/ICMPv6 are checksummed and nothing else. 10444 * 1 => Skip TCP/UDP/SCTP checksum 10445 * Otherwise => checksum_request contains insert offset for checksum 10446 * 10447 * Assumes that the following set of headers appear in the first 10448 * mblk: 10449 * ip6_t 10450 * Any extension headers 10451 * TCP/UDP/SCTP header (if present) 10452 * The routine can handle an ICMPv6 header that is not in the first mblk. 10453 * 10454 * NOTE : This function does not ire_refrele the ire passed in as the 10455 * argument unlike ip_wput_ire where the REFRELE is done. 10456 * Refer to ip_wput_ire for more on this. 10457 */ 10458 static void 10459 ip_wput_ire_v6(queue_t *q, mblk_t *mp, ire_t *ire, int unspec_src, 10460 int cksum_request, conn_t *connp, int caller, int flags, zoneid_t zoneid) 10461 { 10462 ip6_t *ip6h; 10463 uint8_t nexthdr; 10464 uint16_t hdr_length; 10465 uint_t reachable = 0x0; 10466 ill_t *ill; 10467 mib2_ipIfStatsEntry_t *mibptr; 10468 mblk_t *first_mp; 10469 boolean_t mctl_present; 10470 ipsec_out_t *io; 10471 boolean_t conn_dontroute; /* conn value for multicast */ 10472 boolean_t conn_multicast_loop; /* conn value for multicast */ 10473 boolean_t multicast_forward; /* Should we forward ? */ 10474 int max_frag; 10475 ip_stack_t *ipst = ire->ire_ipst; 10476 ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec; 10477 10478 ill = ire_to_ill(ire); 10479 first_mp = mp; 10480 multicast_forward = B_FALSE; 10481 10482 if (mp->b_datap->db_type != M_CTL) { 10483 ip6h = (ip6_t *)first_mp->b_rptr; 10484 } else { 10485 io = (ipsec_out_t *)first_mp->b_rptr; 10486 ASSERT(io->ipsec_out_type == IPSEC_OUT); 10487 /* 10488 * Grab the zone id now because the M_CTL can be discarded by 10489 * ip_wput_ire_parse_ipsec_out() below. 10490 */ 10491 ASSERT(zoneid == io->ipsec_out_zoneid); 10492 ASSERT(zoneid != ALL_ZONES); 10493 ip6h = (ip6_t *)first_mp->b_cont->b_rptr; 10494 /* 10495 * For the multicast case, ipsec_out carries conn_dontroute and 10496 * conn_multicast_loop as conn may not be available here. We 10497 * need this for multicast loopback and forwarding which is done 10498 * later in the code. 10499 */ 10500 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 10501 conn_dontroute = io->ipsec_out_dontroute; 10502 conn_multicast_loop = io->ipsec_out_multicast_loop; 10503 /* 10504 * If conn_dontroute is not set or conn_multicast_loop 10505 * is set, we need to do forwarding/loopback. For 10506 * datagrams from ip_wput_multicast, conn_dontroute is 10507 * set to B_TRUE and conn_multicast_loop is set to 10508 * B_FALSE so that we neither do forwarding nor 10509 * loopback. 10510 */ 10511 if (!conn_dontroute || conn_multicast_loop) 10512 multicast_forward = B_TRUE; 10513 } 10514 } 10515 10516 /* 10517 * If the sender didn't supply the hop limit and there is a default 10518 * unicast hop limit associated with the output interface, we use 10519 * that if the packet is unicast. Interface specific unicast hop 10520 * limits as set via the SIOCSLIFLNKINFO ioctl. 10521 */ 10522 if (ill->ill_max_hops != 0 && !(flags & IP6I_HOPLIMIT) && 10523 !(IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst))) { 10524 ip6h->ip6_hops = ill->ill_max_hops; 10525 } 10526 10527 if (ire->ire_type == IRE_LOCAL && ire->ire_zoneid != zoneid && 10528 ire->ire_zoneid != ALL_ZONES) { 10529 /* 10530 * When a zone sends a packet to another zone, we try to deliver 10531 * the packet under the same conditions as if the destination 10532 * was a real node on the network. To do so, we look for a 10533 * matching route in the forwarding table. 10534 * RTF_REJECT and RTF_BLACKHOLE are handled just like 10535 * ip_newroute_v6() does. 10536 * Note that IRE_LOCAL are special, since they are used 10537 * when the zoneid doesn't match in some cases. This means that 10538 * we need to handle ipha_src differently since ire_src_addr 10539 * belongs to the receiving zone instead of the sending zone. 10540 * When ip_restrict_interzone_loopback is set, then 10541 * ire_cache_lookup_v6() ensures that IRE_LOCAL are only used 10542 * for loopback between zones when the logical "Ethernet" would 10543 * have looped them back. 10544 */ 10545 ire_t *src_ire; 10546 10547 src_ire = ire_ftable_lookup_v6(&ip6h->ip6_dst, 0, 0, 0, 10548 NULL, NULL, zoneid, 0, NULL, (MATCH_IRE_RECURSIVE | 10549 MATCH_IRE_DEFAULT | MATCH_IRE_RJ_BHOLE), ipst); 10550 if (src_ire != NULL && 10551 !(src_ire->ire_flags & (RTF_REJECT | RTF_BLACKHOLE)) && 10552 (!ipst->ips_ip_restrict_interzone_loopback || 10553 ire_local_same_lan(ire, src_ire))) { 10554 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src) && 10555 !unspec_src) { 10556 ip6h->ip6_src = src_ire->ire_src_addr_v6; 10557 } 10558 ire_refrele(src_ire); 10559 } else { 10560 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutNoRoutes); 10561 if (src_ire != NULL) { 10562 if (src_ire->ire_flags & RTF_BLACKHOLE) { 10563 ire_refrele(src_ire); 10564 freemsg(first_mp); 10565 return; 10566 } 10567 ire_refrele(src_ire); 10568 } 10569 if (ip_hdr_complete_v6(ip6h, zoneid, ipst)) { 10570 /* Failed */ 10571 freemsg(first_mp); 10572 return; 10573 } 10574 icmp_unreachable_v6(q, first_mp, 10575 ICMP6_DST_UNREACH_NOROUTE, B_FALSE, B_FALSE, 10576 zoneid, ipst); 10577 return; 10578 } 10579 } 10580 10581 if (mp->b_datap->db_type == M_CTL || 10582 ipss->ipsec_outbound_v6_policy_present) { 10583 mp = ip_wput_ire_parse_ipsec_out(first_mp, NULL, ip6h, ire, 10584 connp, unspec_src, zoneid); 10585 if (mp == NULL) { 10586 return; 10587 } 10588 } 10589 10590 first_mp = mp; 10591 if (mp->b_datap->db_type == M_CTL) { 10592 io = (ipsec_out_t *)mp->b_rptr; 10593 ASSERT(io->ipsec_out_type == IPSEC_OUT); 10594 mp = mp->b_cont; 10595 mctl_present = B_TRUE; 10596 } else { 10597 mctl_present = B_FALSE; 10598 } 10599 10600 ip6h = (ip6_t *)mp->b_rptr; 10601 nexthdr = ip6h->ip6_nxt; 10602 mibptr = ill->ill_ip_mib; 10603 10604 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src) && !unspec_src) { 10605 ipif_t *ipif; 10606 10607 /* 10608 * Select the source address using ipif_select_source_v6. 10609 */ 10610 ipif = ipif_select_source_v6(ill, &ip6h->ip6_dst, B_FALSE, 10611 IPV6_PREFER_SRC_DEFAULT, zoneid); 10612 if (ipif == NULL) { 10613 if (ip_debug > 2) { 10614 /* ip1dbg */ 10615 pr_addr_dbg("ip_wput_ire_v6: no src for " 10616 "dst %s\n", AF_INET6, &ip6h->ip6_dst); 10617 printf("through interface %s\n", ill->ill_name); 10618 } 10619 freemsg(first_mp); 10620 return; 10621 } 10622 ip6h->ip6_src = ipif->ipif_v6src_addr; 10623 ipif_refrele(ipif); 10624 } 10625 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 10626 if ((connp != NULL && connp->conn_multicast_loop) || 10627 !IS_LOOPBACK(ill)) { 10628 if (ilm_lookup_ill_v6(ill, &ip6h->ip6_dst, B_FALSE, 10629 ALL_ZONES) != NULL) { 10630 mblk_t *nmp; 10631 int fanout_flags = 0; 10632 10633 if (connp != NULL && 10634 !connp->conn_multicast_loop) { 10635 fanout_flags |= IP_FF_NO_MCAST_LOOP; 10636 } 10637 ip1dbg(("ip_wput_ire_v6: " 10638 "Loopback multicast\n")); 10639 nmp = ip_copymsg(first_mp); 10640 if (nmp != NULL) { 10641 ip6_t *nip6h; 10642 mblk_t *mp_ip6h; 10643 10644 if (mctl_present) { 10645 nip6h = (ip6_t *) 10646 nmp->b_cont->b_rptr; 10647 mp_ip6h = nmp->b_cont; 10648 } else { 10649 nip6h = (ip6_t *)nmp->b_rptr; 10650 mp_ip6h = nmp; 10651 } 10652 10653 DTRACE_PROBE4( 10654 ip6__loopback__out__start, 10655 ill_t *, NULL, 10656 ill_t *, ill, 10657 ip6_t *, nip6h, 10658 mblk_t *, nmp); 10659 10660 FW_HOOKS6( 10661 ipst->ips_ip6_loopback_out_event, 10662 ipst->ips_ipv6firewall_loopback_out, 10663 NULL, ill, nip6h, nmp, mp_ip6h, 10664 0, ipst); 10665 10666 DTRACE_PROBE1( 10667 ip6__loopback__out__end, 10668 mblk_t *, nmp); 10669 10670 /* 10671 * DTrace this as ip:::send. A blocked 10672 * packet will fire the send probe, but 10673 * not the receive probe. 10674 */ 10675 DTRACE_IP7(send, mblk_t *, nmp, 10676 conn_t *, NULL, void_ip_t *, nip6h, 10677 __dtrace_ipsr_ill_t *, ill, 10678 ipha_t *, NULL, ip6_t *, nip6h, 10679 int, 1); 10680 10681 if (nmp != NULL) { 10682 /* 10683 * Deliver locally and to 10684 * every local zone, except 10685 * the sending zone when 10686 * IPV6_MULTICAST_LOOP is 10687 * disabled. 10688 */ 10689 ip_wput_local_v6(RD(q), ill, 10690 nip6h, nmp, ire, 10691 fanout_flags, zoneid); 10692 } 10693 } else { 10694 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10695 ip1dbg(("ip_wput_ire_v6: " 10696 "copymsg failed\n")); 10697 } 10698 } 10699 } 10700 if (ip6h->ip6_hops == 0 || 10701 IN6_IS_ADDR_MC_NODELOCAL(&ip6h->ip6_dst) || 10702 IS_LOOPBACK(ill)) { 10703 /* 10704 * Local multicast or just loopback on loopback 10705 * interface. 10706 */ 10707 BUMP_MIB(mibptr, ipIfStatsHCOutMcastPkts); 10708 UPDATE_MIB(mibptr, ipIfStatsHCOutMcastOctets, 10709 ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN); 10710 ip1dbg(("ip_wput_ire_v6: local multicast only\n")); 10711 freemsg(first_mp); 10712 return; 10713 } 10714 } 10715 10716 if (ire->ire_stq != NULL) { 10717 uint32_t sum; 10718 uint_t ill_index = ((ill_t *)ire->ire_stq->q_ptr)-> 10719 ill_phyint->phyint_ifindex; 10720 queue_t *dev_q = ire->ire_stq->q_next; 10721 10722 /* 10723 * non-NULL send-to queue - packet is to be sent 10724 * out an interface. 10725 */ 10726 10727 /* Driver is flow-controlling? */ 10728 if (!IP_FLOW_CONTROLLED_ULP(nexthdr) && 10729 DEV_Q_FLOW_BLOCKED(dev_q)) { 10730 /* 10731 * Queue packet if we have an conn to give back 10732 * pressure. We can't queue packets intended for 10733 * hardware acceleration since we've tossed that 10734 * state already. If the packet is being fed back 10735 * from ire_send_v6, we don't know the position in 10736 * the queue to enqueue the packet and we discard 10737 * the packet. 10738 */ 10739 if (ipst->ips_ip_output_queue && connp != NULL && 10740 !mctl_present && caller != IRE_SEND) { 10741 if (caller == IP_WSRV) { 10742 connp->conn_did_putbq = 1; 10743 (void) putbq(connp->conn_wq, mp); 10744 conn_drain_insert(connp); 10745 /* 10746 * caller == IP_WSRV implies we are 10747 * the service thread, and the 10748 * queue is already noenabled. 10749 * The check for canput and 10750 * the putbq is not atomic. 10751 * So we need to check again. 10752 */ 10753 if (canput(dev_q)) 10754 connp->conn_did_putbq = 0; 10755 } else { 10756 (void) putq(connp->conn_wq, mp); 10757 } 10758 return; 10759 } 10760 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10761 freemsg(first_mp); 10762 return; 10763 } 10764 10765 /* 10766 * Look for reachability confirmations from the transport. 10767 */ 10768 if (ip6h->ip6_vcf & IP_FORWARD_PROG) { 10769 reachable |= IPV6_REACHABILITY_CONFIRMATION; 10770 ip6h->ip6_vcf &= ~IP_FORWARD_PROG; 10771 if (mctl_present) 10772 io->ipsec_out_reachable = B_TRUE; 10773 } 10774 /* Fastpath */ 10775 switch (nexthdr) { 10776 case IPPROTO_TCP: 10777 case IPPROTO_UDP: 10778 case IPPROTO_ICMPV6: 10779 case IPPROTO_SCTP: 10780 hdr_length = IPV6_HDR_LEN; 10781 break; 10782 default: { 10783 uint8_t *nexthdrp; 10784 10785 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, 10786 &hdr_length, &nexthdrp)) { 10787 /* Malformed packet */ 10788 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10789 freemsg(first_mp); 10790 return; 10791 } 10792 nexthdr = *nexthdrp; 10793 break; 10794 } 10795 } 10796 10797 if (cksum_request != -1 && nexthdr != IPPROTO_ICMPV6) { 10798 uint16_t *up; 10799 uint16_t *insp; 10800 10801 /* 10802 * The packet header is processed once for all, even 10803 * in the multirouting case. We disable hardware 10804 * checksum if the packet is multirouted, as it will be 10805 * replicated via several interfaces, and not all of 10806 * them may have this capability. 10807 */ 10808 if (cksum_request == 1 && 10809 !(ire->ire_flags & RTF_MULTIRT)) { 10810 /* Skip the transport checksum */ 10811 goto cksum_done; 10812 } 10813 /* 10814 * Do user-configured raw checksum. 10815 * Compute checksum and insert at offset "cksum_request" 10816 */ 10817 10818 /* check for enough headers for checksum */ 10819 cksum_request += hdr_length; /* offset from rptr */ 10820 if ((mp->b_wptr - mp->b_rptr) < 10821 (cksum_request + sizeof (int16_t))) { 10822 if (!pullupmsg(mp, 10823 cksum_request + sizeof (int16_t))) { 10824 ip1dbg(("ip_wput_v6: ICMP hdr pullupmsg" 10825 " failed\n")); 10826 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10827 freemsg(first_mp); 10828 return; 10829 } 10830 ip6h = (ip6_t *)mp->b_rptr; 10831 } 10832 insp = (uint16_t *)((uchar_t *)ip6h + cksum_request); 10833 ASSERT(((uintptr_t)insp & 0x1) == 0); 10834 up = (uint16_t *)&ip6h->ip6_src; 10835 /* 10836 * icmp has placed length and routing 10837 * header adjustment in *insp. 10838 */ 10839 sum = htons(nexthdr) + 10840 up[0] + up[1] + up[2] + up[3] + 10841 up[4] + up[5] + up[6] + up[7] + 10842 up[8] + up[9] + up[10] + up[11] + 10843 up[12] + up[13] + up[14] + up[15]; 10844 sum = (sum & 0xffff) + (sum >> 16); 10845 *insp = IP_CSUM(mp, hdr_length, sum); 10846 } else if (nexthdr == IPPROTO_TCP) { 10847 uint16_t *up; 10848 10849 /* 10850 * Check for full IPv6 header + enough TCP header 10851 * to get at the checksum field. 10852 */ 10853 if ((mp->b_wptr - mp->b_rptr) < 10854 (hdr_length + TCP_CHECKSUM_OFFSET + 10855 TCP_CHECKSUM_SIZE)) { 10856 if (!pullupmsg(mp, hdr_length + 10857 TCP_CHECKSUM_OFFSET + TCP_CHECKSUM_SIZE)) { 10858 ip1dbg(("ip_wput_v6: TCP hdr pullupmsg" 10859 " failed\n")); 10860 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10861 freemsg(first_mp); 10862 return; 10863 } 10864 ip6h = (ip6_t *)mp->b_rptr; 10865 } 10866 10867 up = (uint16_t *)&ip6h->ip6_src; 10868 /* 10869 * Note: The TCP module has stored the length value 10870 * into the tcp checksum field, so we don't 10871 * need to explicitly sum it in here. 10872 */ 10873 sum = up[0] + up[1] + up[2] + up[3] + 10874 up[4] + up[5] + up[6] + up[7] + 10875 up[8] + up[9] + up[10] + up[11] + 10876 up[12] + up[13] + up[14] + up[15]; 10877 10878 /* Fold the initial sum */ 10879 sum = (sum & 0xffff) + (sum >> 16); 10880 10881 up = (uint16_t *)(((uchar_t *)ip6h) + 10882 hdr_length + TCP_CHECKSUM_OFFSET); 10883 10884 IP_CKSUM_XMIT(ill, ire, mp, ip6h, up, IPPROTO_TCP, 10885 hdr_length, ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN, 10886 ire->ire_max_frag, mctl_present, sum); 10887 10888 /* Software checksum? */ 10889 if (DB_CKSUMFLAGS(mp) == 0) { 10890 IP6_STAT(ipst, ip6_out_sw_cksum); 10891 IP6_STAT_UPDATE(ipst, 10892 ip6_tcp_out_sw_cksum_bytes, 10893 (ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN) - 10894 hdr_length); 10895 } 10896 } else if (nexthdr == IPPROTO_UDP) { 10897 uint16_t *up; 10898 10899 /* 10900 * check for full IPv6 header + enough UDP header 10901 * to get at the UDP checksum field 10902 */ 10903 if ((mp->b_wptr - mp->b_rptr) < (hdr_length + 10904 UDP_CHECKSUM_OFFSET + UDP_CHECKSUM_SIZE)) { 10905 if (!pullupmsg(mp, hdr_length + 10906 UDP_CHECKSUM_OFFSET + UDP_CHECKSUM_SIZE)) { 10907 ip1dbg(("ip_wput_v6: UDP hdr pullupmsg" 10908 " failed\n")); 10909 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10910 freemsg(first_mp); 10911 return; 10912 } 10913 ip6h = (ip6_t *)mp->b_rptr; 10914 } 10915 up = (uint16_t *)&ip6h->ip6_src; 10916 /* 10917 * Note: The UDP module has stored the length value 10918 * into the udp checksum field, so we don't 10919 * need to explicitly sum it in here. 10920 */ 10921 sum = up[0] + up[1] + up[2] + up[3] + 10922 up[4] + up[5] + up[6] + up[7] + 10923 up[8] + up[9] + up[10] + up[11] + 10924 up[12] + up[13] + up[14] + up[15]; 10925 10926 /* Fold the initial sum */ 10927 sum = (sum & 0xffff) + (sum >> 16); 10928 10929 up = (uint16_t *)(((uchar_t *)ip6h) + 10930 hdr_length + UDP_CHECKSUM_OFFSET); 10931 10932 IP_CKSUM_XMIT(ill, ire, mp, ip6h, up, IPPROTO_UDP, 10933 hdr_length, ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN, 10934 ire->ire_max_frag, mctl_present, sum); 10935 10936 /* Software checksum? */ 10937 if (DB_CKSUMFLAGS(mp) == 0) { 10938 IP6_STAT(ipst, ip6_out_sw_cksum); 10939 IP6_STAT_UPDATE(ipst, 10940 ip6_udp_out_sw_cksum_bytes, 10941 (ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN) - 10942 hdr_length); 10943 } 10944 } else if (nexthdr == IPPROTO_ICMPV6) { 10945 uint16_t *up; 10946 icmp6_t *icmp6; 10947 10948 /* check for full IPv6+ICMPv6 header */ 10949 if ((mp->b_wptr - mp->b_rptr) < 10950 (hdr_length + ICMP6_MINLEN)) { 10951 if (!pullupmsg(mp, hdr_length + ICMP6_MINLEN)) { 10952 ip1dbg(("ip_wput_v6: ICMP hdr pullupmsg" 10953 " failed\n")); 10954 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10955 freemsg(first_mp); 10956 return; 10957 } 10958 ip6h = (ip6_t *)mp->b_rptr; 10959 } 10960 icmp6 = (icmp6_t *)((uchar_t *)ip6h + hdr_length); 10961 up = (uint16_t *)&ip6h->ip6_src; 10962 /* 10963 * icmp has placed length and routing 10964 * header adjustment in icmp6_cksum. 10965 */ 10966 sum = htons(IPPROTO_ICMPV6) + 10967 up[0] + up[1] + up[2] + up[3] + 10968 up[4] + up[5] + up[6] + up[7] + 10969 up[8] + up[9] + up[10] + up[11] + 10970 up[12] + up[13] + up[14] + up[15]; 10971 sum = (sum & 0xffff) + (sum >> 16); 10972 icmp6->icmp6_cksum = IP_CSUM(mp, hdr_length, sum); 10973 10974 /* Update output mib stats */ 10975 icmp_update_out_mib_v6(ill, icmp6); 10976 } else if (nexthdr == IPPROTO_SCTP) { 10977 sctp_hdr_t *sctph; 10978 10979 if (MBLKL(mp) < (hdr_length + sizeof (*sctph))) { 10980 if (!pullupmsg(mp, hdr_length + 10981 sizeof (*sctph))) { 10982 ip1dbg(("ip_wput_v6: SCTP hdr pullupmsg" 10983 " failed\n")); 10984 BUMP_MIB(ill->ill_ip_mib, 10985 ipIfStatsOutDiscards); 10986 freemsg(mp); 10987 return; 10988 } 10989 ip6h = (ip6_t *)mp->b_rptr; 10990 } 10991 sctph = (sctp_hdr_t *)(mp->b_rptr + hdr_length); 10992 sctph->sh_chksum = 0; 10993 sctph->sh_chksum = sctp_cksum(mp, hdr_length); 10994 } 10995 10996 cksum_done: 10997 /* 10998 * We force the insertion of a fragment header using the 10999 * IPH_FRAG_HDR flag in two cases: 11000 * - after reception of an ICMPv6 "packet too big" message 11001 * with a MTU < 1280 (cf. RFC 2460 section 5) 11002 * - for multirouted IPv6 packets, so that the receiver can 11003 * discard duplicates according to their fragment identifier 11004 * 11005 * Two flags modifed from the API can modify this behavior. 11006 * The first is IPV6_USE_MIN_MTU. With this API the user 11007 * can specify how to manage PMTUD for unicast and multicast. 11008 * 11009 * IPV6_DONTFRAG disallows fragmentation. 11010 */ 11011 max_frag = ire->ire_max_frag; 11012 switch (IP6I_USE_MIN_MTU_API(flags)) { 11013 case IPV6_USE_MIN_MTU_DEFAULT: 11014 case IPV6_USE_MIN_MTU_UNICAST: 11015 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 11016 max_frag = IPV6_MIN_MTU; 11017 } 11018 break; 11019 11020 case IPV6_USE_MIN_MTU_NEVER: 11021 max_frag = IPV6_MIN_MTU; 11022 break; 11023 } 11024 if (ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN > max_frag || 11025 (ire->ire_frag_flag & IPH_FRAG_HDR)) { 11026 if (connp != NULL && (flags & IP6I_DONTFRAG)) { 11027 icmp_pkt2big_v6(ire->ire_stq, first_mp, 11028 max_frag, B_FALSE, B_TRUE, zoneid, ipst); 11029 return; 11030 } 11031 11032 if (ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN != 11033 (mp->b_cont ? msgdsize(mp) : 11034 mp->b_wptr - (uchar_t *)ip6h)) { 11035 ip0dbg(("Packet length mismatch: %d, %ld\n", 11036 ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN, 11037 msgdsize(mp))); 11038 freemsg(first_mp); 11039 return; 11040 } 11041 /* Do IPSEC processing first */ 11042 if (mctl_present) { 11043 ipsec_out_process(q, first_mp, ire, ill_index); 11044 return; 11045 } 11046 ASSERT(mp->b_prev == NULL); 11047 ip2dbg(("Fragmenting Size = %d, mtu = %d\n", 11048 ntohs(ip6h->ip6_plen) + 11049 IPV6_HDR_LEN, max_frag)); 11050 ASSERT(mp == first_mp); 11051 /* Initiate IPPF processing */ 11052 if (IPP_ENABLED(IPP_LOCAL_OUT, ipst)) { 11053 ip_process(IPP_LOCAL_OUT, &mp, ill_index); 11054 if (mp == NULL) { 11055 return; 11056 } 11057 } 11058 ip_wput_frag_v6(mp, ire, reachable, connp, 11059 caller, max_frag); 11060 return; 11061 } 11062 /* Do IPSEC processing first */ 11063 if (mctl_present) { 11064 int extra_len = ipsec_out_extra_length(first_mp); 11065 11066 if (ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN + extra_len > 11067 max_frag) { 11068 /* 11069 * IPsec headers will push the packet over the 11070 * MTU limit. Issue an ICMPv6 Packet Too Big 11071 * message for this packet if the upper-layer 11072 * that issued this packet will be able to 11073 * react to the icmp_pkt2big_v6() that we'll 11074 * generate. 11075 */ 11076 icmp_pkt2big_v6(ire->ire_stq, first_mp, 11077 max_frag, B_FALSE, B_TRUE, zoneid, ipst); 11078 return; 11079 } 11080 ipsec_out_process(q, first_mp, ire, ill_index); 11081 return; 11082 } 11083 /* 11084 * XXX multicast: add ip_mforward_v6() here. 11085 * Check conn_dontroute 11086 */ 11087 #ifdef lint 11088 /* 11089 * XXX The only purpose of this statement is to avoid lint 11090 * errors. See the above "XXX multicast". When that gets 11091 * fixed, remove this whole #ifdef lint section. 11092 */ 11093 ip3dbg(("multicast forward is %s.\n", 11094 (multicast_forward ? "TRUE" : "FALSE"))); 11095 #endif 11096 11097 UPDATE_OB_PKT_COUNT(ire); 11098 ire->ire_last_used_time = lbolt; 11099 ASSERT(mp == first_mp); 11100 ip_xmit_v6(mp, ire, reachable, connp, caller, NULL); 11101 } else { 11102 /* 11103 * DTrace this as ip:::send. A blocked packet will fire the 11104 * send probe, but not the receive probe. 11105 */ 11106 DTRACE_IP7(send, mblk_t *, first_mp, conn_t *, NULL, 11107 void_ip_t *, ip6h, __dtrace_ipsr_ill_t *, ill, ipha_t *, 11108 NULL, ip6_t *, ip6h, int, 1); 11109 DTRACE_PROBE4(ip6__loopback__out__start, 11110 ill_t *, NULL, ill_t *, ill, 11111 ip6_t *, ip6h, mblk_t *, first_mp); 11112 FW_HOOKS6(ipst->ips_ip6_loopback_out_event, 11113 ipst->ips_ipv6firewall_loopback_out, 11114 NULL, ill, ip6h, first_mp, mp, 0, ipst); 11115 DTRACE_PROBE1(ip6__loopback__out__end, mblk_t *, first_mp); 11116 if (first_mp != NULL) { 11117 ip_wput_local_v6(RD(q), ill, ip6h, first_mp, ire, 0, 11118 zoneid); 11119 } 11120 } 11121 } 11122 11123 /* 11124 * Outbound IPv6 fragmentation routine using MDT. 11125 */ 11126 static void 11127 ip_wput_frag_mdt_v6(mblk_t *mp, ire_t *ire, size_t max_chunk, 11128 size_t unfragmentable_len, uint8_t nexthdr, uint_t prev_nexthdr_offset) 11129 { 11130 ip6_t *ip6h = (ip6_t *)mp->b_rptr; 11131 uint_t pkts, wroff, hdr_chunk_len, pbuf_idx; 11132 mblk_t *hdr_mp, *md_mp = NULL; 11133 int i1; 11134 multidata_t *mmd; 11135 unsigned char *hdr_ptr, *pld_ptr; 11136 ip_pdescinfo_t pdi; 11137 uint32_t ident; 11138 size_t len; 11139 uint16_t offset; 11140 queue_t *stq = ire->ire_stq; 11141 ill_t *ill = (ill_t *)stq->q_ptr; 11142 ip_stack_t *ipst = ill->ill_ipst; 11143 11144 ASSERT(DB_TYPE(mp) == M_DATA); 11145 ASSERT(MBLKL(mp) > unfragmentable_len); 11146 11147 /* 11148 * Move read ptr past unfragmentable portion, we don't want this part 11149 * of the data in our fragments. 11150 */ 11151 mp->b_rptr += unfragmentable_len; 11152 11153 /* Calculate how many packets we will send out */ 11154 i1 = (mp->b_cont == NULL) ? MBLKL(mp) : msgsize(mp); 11155 pkts = (i1 + max_chunk - 1) / max_chunk; 11156 ASSERT(pkts > 1); 11157 11158 /* Allocate a message block which will hold all the IP Headers. */ 11159 wroff = ipst->ips_ip_wroff_extra; 11160 hdr_chunk_len = wroff + unfragmentable_len + sizeof (ip6_frag_t); 11161 11162 i1 = pkts * hdr_chunk_len; 11163 /* 11164 * Create the header buffer, Multidata and destination address 11165 * and SAP attribute that should be associated with it. 11166 */ 11167 if ((hdr_mp = allocb(i1, BPRI_HI)) == NULL || 11168 ((hdr_mp->b_wptr += i1), 11169 (mmd = mmd_alloc(hdr_mp, &md_mp, KM_NOSLEEP)) == NULL) || 11170 !ip_md_addr_attr(mmd, NULL, ire->ire_nce->nce_res_mp)) { 11171 freemsg(mp); 11172 if (md_mp == NULL) { 11173 freemsg(hdr_mp); 11174 } else { 11175 free_mmd: IP6_STAT(ipst, ip6_frag_mdt_discarded); 11176 freemsg(md_mp); 11177 } 11178 IP6_STAT(ipst, ip6_frag_mdt_allocfail); 11179 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails); 11180 return; 11181 } 11182 IP6_STAT(ipst, ip6_frag_mdt_allocd); 11183 11184 /* 11185 * Add a payload buffer to the Multidata; this operation must not 11186 * fail, or otherwise our logic in this routine is broken. There 11187 * is no memory allocation done by the routine, so any returned 11188 * failure simply tells us that we've done something wrong. 11189 * 11190 * A failure tells us that either we're adding the same payload 11191 * buffer more than once, or we're trying to add more buffers than 11192 * allowed. None of the above cases should happen, and we panic 11193 * because either there's horrible heap corruption, and/or 11194 * programming mistake. 11195 */ 11196 if ((pbuf_idx = mmd_addpldbuf(mmd, mp)) < 0) { 11197 goto pbuf_panic; 11198 } 11199 11200 hdr_ptr = hdr_mp->b_rptr; 11201 pld_ptr = mp->b_rptr; 11202 11203 pdi.flags = PDESC_HBUF_REF | PDESC_PBUF_REF; 11204 11205 ident = htonl(atomic_add_32_nv(&ire->ire_ident, 1)); 11206 11207 /* 11208 * len is the total length of the fragmentable data in this 11209 * datagram. For each fragment sent, we will decrement len 11210 * by the amount of fragmentable data sent in that fragment 11211 * until len reaches zero. 11212 */ 11213 len = ntohs(ip6h->ip6_plen) - (unfragmentable_len - IPV6_HDR_LEN); 11214 11215 offset = 0; 11216 prev_nexthdr_offset += wroff; 11217 11218 while (len != 0) { 11219 size_t mlen; 11220 ip6_t *fip6h; 11221 ip6_frag_t *fraghdr; 11222 int error; 11223 11224 ASSERT((hdr_ptr + hdr_chunk_len) <= hdr_mp->b_wptr); 11225 mlen = MIN(len, max_chunk); 11226 len -= mlen; 11227 11228 fip6h = (ip6_t *)(hdr_ptr + wroff); 11229 ASSERT(OK_32PTR(fip6h)); 11230 bcopy(ip6h, fip6h, unfragmentable_len); 11231 hdr_ptr[prev_nexthdr_offset] = IPPROTO_FRAGMENT; 11232 11233 fip6h->ip6_plen = htons((uint16_t)(mlen + 11234 unfragmentable_len - IPV6_HDR_LEN + sizeof (ip6_frag_t))); 11235 11236 fraghdr = (ip6_frag_t *)((unsigned char *)fip6h + 11237 unfragmentable_len); 11238 fraghdr->ip6f_nxt = nexthdr; 11239 fraghdr->ip6f_reserved = 0; 11240 fraghdr->ip6f_offlg = htons(offset) | 11241 ((len != 0) ? IP6F_MORE_FRAG : 0); 11242 fraghdr->ip6f_ident = ident; 11243 11244 /* 11245 * Record offset and size of header and data of the next packet 11246 * in the multidata message. 11247 */ 11248 PDESC_HDR_ADD(&pdi, hdr_ptr, wroff, 11249 unfragmentable_len + sizeof (ip6_frag_t), 0); 11250 PDESC_PLD_INIT(&pdi); 11251 i1 = MIN(mp->b_wptr - pld_ptr, mlen); 11252 ASSERT(i1 > 0); 11253 PDESC_PLD_SPAN_ADD(&pdi, pbuf_idx, pld_ptr, i1); 11254 if (i1 == mlen) { 11255 pld_ptr += mlen; 11256 } else { 11257 i1 = mlen - i1; 11258 mp = mp->b_cont; 11259 ASSERT(mp != NULL); 11260 ASSERT(MBLKL(mp) >= i1); 11261 /* 11262 * Attach the next payload message block to the 11263 * multidata message. 11264 */ 11265 if ((pbuf_idx = mmd_addpldbuf(mmd, mp)) < 0) 11266 goto pbuf_panic; 11267 PDESC_PLD_SPAN_ADD(&pdi, pbuf_idx, mp->b_rptr, i1); 11268 pld_ptr = mp->b_rptr + i1; 11269 } 11270 11271 if ((mmd_addpdesc(mmd, (pdescinfo_t *)&pdi, &error, 11272 KM_NOSLEEP)) == NULL) { 11273 /* 11274 * Any failure other than ENOMEM indicates that we 11275 * have passed in invalid pdesc info or parameters 11276 * to mmd_addpdesc, which must not happen. 11277 * 11278 * EINVAL is a result of failure on boundary checks 11279 * against the pdesc info contents. It should not 11280 * happen, and we panic because either there's 11281 * horrible heap corruption, and/or programming 11282 * mistake. 11283 */ 11284 if (error != ENOMEM) { 11285 cmn_err(CE_PANIC, "ip_wput_frag_mdt_v6: " 11286 "pdesc logic error detected for " 11287 "mmd %p pinfo %p (%d)\n", 11288 (void *)mmd, (void *)&pdi, error); 11289 /* NOTREACHED */ 11290 } 11291 IP6_STAT(ipst, ip6_frag_mdt_addpdescfail); 11292 /* Free unattached payload message blocks as well */ 11293 md_mp->b_cont = mp->b_cont; 11294 goto free_mmd; 11295 } 11296 11297 /* Advance fragment offset. */ 11298 offset += mlen; 11299 11300 /* Advance to location for next header in the buffer. */ 11301 hdr_ptr += hdr_chunk_len; 11302 11303 /* Did we reach the next payload message block? */ 11304 if (pld_ptr == mp->b_wptr && mp->b_cont != NULL) { 11305 mp = mp->b_cont; 11306 /* 11307 * Attach the next message block with payload 11308 * data to the multidata message. 11309 */ 11310 if ((pbuf_idx = mmd_addpldbuf(mmd, mp)) < 0) 11311 goto pbuf_panic; 11312 pld_ptr = mp->b_rptr; 11313 } 11314 } 11315 11316 ASSERT(hdr_mp->b_wptr == hdr_ptr); 11317 ASSERT(mp->b_wptr == pld_ptr); 11318 11319 /* Update IP statistics */ 11320 UPDATE_MIB(ill->ill_ip_mib, ipIfStatsOutFragCreates, pkts); 11321 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragOKs); 11322 UPDATE_MIB(ill->ill_ip_mib, ipIfStatsHCOutTransmits, pkts); 11323 /* 11324 * The ipv6 header len is accounted for in unfragmentable_len so 11325 * when calculating the fragmentation overhead just add the frag 11326 * header len. 11327 */ 11328 UPDATE_MIB(ill->ill_ip_mib, ipIfStatsHCOutOctets, 11329 (ntohs(ip6h->ip6_plen) - (unfragmentable_len - IPV6_HDR_LEN)) + 11330 pkts * (unfragmentable_len + sizeof (ip6_frag_t))); 11331 IP6_STAT_UPDATE(ipst, ip6_frag_mdt_pkt_out, pkts); 11332 11333 ire->ire_ob_pkt_count += pkts; 11334 if (ire->ire_ipif != NULL) 11335 atomic_add_32(&ire->ire_ipif->ipif_ob_pkt_count, pkts); 11336 11337 ire->ire_last_used_time = lbolt; 11338 /* Send it down */ 11339 putnext(stq, md_mp); 11340 return; 11341 11342 pbuf_panic: 11343 cmn_err(CE_PANIC, "ip_wput_frag_mdt_v6: payload buffer logic " 11344 "error for mmd %p pbuf %p (%d)", (void *)mmd, (void *)mp, 11345 pbuf_idx); 11346 /* NOTREACHED */ 11347 } 11348 11349 /* 11350 * IPv6 fragmentation. Essentially the same as IPv4 fragmentation. 11351 * We have not optimized this in terms of number of mblks 11352 * allocated. For instance, for each fragment sent we always allocate a 11353 * mblk to hold the IPv6 header and fragment header. 11354 * 11355 * Assumes that all the extension headers are contained in the first mblk. 11356 * 11357 * The fragment header is inserted after an hop-by-hop options header 11358 * and after [an optional destinations header followed by] a routing header. 11359 * 11360 * NOTE : This function does not ire_refrele the ire passed in as 11361 * the argument. 11362 */ 11363 void 11364 ip_wput_frag_v6(mblk_t *mp, ire_t *ire, uint_t reachable, conn_t *connp, 11365 int caller, int max_frag) 11366 { 11367 ip6_t *ip6h = (ip6_t *)mp->b_rptr; 11368 ip6_t *fip6h; 11369 mblk_t *hmp; 11370 mblk_t *hmp0; 11371 mblk_t *dmp; 11372 ip6_frag_t *fraghdr; 11373 size_t unfragmentable_len; 11374 size_t len; 11375 size_t mlen; 11376 size_t max_chunk; 11377 uint32_t ident; 11378 uint16_t off_flags; 11379 uint16_t offset = 0; 11380 ill_t *ill; 11381 uint8_t nexthdr; 11382 uint_t prev_nexthdr_offset; 11383 uint8_t *ptr; 11384 ip_stack_t *ipst = ire->ire_ipst; 11385 11386 ASSERT(ire->ire_type == IRE_CACHE); 11387 ill = (ill_t *)ire->ire_stq->q_ptr; 11388 11389 if (max_frag <= 0) { 11390 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails); 11391 freemsg(mp); 11392 return; 11393 } 11394 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragReqds); 11395 11396 /* 11397 * Determine the length of the unfragmentable portion of this 11398 * datagram. This consists of the IPv6 header, a potential 11399 * hop-by-hop options header, a potential pre-routing-header 11400 * destination options header, and a potential routing header. 11401 */ 11402 nexthdr = ip6h->ip6_nxt; 11403 prev_nexthdr_offset = (uint8_t *)&ip6h->ip6_nxt - (uint8_t *)ip6h; 11404 ptr = (uint8_t *)&ip6h[1]; 11405 11406 if (nexthdr == IPPROTO_HOPOPTS) { 11407 ip6_hbh_t *hbh_hdr; 11408 uint_t hdr_len; 11409 11410 hbh_hdr = (ip6_hbh_t *)ptr; 11411 hdr_len = 8 * (hbh_hdr->ip6h_len + 1); 11412 nexthdr = hbh_hdr->ip6h_nxt; 11413 prev_nexthdr_offset = (uint8_t *)&hbh_hdr->ip6h_nxt 11414 - (uint8_t *)ip6h; 11415 ptr += hdr_len; 11416 } 11417 if (nexthdr == IPPROTO_DSTOPTS) { 11418 ip6_dest_t *dest_hdr; 11419 uint_t hdr_len; 11420 11421 dest_hdr = (ip6_dest_t *)ptr; 11422 if (dest_hdr->ip6d_nxt == IPPROTO_ROUTING) { 11423 hdr_len = 8 * (dest_hdr->ip6d_len + 1); 11424 nexthdr = dest_hdr->ip6d_nxt; 11425 prev_nexthdr_offset = (uint8_t *)&dest_hdr->ip6d_nxt 11426 - (uint8_t *)ip6h; 11427 ptr += hdr_len; 11428 } 11429 } 11430 if (nexthdr == IPPROTO_ROUTING) { 11431 ip6_rthdr_t *rthdr; 11432 uint_t hdr_len; 11433 11434 rthdr = (ip6_rthdr_t *)ptr; 11435 nexthdr = rthdr->ip6r_nxt; 11436 prev_nexthdr_offset = (uint8_t *)&rthdr->ip6r_nxt 11437 - (uint8_t *)ip6h; 11438 hdr_len = 8 * (rthdr->ip6r_len + 1); 11439 ptr += hdr_len; 11440 } 11441 unfragmentable_len = (uint_t)(ptr - (uint8_t *)ip6h); 11442 11443 max_chunk = (min(max_frag, ire->ire_max_frag) - unfragmentable_len - 11444 sizeof (ip6_frag_t)) & ~7; 11445 11446 /* Check if we can use MDT to send out the frags. */ 11447 ASSERT(!IRE_IS_LOCAL(ire)); 11448 if (ipst->ips_ip_multidata_outbound && reachable == 0 && 11449 !(ire->ire_flags & RTF_MULTIRT) && ILL_MDT_CAPABLE(ill) && 11450 IP_CAN_FRAG_MDT(mp, unfragmentable_len, max_chunk)) { 11451 ip_wput_frag_mdt_v6(mp, ire, max_chunk, unfragmentable_len, 11452 nexthdr, prev_nexthdr_offset); 11453 return; 11454 } 11455 11456 /* 11457 * Allocate an mblk with enough room for the link-layer 11458 * header, the unfragmentable part of the datagram, and the 11459 * fragment header. This (or a copy) will be used as the 11460 * first mblk for each fragment we send. 11461 */ 11462 hmp = allocb(unfragmentable_len + sizeof (ip6_frag_t) + 11463 ipst->ips_ip_wroff_extra, BPRI_HI); 11464 if (hmp == NULL) { 11465 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails); 11466 freemsg(mp); 11467 return; 11468 } 11469 hmp->b_rptr += ipst->ips_ip_wroff_extra; 11470 hmp->b_wptr = hmp->b_rptr + unfragmentable_len + sizeof (ip6_frag_t); 11471 11472 fip6h = (ip6_t *)hmp->b_rptr; 11473 fraghdr = (ip6_frag_t *)(hmp->b_rptr + unfragmentable_len); 11474 11475 bcopy(ip6h, fip6h, unfragmentable_len); 11476 hmp->b_rptr[prev_nexthdr_offset] = IPPROTO_FRAGMENT; 11477 11478 ident = atomic_add_32_nv(&ire->ire_ident, 1); 11479 11480 fraghdr->ip6f_nxt = nexthdr; 11481 fraghdr->ip6f_reserved = 0; 11482 fraghdr->ip6f_offlg = 0; 11483 fraghdr->ip6f_ident = htonl(ident); 11484 11485 /* 11486 * len is the total length of the fragmentable data in this 11487 * datagram. For each fragment sent, we will decrement len 11488 * by the amount of fragmentable data sent in that fragment 11489 * until len reaches zero. 11490 */ 11491 len = ntohs(ip6h->ip6_plen) - (unfragmentable_len - IPV6_HDR_LEN); 11492 11493 /* 11494 * Move read ptr past unfragmentable portion, we don't want this part 11495 * of the data in our fragments. 11496 */ 11497 mp->b_rptr += unfragmentable_len; 11498 11499 while (len != 0) { 11500 mlen = MIN(len, max_chunk); 11501 len -= mlen; 11502 if (len != 0) { 11503 /* Not last */ 11504 hmp0 = copyb(hmp); 11505 if (hmp0 == NULL) { 11506 freeb(hmp); 11507 freemsg(mp); 11508 BUMP_MIB(ill->ill_ip_mib, 11509 ipIfStatsOutFragFails); 11510 ip1dbg(("ip_wput_frag_v6: copyb failed\n")); 11511 return; 11512 } 11513 off_flags = IP6F_MORE_FRAG; 11514 } else { 11515 /* Last fragment */ 11516 hmp0 = hmp; 11517 hmp = NULL; 11518 off_flags = 0; 11519 } 11520 fip6h = (ip6_t *)(hmp0->b_rptr); 11521 fraghdr = (ip6_frag_t *)(hmp0->b_rptr + unfragmentable_len); 11522 11523 fip6h->ip6_plen = htons((uint16_t)(mlen + 11524 unfragmentable_len - IPV6_HDR_LEN + sizeof (ip6_frag_t))); 11525 /* 11526 * Note: Optimization alert. 11527 * In IPv6 (and IPv4) protocol header, Fragment Offset 11528 * ("offset") is 13 bits wide and in 8-octet units. 11529 * In IPv6 protocol header (unlike IPv4) in a 16 bit field, 11530 * it occupies the most significant 13 bits. 11531 * (least significant 13 bits in IPv4). 11532 * We do not do any shifts here. Not shifting is same effect 11533 * as taking offset value in octet units, dividing by 8 and 11534 * then shifting 3 bits left to line it up in place in proper 11535 * place protocol header. 11536 */ 11537 fraghdr->ip6f_offlg = htons(offset) | off_flags; 11538 11539 if (!(dmp = ip_carve_mp(&mp, mlen))) { 11540 /* mp has already been freed by ip_carve_mp() */ 11541 if (hmp != NULL) 11542 freeb(hmp); 11543 freeb(hmp0); 11544 ip1dbg(("ip_carve_mp: failed\n")); 11545 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails); 11546 return; 11547 } 11548 hmp0->b_cont = dmp; 11549 /* Get the priority marking, if any */ 11550 hmp0->b_band = dmp->b_band; 11551 UPDATE_OB_PKT_COUNT(ire); 11552 ire->ire_last_used_time = lbolt; 11553 ip_xmit_v6(hmp0, ire, reachable | IP6_NO_IPPOLICY, connp, 11554 caller, NULL); 11555 reachable = 0; /* No need to redo state machine in loop */ 11556 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragCreates); 11557 offset += mlen; 11558 } 11559 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragOKs); 11560 } 11561 11562 /* 11563 * Determine if the ill and multicast aspects of that packets 11564 * "matches" the conn. 11565 */ 11566 boolean_t 11567 conn_wantpacket_v6(conn_t *connp, ill_t *ill, ip6_t *ip6h, int fanout_flags, 11568 zoneid_t zoneid) 11569 { 11570 ill_t *bound_ill; 11571 boolean_t wantpacket; 11572 in6_addr_t *v6dst_ptr = &ip6h->ip6_dst; 11573 in6_addr_t *v6src_ptr = &ip6h->ip6_src; 11574 11575 /* 11576 * conn_incoming_ill is set by IPV6_BOUND_IF which limits 11577 * unicast and multicast reception to conn_incoming_ill. 11578 * conn_wantpacket_v6 is called both for unicast and 11579 * multicast. 11580 */ 11581 bound_ill = connp->conn_incoming_ill; 11582 if (bound_ill != NULL) { 11583 if (IS_IPMP(bound_ill)) { 11584 if (bound_ill->ill_grp != ill->ill_grp) 11585 return (B_FALSE); 11586 } else { 11587 if (bound_ill != ill) 11588 return (B_FALSE); 11589 } 11590 } 11591 11592 if (connp->conn_multi_router) 11593 return (B_TRUE); 11594 11595 if (!IN6_IS_ADDR_MULTICAST(v6dst_ptr) && 11596 !IN6_IS_ADDR_V4MAPPED_CLASSD(v6dst_ptr)) { 11597 /* 11598 * Unicast case: we match the conn only if it's in the specified 11599 * zone. 11600 */ 11601 return (IPCL_ZONE_MATCH(connp, zoneid)); 11602 } 11603 11604 if ((fanout_flags & IP_FF_NO_MCAST_LOOP) && 11605 (connp->conn_zoneid == zoneid || zoneid == ALL_ZONES)) { 11606 /* 11607 * Loopback case: the sending endpoint has IP_MULTICAST_LOOP 11608 * disabled, therefore we don't dispatch the multicast packet to 11609 * the sending zone. 11610 */ 11611 return (B_FALSE); 11612 } 11613 11614 if (IS_LOOPBACK(ill) && connp->conn_zoneid != zoneid && 11615 zoneid != ALL_ZONES) { 11616 /* 11617 * Multicast packet on the loopback interface: we only match 11618 * conns who joined the group in the specified zone. 11619 */ 11620 return (B_FALSE); 11621 } 11622 11623 mutex_enter(&connp->conn_lock); 11624 wantpacket = 11625 ilg_lookup_ill_withsrc_v6(connp, v6dst_ptr, v6src_ptr, ill) != NULL; 11626 mutex_exit(&connp->conn_lock); 11627 11628 return (wantpacket); 11629 } 11630 11631 11632 /* 11633 * Transmit a packet and update any NUD state based on the flags 11634 * XXX need to "recover" any ip6i_t when doing putq! 11635 * 11636 * NOTE : This function does not ire_refrele the ire passed in as the 11637 * argument. 11638 */ 11639 void 11640 ip_xmit_v6(mblk_t *mp, ire_t *ire, uint_t flags, conn_t *connp, 11641 int caller, ipsec_out_t *io) 11642 { 11643 mblk_t *mp1; 11644 nce_t *nce = ire->ire_nce; 11645 ill_t *ill; 11646 ill_t *out_ill; 11647 uint64_t delta; 11648 ip6_t *ip6h; 11649 queue_t *stq = ire->ire_stq; 11650 ire_t *ire1 = NULL; 11651 ire_t *save_ire = ire; 11652 boolean_t multirt_send = B_FALSE; 11653 mblk_t *next_mp = NULL; 11654 ip_stack_t *ipst = ire->ire_ipst; 11655 boolean_t fp_prepend = B_FALSE; 11656 uint32_t hlen; 11657 11658 ip6h = (ip6_t *)mp->b_rptr; 11659 ASSERT(!IN6_IS_ADDR_V4MAPPED(&ire->ire_addr_v6)); 11660 ASSERT(ire->ire_ipversion == IPV6_VERSION); 11661 ASSERT(nce != NULL); 11662 ASSERT(mp->b_datap->db_type == M_DATA); 11663 ASSERT(stq != NULL); 11664 11665 ill = ire_to_ill(ire); 11666 if (!ill) { 11667 ip0dbg(("ip_xmit_v6: ire_to_ill failed\n")); 11668 freemsg(mp); 11669 return; 11670 } 11671 11672 /* 11673 * If a packet is to be sent out an interface that is a 6to4 11674 * tunnel, outgoing IPv6 packets, with a 6to4 addressed IPv6 11675 * destination, must be checked to have a 6to4 prefix 11676 * (2002:V4ADDR::/48) that is NOT equal to the 6to4 prefix of 11677 * address configured on the sending interface. Otherwise, 11678 * the packet was delivered to this interface in error and the 11679 * packet must be dropped. 11680 */ 11681 if ((ill->ill_is_6to4tun) && IN6_IS_ADDR_6TO4(&ip6h->ip6_dst)) { 11682 ipif_t *ipif = ill->ill_ipif; 11683 11684 if (IN6_ARE_6TO4_PREFIX_EQUAL(&ipif->ipif_v6lcl_addr, 11685 &ip6h->ip6_dst)) { 11686 if (ip_debug > 2) { 11687 /* ip1dbg */ 11688 pr_addr_dbg("ip_xmit_v6: attempting to " 11689 "send 6to4 addressed IPv6 " 11690 "destination (%s) out the wrong " 11691 "interface.\n", AF_INET6, 11692 &ip6h->ip6_dst); 11693 } 11694 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); 11695 freemsg(mp); 11696 return; 11697 } 11698 } 11699 11700 /* Flow-control check has been done in ip_wput_ire_v6 */ 11701 if (IP_FLOW_CONTROLLED_ULP(ip6h->ip6_nxt) || caller == IP_WPUT || 11702 caller == IP_WSRV || canput(stq->q_next)) { 11703 uint32_t ill_index; 11704 11705 /* 11706 * In most cases, the emission loop below is entered only 11707 * once. Only in the case where the ire holds the 11708 * RTF_MULTIRT flag, do we loop to process all RTF_MULTIRT 11709 * flagged ires in the bucket, and send the packet 11710 * through all crossed RTF_MULTIRT routes. 11711 */ 11712 if (ire->ire_flags & RTF_MULTIRT) { 11713 /* 11714 * Multirouting case. The bucket where ire is stored 11715 * probably holds other RTF_MULTIRT flagged ires 11716 * to the destination. In this call to ip_xmit_v6, 11717 * we attempt to send the packet through all 11718 * those ires. Thus, we first ensure that ire is the 11719 * first RTF_MULTIRT ire in the bucket, 11720 * before walking the ire list. 11721 */ 11722 ire_t *first_ire; 11723 irb_t *irb = ire->ire_bucket; 11724 ASSERT(irb != NULL); 11725 multirt_send = B_TRUE; 11726 11727 /* Make sure we do not omit any multiroute ire. */ 11728 IRB_REFHOLD(irb); 11729 for (first_ire = irb->irb_ire; 11730 first_ire != NULL; 11731 first_ire = first_ire->ire_next) { 11732 if ((first_ire->ire_flags & RTF_MULTIRT) && 11733 (IN6_ARE_ADDR_EQUAL(&first_ire->ire_addr_v6, 11734 &ire->ire_addr_v6)) && 11735 !(first_ire->ire_marks & 11736 (IRE_MARK_CONDEMNED | IRE_MARK_TESTHIDDEN))) 11737 break; 11738 } 11739 11740 if ((first_ire != NULL) && (first_ire != ire)) { 11741 IRE_REFHOLD(first_ire); 11742 /* ire will be released by the caller */ 11743 ire = first_ire; 11744 nce = ire->ire_nce; 11745 stq = ire->ire_stq; 11746 ill = ire_to_ill(ire); 11747 } 11748 IRB_REFRELE(irb); 11749 } else if (connp != NULL && IPCL_IS_TCP(connp) && 11750 connp->conn_mdt_ok && !connp->conn_tcp->tcp_mdt && 11751 ILL_MDT_USABLE(ill)) { 11752 /* 11753 * This tcp connection was marked as MDT-capable, but 11754 * it has been turned off due changes in the interface. 11755 * Now that the interface support is back, turn it on 11756 * by notifying tcp. We don't directly modify tcp_mdt, 11757 * since we leave all the details to the tcp code that 11758 * knows better. 11759 */ 11760 mblk_t *mdimp = ip_mdinfo_alloc(ill->ill_mdt_capab); 11761 11762 if (mdimp == NULL) { 11763 ip0dbg(("ip_xmit_v6: can't re-enable MDT for " 11764 "connp %p (ENOMEM)\n", (void *)connp)); 11765 } else { 11766 CONN_INC_REF(connp); 11767 SQUEUE_ENTER_ONE(connp->conn_sqp, mdimp, 11768 tcp_input, connp, SQ_FILL, 11769 SQTAG_TCP_INPUT_MCTL); 11770 } 11771 } 11772 11773 do { 11774 mblk_t *mp_ip6h; 11775 11776 if (multirt_send) { 11777 irb_t *irb; 11778 /* 11779 * We are in a multiple send case, need to get 11780 * the next ire and make a duplicate of the 11781 * packet. ire1 holds here the next ire to 11782 * process in the bucket. If multirouting is 11783 * expected, any non-RTF_MULTIRT ire that has 11784 * the right destination address is ignored. 11785 */ 11786 irb = ire->ire_bucket; 11787 ASSERT(irb != NULL); 11788 11789 IRB_REFHOLD(irb); 11790 for (ire1 = ire->ire_next; 11791 ire1 != NULL; 11792 ire1 = ire1->ire_next) { 11793 if (!(ire1->ire_flags & RTF_MULTIRT)) 11794 continue; 11795 if (!IN6_ARE_ADDR_EQUAL( 11796 &ire1->ire_addr_v6, 11797 &ire->ire_addr_v6)) 11798 continue; 11799 if (ire1->ire_marks & 11800 IRE_MARK_CONDEMNED) 11801 continue; 11802 11803 /* Got one */ 11804 if (ire1 != save_ire) { 11805 IRE_REFHOLD(ire1); 11806 } 11807 break; 11808 } 11809 IRB_REFRELE(irb); 11810 11811 if (ire1 != NULL) { 11812 next_mp = copyb(mp); 11813 if ((next_mp == NULL) || 11814 ((mp->b_cont != NULL) && 11815 ((next_mp->b_cont = 11816 dupmsg(mp->b_cont)) == NULL))) { 11817 freemsg(next_mp); 11818 next_mp = NULL; 11819 ire_refrele(ire1); 11820 ire1 = NULL; 11821 } 11822 } 11823 11824 /* Last multiroute ire; don't loop anymore. */ 11825 if (ire1 == NULL) { 11826 multirt_send = B_FALSE; 11827 } 11828 } 11829 11830 ill_index = 11831 ((ill_t *)stq->q_ptr)->ill_phyint->phyint_ifindex; 11832 11833 /* Initiate IPPF processing */ 11834 if (IP6_OUT_IPP(flags, ipst)) { 11835 ip_process(IPP_LOCAL_OUT, &mp, ill_index); 11836 if (mp == NULL) { 11837 BUMP_MIB(ill->ill_ip_mib, 11838 ipIfStatsOutDiscards); 11839 if (next_mp != NULL) 11840 freemsg(next_mp); 11841 if (ire != save_ire) { 11842 ire_refrele(ire); 11843 } 11844 return; 11845 } 11846 ip6h = (ip6_t *)mp->b_rptr; 11847 } 11848 mp_ip6h = mp; 11849 11850 /* 11851 * Check for fastpath, we need to hold nce_lock to 11852 * prevent fastpath update from chaining nce_fp_mp. 11853 */ 11854 11855 ASSERT(nce->nce_ipversion != IPV4_VERSION); 11856 mutex_enter(&nce->nce_lock); 11857 if ((mp1 = nce->nce_fp_mp) != NULL) { 11858 uchar_t *rptr; 11859 11860 hlen = MBLKL(mp1); 11861 rptr = mp->b_rptr - hlen; 11862 /* 11863 * make sure there is room for the fastpath 11864 * datalink header 11865 */ 11866 if (rptr < mp->b_datap->db_base) { 11867 mp1 = copyb(mp1); 11868 mutex_exit(&nce->nce_lock); 11869 if (mp1 == NULL) { 11870 BUMP_MIB(ill->ill_ip_mib, 11871 ipIfStatsOutDiscards); 11872 freemsg(mp); 11873 if (next_mp != NULL) 11874 freemsg(next_mp); 11875 if (ire != save_ire) { 11876 ire_refrele(ire); 11877 } 11878 return; 11879 } 11880 mp1->b_cont = mp; 11881 11882 /* Get the priority marking, if any */ 11883 mp1->b_band = mp->b_band; 11884 mp = mp1; 11885 } else { 11886 mp->b_rptr = rptr; 11887 /* 11888 * fastpath - pre-pend datalink 11889 * header 11890 */ 11891 bcopy(mp1->b_rptr, rptr, hlen); 11892 mutex_exit(&nce->nce_lock); 11893 fp_prepend = B_TRUE; 11894 } 11895 } else { 11896 /* 11897 * Get the DL_UNITDATA_REQ. 11898 */ 11899 mp1 = nce->nce_res_mp; 11900 if (mp1 == NULL) { 11901 mutex_exit(&nce->nce_lock); 11902 ip1dbg(("ip_xmit_v6: No resolution " 11903 "block ire = %p\n", (void *)ire)); 11904 freemsg(mp); 11905 if (next_mp != NULL) 11906 freemsg(next_mp); 11907 if (ire != save_ire) { 11908 ire_refrele(ire); 11909 } 11910 return; 11911 } 11912 /* 11913 * Prepend the DL_UNITDATA_REQ. 11914 */ 11915 mp1 = copyb(mp1); 11916 mutex_exit(&nce->nce_lock); 11917 if (mp1 == NULL) { 11918 BUMP_MIB(ill->ill_ip_mib, 11919 ipIfStatsOutDiscards); 11920 freemsg(mp); 11921 if (next_mp != NULL) 11922 freemsg(next_mp); 11923 if (ire != save_ire) { 11924 ire_refrele(ire); 11925 } 11926 return; 11927 } 11928 mp1->b_cont = mp; 11929 11930 /* Get the priority marking, if any */ 11931 mp1->b_band = mp->b_band; 11932 mp = mp1; 11933 } 11934 11935 out_ill = (ill_t *)stq->q_ptr; 11936 11937 DTRACE_PROBE4(ip6__physical__out__start, 11938 ill_t *, NULL, ill_t *, out_ill, 11939 ip6_t *, ip6h, mblk_t *, mp); 11940 11941 FW_HOOKS6(ipst->ips_ip6_physical_out_event, 11942 ipst->ips_ipv6firewall_physical_out, 11943 NULL, out_ill, ip6h, mp, mp_ip6h, 0, ipst); 11944 11945 DTRACE_PROBE1(ip6__physical__out__end, mblk_t *, mp); 11946 11947 if (mp == NULL) { 11948 if (multirt_send) { 11949 ASSERT(ire1 != NULL); 11950 if (ire != save_ire) { 11951 ire_refrele(ire); 11952 } 11953 /* 11954 * Proceed with the next RTF_MULTIRT 11955 * ire, also set up the send-to queue 11956 * accordingly. 11957 */ 11958 ire = ire1; 11959 ire1 = NULL; 11960 stq = ire->ire_stq; 11961 nce = ire->ire_nce; 11962 ill = ire_to_ill(ire); 11963 mp = next_mp; 11964 next_mp = NULL; 11965 continue; 11966 } else { 11967 ASSERT(next_mp == NULL); 11968 ASSERT(ire1 == NULL); 11969 break; 11970 } 11971 } 11972 11973 if (ipst->ips_ipobs_enabled) { 11974 zoneid_t szone; 11975 11976 szone = ip_get_zoneid_v6(&ip6h->ip6_src, 11977 mp_ip6h, out_ill, ipst, ALL_ZONES); 11978 ipobs_hook(mp_ip6h, IPOBS_HOOK_OUTBOUND, szone, 11979 ALL_ZONES, out_ill, IPV6_VERSION, 11980 fp_prepend ? hlen : 0, ipst); 11981 } 11982 11983 /* 11984 * Update ire and MIB counters; for save_ire, this has 11985 * been done by the caller. 11986 */ 11987 if (ire != save_ire) { 11988 UPDATE_OB_PKT_COUNT(ire); 11989 ire->ire_last_used_time = lbolt; 11990 11991 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 11992 BUMP_MIB(ill->ill_ip_mib, 11993 ipIfStatsHCOutMcastPkts); 11994 UPDATE_MIB(ill->ill_ip_mib, 11995 ipIfStatsHCOutMcastOctets, 11996 ntohs(ip6h->ip6_plen) + 11997 IPV6_HDR_LEN); 11998 } 11999 } 12000 12001 /* 12002 * Send it down. XXX Do we want to flow control AH/ESP 12003 * packets that carry TCP payloads? We don't flow 12004 * control TCP packets, but we should also not 12005 * flow-control TCP packets that have been protected. 12006 * We don't have an easy way to find out if an AH/ESP 12007 * packet was originally TCP or not currently. 12008 */ 12009 if (io == NULL) { 12010 BUMP_MIB(ill->ill_ip_mib, 12011 ipIfStatsHCOutTransmits); 12012 UPDATE_MIB(ill->ill_ip_mib, 12013 ipIfStatsHCOutOctets, 12014 ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN); 12015 DTRACE_IP7(send, mblk_t *, mp, conn_t *, NULL, 12016 void_ip_t *, ip6h, __dtrace_ipsr_ill_t *, 12017 out_ill, ipha_t *, NULL, ip6_t *, ip6h, 12018 int, 0); 12019 12020 putnext(stq, mp); 12021 } else { 12022 /* 12023 * Safety Pup says: make sure this is 12024 * going to the right interface! 12025 */ 12026 if (io->ipsec_out_capab_ill_index != 12027 ill_index) { 12028 /* IPsec kstats: bump lose counter */ 12029 freemsg(mp1); 12030 } else { 12031 BUMP_MIB(ill->ill_ip_mib, 12032 ipIfStatsHCOutTransmits); 12033 UPDATE_MIB(ill->ill_ip_mib, 12034 ipIfStatsHCOutOctets, 12035 ntohs(ip6h->ip6_plen) + 12036 IPV6_HDR_LEN); 12037 DTRACE_IP7(send, mblk_t *, mp, 12038 conn_t *, NULL, void_ip_t *, ip6h, 12039 __dtrace_ipsr_ill_t *, out_ill, 12040 ipha_t *, NULL, ip6_t *, ip6h, int, 12041 0); 12042 ipsec_hw_putnext(stq, mp); 12043 } 12044 } 12045 12046 if (nce->nce_flags & (NCE_F_NONUD|NCE_F_PERMANENT)) { 12047 if (ire != save_ire) { 12048 ire_refrele(ire); 12049 } 12050 if (multirt_send) { 12051 ASSERT(ire1 != NULL); 12052 /* 12053 * Proceed with the next RTF_MULTIRT 12054 * ire, also set up the send-to queue 12055 * accordingly. 12056 */ 12057 ire = ire1; 12058 ire1 = NULL; 12059 stq = ire->ire_stq; 12060 nce = ire->ire_nce; 12061 ill = ire_to_ill(ire); 12062 mp = next_mp; 12063 next_mp = NULL; 12064 continue; 12065 } 12066 ASSERT(next_mp == NULL); 12067 ASSERT(ire1 == NULL); 12068 return; 12069 } 12070 12071 ASSERT(nce->nce_state != ND_INCOMPLETE); 12072 12073 /* 12074 * Check for upper layer advice 12075 */ 12076 if (flags & IPV6_REACHABILITY_CONFIRMATION) { 12077 /* 12078 * It should be o.k. to check the state without 12079 * a lock here, at most we lose an advice. 12080 */ 12081 nce->nce_last = TICK_TO_MSEC(lbolt64); 12082 if (nce->nce_state != ND_REACHABLE) { 12083 12084 mutex_enter(&nce->nce_lock); 12085 nce->nce_state = ND_REACHABLE; 12086 nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT; 12087 mutex_exit(&nce->nce_lock); 12088 (void) untimeout(nce->nce_timeout_id); 12089 if (ip_debug > 2) { 12090 /* ip1dbg */ 12091 pr_addr_dbg("ip_xmit_v6: state" 12092 " for %s changed to" 12093 " REACHABLE\n", AF_INET6, 12094 &ire->ire_addr_v6); 12095 } 12096 } 12097 if (ire != save_ire) { 12098 ire_refrele(ire); 12099 } 12100 if (multirt_send) { 12101 ASSERT(ire1 != NULL); 12102 /* 12103 * Proceed with the next RTF_MULTIRT 12104 * ire, also set up the send-to queue 12105 * accordingly. 12106 */ 12107 ire = ire1; 12108 ire1 = NULL; 12109 stq = ire->ire_stq; 12110 nce = ire->ire_nce; 12111 ill = ire_to_ill(ire); 12112 mp = next_mp; 12113 next_mp = NULL; 12114 continue; 12115 } 12116 ASSERT(next_mp == NULL); 12117 ASSERT(ire1 == NULL); 12118 return; 12119 } 12120 12121 delta = TICK_TO_MSEC(lbolt64) - nce->nce_last; 12122 ip1dbg(("ip_xmit_v6: delta = %" PRId64 12123 " ill_reachable_time = %d \n", delta, 12124 ill->ill_reachable_time)); 12125 if (delta > (uint64_t)ill->ill_reachable_time) { 12126 nce = ire->ire_nce; 12127 mutex_enter(&nce->nce_lock); 12128 switch (nce->nce_state) { 12129 case ND_REACHABLE: 12130 case ND_STALE: 12131 /* 12132 * ND_REACHABLE is identical to 12133 * ND_STALE in this specific case. If 12134 * reachable time has expired for this 12135 * neighbor (delta is greater than 12136 * reachable time), conceptually, the 12137 * neighbor cache is no longer in 12138 * REACHABLE state, but already in 12139 * STALE state. So the correct 12140 * transition here is to ND_DELAY. 12141 */ 12142 nce->nce_state = ND_DELAY; 12143 mutex_exit(&nce->nce_lock); 12144 NDP_RESTART_TIMER(nce, 12145 ipst->ips_delay_first_probe_time); 12146 if (ip_debug > 3) { 12147 /* ip2dbg */ 12148 pr_addr_dbg("ip_xmit_v6: state" 12149 " for %s changed to" 12150 " DELAY\n", AF_INET6, 12151 &ire->ire_addr_v6); 12152 } 12153 break; 12154 case ND_DELAY: 12155 case ND_PROBE: 12156 mutex_exit(&nce->nce_lock); 12157 /* Timers have already started */ 12158 break; 12159 case ND_UNREACHABLE: 12160 /* 12161 * ndp timer has detected that this nce 12162 * is unreachable and initiated deleting 12163 * this nce and all its associated IREs. 12164 * This is a race where we found the 12165 * ire before it was deleted and have 12166 * just sent out a packet using this 12167 * unreachable nce. 12168 */ 12169 mutex_exit(&nce->nce_lock); 12170 break; 12171 default: 12172 ASSERT(0); 12173 } 12174 } 12175 12176 if (multirt_send) { 12177 ASSERT(ire1 != NULL); 12178 /* 12179 * Proceed with the next RTF_MULTIRT ire, 12180 * Also set up the send-to queue accordingly. 12181 */ 12182 if (ire != save_ire) { 12183 ire_refrele(ire); 12184 } 12185 ire = ire1; 12186 ire1 = NULL; 12187 stq = ire->ire_stq; 12188 nce = ire->ire_nce; 12189 ill = ire_to_ill(ire); 12190 mp = next_mp; 12191 next_mp = NULL; 12192 } 12193 } while (multirt_send); 12194 /* 12195 * In the multirouting case, release the last ire used for 12196 * emission. save_ire will be released by the caller. 12197 */ 12198 if (ire != save_ire) { 12199 ire_refrele(ire); 12200 } 12201 } else { 12202 /* 12203 * Can't apply backpressure, just discard the packet. 12204 */ 12205 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); 12206 freemsg(mp); 12207 return; 12208 } 12209 } 12210 12211 /* 12212 * pr_addr_dbg function provides the needed buffer space to call 12213 * inet_ntop() function's 3rd argument. This function should be 12214 * used by any kernel routine which wants to save INET6_ADDRSTRLEN 12215 * stack buffer space in it's own stack frame. This function uses 12216 * a buffer from it's own stack and prints the information. 12217 * Example: pr_addr_dbg("func: no route for %s\n ", AF_INET, addr) 12218 * 12219 * Note: This function can call inet_ntop() once. 12220 */ 12221 void 12222 pr_addr_dbg(char *fmt1, int af, const void *addr) 12223 { 12224 char buf[INET6_ADDRSTRLEN]; 12225 12226 if (fmt1 == NULL) { 12227 ip0dbg(("pr_addr_dbg: Wrong arguments\n")); 12228 return; 12229 } 12230 12231 /* 12232 * This does not compare debug level and just prints 12233 * out. Thus it is the responsibility of the caller 12234 * to check the appropriate debug-level before calling 12235 * this function. 12236 */ 12237 if (ip_debug > 0) { 12238 printf(fmt1, inet_ntop(af, addr, buf, sizeof (buf))); 12239 } 12240 12241 12242 } 12243 12244 12245 /* 12246 * Return the length in bytes of the IPv6 headers (base header, ip6i_t 12247 * if needed and extension headers) that will be needed based on the 12248 * ip6_pkt_t structure passed by the caller. 12249 * 12250 * The returned length does not include the length of the upper level 12251 * protocol (ULP) header. 12252 */ 12253 int 12254 ip_total_hdrs_len_v6(ip6_pkt_t *ipp) 12255 { 12256 int len; 12257 12258 len = IPV6_HDR_LEN; 12259 if (ipp->ipp_fields & IPPF_HAS_IP6I) 12260 len += sizeof (ip6i_t); 12261 if (ipp->ipp_fields & IPPF_HOPOPTS) { 12262 ASSERT(ipp->ipp_hopoptslen != 0); 12263 len += ipp->ipp_hopoptslen; 12264 } 12265 if (ipp->ipp_fields & IPPF_RTHDR) { 12266 ASSERT(ipp->ipp_rthdrlen != 0); 12267 len += ipp->ipp_rthdrlen; 12268 } 12269 /* 12270 * En-route destination options 12271 * Only do them if there's a routing header as well 12272 */ 12273 if ((ipp->ipp_fields & (IPPF_RTDSTOPTS|IPPF_RTHDR)) == 12274 (IPPF_RTDSTOPTS|IPPF_RTHDR)) { 12275 ASSERT(ipp->ipp_rtdstoptslen != 0); 12276 len += ipp->ipp_rtdstoptslen; 12277 } 12278 if (ipp->ipp_fields & IPPF_DSTOPTS) { 12279 ASSERT(ipp->ipp_dstoptslen != 0); 12280 len += ipp->ipp_dstoptslen; 12281 } 12282 return (len); 12283 } 12284 12285 /* 12286 * All-purpose routine to build a header chain of an IPv6 header 12287 * followed by any required extension headers and a proto header, 12288 * preceeded (where necessary) by an ip6i_t private header. 12289 * 12290 * The fields of the IPv6 header that are derived from the ip6_pkt_t 12291 * will be filled in appropriately. 12292 * Thus the caller must fill in the rest of the IPv6 header, such as 12293 * traffic class/flowid, source address (if not set here), hoplimit (if not 12294 * set here) and destination address. 12295 * 12296 * The extension headers and ip6i_t header will all be fully filled in. 12297 */ 12298 void 12299 ip_build_hdrs_v6(uchar_t *ext_hdrs, uint_t ext_hdrs_len, 12300 ip6_pkt_t *ipp, uint8_t protocol) 12301 { 12302 uint8_t *nxthdr_ptr; 12303 uint8_t *cp; 12304 ip6i_t *ip6i; 12305 ip6_t *ip6h = (ip6_t *)ext_hdrs; 12306 12307 /* 12308 * If sending private ip6i_t header down (checksum info, nexthop, 12309 * or ifindex), adjust ip header pointer and set ip6i_t header pointer, 12310 * then fill it in. (The checksum info will be filled in by icmp). 12311 */ 12312 if (ipp->ipp_fields & IPPF_HAS_IP6I) { 12313 ip6i = (ip6i_t *)ip6h; 12314 ip6h = (ip6_t *)&ip6i[1]; 12315 12316 ip6i->ip6i_flags = 0; 12317 ip6i->ip6i_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 12318 if (ipp->ipp_fields & IPPF_IFINDEX || 12319 ipp->ipp_fields & IPPF_SCOPE_ID) { 12320 ASSERT(ipp->ipp_ifindex != 0); 12321 ip6i->ip6i_flags |= IP6I_IFINDEX; 12322 ip6i->ip6i_ifindex = ipp->ipp_ifindex; 12323 } 12324 if (ipp->ipp_fields & IPPF_ADDR) { 12325 /* 12326 * Enable per-packet source address verification if 12327 * IPV6_PKTINFO specified the source address. 12328 * ip6_src is set in the transport's _wput function. 12329 */ 12330 ASSERT(!IN6_IS_ADDR_UNSPECIFIED( 12331 &ipp->ipp_addr)); 12332 ip6i->ip6i_flags |= IP6I_VERIFY_SRC; 12333 } 12334 if (ipp->ipp_fields & IPPF_UNICAST_HOPS) { 12335 ip6h->ip6_hops = ipp->ipp_unicast_hops; 12336 /* 12337 * We need to set this flag so that IP doesn't 12338 * rewrite the IPv6 header's hoplimit with the 12339 * current default value. 12340 */ 12341 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 12342 } 12343 if (ipp->ipp_fields & IPPF_NEXTHOP) { 12344 ASSERT(!IN6_IS_ADDR_UNSPECIFIED( 12345 &ipp->ipp_nexthop)); 12346 ip6i->ip6i_flags |= IP6I_NEXTHOP; 12347 ip6i->ip6i_nexthop = ipp->ipp_nexthop; 12348 } 12349 /* 12350 * tell IP this is an ip6i_t private header 12351 */ 12352 ip6i->ip6i_nxt = IPPROTO_RAW; 12353 } 12354 /* Initialize IPv6 header */ 12355 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 12356 if (ipp->ipp_fields & IPPF_TCLASS) { 12357 ip6h->ip6_vcf = (ip6h->ip6_vcf & ~IPV6_FLOWINFO_TCLASS) | 12358 (ipp->ipp_tclass << 20); 12359 } 12360 if (ipp->ipp_fields & IPPF_ADDR) 12361 ip6h->ip6_src = ipp->ipp_addr; 12362 12363 nxthdr_ptr = (uint8_t *)&ip6h->ip6_nxt; 12364 cp = (uint8_t *)&ip6h[1]; 12365 /* 12366 * Here's where we have to start stringing together 12367 * any extension headers in the right order: 12368 * Hop-by-hop, destination, routing, and final destination opts. 12369 */ 12370 if (ipp->ipp_fields & IPPF_HOPOPTS) { 12371 /* Hop-by-hop options */ 12372 ip6_hbh_t *hbh = (ip6_hbh_t *)cp; 12373 12374 *nxthdr_ptr = IPPROTO_HOPOPTS; 12375 nxthdr_ptr = &hbh->ip6h_nxt; 12376 12377 bcopy(ipp->ipp_hopopts, cp, ipp->ipp_hopoptslen); 12378 cp += ipp->ipp_hopoptslen; 12379 } 12380 /* 12381 * En-route destination options 12382 * Only do them if there's a routing header as well 12383 */ 12384 if ((ipp->ipp_fields & (IPPF_RTDSTOPTS|IPPF_RTHDR)) == 12385 (IPPF_RTDSTOPTS|IPPF_RTHDR)) { 12386 ip6_dest_t *dst = (ip6_dest_t *)cp; 12387 12388 *nxthdr_ptr = IPPROTO_DSTOPTS; 12389 nxthdr_ptr = &dst->ip6d_nxt; 12390 12391 bcopy(ipp->ipp_rtdstopts, cp, ipp->ipp_rtdstoptslen); 12392 cp += ipp->ipp_rtdstoptslen; 12393 } 12394 /* 12395 * Routing header next 12396 */ 12397 if (ipp->ipp_fields & IPPF_RTHDR) { 12398 ip6_rthdr_t *rt = (ip6_rthdr_t *)cp; 12399 12400 *nxthdr_ptr = IPPROTO_ROUTING; 12401 nxthdr_ptr = &rt->ip6r_nxt; 12402 12403 bcopy(ipp->ipp_rthdr, cp, ipp->ipp_rthdrlen); 12404 cp += ipp->ipp_rthdrlen; 12405 } 12406 /* 12407 * Do ultimate destination options 12408 */ 12409 if (ipp->ipp_fields & IPPF_DSTOPTS) { 12410 ip6_dest_t *dest = (ip6_dest_t *)cp; 12411 12412 *nxthdr_ptr = IPPROTO_DSTOPTS; 12413 nxthdr_ptr = &dest->ip6d_nxt; 12414 12415 bcopy(ipp->ipp_dstopts, cp, ipp->ipp_dstoptslen); 12416 cp += ipp->ipp_dstoptslen; 12417 } 12418 /* 12419 * Now set the last header pointer to the proto passed in 12420 */ 12421 *nxthdr_ptr = protocol; 12422 ASSERT((int)(cp - ext_hdrs) == ext_hdrs_len); 12423 } 12424 12425 /* 12426 * Return a pointer to the routing header extension header 12427 * in the IPv6 header(s) chain passed in. 12428 * If none found, return NULL 12429 * Assumes that all extension headers are in same mblk as the v6 header 12430 */ 12431 ip6_rthdr_t * 12432 ip_find_rthdr_v6(ip6_t *ip6h, uint8_t *endptr) 12433 { 12434 ip6_dest_t *desthdr; 12435 ip6_frag_t *fraghdr; 12436 uint_t hdrlen; 12437 uint8_t nexthdr; 12438 uint8_t *ptr = (uint8_t *)&ip6h[1]; 12439 12440 if (ip6h->ip6_nxt == IPPROTO_ROUTING) 12441 return ((ip6_rthdr_t *)ptr); 12442 12443 /* 12444 * The routing header will precede all extension headers 12445 * other than the hop-by-hop and destination options 12446 * extension headers, so if we see anything other than those, 12447 * we're done and didn't find it. 12448 * We could see a destination options header alone but no 12449 * routing header, in which case we'll return NULL as soon as 12450 * we see anything after that. 12451 * Hop-by-hop and destination option headers are identical, 12452 * so we can use either one we want as a template. 12453 */ 12454 nexthdr = ip6h->ip6_nxt; 12455 while (ptr < endptr) { 12456 /* Is there enough left for len + nexthdr? */ 12457 if (ptr + MIN_EHDR_LEN > endptr) 12458 return (NULL); 12459 12460 switch (nexthdr) { 12461 case IPPROTO_HOPOPTS: 12462 case IPPROTO_DSTOPTS: 12463 /* Assumes the headers are identical for hbh and dst */ 12464 desthdr = (ip6_dest_t *)ptr; 12465 hdrlen = 8 * (desthdr->ip6d_len + 1); 12466 nexthdr = desthdr->ip6d_nxt; 12467 break; 12468 12469 case IPPROTO_ROUTING: 12470 return ((ip6_rthdr_t *)ptr); 12471 12472 case IPPROTO_FRAGMENT: 12473 fraghdr = (ip6_frag_t *)ptr; 12474 hdrlen = sizeof (ip6_frag_t); 12475 nexthdr = fraghdr->ip6f_nxt; 12476 break; 12477 12478 default: 12479 return (NULL); 12480 } 12481 ptr += hdrlen; 12482 } 12483 return (NULL); 12484 } 12485 12486 /* 12487 * Called for source-routed packets originating on this node. 12488 * Manipulates the original routing header by moving every entry up 12489 * one slot, placing the first entry in the v6 header's v6_dst field, 12490 * and placing the ultimate destination in the routing header's last 12491 * slot. 12492 * 12493 * Returns the checksum diference between the ultimate destination 12494 * (last hop in the routing header when the packet is sent) and 12495 * the first hop (ip6_dst when the packet is sent) 12496 */ 12497 /* ARGSUSED2 */ 12498 uint32_t 12499 ip_massage_options_v6(ip6_t *ip6h, ip6_rthdr_t *rth, netstack_t *ns) 12500 { 12501 uint_t numaddr; 12502 uint_t i; 12503 in6_addr_t *addrptr; 12504 in6_addr_t tmp; 12505 ip6_rthdr0_t *rthdr = (ip6_rthdr0_t *)rth; 12506 uint32_t cksm; 12507 uint32_t addrsum = 0; 12508 uint16_t *ptr; 12509 12510 /* 12511 * Perform any processing needed for source routing. 12512 * We know that all extension headers will be in the same mblk 12513 * as the IPv6 header. 12514 */ 12515 12516 /* 12517 * If no segments left in header, or the header length field is zero, 12518 * don't move hop addresses around; 12519 * Checksum difference is zero. 12520 */ 12521 if ((rthdr->ip6r0_segleft == 0) || (rthdr->ip6r0_len == 0)) 12522 return (0); 12523 12524 ptr = (uint16_t *)&ip6h->ip6_dst; 12525 cksm = 0; 12526 for (i = 0; i < (sizeof (in6_addr_t) / sizeof (uint16_t)); i++) { 12527 cksm += ptr[i]; 12528 } 12529 cksm = (cksm & 0xFFFF) + (cksm >> 16); 12530 12531 /* 12532 * Here's where the fun begins - we have to 12533 * move all addresses up one spot, take the 12534 * first hop and make it our first ip6_dst, 12535 * and place the ultimate destination in the 12536 * newly-opened last slot. 12537 */ 12538 addrptr = (in6_addr_t *)((char *)rthdr + sizeof (*rthdr)); 12539 numaddr = rthdr->ip6r0_len / 2; 12540 tmp = *addrptr; 12541 for (i = 0; i < (numaddr - 1); addrptr++, i++) { 12542 *addrptr = addrptr[1]; 12543 } 12544 *addrptr = ip6h->ip6_dst; 12545 ip6h->ip6_dst = tmp; 12546 12547 /* 12548 * From the checksummed ultimate destination subtract the checksummed 12549 * current ip6_dst (the first hop address). Return that number. 12550 * (In the v4 case, the second part of this is done in each routine 12551 * that calls ip_massage_options(). We do it all in this one place 12552 * for v6). 12553 */ 12554 ptr = (uint16_t *)&ip6h->ip6_dst; 12555 for (i = 0; i < (sizeof (in6_addr_t) / sizeof (uint16_t)); i++) { 12556 addrsum += ptr[i]; 12557 } 12558 cksm -= ((addrsum >> 16) + (addrsum & 0xFFFF)); 12559 if ((int)cksm < 0) 12560 cksm--; 12561 cksm = (cksm & 0xFFFF) + (cksm >> 16); 12562 12563 return (cksm); 12564 } 12565 12566 /* 12567 * Propagate a multicast group membership operation (join/leave) (*fn) on 12568 * all interfaces crossed by the related multirt routes. 12569 * The call is considered successful if the operation succeeds 12570 * on at least one interface. 12571 * The function is called if the destination address in the packet to send 12572 * is multirouted. 12573 */ 12574 int 12575 ip_multirt_apply_membership_v6(int (*fn)(conn_t *, boolean_t, 12576 const in6_addr_t *, int, mcast_record_t, const in6_addr_t *, mblk_t *), 12577 ire_t *ire, conn_t *connp, boolean_t checkonly, const in6_addr_t *v6grp, 12578 mcast_record_t fmode, const in6_addr_t *v6src, mblk_t *first_mp) 12579 { 12580 ire_t *ire_gw; 12581 irb_t *irb; 12582 int index, error = 0; 12583 opt_restart_t *or; 12584 ip_stack_t *ipst = ire->ire_ipst; 12585 12586 irb = ire->ire_bucket; 12587 ASSERT(irb != NULL); 12588 12589 ASSERT(DB_TYPE(first_mp) == M_CTL); 12590 or = (opt_restart_t *)first_mp->b_rptr; 12591 12592 IRB_REFHOLD(irb); 12593 for (; ire != NULL; ire = ire->ire_next) { 12594 if ((ire->ire_flags & RTF_MULTIRT) == 0) 12595 continue; 12596 if (!IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, v6grp)) 12597 continue; 12598 12599 ire_gw = ire_ftable_lookup_v6(&ire->ire_gateway_addr_v6, 0, 0, 12600 IRE_INTERFACE, NULL, NULL, ALL_ZONES, 0, NULL, 12601 MATCH_IRE_RECURSIVE | MATCH_IRE_TYPE, ipst); 12602 /* No resolver exists for the gateway; skip this ire. */ 12603 if (ire_gw == NULL) 12604 continue; 12605 index = ire_gw->ire_ipif->ipif_ill->ill_phyint->phyint_ifindex; 12606 /* 12607 * A resolver exists: we can get the interface on which we have 12608 * to apply the operation. 12609 */ 12610 error = fn(connp, checkonly, v6grp, index, fmode, v6src, 12611 first_mp); 12612 if (error == 0) 12613 or->or_private = CGTP_MCAST_SUCCESS; 12614 12615 if (ip_debug > 0) { 12616 ulong_t off; 12617 char *ksym; 12618 12619 ksym = kobj_getsymname((uintptr_t)fn, &off); 12620 ip2dbg(("ip_multirt_apply_membership_v6: " 12621 "called %s, multirt group 0x%08x via itf 0x%08x, " 12622 "error %d [success %u]\n", 12623 ksym ? ksym : "?", 12624 ntohl(V4_PART_OF_V6((*v6grp))), 12625 ntohl(V4_PART_OF_V6(ire_gw->ire_src_addr_v6)), 12626 error, or->or_private)); 12627 } 12628 12629 ire_refrele(ire_gw); 12630 if (error == EINPROGRESS) { 12631 IRB_REFRELE(irb); 12632 return (error); 12633 } 12634 } 12635 IRB_REFRELE(irb); 12636 /* 12637 * Consider the call as successful if we succeeded on at least 12638 * one interface. Otherwise, return the last encountered error. 12639 */ 12640 return (or->or_private == CGTP_MCAST_SUCCESS ? 0 : error); 12641 } 12642 12643 void 12644 *ip6_kstat_init(netstackid_t stackid, ip6_stat_t *ip6_statisticsp) 12645 { 12646 kstat_t *ksp; 12647 12648 ip6_stat_t template = { 12649 { "ip6_udp_fast_path", KSTAT_DATA_UINT64 }, 12650 { "ip6_udp_slow_path", KSTAT_DATA_UINT64 }, 12651 { "ip6_udp_fannorm", KSTAT_DATA_UINT64 }, 12652 { "ip6_udp_fanmb", KSTAT_DATA_UINT64 }, 12653 { "ip6_out_sw_cksum", KSTAT_DATA_UINT64 }, 12654 { "ip6_in_sw_cksum", KSTAT_DATA_UINT64 }, 12655 { "ip6_tcp_in_full_hw_cksum_err", KSTAT_DATA_UINT64 }, 12656 { "ip6_tcp_in_part_hw_cksum_err", KSTAT_DATA_UINT64 }, 12657 { "ip6_tcp_in_sw_cksum_err", KSTAT_DATA_UINT64 }, 12658 { "ip6_tcp_out_sw_cksum_bytes", KSTAT_DATA_UINT64 }, 12659 { "ip6_udp_in_full_hw_cksum_err", KSTAT_DATA_UINT64 }, 12660 { "ip6_udp_in_part_hw_cksum_err", KSTAT_DATA_UINT64 }, 12661 { "ip6_udp_in_sw_cksum_err", KSTAT_DATA_UINT64 }, 12662 { "ip6_udp_out_sw_cksum_bytes", KSTAT_DATA_UINT64 }, 12663 { "ip6_frag_mdt_pkt_out", KSTAT_DATA_UINT64 }, 12664 { "ip6_frag_mdt_discarded", KSTAT_DATA_UINT64 }, 12665 { "ip6_frag_mdt_allocfail", KSTAT_DATA_UINT64 }, 12666 { "ip6_frag_mdt_addpdescfail", KSTAT_DATA_UINT64 }, 12667 { "ip6_frag_mdt_allocd", KSTAT_DATA_UINT64 }, 12668 }; 12669 ksp = kstat_create_netstack("ip", 0, "ip6stat", "net", 12670 KSTAT_TYPE_NAMED, sizeof (template) / sizeof (kstat_named_t), 12671 KSTAT_FLAG_VIRTUAL, stackid); 12672 12673 if (ksp == NULL) 12674 return (NULL); 12675 12676 bcopy(&template, ip6_statisticsp, sizeof (template)); 12677 ksp->ks_data = (void *)ip6_statisticsp; 12678 ksp->ks_private = (void *)(uintptr_t)stackid; 12679 12680 kstat_install(ksp); 12681 return (ksp); 12682 } 12683 12684 void 12685 ip6_kstat_fini(netstackid_t stackid, kstat_t *ksp) 12686 { 12687 if (ksp != NULL) { 12688 ASSERT(stackid == (netstackid_t)(uintptr_t)ksp->ks_private); 12689 kstat_delete_netstack(ksp, stackid); 12690 } 12691 } 12692 12693 /* 12694 * The following two functions set and get the value for the 12695 * IPV6_SRC_PREFERENCES socket option. 12696 */ 12697 int 12698 ip6_set_src_preferences(conn_t *connp, uint32_t prefs) 12699 { 12700 /* 12701 * We only support preferences that are covered by 12702 * IPV6_PREFER_SRC_MASK. 12703 */ 12704 if (prefs & ~IPV6_PREFER_SRC_MASK) 12705 return (EINVAL); 12706 12707 /* 12708 * Look for conflicting preferences or default preferences. If 12709 * both bits of a related pair are clear, the application wants the 12710 * system's default value for that pair. Both bits in a pair can't 12711 * be set. 12712 */ 12713 if ((prefs & IPV6_PREFER_SRC_MIPMASK) == 0) { 12714 prefs |= IPV6_PREFER_SRC_MIPDEFAULT; 12715 } else if ((prefs & IPV6_PREFER_SRC_MIPMASK) == 12716 IPV6_PREFER_SRC_MIPMASK) { 12717 return (EINVAL); 12718 } 12719 if ((prefs & IPV6_PREFER_SRC_TMPMASK) == 0) { 12720 prefs |= IPV6_PREFER_SRC_TMPDEFAULT; 12721 } else if ((prefs & IPV6_PREFER_SRC_TMPMASK) == 12722 IPV6_PREFER_SRC_TMPMASK) { 12723 return (EINVAL); 12724 } 12725 if ((prefs & IPV6_PREFER_SRC_CGAMASK) == 0) { 12726 prefs |= IPV6_PREFER_SRC_CGADEFAULT; 12727 } else if ((prefs & IPV6_PREFER_SRC_CGAMASK) == 12728 IPV6_PREFER_SRC_CGAMASK) { 12729 return (EINVAL); 12730 } 12731 12732 connp->conn_src_preferences = prefs; 12733 return (0); 12734 } 12735 12736 size_t 12737 ip6_get_src_preferences(conn_t *connp, uint32_t *val) 12738 { 12739 *val = connp->conn_src_preferences; 12740 return (sizeof (connp->conn_src_preferences)); 12741 } 12742 12743 int 12744 ip6_set_pktinfo(cred_t *cr, conn_t *connp, struct in6_pktinfo *pkti) 12745 { 12746 ire_t *ire; 12747 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 12748 12749 /* 12750 * Verify the source address and ifindex. Privileged users can use 12751 * any source address. For ancillary data the source address is 12752 * checked in ip_wput_v6. 12753 */ 12754 if (pkti->ipi6_ifindex != 0) { 12755 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 12756 if (!phyint_exists(pkti->ipi6_ifindex, ipst)) { 12757 rw_exit(&ipst->ips_ill_g_lock); 12758 return (ENXIO); 12759 } 12760 rw_exit(&ipst->ips_ill_g_lock); 12761 } 12762 if (!IN6_IS_ADDR_UNSPECIFIED(&pkti->ipi6_addr) && 12763 secpolicy_net_rawaccess(cr) != 0) { 12764 ire = ire_route_lookup_v6(&pkti->ipi6_addr, 0, 0, 12765 (IRE_LOCAL|IRE_LOOPBACK), NULL, NULL, 12766 connp->conn_zoneid, NULL, MATCH_IRE_TYPE, ipst); 12767 if (ire != NULL) 12768 ire_refrele(ire); 12769 else 12770 return (ENXIO); 12771 } 12772 return (0); 12773 } 12774 12775 /* 12776 * Get the size of the IP options (including the IP headers size) 12777 * without including the AH header's size. If till_ah is B_FALSE, 12778 * and if AH header is present, dest options beyond AH header will 12779 * also be included in the returned size. 12780 */ 12781 int 12782 ipsec_ah_get_hdr_size_v6(mblk_t *mp, boolean_t till_ah) 12783 { 12784 ip6_t *ip6h; 12785 uint8_t nexthdr; 12786 uint8_t *whereptr; 12787 ip6_hbh_t *hbhhdr; 12788 ip6_dest_t *dsthdr; 12789 ip6_rthdr_t *rthdr; 12790 int ehdrlen; 12791 int size; 12792 ah_t *ah; 12793 12794 ip6h = (ip6_t *)mp->b_rptr; 12795 size = IPV6_HDR_LEN; 12796 nexthdr = ip6h->ip6_nxt; 12797 whereptr = (uint8_t *)&ip6h[1]; 12798 for (;;) { 12799 /* Assume IP has already stripped it */ 12800 ASSERT(nexthdr != IPPROTO_FRAGMENT && nexthdr != IPPROTO_RAW); 12801 switch (nexthdr) { 12802 case IPPROTO_HOPOPTS: 12803 hbhhdr = (ip6_hbh_t *)whereptr; 12804 nexthdr = hbhhdr->ip6h_nxt; 12805 ehdrlen = 8 * (hbhhdr->ip6h_len + 1); 12806 break; 12807 case IPPROTO_DSTOPTS: 12808 dsthdr = (ip6_dest_t *)whereptr; 12809 nexthdr = dsthdr->ip6d_nxt; 12810 ehdrlen = 8 * (dsthdr->ip6d_len + 1); 12811 break; 12812 case IPPROTO_ROUTING: 12813 rthdr = (ip6_rthdr_t *)whereptr; 12814 nexthdr = rthdr->ip6r_nxt; 12815 ehdrlen = 8 * (rthdr->ip6r_len + 1); 12816 break; 12817 default : 12818 if (till_ah) { 12819 ASSERT(nexthdr == IPPROTO_AH); 12820 return (size); 12821 } 12822 /* 12823 * If we don't have a AH header to traverse, 12824 * return now. This happens normally for 12825 * outbound datagrams where we have not inserted 12826 * the AH header. 12827 */ 12828 if (nexthdr != IPPROTO_AH) { 12829 return (size); 12830 } 12831 12832 /* 12833 * We don't include the AH header's size 12834 * to be symmetrical with other cases where 12835 * we either don't have a AH header (outbound) 12836 * or peek into the AH header yet (inbound and 12837 * not pulled up yet). 12838 */ 12839 ah = (ah_t *)whereptr; 12840 nexthdr = ah->ah_nexthdr; 12841 ehdrlen = (ah->ah_length << 2) + 8; 12842 12843 if (nexthdr == IPPROTO_DSTOPTS) { 12844 if (whereptr + ehdrlen >= mp->b_wptr) { 12845 /* 12846 * The destination options header 12847 * is not part of the first mblk. 12848 */ 12849 whereptr = mp->b_cont->b_rptr; 12850 } else { 12851 whereptr += ehdrlen; 12852 } 12853 12854 dsthdr = (ip6_dest_t *)whereptr; 12855 ehdrlen = 8 * (dsthdr->ip6d_len + 1); 12856 size += ehdrlen; 12857 } 12858 return (size); 12859 } 12860 whereptr += ehdrlen; 12861 size += ehdrlen; 12862 } 12863 } 12864 12865 /* 12866 * Utility routine that checks if `v6srcp' is a valid address on underlying 12867 * interface `ill'. If `ipifp' is non-NULL, it's set to a held ipif 12868 * associated with `v6srcp' on success. NOTE: if this is not called from 12869 * inside the IPSQ (ill_g_lock is not held), `ill' may be removed from the 12870 * group during or after this lookup. 12871 */ 12872 static boolean_t 12873 ipif_lookup_testaddr_v6(ill_t *ill, const in6_addr_t *v6srcp, ipif_t **ipifp) 12874 { 12875 ipif_t *ipif; 12876 12877 ipif = ipif_lookup_addr_exact_v6(v6srcp, ill, ill->ill_ipst); 12878 if (ipif != NULL) { 12879 if (ipifp != NULL) 12880 *ipifp = ipif; 12881 else 12882 ipif_refrele(ipif); 12883 return (B_TRUE); 12884 } 12885 12886 if (ip_debug > 2) { 12887 pr_addr_dbg("ipif_lookup_testaddr_v6: cannot find ipif for " 12888 "src %s\n", AF_INET6, v6srcp); 12889 } 12890 return (B_FALSE); 12891 } 12892