1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* 26 * Copyright (c) 1990 Mentat Inc. 27 */ 28 29 #include <sys/types.h> 30 #include <sys/stream.h> 31 #include <sys/dlpi.h> 32 #include <sys/stropts.h> 33 #include <sys/sysmacros.h> 34 #include <sys/strsun.h> 35 #include <sys/strlog.h> 36 #include <sys/strsubr.h> 37 #define _SUN_TPI_VERSION 2 38 #include <sys/tihdr.h> 39 #include <sys/ddi.h> 40 #include <sys/sunddi.h> 41 #include <sys/cmn_err.h> 42 #include <sys/debug.h> 43 #include <sys/sdt.h> 44 #include <sys/kobj.h> 45 #include <sys/zone.h> 46 #include <sys/neti.h> 47 #include <sys/hook.h> 48 49 #include <sys/kmem.h> 50 #include <sys/systm.h> 51 #include <sys/param.h> 52 #include <sys/socket.h> 53 #include <sys/vtrace.h> 54 #include <sys/isa_defs.h> 55 #include <sys/atomic.h> 56 #include <sys/iphada.h> 57 #include <sys/policy.h> 58 #include <net/if.h> 59 #include <net/if_types.h> 60 #include <net/route.h> 61 #include <net/if_dl.h> 62 #include <sys/sockio.h> 63 #include <netinet/in.h> 64 #include <netinet/ip6.h> 65 #include <netinet/icmp6.h> 66 #include <netinet/sctp.h> 67 68 #include <inet/common.h> 69 #include <inet/mi.h> 70 #include <inet/optcom.h> 71 #include <inet/mib2.h> 72 #include <inet/nd.h> 73 #include <inet/arp.h> 74 75 #include <inet/ip.h> 76 #include <inet/ip_impl.h> 77 #include <inet/ip6.h> 78 #include <inet/ip6_asp.h> 79 #include <inet/tcp.h> 80 #include <inet/tcp_impl.h> 81 #include <inet/udp_impl.h> 82 #include <inet/sctp/sctp_impl.h> 83 #include <inet/ipp_common.h> 84 #include <inet/ilb_ip.h> 85 86 #include <inet/ip_multi.h> 87 #include <inet/ip_if.h> 88 #include <inet/ip_ire.h> 89 #include <inet/ip_rts.h> 90 #include <inet/ip_ndp.h> 91 #include <net/pfkeyv2.h> 92 #include <inet/ipsec_info.h> 93 #include <inet/sadb.h> 94 #include <inet/ipsec_impl.h> 95 #include <inet/iptun/iptun_impl.h> 96 #include <inet/sctp_ip.h> 97 #include <sys/pattr.h> 98 #include <inet/ipclassifier.h> 99 #include <inet/ipsecah.h> 100 #include <inet/rawip_impl.h> 101 #include <inet/rts_impl.h> 102 #include <sys/squeue_impl.h> 103 #include <sys/squeue.h> 104 105 #include <sys/tsol/label.h> 106 #include <sys/tsol/tnet.h> 107 108 #include <rpc/pmap_prot.h> 109 110 /* Temporary; for CR 6451644 work-around */ 111 #include <sys/ethernet.h> 112 113 extern int ip_squeue_flag; 114 115 /* 116 * Naming conventions: 117 * These rules should be judiciously applied 118 * if there is a need to identify something as IPv6 versus IPv4 119 * IPv6 funcions will end with _v6 in the ip module. 120 * IPv6 funcions will end with _ipv6 in the transport modules. 121 * IPv6 macros: 122 * Some macros end with _V6; e.g. ILL_FRAG_HASH_V6 123 * Some macros start with V6_; e.g. V6_OR_V4_INADDR_ANY 124 * And then there are ..V4_PART_OF_V6. 125 * The intent is that macros in the ip module end with _V6. 126 * IPv6 global variables will start with ipv6_ 127 * IPv6 structures will start with ipv6 128 * IPv6 defined constants should start with IPV6_ 129 * (but then there are NDP_DEFAULT_VERS_PRI_AND_FLOW, etc) 130 */ 131 132 /* 133 * ip6opt_ls is used to enable IPv6 (via /etc/system on TX systems). 134 * We need to do this because we didn't obtain the IP6OPT_LS (0x0a) 135 * from IANA. This mechanism will remain in effect until an official 136 * number is obtained. 137 */ 138 uchar_t ip6opt_ls; 139 140 const in6_addr_t ipv6_all_ones = 141 { 0xffffffffU, 0xffffffffU, 0xffffffffU, 0xffffffffU }; 142 const in6_addr_t ipv6_all_zeros = { 0, 0, 0, 0 }; 143 144 #ifdef _BIG_ENDIAN 145 const in6_addr_t ipv6_unspecified_group = { 0xff000000U, 0, 0, 0 }; 146 #else /* _BIG_ENDIAN */ 147 const in6_addr_t ipv6_unspecified_group = { 0x000000ffU, 0, 0, 0 }; 148 #endif /* _BIG_ENDIAN */ 149 150 #ifdef _BIG_ENDIAN 151 const in6_addr_t ipv6_loopback = { 0, 0, 0, 0x00000001U }; 152 #else /* _BIG_ENDIAN */ 153 const in6_addr_t ipv6_loopback = { 0, 0, 0, 0x01000000U }; 154 #endif /* _BIG_ENDIAN */ 155 156 #ifdef _BIG_ENDIAN 157 const in6_addr_t ipv6_all_hosts_mcast = { 0xff020000U, 0, 0, 0x00000001U }; 158 #else /* _BIG_ENDIAN */ 159 const in6_addr_t ipv6_all_hosts_mcast = { 0x000002ffU, 0, 0, 0x01000000U }; 160 #endif /* _BIG_ENDIAN */ 161 162 #ifdef _BIG_ENDIAN 163 const in6_addr_t ipv6_all_rtrs_mcast = { 0xff020000U, 0, 0, 0x00000002U }; 164 #else /* _BIG_ENDIAN */ 165 const in6_addr_t ipv6_all_rtrs_mcast = { 0x000002ffU, 0, 0, 0x02000000U }; 166 #endif /* _BIG_ENDIAN */ 167 168 #ifdef _BIG_ENDIAN 169 const in6_addr_t ipv6_all_v2rtrs_mcast = { 0xff020000U, 0, 0, 0x00000016U }; 170 #else /* _BIG_ENDIAN */ 171 const in6_addr_t ipv6_all_v2rtrs_mcast = { 0x000002ffU, 0, 0, 0x16000000U }; 172 #endif /* _BIG_ENDIAN */ 173 174 #ifdef _BIG_ENDIAN 175 const in6_addr_t ipv6_solicited_node_mcast = 176 { 0xff020000U, 0, 0x00000001U, 0xff000000U }; 177 #else /* _BIG_ENDIAN */ 178 const in6_addr_t ipv6_solicited_node_mcast = 179 { 0x000002ffU, 0, 0x01000000U, 0x000000ffU }; 180 #endif /* _BIG_ENDIAN */ 181 182 /* Leave room for ip_newroute to tack on the src and target addresses */ 183 #define OK_RESOLVER_MP_V6(mp) \ 184 ((mp) && ((mp)->b_wptr - (mp)->b_rptr) >= (2 * IPV6_ADDR_LEN)) 185 186 #define IP6_MBLK_OK 0 187 #define IP6_MBLK_HDR_ERR 1 188 #define IP6_MBLK_LEN_ERR 2 189 190 static void icmp_inbound_too_big_v6(queue_t *, mblk_t *, ill_t *, ill_t *, 191 boolean_t, zoneid_t); 192 static void icmp_pkt_v6(queue_t *, mblk_t *, void *, size_t, 193 const in6_addr_t *, boolean_t, zoneid_t, ip_stack_t *); 194 static void icmp_redirect_v6(queue_t *, mblk_t *, ill_t *ill); 195 static int ip_bind_connected_v6(conn_t *, mblk_t **, uint8_t, in6_addr_t *, 196 uint16_t, const in6_addr_t *, ip6_pkt_t *, uint16_t, 197 boolean_t, boolean_t, cred_t *); 198 static boolean_t ip_bind_get_ire_v6(mblk_t **, ire_t *, const in6_addr_t *, 199 iulp_t *, ip_stack_t *); 200 static int ip_bind_laddr_v6(conn_t *, mblk_t **, uint8_t, 201 const in6_addr_t *, uint16_t, boolean_t); 202 static void ip_fanout_proto_v6(queue_t *, mblk_t *, ip6_t *, ill_t *, 203 ill_t *, uint8_t, uint_t, uint_t, boolean_t, zoneid_t); 204 static void ip_fanout_tcp_v6(queue_t *, mblk_t *, ip6_t *, ill_t *, 205 ill_t *, uint_t, uint_t, boolean_t, zoneid_t); 206 static void ip_fanout_udp_v6(queue_t *, mblk_t *, ip6_t *, uint32_t, 207 ill_t *, ill_t *, uint_t, boolean_t, zoneid_t); 208 static int ip_process_options_v6(queue_t *, mblk_t *, ip6_t *, 209 uint8_t *, uint_t, uint8_t, ip_stack_t *); 210 static mblk_t *ip_rput_frag_v6(ill_t *, ill_t *, mblk_t *, ip6_t *, 211 ip6_frag_t *, uint_t, uint_t *, uint32_t *, uint16_t *); 212 static boolean_t ip_source_routed_v6(ip6_t *, mblk_t *, ip_stack_t *); 213 static void ip_wput_ire_v6(queue_t *, mblk_t *, ire_t *, int, int, 214 conn_t *, int, int, zoneid_t); 215 static boolean_t ipif_lookup_testaddr_v6(ill_t *, const in6_addr_t *, 216 ipif_t **); 217 218 /* 219 * A template for an IPv6 AR_ENTRY_QUERY 220 */ 221 static areq_t ipv6_areq_template = { 222 AR_ENTRY_QUERY, /* cmd */ 223 sizeof (areq_t)+(2*IPV6_ADDR_LEN), /* name offset */ 224 sizeof (areq_t), /* name len (filled by ill_arp_alloc) */ 225 ETHERTYPE_IPV6, /* protocol, from arps perspective */ 226 sizeof (areq_t), /* target addr offset */ 227 IPV6_ADDR_LEN, /* target addr_length */ 228 0, /* flags */ 229 sizeof (areq_t) + IPV6_ADDR_LEN, /* sender addr offset */ 230 IPV6_ADDR_LEN, /* sender addr length */ 231 6, /* xmit_count */ 232 1000, /* (re)xmit_interval in milliseconds */ 233 4 /* max # of requests to buffer */ 234 /* anything else filled in by the code */ 235 }; 236 237 /* 238 * Handle IPv6 ICMP packets sent to us. Consume the mblk passed in. 239 * The message has already been checksummed and if needed, 240 * a copy has been made to be sent any interested ICMP client (conn) 241 * Note that this is different than icmp_inbound() which does the fanout 242 * to conn's as well as local processing of the ICMP packets. 243 * 244 * All error messages are passed to the matching transport stream. 245 * 246 * Zones notes: 247 * The packet is only processed in the context of the specified zone: typically 248 * only this zone will reply to an echo request. This means that the caller must 249 * call icmp_inbound_v6() for each relevant zone. 250 */ 251 static void 252 icmp_inbound_v6(queue_t *q, mblk_t *mp, ill_t *ill, ill_t *inill, 253 uint_t hdr_length, boolean_t mctl_present, uint_t flags, zoneid_t zoneid, 254 mblk_t *dl_mp) 255 { 256 icmp6_t *icmp6; 257 ip6_t *ip6h; 258 boolean_t interested; 259 in6_addr_t origsrc; 260 mblk_t *first_mp; 261 ipsec_in_t *ii; 262 ip_stack_t *ipst = ill->ill_ipst; 263 264 ASSERT(ill != NULL); 265 first_mp = mp; 266 if (mctl_present) { 267 mp = first_mp->b_cont; 268 ASSERT(mp != NULL); 269 270 ii = (ipsec_in_t *)first_mp->b_rptr; 271 ASSERT(ii->ipsec_in_type == IPSEC_IN); 272 } 273 274 ip6h = (ip6_t *)mp->b_rptr; 275 276 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInMsgs); 277 278 if ((mp->b_wptr - mp->b_rptr) < (hdr_length + ICMP6_MINLEN)) { 279 if (!pullupmsg(mp, hdr_length + ICMP6_MINLEN)) { 280 ip1dbg(("icmp_inbound_v6: pullupmsg failed\n")); 281 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors); 282 freemsg(first_mp); 283 return; 284 } 285 ip6h = (ip6_t *)mp->b_rptr; 286 } 287 if (ipst->ips_icmp_accept_clear_messages == 0) { 288 first_mp = ipsec_check_global_policy(first_mp, NULL, 289 NULL, ip6h, mctl_present, ipst->ips_netstack); 290 if (first_mp == NULL) 291 return; 292 } 293 294 /* 295 * On a labeled system, we have to check whether the zone itself is 296 * permitted to receive raw traffic. 297 */ 298 if (is_system_labeled()) { 299 if (zoneid == ALL_ZONES) 300 zoneid = tsol_packet_to_zoneid(mp); 301 if (!tsol_can_accept_raw(mp, B_FALSE)) { 302 ip1dbg(("icmp_inbound_v6: zone %d can't receive raw", 303 zoneid)); 304 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors); 305 freemsg(first_mp); 306 return; 307 } 308 } 309 310 icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]); 311 ip2dbg(("icmp_inbound_v6: type %d code %d\n", icmp6->icmp6_type, 312 icmp6->icmp6_code)); 313 interested = !(icmp6->icmp6_type & ICMP6_INFOMSG_MASK); 314 315 /* Initiate IPPF processing here */ 316 if (IP6_IN_IPP(flags, ipst)) { 317 318 /* 319 * If the ifindex changes due to SIOCSLIFINDEX 320 * packet may return to IP on the wrong ill. 321 */ 322 ip_process(IPP_LOCAL_IN, &mp, ill->ill_phyint->phyint_ifindex); 323 if (mp == NULL) { 324 if (mctl_present) { 325 freeb(first_mp); 326 } 327 return; 328 } 329 } 330 331 switch (icmp6->icmp6_type) { 332 case ICMP6_DST_UNREACH: 333 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInDestUnreachs); 334 if (icmp6->icmp6_code == ICMP6_DST_UNREACH_ADMIN) 335 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInAdminProhibs); 336 break; 337 338 case ICMP6_TIME_EXCEEDED: 339 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInTimeExcds); 340 break; 341 342 case ICMP6_PARAM_PROB: 343 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInParmProblems); 344 break; 345 346 case ICMP6_PACKET_TOO_BIG: 347 icmp_inbound_too_big_v6(q, first_mp, ill, inill, mctl_present, 348 zoneid); 349 return; 350 case ICMP6_ECHO_REQUEST: 351 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInEchos); 352 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst) && 353 !ipst->ips_ipv6_resp_echo_mcast) 354 break; 355 356 /* 357 * We must have exclusive use of the mblk to convert it to 358 * a response. 359 * If not, we copy it. 360 */ 361 if (mp->b_datap->db_ref > 1) { 362 mblk_t *mp1; 363 364 mp1 = copymsg(mp); 365 freemsg(mp); 366 if (mp1 == NULL) { 367 BUMP_MIB(ill->ill_icmp6_mib, 368 ipv6IfIcmpInErrors); 369 if (mctl_present) 370 freeb(first_mp); 371 return; 372 } 373 mp = mp1; 374 ip6h = (ip6_t *)mp->b_rptr; 375 icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]); 376 if (mctl_present) 377 first_mp->b_cont = mp; 378 else 379 first_mp = mp; 380 } 381 382 /* 383 * Turn the echo into an echo reply. 384 * Remove any extension headers (do not reverse a source route) 385 * and clear the flow id (keep traffic class for now). 386 */ 387 if (hdr_length != IPV6_HDR_LEN) { 388 int i; 389 390 for (i = 0; i < IPV6_HDR_LEN; i++) 391 mp->b_rptr[hdr_length - i - 1] = 392 mp->b_rptr[IPV6_HDR_LEN - i - 1]; 393 mp->b_rptr += (hdr_length - IPV6_HDR_LEN); 394 ip6h = (ip6_t *)mp->b_rptr; 395 ip6h->ip6_nxt = IPPROTO_ICMPV6; 396 hdr_length = IPV6_HDR_LEN; 397 } 398 ip6h->ip6_vcf &= ~IPV6_FLOWINFO_FLOWLABEL; 399 icmp6->icmp6_type = ICMP6_ECHO_REPLY; 400 401 ip6h->ip6_plen = 402 htons((uint16_t)(msgdsize(mp) - IPV6_HDR_LEN)); 403 origsrc = ip6h->ip6_src; 404 /* 405 * Reverse the source and destination addresses. 406 * If the return address is a multicast, zero out the source 407 * (ip_wput_v6 will set an address). 408 */ 409 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 410 ip6h->ip6_src = ipv6_all_zeros; 411 ip6h->ip6_dst = origsrc; 412 } else { 413 ip6h->ip6_src = ip6h->ip6_dst; 414 ip6h->ip6_dst = origsrc; 415 } 416 417 /* set the hop limit */ 418 ip6h->ip6_hops = ipst->ips_ipv6_def_hops; 419 420 /* 421 * Prepare for checksum by putting icmp length in the icmp 422 * checksum field. The checksum is calculated in ip_wput_v6. 423 */ 424 icmp6->icmp6_cksum = ip6h->ip6_plen; 425 426 if (!mctl_present) { 427 /* 428 * This packet should go out the same way as it 429 * came in i.e in clear. To make sure that global 430 * policy will not be applied to this in ip_wput, 431 * we attach a IPSEC_IN mp and clear ipsec_in_secure. 432 */ 433 ASSERT(first_mp == mp); 434 first_mp = ipsec_in_alloc(B_FALSE, ipst->ips_netstack); 435 if (first_mp == NULL) { 436 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 437 freemsg(mp); 438 return; 439 } 440 ii = (ipsec_in_t *)first_mp->b_rptr; 441 442 /* This is not a secure packet */ 443 ii->ipsec_in_secure = B_FALSE; 444 first_mp->b_cont = mp; 445 } 446 if (!ipsec_in_to_out(first_mp, NULL, ip6h, zoneid)) { 447 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 448 return; 449 } 450 put(WR(q), first_mp); 451 return; 452 453 case ICMP6_ECHO_REPLY: 454 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInEchoReplies); 455 break; 456 457 case ND_ROUTER_SOLICIT: 458 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInRouterSolicits); 459 break; 460 461 case ND_ROUTER_ADVERT: 462 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInRouterAdvertisements); 463 break; 464 465 case ND_NEIGHBOR_SOLICIT: 466 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInNeighborSolicits); 467 if (mctl_present) 468 freeb(first_mp); 469 /* XXX may wish to pass first_mp up to ndp_input someday. */ 470 ndp_input(inill, mp, dl_mp); 471 return; 472 473 case ND_NEIGHBOR_ADVERT: 474 BUMP_MIB(ill->ill_icmp6_mib, 475 ipv6IfIcmpInNeighborAdvertisements); 476 if (mctl_present) 477 freeb(first_mp); 478 /* XXX may wish to pass first_mp up to ndp_input someday. */ 479 ndp_input(inill, mp, dl_mp); 480 return; 481 482 case ND_REDIRECT: { 483 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInRedirects); 484 485 if (ipst->ips_ipv6_ignore_redirect) 486 break; 487 488 /* 489 * As there is no upper client to deliver, we don't 490 * need the first_mp any more. 491 */ 492 if (mctl_present) 493 freeb(first_mp); 494 if (!pullupmsg(mp, -1)) { 495 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects); 496 break; 497 } 498 icmp_redirect_v6(q, mp, ill); 499 return; 500 } 501 502 /* 503 * The next three icmp messages will be handled by MLD. 504 * Pass all valid MLD packets up to any process(es) 505 * listening on a raw ICMP socket. MLD messages are 506 * freed by mld_input function. 507 */ 508 case MLD_LISTENER_QUERY: 509 case MLD_LISTENER_REPORT: 510 case MLD_LISTENER_REDUCTION: 511 if (mctl_present) 512 freeb(first_mp); 513 mld_input(q, mp, ill); 514 return; 515 default: 516 break; 517 } 518 if (interested) { 519 icmp_inbound_error_fanout_v6(q, first_mp, ip6h, icmp6, ill, 520 inill, mctl_present, zoneid); 521 } else { 522 freemsg(first_mp); 523 } 524 } 525 526 /* 527 * Process received IPv6 ICMP Packet too big. 528 * After updating any IRE it does the fanout to any matching transport streams. 529 * Assumes the IPv6 plus ICMPv6 headers have been pulled up but nothing else. 530 */ 531 /* ARGSUSED */ 532 static void 533 icmp_inbound_too_big_v6(queue_t *q, mblk_t *mp, ill_t *ill, ill_t *inill, 534 boolean_t mctl_present, zoneid_t zoneid) 535 { 536 ip6_t *ip6h; 537 ip6_t *inner_ip6h; 538 icmp6_t *icmp6; 539 uint16_t hdr_length; 540 uint32_t mtu; 541 ire_t *ire, *first_ire; 542 mblk_t *first_mp; 543 ip_stack_t *ipst = ill->ill_ipst; 544 545 first_mp = mp; 546 if (mctl_present) 547 mp = first_mp->b_cont; 548 /* 549 * We must have exclusive use of the mblk to update the MTU 550 * in the packet. 551 * If not, we copy it. 552 * 553 * If there's an M_CTL present, we know that allocated first_mp 554 * earlier in this function, so we know first_mp has refcnt of one. 555 */ 556 ASSERT(!mctl_present || first_mp->b_datap->db_ref == 1); 557 if (mp->b_datap->db_ref > 1) { 558 mblk_t *mp1; 559 560 mp1 = copymsg(mp); 561 freemsg(mp); 562 if (mp1 == NULL) { 563 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 564 if (mctl_present) 565 freeb(first_mp); 566 return; 567 } 568 mp = mp1; 569 if (mctl_present) 570 first_mp->b_cont = mp; 571 else 572 first_mp = mp; 573 } 574 ip6h = (ip6_t *)mp->b_rptr; 575 if (ip6h->ip6_nxt != IPPROTO_ICMPV6) 576 hdr_length = ip_hdr_length_v6(mp, ip6h); 577 else 578 hdr_length = IPV6_HDR_LEN; 579 580 icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]); 581 ASSERT((size_t)(mp->b_wptr - mp->b_rptr) >= hdr_length + ICMP6_MINLEN); 582 inner_ip6h = (ip6_t *)&icmp6[1]; /* Packet in error */ 583 if ((uchar_t *)&inner_ip6h[1] > mp->b_wptr) { 584 if (!pullupmsg(mp, (uchar_t *)&inner_ip6h[1] - mp->b_rptr)) { 585 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 586 freemsg(first_mp); 587 return; 588 } 589 ip6h = (ip6_t *)mp->b_rptr; 590 icmp6 = (icmp6_t *)&mp->b_rptr[hdr_length]; 591 inner_ip6h = (ip6_t *)&icmp6[1]; 592 } 593 594 /* 595 * For link local destinations matching simply on IRE type is not 596 * sufficient. Same link local addresses for different ILL's is 597 * possible. 598 */ 599 if (IN6_IS_ADDR_LINKLOCAL(&inner_ip6h->ip6_dst)) { 600 first_ire = ire_ctable_lookup_v6(&inner_ip6h->ip6_dst, NULL, 601 IRE_CACHE, ill->ill_ipif, ALL_ZONES, NULL, 602 MATCH_IRE_TYPE | MATCH_IRE_ILL, ipst); 603 604 if (first_ire == NULL) { 605 if (ip_debug > 2) { 606 /* ip1dbg */ 607 pr_addr_dbg("icmp_inbound_too_big_v6:" 608 "no ire for dst %s\n", AF_INET6, 609 &inner_ip6h->ip6_dst); 610 } 611 freemsg(first_mp); 612 return; 613 } 614 615 mtu = ntohl(icmp6->icmp6_mtu); 616 rw_enter(&first_ire->ire_bucket->irb_lock, RW_READER); 617 for (ire = first_ire; ire != NULL && 618 IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, &inner_ip6h->ip6_dst); 619 ire = ire->ire_next) { 620 mutex_enter(&ire->ire_lock); 621 if (mtu < IPV6_MIN_MTU) { 622 ip1dbg(("Received mtu less than IPv6 " 623 "min mtu %d: %d\n", IPV6_MIN_MTU, mtu)); 624 mtu = IPV6_MIN_MTU; 625 /* 626 * If an mtu less than IPv6 min mtu is received, 627 * we must include a fragment header in 628 * subsequent packets. 629 */ 630 ire->ire_frag_flag |= IPH_FRAG_HDR; 631 } 632 ip1dbg(("Received mtu from router: %d\n", mtu)); 633 ire->ire_max_frag = MIN(ire->ire_max_frag, mtu); 634 if (ire->ire_max_frag == mtu) { 635 /* Decreased it */ 636 ire->ire_marks |= IRE_MARK_PMTU; 637 } 638 /* Record the new max frag size for the ULP. */ 639 if (ire->ire_frag_flag & IPH_FRAG_HDR) { 640 /* 641 * If we need a fragment header in every packet 642 * (above case or multirouting), make sure the 643 * ULP takes it into account when computing the 644 * payload size. 645 */ 646 icmp6->icmp6_mtu = htonl(ire->ire_max_frag - 647 sizeof (ip6_frag_t)); 648 } else { 649 icmp6->icmp6_mtu = htonl(ire->ire_max_frag); 650 } 651 mutex_exit(&ire->ire_lock); 652 } 653 rw_exit(&first_ire->ire_bucket->irb_lock); 654 ire_refrele(first_ire); 655 } else { 656 irb_t *irb = NULL; 657 /* 658 * for non-link local destinations we match only on the IRE type 659 */ 660 ire = ire_ctable_lookup_v6(&inner_ip6h->ip6_dst, NULL, 661 IRE_CACHE, ill->ill_ipif, ALL_ZONES, NULL, MATCH_IRE_TYPE, 662 ipst); 663 if (ire == NULL) { 664 if (ip_debug > 2) { 665 /* ip1dbg */ 666 pr_addr_dbg("icmp_inbound_too_big_v6:" 667 "no ire for dst %s\n", 668 AF_INET6, &inner_ip6h->ip6_dst); 669 } 670 freemsg(first_mp); 671 return; 672 } 673 irb = ire->ire_bucket; 674 ire_refrele(ire); 675 rw_enter(&irb->irb_lock, RW_READER); 676 for (ire = irb->irb_ire; ire != NULL; ire = ire->ire_next) { 677 if (IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, 678 &inner_ip6h->ip6_dst)) { 679 mtu = ntohl(icmp6->icmp6_mtu); 680 mutex_enter(&ire->ire_lock); 681 if (mtu < IPV6_MIN_MTU) { 682 ip1dbg(("Received mtu less than IPv6" 683 "min mtu %d: %d\n", 684 IPV6_MIN_MTU, mtu)); 685 mtu = IPV6_MIN_MTU; 686 /* 687 * If an mtu less than IPv6 min mtu is 688 * received, we must include a fragment 689 * header in subsequent packets. 690 */ 691 ire->ire_frag_flag |= IPH_FRAG_HDR; 692 } 693 694 ip1dbg(("Received mtu from router: %d\n", mtu)); 695 ire->ire_max_frag = MIN(ire->ire_max_frag, mtu); 696 if (ire->ire_max_frag == mtu) { 697 /* Decreased it */ 698 ire->ire_marks |= IRE_MARK_PMTU; 699 } 700 /* Record the new max frag size for the ULP. */ 701 if (ire->ire_frag_flag & IPH_FRAG_HDR) { 702 /* 703 * If we need a fragment header in 704 * every packet (above case or 705 * multirouting), make sure the ULP 706 * takes it into account when computing 707 * the payload size. 708 */ 709 icmp6->icmp6_mtu = 710 htonl(ire->ire_max_frag - 711 sizeof (ip6_frag_t)); 712 } else { 713 icmp6->icmp6_mtu = 714 htonl(ire->ire_max_frag); 715 } 716 mutex_exit(&ire->ire_lock); 717 } 718 } 719 rw_exit(&irb->irb_lock); 720 } 721 icmp_inbound_error_fanout_v6(q, first_mp, ip6h, icmp6, ill, inill, 722 mctl_present, zoneid); 723 } 724 725 /* 726 * Fanout for ICMPv6 errors containing IP-in-IPv6 packets. Returns B_TRUE if a 727 * tunnel consumed the message, and B_FALSE otherwise. 728 */ 729 static boolean_t 730 icmp_inbound_iptun_fanout_v6(mblk_t *first_mp, ip6_t *rip6h, ill_t *ill, 731 ip_stack_t *ipst) 732 { 733 conn_t *connp; 734 735 if ((connp = ipcl_iptun_classify_v6(&rip6h->ip6_src, &rip6h->ip6_dst, 736 ipst)) == NULL) 737 return (B_FALSE); 738 739 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 740 connp->conn_recv(connp, first_mp, NULL); 741 CONN_DEC_REF(connp); 742 return (B_TRUE); 743 } 744 745 /* 746 * Fanout received ICMPv6 error packets to the transports. 747 * Assumes the IPv6 plus ICMPv6 headers have been pulled up but nothing else. 748 */ 749 void 750 icmp_inbound_error_fanout_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, 751 icmp6_t *icmp6, ill_t *ill, ill_t *inill, boolean_t mctl_present, 752 zoneid_t zoneid) 753 { 754 uint16_t *up; /* Pointer to ports in ULP header */ 755 uint32_t ports; /* reversed ports for fanout */ 756 ip6_t rip6h; /* With reversed addresses */ 757 uint16_t hdr_length; 758 uint8_t *nexthdrp; 759 uint8_t nexthdr; 760 mblk_t *first_mp; 761 ipsec_in_t *ii; 762 tcpha_t *tcpha; 763 conn_t *connp; 764 ip_stack_t *ipst = ill->ill_ipst; 765 766 first_mp = mp; 767 if (mctl_present) { 768 mp = first_mp->b_cont; 769 ASSERT(mp != NULL); 770 771 ii = (ipsec_in_t *)first_mp->b_rptr; 772 ASSERT(ii->ipsec_in_type == IPSEC_IN); 773 } else { 774 ii = NULL; 775 } 776 777 hdr_length = (uint16_t)((uchar_t *)icmp6 - (uchar_t *)ip6h); 778 ASSERT((size_t)(mp->b_wptr - (uchar_t *)icmp6) >= ICMP6_MINLEN); 779 780 /* 781 * Need to pullup everything in order to use 782 * ip_hdr_length_nexthdr_v6() 783 */ 784 if (mp->b_cont != NULL) { 785 if (!pullupmsg(mp, -1)) { 786 ip1dbg(("icmp_inbound_error_fanout_v6: " 787 "pullupmsg failed\n")); 788 goto drop_pkt; 789 } 790 ip6h = (ip6_t *)mp->b_rptr; 791 icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]); 792 } 793 794 ip6h = (ip6_t *)&icmp6[1]; /* Packet in error */ 795 if ((uchar_t *)&ip6h[1] > mp->b_wptr) 796 goto drop_pkt; 797 798 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_length, &nexthdrp)) 799 goto drop_pkt; 800 nexthdr = *nexthdrp; 801 802 /* Set message type, must be done after pullups */ 803 mp->b_datap->db_type = M_CTL; 804 805 /* 806 * We need a separate IP header with the source and destination 807 * addresses reversed to do fanout/classification because the ip6h in 808 * the ICMPv6 error is in the form we sent it out. 809 */ 810 rip6h.ip6_src = ip6h->ip6_dst; 811 rip6h.ip6_dst = ip6h->ip6_src; 812 rip6h.ip6_nxt = nexthdr; 813 814 /* Try to pass the ICMP message to clients who need it */ 815 switch (nexthdr) { 816 case IPPROTO_UDP: { 817 /* 818 * Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of 819 * UDP header to get the port information. 820 */ 821 if ((uchar_t *)ip6h + hdr_length + ICMP_MIN_TP_HDR_LEN > 822 mp->b_wptr) { 823 break; 824 } 825 /* Attempt to find a client stream based on port. */ 826 up = (uint16_t *)((uchar_t *)ip6h + hdr_length); 827 ((uint16_t *)&ports)[0] = up[1]; 828 ((uint16_t *)&ports)[1] = up[0]; 829 830 ip_fanout_udp_v6(q, first_mp, &rip6h, ports, ill, inill, 831 IP6_NO_IPPOLICY, mctl_present, zoneid); 832 return; 833 } 834 case IPPROTO_TCP: { 835 /* 836 * Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of 837 * the TCP header to get the port information. 838 */ 839 if ((uchar_t *)ip6h + hdr_length + ICMP_MIN_TP_HDR_LEN > 840 mp->b_wptr) { 841 break; 842 } 843 844 /* 845 * Attempt to find a client stream based on port. 846 * Note that we do a reverse lookup since the header is 847 * in the form we sent it out. 848 */ 849 tcpha = (tcpha_t *)((char *)ip6h + hdr_length); 850 connp = ipcl_tcp_lookup_reversed_ipv6(ip6h, tcpha, 851 TCPS_LISTEN, ill->ill_phyint->phyint_ifindex, ipst); 852 if (connp == NULL) { 853 goto drop_pkt; 854 } 855 856 SQUEUE_ENTER_ONE(connp->conn_sqp, first_mp, tcp_input, connp, 857 SQ_FILL, SQTAG_TCP6_INPUT_ICMP_ERR); 858 return; 859 860 } 861 case IPPROTO_SCTP: 862 /* 863 * Verify we have at least ICMP_MIN_SCTP_HDR_LEN bytes of 864 * transport header to get the port information. 865 */ 866 if ((uchar_t *)ip6h + hdr_length + ICMP_MIN_SCTP_HDR_LEN > 867 mp->b_wptr) { 868 if (!pullupmsg(mp, (uchar_t *)ip6h + hdr_length + 869 ICMP_MIN_SCTP_HDR_LEN - mp->b_rptr)) { 870 goto drop_pkt; 871 } 872 } 873 874 up = (uint16_t *)((uchar_t *)ip6h + hdr_length); 875 ((uint16_t *)&ports)[0] = up[1]; 876 ((uint16_t *)&ports)[1] = up[0]; 877 ip_fanout_sctp(first_mp, inill, (ipha_t *)ip6h, ports, 0, 878 mctl_present, IP6_NO_IPPOLICY, zoneid); 879 return; 880 case IPPROTO_ESP: 881 case IPPROTO_AH: { 882 int ipsec_rc; 883 ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec; 884 885 /* 886 * We need a IPSEC_IN in the front to fanout to AH/ESP. 887 * We will re-use the IPSEC_IN if it is already present as 888 * AH/ESP will not affect any fields in the IPSEC_IN for 889 * ICMP errors. If there is no IPSEC_IN, allocate a new 890 * one and attach it in the front. 891 */ 892 if (ii != NULL) { 893 /* 894 * ip_fanout_proto_again converts the ICMP errors 895 * that come back from AH/ESP to M_DATA so that 896 * if it is non-AH/ESP and we do a pullupmsg in 897 * this function, it would work. Convert it back 898 * to M_CTL before we send up as this is a ICMP 899 * error. This could have been generated locally or 900 * by some router. Validate the inner IPSEC 901 * headers. 902 * 903 * NOTE : ill_index is used by ip_fanout_proto_again 904 * to locate the ill. 905 */ 906 ASSERT(ill != NULL); 907 ii->ipsec_in_ill_index = 908 ill->ill_phyint->phyint_ifindex; 909 ii->ipsec_in_rill_index = 910 inill->ill_phyint->phyint_ifindex; 911 first_mp->b_cont->b_datap->db_type = M_CTL; 912 } else { 913 /* 914 * IPSEC_IN is not present. We attach a ipsec_in 915 * message and send up to IPSEC for validating 916 * and removing the IPSEC headers. Clear 917 * ipsec_in_secure so that when we return 918 * from IPSEC, we don't mistakenly think that this 919 * is a secure packet came from the network. 920 * 921 * NOTE : ill_index is used by ip_fanout_proto_again 922 * to locate the ill. 923 */ 924 ASSERT(first_mp == mp); 925 first_mp = ipsec_in_alloc(B_FALSE, ipst->ips_netstack); 926 ASSERT(ill != NULL); 927 if (first_mp == NULL) { 928 freemsg(mp); 929 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 930 return; 931 } 932 ii = (ipsec_in_t *)first_mp->b_rptr; 933 934 /* This is not a secure packet */ 935 ii->ipsec_in_secure = B_FALSE; 936 first_mp->b_cont = mp; 937 mp->b_datap->db_type = M_CTL; 938 ii->ipsec_in_ill_index = 939 ill->ill_phyint->phyint_ifindex; 940 ii->ipsec_in_rill_index = 941 inill->ill_phyint->phyint_ifindex; 942 } 943 944 if (!ipsec_loaded(ipss)) { 945 ip_proto_not_sup(q, first_mp, 0, zoneid, ipst); 946 return; 947 } 948 949 if (nexthdr == IPPROTO_ESP) 950 ipsec_rc = ipsecesp_icmp_error(first_mp); 951 else 952 ipsec_rc = ipsecah_icmp_error(first_mp); 953 if (ipsec_rc == IPSEC_STATUS_FAILED) 954 return; 955 956 ip_fanout_proto_again(first_mp, ill, inill, NULL); 957 return; 958 } 959 case IPPROTO_ENCAP: 960 case IPPROTO_IPV6: 961 if ((uint8_t *)ip6h + hdr_length + 962 (nexthdr == IPPROTO_ENCAP ? sizeof (ipha_t) : 963 sizeof (ip6_t)) > mp->b_wptr) { 964 goto drop_pkt; 965 } 966 967 if (nexthdr == IPPROTO_ENCAP || 968 !IN6_ARE_ADDR_EQUAL( 969 &((ip6_t *)(((uint8_t *)ip6h) + hdr_length))->ip6_src, 970 &ip6h->ip6_src) || 971 !IN6_ARE_ADDR_EQUAL( 972 &((ip6_t *)(((uint8_t *)ip6h) + hdr_length))->ip6_dst, 973 &ip6h->ip6_dst)) { 974 /* 975 * For tunnels that have used IPsec protection, 976 * we need to adjust the MTU to take into account 977 * the IPsec overhead. 978 */ 979 if (ii != NULL) { 980 icmp6->icmp6_mtu = htonl( 981 ntohl(icmp6->icmp6_mtu) - 982 ipsec_in_extra_length(first_mp)); 983 } 984 } else { 985 /* 986 * Self-encapsulated case. As in the ipv4 case, 987 * we need to strip the 2nd IP header. Since mp 988 * is already pulled-up, we can simply bcopy 989 * the 3rd header + data over the 2nd header. 990 */ 991 uint16_t unused_len; 992 ip6_t *inner_ip6h = (ip6_t *) 993 ((uchar_t *)ip6h + hdr_length); 994 995 /* 996 * Make sure we don't do recursion more than once. 997 */ 998 if (!ip_hdr_length_nexthdr_v6(mp, inner_ip6h, 999 &unused_len, &nexthdrp) || 1000 *nexthdrp == IPPROTO_IPV6) { 1001 goto drop_pkt; 1002 } 1003 1004 /* 1005 * We are about to modify the packet. Make a copy if 1006 * someone else has a reference to it. 1007 */ 1008 if (DB_REF(mp) > 1) { 1009 mblk_t *mp1; 1010 uint16_t icmp6_offset; 1011 1012 mp1 = copymsg(mp); 1013 if (mp1 == NULL) { 1014 goto drop_pkt; 1015 } 1016 icmp6_offset = (uint16_t) 1017 ((uchar_t *)icmp6 - mp->b_rptr); 1018 freemsg(mp); 1019 mp = mp1; 1020 1021 icmp6 = (icmp6_t *)(mp->b_rptr + icmp6_offset); 1022 ip6h = (ip6_t *)&icmp6[1]; 1023 inner_ip6h = (ip6_t *) 1024 ((uchar_t *)ip6h + hdr_length); 1025 1026 if (mctl_present) 1027 first_mp->b_cont = mp; 1028 else 1029 first_mp = mp; 1030 } 1031 1032 /* 1033 * Need to set db_type back to M_DATA before 1034 * refeeding mp into this function. 1035 */ 1036 DB_TYPE(mp) = M_DATA; 1037 1038 /* 1039 * Copy the 3rd header + remaining data on top 1040 * of the 2nd header. 1041 */ 1042 bcopy(inner_ip6h, ip6h, 1043 mp->b_wptr - (uchar_t *)inner_ip6h); 1044 1045 /* 1046 * Subtract length of the 2nd header. 1047 */ 1048 mp->b_wptr -= hdr_length; 1049 1050 /* 1051 * Now recurse, and see what I _really_ should be 1052 * doing here. 1053 */ 1054 icmp_inbound_error_fanout_v6(q, first_mp, 1055 (ip6_t *)mp->b_rptr, icmp6, ill, inill, 1056 mctl_present, zoneid); 1057 return; 1058 } 1059 if (icmp_inbound_iptun_fanout_v6(first_mp, &rip6h, ill, ipst)) 1060 return; 1061 /* 1062 * No IP tunnel is associated with this error. Perhaps a raw 1063 * socket will want it. 1064 */ 1065 /* FALLTHRU */ 1066 default: 1067 ip_fanout_proto_v6(q, first_mp, &rip6h, ill, inill, nexthdr, 0, 1068 IP6_NO_IPPOLICY, mctl_present, zoneid); 1069 return; 1070 } 1071 /* NOTREACHED */ 1072 drop_pkt: 1073 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors); 1074 ip1dbg(("icmp_inbound_error_fanout_v6: drop pkt\n")); 1075 freemsg(first_mp); 1076 } 1077 1078 /* 1079 * Process received IPv6 ICMP Redirect messages. 1080 */ 1081 /* ARGSUSED */ 1082 static void 1083 icmp_redirect_v6(queue_t *q, mblk_t *mp, ill_t *ill) 1084 { 1085 ip6_t *ip6h; 1086 uint16_t hdr_length; 1087 nd_redirect_t *rd; 1088 ire_t *ire; 1089 ire_t *prev_ire; 1090 ire_t *redir_ire; 1091 in6_addr_t *src, *dst, *gateway; 1092 nd_opt_hdr_t *opt; 1093 nce_t *nce; 1094 int nce_flags = 0; 1095 int err = 0; 1096 boolean_t redirect_to_router = B_FALSE; 1097 int len; 1098 int optlen; 1099 iulp_t ulp_info = { 0 }; 1100 ill_t *prev_ire_ill; 1101 ipif_t *ipif; 1102 ip_stack_t *ipst = ill->ill_ipst; 1103 1104 ip6h = (ip6_t *)mp->b_rptr; 1105 if (ip6h->ip6_nxt != IPPROTO_ICMPV6) 1106 hdr_length = ip_hdr_length_v6(mp, ip6h); 1107 else 1108 hdr_length = IPV6_HDR_LEN; 1109 1110 rd = (nd_redirect_t *)&mp->b_rptr[hdr_length]; 1111 len = mp->b_wptr - mp->b_rptr - hdr_length; 1112 src = &ip6h->ip6_src; 1113 dst = &rd->nd_rd_dst; 1114 gateway = &rd->nd_rd_target; 1115 1116 /* Verify if it is a valid redirect */ 1117 if (!IN6_IS_ADDR_LINKLOCAL(src) || 1118 (ip6h->ip6_hops != IPV6_MAX_HOPS) || 1119 (rd->nd_rd_code != 0) || 1120 (len < sizeof (nd_redirect_t)) || 1121 (IN6_IS_ADDR_V4MAPPED(dst)) || 1122 (IN6_IS_ADDR_MULTICAST(dst))) { 1123 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects); 1124 freemsg(mp); 1125 return; 1126 } 1127 1128 if (!(IN6_IS_ADDR_LINKLOCAL(gateway) || 1129 IN6_ARE_ADDR_EQUAL(gateway, dst))) { 1130 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects); 1131 freemsg(mp); 1132 return; 1133 } 1134 1135 if (len > sizeof (nd_redirect_t)) { 1136 if (!ndp_verify_optlen((nd_opt_hdr_t *)&rd[1], 1137 len - sizeof (nd_redirect_t))) { 1138 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects); 1139 freemsg(mp); 1140 return; 1141 } 1142 } 1143 1144 if (!IN6_ARE_ADDR_EQUAL(gateway, dst)) { 1145 redirect_to_router = B_TRUE; 1146 nce_flags |= NCE_F_ISROUTER; 1147 } 1148 1149 /* ipif will be refreleased afterwards */ 1150 ipif = ipif_get_next_ipif(NULL, ill); 1151 if (ipif == NULL) { 1152 freemsg(mp); 1153 return; 1154 } 1155 1156 /* 1157 * Verify that the IP source address of the redirect is 1158 * the same as the current first-hop router for the specified 1159 * ICMP destination address. 1160 * Also, Make sure we had a route for the dest in question and 1161 * that route was pointing to the old gateway (the source of the 1162 * redirect packet.) 1163 */ 1164 1165 prev_ire = ire_route_lookup_v6(dst, 0, src, 0, ipif, NULL, ALL_ZONES, 1166 NULL, MATCH_IRE_GW | MATCH_IRE_ILL | MATCH_IRE_DEFAULT, ipst); 1167 1168 /* 1169 * Check that 1170 * the redirect was not from ourselves 1171 * old gateway is still directly reachable 1172 */ 1173 if (prev_ire == NULL || 1174 prev_ire->ire_type == IRE_LOCAL) { 1175 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects); 1176 ipif_refrele(ipif); 1177 goto fail_redirect; 1178 } 1179 prev_ire_ill = ire_to_ill(prev_ire); 1180 ASSERT(prev_ire_ill != NULL); 1181 if (prev_ire_ill->ill_flags & ILLF_NONUD) 1182 nce_flags |= NCE_F_NONUD; 1183 1184 /* 1185 * Should we use the old ULP info to create the new gateway? From 1186 * a user's perspective, we should inherit the info so that it 1187 * is a "smooth" transition. If we do not do that, then new 1188 * connections going thru the new gateway will have no route metrics, 1189 * which is counter-intuitive to user. From a network point of 1190 * view, this may or may not make sense even though the new gateway 1191 * is still directly connected to us so the route metrics should not 1192 * change much. 1193 * 1194 * But if the old ire_uinfo is not initialized, we do another 1195 * recursive lookup on the dest using the new gateway. There may 1196 * be a route to that. If so, use it to initialize the redirect 1197 * route. 1198 */ 1199 if (prev_ire->ire_uinfo.iulp_set) { 1200 bcopy(&prev_ire->ire_uinfo, &ulp_info, sizeof (iulp_t)); 1201 } else if (redirect_to_router) { 1202 /* 1203 * Only do the following if the redirection is really to 1204 * a router. 1205 */ 1206 ire_t *tmp_ire; 1207 ire_t *sire; 1208 1209 tmp_ire = ire_ftable_lookup_v6(dst, 0, gateway, 0, NULL, &sire, 1210 ALL_ZONES, 0, NULL, 1211 (MATCH_IRE_RECURSIVE | MATCH_IRE_GW | MATCH_IRE_DEFAULT), 1212 ipst); 1213 if (sire != NULL) { 1214 bcopy(&sire->ire_uinfo, &ulp_info, sizeof (iulp_t)); 1215 ASSERT(tmp_ire != NULL); 1216 ire_refrele(tmp_ire); 1217 ire_refrele(sire); 1218 } else if (tmp_ire != NULL) { 1219 bcopy(&tmp_ire->ire_uinfo, &ulp_info, 1220 sizeof (iulp_t)); 1221 ire_refrele(tmp_ire); 1222 } 1223 } 1224 1225 optlen = mp->b_wptr - mp->b_rptr - hdr_length - sizeof (nd_redirect_t); 1226 opt = (nd_opt_hdr_t *)&rd[1]; 1227 opt = ndp_get_option(opt, optlen, ND_OPT_TARGET_LINKADDR); 1228 if (opt != NULL) { 1229 err = ndp_lookup_then_add_v6(ill, 1230 B_FALSE, /* don't match across illgrp */ 1231 (uchar_t *)&opt[1], /* Link layer address */ 1232 gateway, 1233 &ipv6_all_ones, /* prefix mask */ 1234 &ipv6_all_zeros, /* Mapping mask */ 1235 0, 1236 nce_flags, 1237 ND_STALE, 1238 &nce); 1239 switch (err) { 1240 case 0: 1241 NCE_REFRELE(nce); 1242 break; 1243 case EEXIST: 1244 /* 1245 * Check to see if link layer address has changed and 1246 * process the nce_state accordingly. 1247 */ 1248 ndp_process(nce, (uchar_t *)&opt[1], 0, B_FALSE); 1249 NCE_REFRELE(nce); 1250 break; 1251 default: 1252 ip1dbg(("icmp_redirect_v6: NCE create failed %d\n", 1253 err)); 1254 ipif_refrele(ipif); 1255 goto fail_redirect; 1256 } 1257 } 1258 if (redirect_to_router) { 1259 /* icmp_redirect_ok_v6() must have already verified this */ 1260 ASSERT(IN6_IS_ADDR_LINKLOCAL(gateway)); 1261 1262 /* 1263 * Create a Route Association. This will allow us to remember 1264 * a router told us to use the particular gateway. 1265 */ 1266 ire = ire_create_v6( 1267 dst, 1268 &ipv6_all_ones, /* mask */ 1269 &prev_ire->ire_src_addr_v6, /* source addr */ 1270 gateway, /* gateway addr */ 1271 &prev_ire->ire_max_frag, /* max frag */ 1272 NULL, /* no src nce */ 1273 NULL, /* no rfq */ 1274 NULL, /* no stq */ 1275 IRE_HOST, 1276 prev_ire->ire_ipif, 1277 NULL, 1278 0, 1279 0, 1280 (RTF_DYNAMIC | RTF_GATEWAY | RTF_HOST), 1281 &ulp_info, 1282 NULL, 1283 NULL, 1284 ipst); 1285 } else { 1286 queue_t *stq; 1287 1288 stq = (ipif->ipif_net_type == IRE_IF_RESOLVER) 1289 ? ipif->ipif_rq : ipif->ipif_wq; 1290 1291 /* 1292 * Just create an on link entry, i.e. interface route. 1293 */ 1294 ire = ire_create_v6( 1295 dst, /* gateway == dst */ 1296 &ipv6_all_ones, /* mask */ 1297 &prev_ire->ire_src_addr_v6, /* source addr */ 1298 &ipv6_all_zeros, /* gateway addr */ 1299 &prev_ire->ire_max_frag, /* max frag */ 1300 NULL, /* no src nce */ 1301 NULL, /* ire rfq */ 1302 stq, /* ire stq */ 1303 ipif->ipif_net_type, /* IF_[NO]RESOLVER */ 1304 prev_ire->ire_ipif, 1305 &ipv6_all_ones, 1306 0, 1307 0, 1308 (RTF_DYNAMIC | RTF_HOST), 1309 &ulp_info, 1310 NULL, 1311 NULL, 1312 ipst); 1313 } 1314 1315 /* Release reference from earlier ipif_get_next_ipif() */ 1316 ipif_refrele(ipif); 1317 1318 if (ire == NULL) 1319 goto fail_redirect; 1320 1321 if (ire_add(&ire, NULL, NULL, NULL, B_FALSE) == 0) { 1322 1323 /* tell routing sockets that we received a redirect */ 1324 ip_rts_change_v6(RTM_REDIRECT, 1325 &rd->nd_rd_dst, 1326 &rd->nd_rd_target, 1327 &ipv6_all_ones, 0, &ire->ire_src_addr_v6, 1328 (RTF_DYNAMIC | RTF_GATEWAY | RTF_HOST), 0, 1329 (RTA_DST | RTA_GATEWAY | RTA_NETMASK | RTA_AUTHOR), ipst); 1330 1331 /* 1332 * Delete any existing IRE_HOST type ires for this destination. 1333 * This together with the added IRE has the effect of 1334 * modifying an existing redirect. 1335 */ 1336 redir_ire = ire_ftable_lookup_v6(dst, 0, src, IRE_HOST, 1337 ire->ire_ipif, NULL, ALL_ZONES, 0, NULL, 1338 (MATCH_IRE_GW | MATCH_IRE_TYPE | MATCH_IRE_ILL), ipst); 1339 1340 ire_refrele(ire); /* Held in ire_add_v6 */ 1341 1342 if (redir_ire != NULL) { 1343 if (redir_ire->ire_flags & RTF_DYNAMIC) 1344 ire_delete(redir_ire); 1345 ire_refrele(redir_ire); 1346 } 1347 } 1348 1349 if (prev_ire->ire_type == IRE_CACHE) 1350 ire_delete(prev_ire); 1351 ire_refrele(prev_ire); 1352 prev_ire = NULL; 1353 1354 fail_redirect: 1355 if (prev_ire != NULL) 1356 ire_refrele(prev_ire); 1357 freemsg(mp); 1358 } 1359 1360 static ill_t * 1361 ip_queue_to_ill_v6(queue_t *q, ip_stack_t *ipst) 1362 { 1363 ill_t *ill; 1364 1365 ASSERT(WR(q) == q); 1366 1367 if (q->q_next != NULL) { 1368 ill = (ill_t *)q->q_ptr; 1369 if (ILL_CAN_LOOKUP(ill)) 1370 ill_refhold(ill); 1371 else 1372 ill = NULL; 1373 } else { 1374 ill = ill_lookup_on_name(ipif_loopback_name, B_FALSE, B_TRUE, 1375 NULL, NULL, NULL, NULL, NULL, ipst); 1376 } 1377 if (ill == NULL) 1378 ip0dbg(("ip_queue_to_ill_v6: no ill\n")); 1379 return (ill); 1380 } 1381 1382 /* 1383 * Assigns an appropriate source address to the packet. 1384 * If origdst is one of our IP addresses that use it as the source. 1385 * If the queue is an ill queue then select a source from that ill. 1386 * Otherwise pick a source based on a route lookup back to the origsrc. 1387 * 1388 * src is the return parameter. Returns a pointer to src or NULL if failure. 1389 */ 1390 static in6_addr_t * 1391 icmp_pick_source_v6(queue_t *wq, in6_addr_t *origsrc, in6_addr_t *origdst, 1392 in6_addr_t *src, zoneid_t zoneid, ip_stack_t *ipst) 1393 { 1394 ill_t *ill; 1395 ire_t *ire; 1396 ipif_t *ipif; 1397 1398 ASSERT(!(wq->q_flag & QREADR)); 1399 if (wq->q_next != NULL) { 1400 ill = (ill_t *)wq->q_ptr; 1401 } else { 1402 ill = NULL; 1403 } 1404 1405 ire = ire_route_lookup_v6(origdst, 0, 0, (IRE_LOCAL|IRE_LOOPBACK), 1406 NULL, NULL, zoneid, NULL, (MATCH_IRE_TYPE|MATCH_IRE_ZONEONLY), 1407 ipst); 1408 if (ire != NULL) { 1409 /* Destined to one of our addresses */ 1410 *src = *origdst; 1411 ire_refrele(ire); 1412 return (src); 1413 } 1414 if (ire != NULL) { 1415 ire_refrele(ire); 1416 ire = NULL; 1417 } 1418 if (ill == NULL) { 1419 /* What is the route back to the original source? */ 1420 ire = ire_route_lookup_v6(origsrc, 0, 0, 0, 1421 NULL, NULL, zoneid, NULL, 1422 (MATCH_IRE_DEFAULT|MATCH_IRE_RECURSIVE), ipst); 1423 if (ire == NULL) { 1424 BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsOutNoRoutes); 1425 return (NULL); 1426 } 1427 ASSERT(ire->ire_ipif != NULL); 1428 ill = ire->ire_ipif->ipif_ill; 1429 ire_refrele(ire); 1430 } 1431 ipif = ipif_select_source_v6(ill, origsrc, B_FALSE, 1432 IPV6_PREFER_SRC_DEFAULT, zoneid); 1433 if (ipif != NULL) { 1434 *src = ipif->ipif_v6src_addr; 1435 ipif_refrele(ipif); 1436 return (src); 1437 } 1438 /* 1439 * Unusual case - can't find a usable source address to reach the 1440 * original source. Use what in the route to the source. 1441 */ 1442 ire = ire_route_lookup_v6(origsrc, 0, 0, 0, 1443 NULL, NULL, zoneid, NULL, 1444 (MATCH_IRE_DEFAULT|MATCH_IRE_RECURSIVE), ipst); 1445 if (ire == NULL) { 1446 BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsOutNoRoutes); 1447 return (NULL); 1448 } 1449 ASSERT(ire != NULL); 1450 *src = ire->ire_src_addr_v6; 1451 ire_refrele(ire); 1452 return (src); 1453 } 1454 1455 /* 1456 * Build and ship an IPv6 ICMP message using the packet data in mp, 1457 * and the ICMP header pointed to by "stuff". (May be called as 1458 * writer.) 1459 * Note: assumes that icmp_pkt_err_ok_v6 has been called to 1460 * verify that an icmp error packet can be sent. 1461 * 1462 * If q is an ill write side queue (which is the case when packets 1463 * arrive from ip_rput) then ip_wput code will ensure that packets to 1464 * link-local destinations are sent out that ill. 1465 * 1466 * If v6src_ptr is set use it as a source. Otherwise select a reasonable 1467 * source address (see above function). 1468 */ 1469 static void 1470 icmp_pkt_v6(queue_t *q, mblk_t *mp, void *stuff, size_t len, 1471 const in6_addr_t *v6src_ptr, boolean_t mctl_present, zoneid_t zoneid, 1472 ip_stack_t *ipst) 1473 { 1474 ip6_t *ip6h; 1475 in6_addr_t v6dst; 1476 size_t len_needed; 1477 size_t msg_len; 1478 mblk_t *mp1; 1479 icmp6_t *icmp6; 1480 ill_t *ill; 1481 in6_addr_t v6src; 1482 mblk_t *ipsec_mp; 1483 ipsec_out_t *io; 1484 1485 ill = ip_queue_to_ill_v6(q, ipst); 1486 if (ill == NULL) { 1487 freemsg(mp); 1488 return; 1489 } 1490 1491 if (mctl_present) { 1492 /* 1493 * If it is : 1494 * 1495 * 1) a IPSEC_OUT, then this is caused by outbound 1496 * datagram originating on this host. IPSEC processing 1497 * may or may not have been done. Refer to comments above 1498 * icmp_inbound_error_fanout for details. 1499 * 1500 * 2) a IPSEC_IN if we are generating a icmp_message 1501 * for an incoming datagram destined for us i.e called 1502 * from ip_fanout_send_icmp. 1503 */ 1504 ipsec_info_t *in; 1505 1506 ipsec_mp = mp; 1507 mp = ipsec_mp->b_cont; 1508 1509 in = (ipsec_info_t *)ipsec_mp->b_rptr; 1510 ip6h = (ip6_t *)mp->b_rptr; 1511 1512 ASSERT(in->ipsec_info_type == IPSEC_OUT || 1513 in->ipsec_info_type == IPSEC_IN); 1514 1515 if (in->ipsec_info_type == IPSEC_IN) { 1516 /* 1517 * Convert the IPSEC_IN to IPSEC_OUT. 1518 */ 1519 if (!ipsec_in_to_out(ipsec_mp, NULL, ip6h, zoneid)) { 1520 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 1521 ill_refrele(ill); 1522 return; 1523 } 1524 } else { 1525 ASSERT(in->ipsec_info_type == IPSEC_OUT); 1526 io = (ipsec_out_t *)in; 1527 /* 1528 * Clear out ipsec_out_proc_begin, so we do a fresh 1529 * ire lookup. 1530 */ 1531 io->ipsec_out_proc_begin = B_FALSE; 1532 } 1533 } else { 1534 /* 1535 * This is in clear. The icmp message we are building 1536 * here should go out in clear. 1537 */ 1538 ipsec_in_t *ii; 1539 ASSERT(mp->b_datap->db_type == M_DATA); 1540 ipsec_mp = ipsec_in_alloc(B_FALSE, ipst->ips_netstack); 1541 if (ipsec_mp == NULL) { 1542 freemsg(mp); 1543 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 1544 ill_refrele(ill); 1545 return; 1546 } 1547 ii = (ipsec_in_t *)ipsec_mp->b_rptr; 1548 1549 /* This is not a secure packet */ 1550 ii->ipsec_in_secure = B_FALSE; 1551 ipsec_mp->b_cont = mp; 1552 ip6h = (ip6_t *)mp->b_rptr; 1553 /* 1554 * Convert the IPSEC_IN to IPSEC_OUT. 1555 */ 1556 if (!ipsec_in_to_out(ipsec_mp, NULL, ip6h, zoneid)) { 1557 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 1558 ill_refrele(ill); 1559 return; 1560 } 1561 } 1562 io = (ipsec_out_t *)ipsec_mp->b_rptr; 1563 1564 if (v6src_ptr != NULL) { 1565 v6src = *v6src_ptr; 1566 } else { 1567 if (icmp_pick_source_v6(q, &ip6h->ip6_src, &ip6h->ip6_dst, 1568 &v6src, zoneid, ipst) == NULL) { 1569 freemsg(ipsec_mp); 1570 ill_refrele(ill); 1571 return; 1572 } 1573 } 1574 v6dst = ip6h->ip6_src; 1575 len_needed = ipst->ips_ipv6_icmp_return - IPV6_HDR_LEN - len; 1576 msg_len = msgdsize(mp); 1577 if (msg_len > len_needed) { 1578 if (!adjmsg(mp, len_needed - msg_len)) { 1579 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutErrors); 1580 freemsg(ipsec_mp); 1581 ill_refrele(ill); 1582 return; 1583 } 1584 msg_len = len_needed; 1585 } 1586 mp1 = allocb_tmpl(IPV6_HDR_LEN + len, mp); 1587 if (mp1 == NULL) { 1588 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutErrors); 1589 freemsg(ipsec_mp); 1590 ill_refrele(ill); 1591 return; 1592 } 1593 ill_refrele(ill); 1594 mp1->b_cont = mp; 1595 mp = mp1; 1596 ASSERT(ipsec_mp->b_datap->db_type == M_CTL && 1597 io->ipsec_out_type == IPSEC_OUT); 1598 ipsec_mp->b_cont = mp; 1599 1600 /* 1601 * Set ipsec_out_icmp_loopback so we can let the ICMP messages this 1602 * node generates be accepted in peace by all on-host destinations. 1603 * If we do NOT assume that all on-host destinations trust 1604 * self-generated ICMP messages, then rework here, ip.c, and spd.c. 1605 * (Look for ipsec_out_icmp_loopback). 1606 */ 1607 io->ipsec_out_icmp_loopback = B_TRUE; 1608 1609 ip6h = (ip6_t *)mp->b_rptr; 1610 mp1->b_wptr = (uchar_t *)ip6h + (IPV6_HDR_LEN + len); 1611 1612 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 1613 ip6h->ip6_nxt = IPPROTO_ICMPV6; 1614 ip6h->ip6_hops = ipst->ips_ipv6_def_hops; 1615 ip6h->ip6_dst = v6dst; 1616 ip6h->ip6_src = v6src; 1617 msg_len += IPV6_HDR_LEN + len; 1618 if (msg_len > IP_MAXPACKET + IPV6_HDR_LEN) { 1619 (void) adjmsg(mp, IP_MAXPACKET + IPV6_HDR_LEN - msg_len); 1620 msg_len = IP_MAXPACKET + IPV6_HDR_LEN; 1621 } 1622 ip6h->ip6_plen = htons((uint16_t)(msgdsize(mp) - IPV6_HDR_LEN)); 1623 icmp6 = (icmp6_t *)&ip6h[1]; 1624 bcopy(stuff, (char *)icmp6, len); 1625 /* 1626 * Prepare for checksum by putting icmp length in the icmp 1627 * checksum field. The checksum is calculated in ip_wput_v6. 1628 */ 1629 icmp6->icmp6_cksum = ip6h->ip6_plen; 1630 if (icmp6->icmp6_type == ND_REDIRECT) { 1631 ip6h->ip6_hops = IPV6_MAX_HOPS; 1632 } 1633 /* Send to V6 writeside put routine */ 1634 put(q, ipsec_mp); 1635 } 1636 1637 /* 1638 * Update the output mib when ICMPv6 packets are sent. 1639 */ 1640 static void 1641 icmp_update_out_mib_v6(ill_t *ill, icmp6_t *icmp6) 1642 { 1643 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutMsgs); 1644 1645 switch (icmp6->icmp6_type) { 1646 case ICMP6_DST_UNREACH: 1647 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutDestUnreachs); 1648 if (icmp6->icmp6_code == ICMP6_DST_UNREACH_ADMIN) 1649 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutAdminProhibs); 1650 break; 1651 1652 case ICMP6_TIME_EXCEEDED: 1653 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutTimeExcds); 1654 break; 1655 1656 case ICMP6_PARAM_PROB: 1657 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutParmProblems); 1658 break; 1659 1660 case ICMP6_PACKET_TOO_BIG: 1661 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutPktTooBigs); 1662 break; 1663 1664 case ICMP6_ECHO_REQUEST: 1665 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutEchos); 1666 break; 1667 1668 case ICMP6_ECHO_REPLY: 1669 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutEchoReplies); 1670 break; 1671 1672 case ND_ROUTER_SOLICIT: 1673 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutRouterSolicits); 1674 break; 1675 1676 case ND_ROUTER_ADVERT: 1677 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutRouterAdvertisements); 1678 break; 1679 1680 case ND_NEIGHBOR_SOLICIT: 1681 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutNeighborSolicits); 1682 break; 1683 1684 case ND_NEIGHBOR_ADVERT: 1685 BUMP_MIB(ill->ill_icmp6_mib, 1686 ipv6IfIcmpOutNeighborAdvertisements); 1687 break; 1688 1689 case ND_REDIRECT: 1690 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutRedirects); 1691 break; 1692 1693 case MLD_LISTENER_QUERY: 1694 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutGroupMembQueries); 1695 break; 1696 1697 case MLD_LISTENER_REPORT: 1698 case MLD_V2_LISTENER_REPORT: 1699 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutGroupMembResponses); 1700 break; 1701 1702 case MLD_LISTENER_REDUCTION: 1703 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutGroupMembReductions); 1704 break; 1705 } 1706 } 1707 1708 /* 1709 * Check if it is ok to send an ICMPv6 error packet in 1710 * response to the IP packet in mp. 1711 * Free the message and return null if no 1712 * ICMP error packet should be sent. 1713 */ 1714 static mblk_t * 1715 icmp_pkt_err_ok_v6(queue_t *q, mblk_t *mp, 1716 boolean_t llbcast, boolean_t mcast_ok, ip_stack_t *ipst) 1717 { 1718 ip6_t *ip6h; 1719 1720 if (!mp) 1721 return (NULL); 1722 1723 ip6h = (ip6_t *)mp->b_rptr; 1724 1725 /* Check if source address uniquely identifies the host */ 1726 1727 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_src) || 1728 IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_src) || 1729 IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 1730 freemsg(mp); 1731 return (NULL); 1732 } 1733 1734 if (ip6h->ip6_nxt == IPPROTO_ICMPV6) { 1735 size_t len_needed = IPV6_HDR_LEN + ICMP6_MINLEN; 1736 icmp6_t *icmp6; 1737 1738 if (mp->b_wptr - mp->b_rptr < len_needed) { 1739 if (!pullupmsg(mp, len_needed)) { 1740 ill_t *ill; 1741 1742 ill = ip_queue_to_ill_v6(q, ipst); 1743 if (ill == NULL) { 1744 BUMP_MIB(&ipst->ips_icmp6_mib, 1745 ipv6IfIcmpInErrors); 1746 } else { 1747 BUMP_MIB(ill->ill_icmp6_mib, 1748 ipv6IfIcmpInErrors); 1749 ill_refrele(ill); 1750 } 1751 freemsg(mp); 1752 return (NULL); 1753 } 1754 ip6h = (ip6_t *)mp->b_rptr; 1755 } 1756 icmp6 = (icmp6_t *)&ip6h[1]; 1757 /* Explicitly do not generate errors in response to redirects */ 1758 if (ICMP6_IS_ERROR(icmp6->icmp6_type) || 1759 icmp6->icmp6_type == ND_REDIRECT) { 1760 freemsg(mp); 1761 return (NULL); 1762 } 1763 } 1764 /* 1765 * Check that the destination is not multicast and that the packet 1766 * was not sent on link layer broadcast or multicast. (Exception 1767 * is Packet too big message as per the draft - when mcast_ok is set.) 1768 */ 1769 if (!mcast_ok && 1770 (llbcast || IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst))) { 1771 freemsg(mp); 1772 return (NULL); 1773 } 1774 if (icmp_err_rate_limit(ipst)) { 1775 /* 1776 * Only send ICMP error packets every so often. 1777 * This should be done on a per port/source basis, 1778 * but for now this will suffice. 1779 */ 1780 freemsg(mp); 1781 return (NULL); 1782 } 1783 return (mp); 1784 } 1785 1786 /* 1787 * Generate an ICMPv6 redirect message. 1788 * Include target link layer address option if it exits. 1789 * Always include redirect header. 1790 */ 1791 static void 1792 icmp_send_redirect_v6(queue_t *q, mblk_t *mp, in6_addr_t *targetp, 1793 in6_addr_t *dest, ill_t *ill, boolean_t llbcast) 1794 { 1795 nd_redirect_t *rd; 1796 nd_opt_rd_hdr_t *rdh; 1797 uchar_t *buf; 1798 nce_t *nce = NULL; 1799 nd_opt_hdr_t *opt; 1800 int len; 1801 int ll_opt_len = 0; 1802 int max_redir_hdr_data_len; 1803 int pkt_len; 1804 in6_addr_t *srcp; 1805 ip_stack_t *ipst = ill->ill_ipst; 1806 1807 /* 1808 * We are called from ip_rput where we could 1809 * not have attached an IPSEC_IN. 1810 */ 1811 ASSERT(mp->b_datap->db_type == M_DATA); 1812 1813 mp = icmp_pkt_err_ok_v6(q, mp, llbcast, B_FALSE, ipst); 1814 if (mp == NULL) 1815 return; 1816 nce = ndp_lookup_v6(ill, B_TRUE, targetp, B_FALSE); 1817 if (nce != NULL && nce->nce_state != ND_INCOMPLETE) { 1818 ll_opt_len = (sizeof (nd_opt_hdr_t) + 1819 ill->ill_phys_addr_length + 7)/8 * 8; 1820 } 1821 len = sizeof (nd_redirect_t) + sizeof (nd_opt_rd_hdr_t) + ll_opt_len; 1822 ASSERT(len % 4 == 0); 1823 buf = kmem_alloc(len, KM_NOSLEEP); 1824 if (buf == NULL) { 1825 if (nce != NULL) 1826 NCE_REFRELE(nce); 1827 freemsg(mp); 1828 return; 1829 } 1830 1831 rd = (nd_redirect_t *)buf; 1832 rd->nd_rd_type = (uint8_t)ND_REDIRECT; 1833 rd->nd_rd_code = 0; 1834 rd->nd_rd_reserved = 0; 1835 rd->nd_rd_target = *targetp; 1836 rd->nd_rd_dst = *dest; 1837 1838 opt = (nd_opt_hdr_t *)(buf + sizeof (nd_redirect_t)); 1839 if (nce != NULL && ll_opt_len != 0) { 1840 opt->nd_opt_type = ND_OPT_TARGET_LINKADDR; 1841 opt->nd_opt_len = ll_opt_len/8; 1842 bcopy((char *)nce->nce_res_mp->b_rptr + 1843 NCE_LL_ADDR_OFFSET(ill), &opt[1], 1844 ill->ill_phys_addr_length); 1845 } 1846 if (nce != NULL) 1847 NCE_REFRELE(nce); 1848 rdh = (nd_opt_rd_hdr_t *)(buf + sizeof (nd_redirect_t) + ll_opt_len); 1849 rdh->nd_opt_rh_type = (uint8_t)ND_OPT_REDIRECTED_HEADER; 1850 /* max_redir_hdr_data_len and nd_opt_rh_len must be multiple of 8 */ 1851 max_redir_hdr_data_len = 1852 (ipst->ips_ipv6_icmp_return - IPV6_HDR_LEN - len)/8*8; 1853 pkt_len = msgdsize(mp); 1854 /* Make sure mp is 8 byte aligned */ 1855 if (pkt_len > max_redir_hdr_data_len) { 1856 rdh->nd_opt_rh_len = (max_redir_hdr_data_len + 1857 sizeof (nd_opt_rd_hdr_t))/8; 1858 (void) adjmsg(mp, max_redir_hdr_data_len - pkt_len); 1859 } else { 1860 rdh->nd_opt_rh_len = (pkt_len + sizeof (nd_opt_rd_hdr_t))/8; 1861 (void) adjmsg(mp, -(pkt_len % 8)); 1862 } 1863 rdh->nd_opt_rh_reserved1 = 0; 1864 rdh->nd_opt_rh_reserved2 = 0; 1865 /* ipif_v6src_addr contains the link-local source address */ 1866 srcp = &ill->ill_ipif->ipif_v6src_addr; 1867 1868 /* Redirects sent by router, and router is global zone */ 1869 icmp_pkt_v6(q, mp, buf, len, srcp, B_FALSE, GLOBAL_ZONEID, ipst); 1870 kmem_free(buf, len); 1871 } 1872 1873 1874 /* Generate an ICMP time exceeded message. (May be called as writer.) */ 1875 void 1876 icmp_time_exceeded_v6(queue_t *q, mblk_t *mp, uint8_t code, 1877 boolean_t llbcast, boolean_t mcast_ok, zoneid_t zoneid, 1878 ip_stack_t *ipst) 1879 { 1880 icmp6_t icmp6; 1881 boolean_t mctl_present; 1882 mblk_t *first_mp; 1883 1884 EXTRACT_PKT_MP(mp, first_mp, mctl_present); 1885 1886 mp = icmp_pkt_err_ok_v6(q, mp, llbcast, mcast_ok, ipst); 1887 if (mp == NULL) { 1888 if (mctl_present) 1889 freeb(first_mp); 1890 return; 1891 } 1892 bzero(&icmp6, sizeof (icmp6_t)); 1893 icmp6.icmp6_type = ICMP6_TIME_EXCEEDED; 1894 icmp6.icmp6_code = code; 1895 icmp_pkt_v6(q, first_mp, &icmp6, sizeof (icmp6_t), NULL, mctl_present, 1896 zoneid, ipst); 1897 } 1898 1899 /* 1900 * Generate an ICMP unreachable message. 1901 */ 1902 void 1903 icmp_unreachable_v6(queue_t *q, mblk_t *mp, uint8_t code, 1904 boolean_t llbcast, boolean_t mcast_ok, zoneid_t zoneid, 1905 ip_stack_t *ipst) 1906 { 1907 icmp6_t icmp6; 1908 boolean_t mctl_present; 1909 mblk_t *first_mp; 1910 1911 EXTRACT_PKT_MP(mp, first_mp, mctl_present); 1912 1913 mp = icmp_pkt_err_ok_v6(q, mp, llbcast, mcast_ok, ipst); 1914 if (mp == NULL) { 1915 if (mctl_present) 1916 freeb(first_mp); 1917 return; 1918 } 1919 bzero(&icmp6, sizeof (icmp6_t)); 1920 icmp6.icmp6_type = ICMP6_DST_UNREACH; 1921 icmp6.icmp6_code = code; 1922 icmp_pkt_v6(q, first_mp, &icmp6, sizeof (icmp6_t), NULL, mctl_present, 1923 zoneid, ipst); 1924 } 1925 1926 /* 1927 * Generate an ICMP pkt too big message. 1928 */ 1929 static void 1930 icmp_pkt2big_v6(queue_t *q, mblk_t *mp, uint32_t mtu, 1931 boolean_t llbcast, boolean_t mcast_ok, zoneid_t zoneid, ip_stack_t *ipst) 1932 { 1933 icmp6_t icmp6; 1934 mblk_t *first_mp; 1935 boolean_t mctl_present; 1936 1937 EXTRACT_PKT_MP(mp, first_mp, mctl_present); 1938 1939 mp = icmp_pkt_err_ok_v6(q, mp, llbcast, mcast_ok, ipst); 1940 if (mp == NULL) { 1941 if (mctl_present) 1942 freeb(first_mp); 1943 return; 1944 } 1945 bzero(&icmp6, sizeof (icmp6_t)); 1946 icmp6.icmp6_type = ICMP6_PACKET_TOO_BIG; 1947 icmp6.icmp6_code = 0; 1948 icmp6.icmp6_mtu = htonl(mtu); 1949 1950 icmp_pkt_v6(q, first_mp, &icmp6, sizeof (icmp6_t), NULL, mctl_present, 1951 zoneid, ipst); 1952 } 1953 1954 /* 1955 * Generate an ICMP parameter problem message. (May be called as writer.) 1956 * 'offset' is the offset from the beginning of the packet in error. 1957 */ 1958 static void 1959 icmp_param_problem_v6(queue_t *q, mblk_t *mp, uint8_t code, 1960 uint32_t offset, boolean_t llbcast, boolean_t mcast_ok, zoneid_t zoneid, 1961 ip_stack_t *ipst) 1962 { 1963 icmp6_t icmp6; 1964 boolean_t mctl_present; 1965 mblk_t *first_mp; 1966 1967 EXTRACT_PKT_MP(mp, first_mp, mctl_present); 1968 1969 mp = icmp_pkt_err_ok_v6(q, mp, llbcast, mcast_ok, ipst); 1970 if (mp == NULL) { 1971 if (mctl_present) 1972 freeb(first_mp); 1973 return; 1974 } 1975 bzero((char *)&icmp6, sizeof (icmp6_t)); 1976 icmp6.icmp6_type = ICMP6_PARAM_PROB; 1977 icmp6.icmp6_code = code; 1978 icmp6.icmp6_pptr = htonl(offset); 1979 icmp_pkt_v6(q, first_mp, &icmp6, sizeof (icmp6_t), NULL, mctl_present, 1980 zoneid, ipst); 1981 } 1982 1983 /* 1984 * This code will need to take into account the possibility of binding 1985 * to a link local address on a multi-homed host, in which case the 1986 * outgoing interface (from the conn) will need to be used when getting 1987 * an ire for the dst. Going through proper outgoing interface and 1988 * choosing the source address corresponding to the outgoing interface 1989 * is necessary when the destination address is a link-local address and 1990 * IPV6_BOUND_IF or IPV6_PKTINFO or scope_id has been set. 1991 * This can happen when active connection is setup; thus ipp pointer 1992 * is passed here from tcp_connect_*() routines, in non-TCP cases NULL 1993 * pointer is passed as ipp pointer. 1994 */ 1995 mblk_t * 1996 ip_bind_v6(queue_t *q, mblk_t *mp, conn_t *connp, ip6_pkt_t *ipp) 1997 { 1998 ssize_t len; 1999 int protocol; 2000 struct T_bind_req *tbr; 2001 sin6_t *sin6; 2002 ipa6_conn_t *ac6; 2003 in6_addr_t *v6srcp; 2004 in6_addr_t *v6dstp; 2005 uint16_t lport; 2006 uint16_t fport; 2007 uchar_t *ucp; 2008 int error = 0; 2009 boolean_t local_bind; 2010 ipa6_conn_x_t *acx6; 2011 boolean_t verify_dst; 2012 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 2013 cred_t *cr; 2014 2015 /* 2016 * All Solaris components should pass a db_credp 2017 * for this TPI message, hence we ASSERT. 2018 * But in case there is some other M_PROTO that looks 2019 * like a TPI message sent by some other kernel 2020 * component, we check and return an error. 2021 */ 2022 cr = msg_getcred(mp, NULL); 2023 ASSERT(cr != NULL); 2024 if (cr == NULL) { 2025 error = EINVAL; 2026 goto bad_addr; 2027 } 2028 2029 ASSERT(connp->conn_af_isv6); 2030 len = mp->b_wptr - mp->b_rptr; 2031 if (len < (sizeof (*tbr) + 1)) { 2032 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 2033 "ip_bind_v6: bogus msg, len %ld", len); 2034 goto bad_addr; 2035 } 2036 /* Back up and extract the protocol identifier. */ 2037 mp->b_wptr--; 2038 tbr = (struct T_bind_req *)mp->b_rptr; 2039 /* Reset the message type in preparation for shipping it back. */ 2040 mp->b_datap->db_type = M_PCPROTO; 2041 2042 protocol = *mp->b_wptr & 0xFF; 2043 connp->conn_ulp = (uint8_t)protocol; 2044 2045 /* 2046 * Check for a zero length address. This is from a protocol that 2047 * wants to register to receive all packets of its type. 2048 */ 2049 if (tbr->ADDR_length == 0) { 2050 if ((protocol == IPPROTO_TCP || protocol == IPPROTO_SCTP || 2051 protocol == IPPROTO_ESP || protocol == IPPROTO_AH) && 2052 ipst->ips_ipcl_proto_fanout_v6[protocol].connf_head != 2053 NULL) { 2054 /* 2055 * TCP, SCTP, AH, and ESP have single protocol fanouts. 2056 * Do not allow others to bind to these. 2057 */ 2058 goto bad_addr; 2059 } 2060 2061 /* 2062 * 2063 * The udp module never sends down a zero-length address, 2064 * and allowing this on a labeled system will break MLP 2065 * functionality. 2066 */ 2067 if (is_system_labeled() && protocol == IPPROTO_UDP) 2068 goto bad_addr; 2069 2070 /* Allow ipsec plumbing */ 2071 if ((connp->conn_mac_mode != CONN_MAC_DEFAULT) && 2072 (protocol != IPPROTO_AH) && (protocol != IPPROTO_ESP)) 2073 goto bad_addr; 2074 2075 connp->conn_srcv6 = ipv6_all_zeros; 2076 ipcl_proto_insert_v6(connp, protocol); 2077 2078 tbr->PRIM_type = T_BIND_ACK; 2079 return (mp); 2080 } 2081 2082 /* Extract the address pointer from the message. */ 2083 ucp = (uchar_t *)mi_offset_param(mp, tbr->ADDR_offset, 2084 tbr->ADDR_length); 2085 if (ucp == NULL) { 2086 ip1dbg(("ip_bind_v6: no address\n")); 2087 goto bad_addr; 2088 } 2089 if (!OK_32PTR(ucp)) { 2090 ip1dbg(("ip_bind_v6: unaligned address\n")); 2091 goto bad_addr; 2092 } 2093 2094 switch (tbr->ADDR_length) { 2095 default: 2096 ip1dbg(("ip_bind_v6: bad address length %d\n", 2097 (int)tbr->ADDR_length)); 2098 goto bad_addr; 2099 2100 case IPV6_ADDR_LEN: 2101 /* Verification of local address only */ 2102 v6srcp = (in6_addr_t *)ucp; 2103 lport = 0; 2104 local_bind = B_TRUE; 2105 break; 2106 2107 case sizeof (sin6_t): 2108 sin6 = (sin6_t *)ucp; 2109 v6srcp = &sin6->sin6_addr; 2110 lport = sin6->sin6_port; 2111 local_bind = B_TRUE; 2112 break; 2113 2114 case sizeof (ipa6_conn_t): 2115 /* 2116 * Verify that both the source and destination addresses 2117 * are valid. 2118 */ 2119 ac6 = (ipa6_conn_t *)ucp; 2120 v6srcp = &ac6->ac6_laddr; 2121 v6dstp = &ac6->ac6_faddr; 2122 fport = ac6->ac6_fport; 2123 /* For raw socket, the local port is not set. */ 2124 lport = ac6->ac6_lport != 0 ? ac6->ac6_lport : 2125 connp->conn_lport; 2126 local_bind = B_FALSE; 2127 /* Always verify destination reachability. */ 2128 verify_dst = B_TRUE; 2129 break; 2130 2131 case sizeof (ipa6_conn_x_t): 2132 /* 2133 * Verify that the source address is valid. 2134 */ 2135 acx6 = (ipa6_conn_x_t *)ucp; 2136 ac6 = &acx6->ac6x_conn; 2137 v6srcp = &ac6->ac6_laddr; 2138 v6dstp = &ac6->ac6_faddr; 2139 fport = ac6->ac6_fport; 2140 lport = ac6->ac6_lport; 2141 local_bind = B_FALSE; 2142 /* 2143 * Client that passed ipa6_conn_x_t to us specifies whether to 2144 * verify destination reachability. 2145 */ 2146 verify_dst = (acx6->ac6x_flags & ACX_VERIFY_DST) != 0; 2147 break; 2148 } 2149 if (local_bind) { 2150 error = ip_proto_bind_laddr_v6(connp, &mp->b_cont, protocol, 2151 v6srcp, lport, tbr->ADDR_length != IPV6_ADDR_LEN); 2152 } else { 2153 error = ip_proto_bind_connected_v6(connp, &mp->b_cont, protocol, 2154 v6srcp, lport, v6dstp, ipp, fport, B_TRUE, verify_dst, cr); 2155 } 2156 2157 if (error == 0) { 2158 /* Send it home. */ 2159 mp->b_datap->db_type = M_PCPROTO; 2160 tbr->PRIM_type = T_BIND_ACK; 2161 return (mp); 2162 } 2163 2164 bad_addr: 2165 ASSERT(error != EINPROGRESS); 2166 if (error > 0) 2167 mp = mi_tpi_err_ack_alloc(mp, TSYSERR, error); 2168 else 2169 mp = mi_tpi_err_ack_alloc(mp, TBADADDR, 0); 2170 return (mp); 2171 } 2172 2173 /* 2174 * Here address is verified to be a valid local address. 2175 * If the IRE_DB_REQ_TYPE mp is present, a multicast 2176 * address is also considered a valid local address. 2177 * In the case of a multicast address, however, the 2178 * upper protocol is expected to reset the src address 2179 * to 0 if it sees an ire with IN6_IS_ADDR_MULTICAST returned so that 2180 * no packets are emitted with multicast address as 2181 * source address. 2182 * The addresses valid for bind are: 2183 * (1) - in6addr_any 2184 * (2) - IP address of an UP interface 2185 * (3) - IP address of a DOWN interface 2186 * (4) - a multicast address. In this case 2187 * the conn will only receive packets destined to 2188 * the specified multicast address. Note: the 2189 * application still has to issue an 2190 * IPV6_JOIN_GROUP socket option. 2191 * 2192 * In all the above cases, the bound address must be valid in the current zone. 2193 * When the address is loopback or multicast, there might be many matching IREs 2194 * so bind has to look up based on the zone. 2195 */ 2196 /* 2197 * Verify the local IP address. Does not change the conn_t except 2198 * conn_fully_bound and conn_policy_cached. 2199 */ 2200 static int 2201 ip_bind_laddr_v6(conn_t *connp, mblk_t **mpp, uint8_t protocol, 2202 const in6_addr_t *v6src, uint16_t lport, boolean_t fanout_insert) 2203 { 2204 int error = 0; 2205 ire_t *src_ire = NULL; 2206 zoneid_t zoneid; 2207 mblk_t *mp = NULL; 2208 boolean_t ire_requested; 2209 boolean_t ipsec_policy_set; 2210 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 2211 2212 if (mpp) 2213 mp = *mpp; 2214 2215 ire_requested = (mp != NULL && DB_TYPE(mp) == IRE_DB_REQ_TYPE); 2216 ipsec_policy_set = (mp != NULL && DB_TYPE(mp) == IPSEC_POLICY_SET); 2217 2218 /* 2219 * If it was previously connected, conn_fully_bound would have 2220 * been set. 2221 */ 2222 connp->conn_fully_bound = B_FALSE; 2223 2224 zoneid = IPCL_ZONEID(connp); 2225 2226 if (!IN6_IS_ADDR_UNSPECIFIED(v6src)) { 2227 src_ire = ire_route_lookup_v6(v6src, 0, 0, 2228 0, NULL, NULL, zoneid, NULL, MATCH_IRE_ZONEONLY, ipst); 2229 /* 2230 * If an address other than in6addr_any is requested, 2231 * we verify that it is a valid address for bind 2232 * Note: Following code is in if-else-if form for 2233 * readability compared to a condition check. 2234 */ 2235 ASSERT(src_ire == NULL || !(src_ire->ire_type & IRE_BROADCAST)); 2236 /* LINTED - statement has no consequent */ 2237 if (IRE_IS_LOCAL(src_ire)) { 2238 /* 2239 * (2) Bind to address of local UP interface 2240 */ 2241 } else if (IN6_IS_ADDR_MULTICAST(v6src)) { 2242 ipif_t *multi_ipif = NULL; 2243 ire_t *save_ire; 2244 /* 2245 * (4) bind to multicast address. 2246 * Fake out the IRE returned to upper 2247 * layer to be a broadcast IRE in 2248 * ip_bind_insert_ire_v6(). 2249 * Pass other information that matches 2250 * the ipif (e.g. the source address). 2251 * conn_multicast_ill is only used for 2252 * IPv6 packets 2253 */ 2254 mutex_enter(&connp->conn_lock); 2255 if (connp->conn_multicast_ill != NULL) { 2256 (void) ipif_lookup_zoneid( 2257 connp->conn_multicast_ill, zoneid, 0, 2258 &multi_ipif); 2259 } else { 2260 /* 2261 * Look for default like 2262 * ip_wput_v6 2263 */ 2264 multi_ipif = ipif_lookup_group_v6( 2265 &ipv6_unspecified_group, zoneid, ipst); 2266 } 2267 mutex_exit(&connp->conn_lock); 2268 save_ire = src_ire; 2269 src_ire = NULL; 2270 if (multi_ipif == NULL || !ire_requested || 2271 (src_ire = ipif_to_ire_v6(multi_ipif)) == NULL) { 2272 src_ire = save_ire; 2273 error = EADDRNOTAVAIL; 2274 } else { 2275 ASSERT(src_ire != NULL); 2276 if (save_ire != NULL) 2277 ire_refrele(save_ire); 2278 } 2279 if (multi_ipif != NULL) 2280 ipif_refrele(multi_ipif); 2281 } else { 2282 if (!ip_addr_exists_v6(v6src, zoneid, ipst)) { 2283 /* 2284 * Not a valid address for bind 2285 */ 2286 error = EADDRNOTAVAIL; 2287 } 2288 } 2289 2290 if (error != 0) { 2291 /* Red Alert! Attempting to be a bogon! */ 2292 if (ip_debug > 2) { 2293 /* ip1dbg */ 2294 pr_addr_dbg("ip_bind_laddr_v6: bad src" 2295 " address %s\n", AF_INET6, v6src); 2296 } 2297 goto bad_addr; 2298 } 2299 } 2300 2301 /* 2302 * Allow setting new policies. For example, disconnects come 2303 * down as ipa_t bind. As we would have set conn_policy_cached 2304 * to B_TRUE before, we should set it to B_FALSE, so that policy 2305 * can change after the disconnect. 2306 */ 2307 connp->conn_policy_cached = B_FALSE; 2308 2309 /* If not fanout_insert this was just an address verification */ 2310 if (fanout_insert) { 2311 /* 2312 * The addresses have been verified. Time to insert in 2313 * the correct fanout list. 2314 */ 2315 connp->conn_srcv6 = *v6src; 2316 connp->conn_remv6 = ipv6_all_zeros; 2317 connp->conn_lport = lport; 2318 connp->conn_fport = 0; 2319 error = ipcl_bind_insert_v6(connp, protocol, v6src, lport); 2320 } 2321 if (error == 0) { 2322 if (ire_requested) { 2323 if (!ip_bind_get_ire_v6(mpp, src_ire, v6src, NULL, 2324 ipst)) { 2325 error = -1; 2326 goto bad_addr; 2327 } 2328 mp = *mpp; 2329 } else if (ipsec_policy_set) { 2330 if (!ip_bind_ipsec_policy_set(connp, mp)) { 2331 error = -1; 2332 goto bad_addr; 2333 } 2334 } 2335 } 2336 bad_addr: 2337 if (error != 0) { 2338 if (connp->conn_anon_port) { 2339 (void) tsol_mlp_anon(crgetzone(connp->conn_cred), 2340 connp->conn_mlp_type, connp->conn_ulp, ntohs(lport), 2341 B_FALSE); 2342 } 2343 connp->conn_mlp_type = mlptSingle; 2344 } 2345 2346 if (src_ire != NULL) 2347 ire_refrele(src_ire); 2348 2349 if (ipsec_policy_set) { 2350 ASSERT(mp != NULL); 2351 freeb(mp); 2352 /* 2353 * As of now assume that nothing else accompanies 2354 * IPSEC_POLICY_SET. 2355 */ 2356 *mpp = NULL; 2357 } 2358 2359 return (error); 2360 } 2361 int 2362 ip_proto_bind_laddr_v6(conn_t *connp, mblk_t **mpp, uint8_t protocol, 2363 const in6_addr_t *v6srcp, uint16_t lport, boolean_t fanout_insert) 2364 { 2365 int error; 2366 boolean_t orig_pkt_isv6 = connp->conn_pkt_isv6; 2367 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 2368 2369 ASSERT(connp->conn_af_isv6); 2370 connp->conn_ulp = protocol; 2371 2372 if (IN6_IS_ADDR_V4MAPPED(v6srcp) && !connp->conn_ipv6_v6only) { 2373 /* Bind to IPv4 address */ 2374 ipaddr_t v4src; 2375 2376 IN6_V4MAPPED_TO_IPADDR(v6srcp, v4src); 2377 2378 error = ip_bind_laddr_v4(connp, mpp, protocol, v4src, lport, 2379 fanout_insert); 2380 if (error != 0) 2381 goto bad_addr; 2382 connp->conn_pkt_isv6 = B_FALSE; 2383 } else { 2384 if (IN6_IS_ADDR_V4MAPPED(v6srcp)) { 2385 error = 0; 2386 goto bad_addr; 2387 } 2388 error = ip_bind_laddr_v6(connp, mpp, protocol, v6srcp, 2389 lport, fanout_insert); 2390 if (error != 0) 2391 goto bad_addr; 2392 connp->conn_pkt_isv6 = B_TRUE; 2393 } 2394 2395 if (orig_pkt_isv6 != connp->conn_pkt_isv6) 2396 ip_setpktversion(connp, connp->conn_pkt_isv6, B_TRUE, ipst); 2397 return (0); 2398 2399 bad_addr: 2400 if (error < 0) 2401 error = -TBADADDR; 2402 return (error); 2403 } 2404 2405 /* 2406 * Verify that both the source and destination addresses 2407 * are valid. If verify_dst, then destination address must also be reachable, 2408 * i.e. have a route. Protocols like TCP want this. Tunnels do not. 2409 * It takes ip6_pkt_t * as one of the arguments to determine correct 2410 * source address when IPV6_PKTINFO or scope_id is set along with a link-local 2411 * destination address. Note that parameter ipp is only useful for TCP connect 2412 * when scope_id is set or IPV6_PKTINFO option is set with an ifindex. For all 2413 * non-TCP cases, it is NULL and for all other tcp cases it is not useful. 2414 * 2415 */ 2416 int 2417 ip_bind_connected_v6(conn_t *connp, mblk_t **mpp, uint8_t protocol, 2418 in6_addr_t *v6src, uint16_t lport, const in6_addr_t *v6dst, 2419 ip6_pkt_t *ipp, uint16_t fport, boolean_t fanout_insert, 2420 boolean_t verify_dst, cred_t *cr) 2421 { 2422 ire_t *src_ire; 2423 ire_t *dst_ire; 2424 int error = 0; 2425 ire_t *sire = NULL; 2426 ire_t *md_dst_ire = NULL; 2427 ill_t *md_ill = NULL; 2428 ill_t *dst_ill = NULL; 2429 ipif_t *src_ipif = NULL; 2430 zoneid_t zoneid; 2431 boolean_t ill_held = B_FALSE; 2432 mblk_t *mp = NULL; 2433 boolean_t ire_requested = B_FALSE; 2434 boolean_t ipsec_policy_set = B_FALSE; 2435 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 2436 ts_label_t *tsl = NULL; 2437 cred_t *effective_cred = NULL; 2438 2439 if (mpp) 2440 mp = *mpp; 2441 2442 if (mp != NULL) { 2443 ire_requested = (DB_TYPE(mp) == IRE_DB_REQ_TYPE); 2444 ipsec_policy_set = (DB_TYPE(mp) == IPSEC_POLICY_SET); 2445 } 2446 2447 src_ire = dst_ire = NULL; 2448 /* 2449 * If we never got a disconnect before, clear it now. 2450 */ 2451 connp->conn_fully_bound = B_FALSE; 2452 2453 zoneid = connp->conn_zoneid; 2454 2455 /* 2456 * Check whether Trusted Solaris policy allows communication with this 2457 * host, and pretend that the destination is unreachable if not. 2458 * 2459 * This is never a problem for TCP, since that transport is known to 2460 * compute the label properly as part of the tcp_rput_other T_BIND_ACK 2461 * handling. If the remote is unreachable, it will be detected at that 2462 * point, so there's no reason to check it here. 2463 * 2464 * Note that for sendto (and other datagram-oriented friends), this 2465 * check is done as part of the data path label computation instead. 2466 * The check here is just to make non-TCP connect() report the right 2467 * error. 2468 */ 2469 if (is_system_labeled() && !IPCL_IS_TCP(connp)) { 2470 if ((error = tsol_check_dest(cr, v6dst, IPV6_VERSION, 2471 connp->conn_mac_mode, &effective_cred)) != 0) { 2472 if (ip_debug > 2) { 2473 pr_addr_dbg( 2474 "ip_bind_connected: no label for dst %s\n", 2475 AF_INET6, v6dst); 2476 } 2477 goto bad_addr; 2478 } 2479 2480 /* 2481 * tsol_check_dest() may have created a new cred with 2482 * a modified security label. Use that cred if it exists 2483 * for ire lookups. 2484 */ 2485 if (effective_cred == NULL) { 2486 tsl = crgetlabel(cr); 2487 } else { 2488 tsl = crgetlabel(effective_cred); 2489 } 2490 } 2491 2492 if (IN6_IS_ADDR_MULTICAST(v6dst)) { 2493 ipif_t *ipif; 2494 2495 /* 2496 * Use an "emulated" IRE_BROADCAST to tell the transport it 2497 * is a multicast. 2498 * Pass other information that matches 2499 * the ipif (e.g. the source address). 2500 * 2501 * conn_multicast_ill is only used for IPv6 packets 2502 */ 2503 mutex_enter(&connp->conn_lock); 2504 if (connp->conn_multicast_ill != NULL) { 2505 (void) ipif_lookup_zoneid(connp->conn_multicast_ill, 2506 zoneid, 0, &ipif); 2507 } else { 2508 /* Look for default like ip_wput_v6 */ 2509 ipif = ipif_lookup_group_v6(v6dst, zoneid, ipst); 2510 } 2511 mutex_exit(&connp->conn_lock); 2512 if (ipif == NULL || ire_requested || 2513 (dst_ire = ipif_to_ire_v6(ipif)) == NULL) { 2514 if (ipif != NULL) 2515 ipif_refrele(ipif); 2516 if (ip_debug > 2) { 2517 /* ip1dbg */ 2518 pr_addr_dbg("ip_bind_connected_v6: bad " 2519 "connected multicast %s\n", AF_INET6, 2520 v6dst); 2521 } 2522 error = ENETUNREACH; 2523 goto bad_addr; 2524 } 2525 if (ipif != NULL) 2526 ipif_refrele(ipif); 2527 } else { 2528 dst_ire = ire_route_lookup_v6(v6dst, NULL, NULL, 0, 2529 NULL, &sire, zoneid, tsl, 2530 MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT | 2531 MATCH_IRE_PARENT | MATCH_IRE_RJ_BHOLE | MATCH_IRE_SECATTR, 2532 ipst); 2533 /* 2534 * We also prevent ire's with src address INADDR_ANY to 2535 * be used, which are created temporarily for 2536 * sending out packets from endpoints that have 2537 * conn_unspec_src set. 2538 */ 2539 if (dst_ire == NULL || 2540 (dst_ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) || 2541 IN6_IS_ADDR_UNSPECIFIED(&dst_ire->ire_src_addr_v6)) { 2542 /* 2543 * When verifying destination reachability, we always 2544 * complain. 2545 * 2546 * When not verifying destination reachability but we 2547 * found an IRE, i.e. the destination is reachable, 2548 * then the other tests still apply and we complain. 2549 */ 2550 if (verify_dst || (dst_ire != NULL)) { 2551 if (ip_debug > 2) { 2552 /* ip1dbg */ 2553 pr_addr_dbg("ip_bind_connected_v6: bad" 2554 " connected dst %s\n", AF_INET6, 2555 v6dst); 2556 } 2557 if (dst_ire == NULL || 2558 !(dst_ire->ire_type & IRE_HOST)) { 2559 error = ENETUNREACH; 2560 } else { 2561 error = EHOSTUNREACH; 2562 } 2563 goto bad_addr; 2564 } 2565 } 2566 } 2567 2568 /* 2569 * If the app does a connect(), it means that it will most likely 2570 * send more than 1 packet to the destination. It makes sense 2571 * to clear the temporary flag. 2572 */ 2573 if (dst_ire != NULL && dst_ire->ire_type == IRE_CACHE && 2574 (dst_ire->ire_marks & IRE_MARK_TEMPORARY)) { 2575 irb_t *irb = dst_ire->ire_bucket; 2576 2577 rw_enter(&irb->irb_lock, RW_WRITER); 2578 /* 2579 * We need to recheck for IRE_MARK_TEMPORARY after acquiring 2580 * the lock in order to guarantee irb_tmp_ire_cnt. 2581 */ 2582 if (dst_ire->ire_marks & IRE_MARK_TEMPORARY) { 2583 dst_ire->ire_marks &= ~IRE_MARK_TEMPORARY; 2584 irb->irb_tmp_ire_cnt--; 2585 } 2586 rw_exit(&irb->irb_lock); 2587 } 2588 2589 ASSERT(dst_ire == NULL || dst_ire->ire_ipversion == IPV6_VERSION); 2590 2591 /* 2592 * See if we should notify ULP about MDT; we do this whether or not 2593 * ire_requested is TRUE, in order to handle active connects; MDT 2594 * eligibility tests for passive connects are handled separately 2595 * through tcp_adapt_ire(). We do this before the source address 2596 * selection, because dst_ire may change after a call to 2597 * ipif_select_source_v6(). This is a best-effort check, as the 2598 * packet for this connection may not actually go through 2599 * dst_ire->ire_stq, and the exact IRE can only be known after 2600 * calling ip_newroute_v6(). This is why we further check on the 2601 * IRE during Multidata packet transmission in tcp_multisend(). 2602 */ 2603 if (ipst->ips_ip_multidata_outbound && !ipsec_policy_set && 2604 dst_ire != NULL && 2605 !(dst_ire->ire_type & (IRE_LOCAL | IRE_LOOPBACK | IRE_BROADCAST)) && 2606 (md_ill = ire_to_ill(dst_ire), md_ill != NULL) && 2607 ILL_MDT_CAPABLE(md_ill)) { 2608 md_dst_ire = dst_ire; 2609 IRE_REFHOLD(md_dst_ire); 2610 } 2611 2612 if (dst_ire != NULL && 2613 dst_ire->ire_type == IRE_LOCAL && 2614 dst_ire->ire_zoneid != zoneid && 2615 dst_ire->ire_zoneid != ALL_ZONES) { 2616 src_ire = ire_ftable_lookup_v6(v6dst, 0, 0, 0, NULL, NULL, 2617 zoneid, 0, NULL, 2618 MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT | 2619 MATCH_IRE_RJ_BHOLE, ipst); 2620 if (src_ire == NULL) { 2621 error = EHOSTUNREACH; 2622 goto bad_addr; 2623 } else if (src_ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) { 2624 if (!(src_ire->ire_type & IRE_HOST)) 2625 error = ENETUNREACH; 2626 else 2627 error = EHOSTUNREACH; 2628 goto bad_addr; 2629 } 2630 if (IN6_IS_ADDR_UNSPECIFIED(v6src)) { 2631 src_ipif = src_ire->ire_ipif; 2632 ipif_refhold(src_ipif); 2633 *v6src = src_ipif->ipif_v6lcl_addr; 2634 } 2635 ire_refrele(src_ire); 2636 src_ire = NULL; 2637 } else if (IN6_IS_ADDR_UNSPECIFIED(v6src) && dst_ire != NULL) { 2638 if ((sire != NULL) && (sire->ire_flags & RTF_SETSRC)) { 2639 *v6src = sire->ire_src_addr_v6; 2640 ire_refrele(dst_ire); 2641 dst_ire = sire; 2642 sire = NULL; 2643 } else if (dst_ire->ire_type == IRE_CACHE && 2644 (dst_ire->ire_flags & RTF_SETSRC)) { 2645 ASSERT(dst_ire->ire_zoneid == zoneid || 2646 dst_ire->ire_zoneid == ALL_ZONES); 2647 *v6src = dst_ire->ire_src_addr_v6; 2648 } else { 2649 /* 2650 * Pick a source address so that a proper inbound load 2651 * spreading would happen. Use dst_ill specified by the 2652 * app. when socket option or scopeid is set. 2653 */ 2654 int err; 2655 2656 if (ipp != NULL && ipp->ipp_ifindex != 0) { 2657 uint_t if_index; 2658 2659 /* 2660 * Scope id or IPV6_PKTINFO 2661 */ 2662 2663 if_index = ipp->ipp_ifindex; 2664 dst_ill = ill_lookup_on_ifindex( 2665 if_index, B_TRUE, NULL, NULL, NULL, NULL, 2666 ipst); 2667 if (dst_ill == NULL) { 2668 ip1dbg(("ip_bind_connected_v6:" 2669 " bad ifindex %d\n", if_index)); 2670 error = EADDRNOTAVAIL; 2671 goto bad_addr; 2672 } 2673 ill_held = B_TRUE; 2674 } else if (connp->conn_outgoing_ill != NULL) { 2675 /* 2676 * For IPV6_BOUND_IF socket option, 2677 * conn_outgoing_ill should be set 2678 * already in TCP or UDP/ICMP. 2679 */ 2680 dst_ill = conn_get_held_ill(connp, 2681 &connp->conn_outgoing_ill, &err); 2682 if (err == ILL_LOOKUP_FAILED) { 2683 ip1dbg(("ip_bind_connected_v6:" 2684 "no ill for bound_if\n")); 2685 error = EADDRNOTAVAIL; 2686 goto bad_addr; 2687 } 2688 ill_held = B_TRUE; 2689 } else if (dst_ire->ire_stq != NULL) { 2690 /* No need to hold ill here */ 2691 dst_ill = (ill_t *)dst_ire->ire_stq->q_ptr; 2692 } else { 2693 /* No need to hold ill here */ 2694 dst_ill = dst_ire->ire_ipif->ipif_ill; 2695 } 2696 if (ip6_asp_can_lookup(ipst)) { 2697 src_ipif = ipif_select_source_v6(dst_ill, 2698 v6dst, B_FALSE, connp->conn_src_preferences, 2699 zoneid); 2700 ip6_asp_table_refrele(ipst); 2701 if (src_ipif == NULL) { 2702 pr_addr_dbg("ip_bind_connected_v6: " 2703 "no usable source address for " 2704 "connection to %s\n", 2705 AF_INET6, v6dst); 2706 error = EADDRNOTAVAIL; 2707 goto bad_addr; 2708 } 2709 *v6src = src_ipif->ipif_v6lcl_addr; 2710 } else { 2711 error = EADDRNOTAVAIL; 2712 goto bad_addr; 2713 } 2714 } 2715 } 2716 2717 /* 2718 * We do ire_route_lookup_v6() here (and not an interface lookup) 2719 * as we assert that v6src should only come from an 2720 * UP interface for hard binding. 2721 */ 2722 src_ire = ire_route_lookup_v6(v6src, 0, 0, 0, NULL, 2723 NULL, zoneid, NULL, MATCH_IRE_ZONEONLY, ipst); 2724 2725 /* src_ire must be a local|loopback */ 2726 if (!IRE_IS_LOCAL(src_ire)) { 2727 if (ip_debug > 2) { 2728 /* ip1dbg */ 2729 pr_addr_dbg("ip_bind_connected_v6: bad " 2730 "connected src %s\n", AF_INET6, v6src); 2731 } 2732 error = EADDRNOTAVAIL; 2733 goto bad_addr; 2734 } 2735 2736 /* 2737 * If the source address is a loopback address, the 2738 * destination had best be local or multicast. 2739 * The transports that can't handle multicast will reject 2740 * those addresses. 2741 */ 2742 if (src_ire->ire_type == IRE_LOOPBACK && 2743 !(IRE_IS_LOCAL(dst_ire) || IN6_IS_ADDR_MULTICAST(v6dst) || 2744 IN6_IS_ADDR_V4MAPPED_CLASSD(v6dst))) { 2745 ip1dbg(("ip_bind_connected_v6: bad connected loopback\n")); 2746 error = -1; 2747 goto bad_addr; 2748 } 2749 /* 2750 * Allow setting new policies. For example, disconnects come 2751 * down as ipa_t bind. As we would have set conn_policy_cached 2752 * to B_TRUE before, we should set it to B_FALSE, so that policy 2753 * can change after the disconnect. 2754 */ 2755 connp->conn_policy_cached = B_FALSE; 2756 2757 /* 2758 * The addresses have been verified. Initialize the conn 2759 * before calling the policy as they expect the conns 2760 * initialized. 2761 */ 2762 connp->conn_srcv6 = *v6src; 2763 connp->conn_remv6 = *v6dst; 2764 connp->conn_lport = lport; 2765 connp->conn_fport = fport; 2766 2767 ASSERT(!(ipsec_policy_set && ire_requested)); 2768 if (ire_requested) { 2769 iulp_t *ulp_info = NULL; 2770 2771 /* 2772 * Note that sire will not be NULL if this is an off-link 2773 * connection and there is not cache for that dest yet. 2774 * 2775 * XXX Because of an existing bug, if there are multiple 2776 * default routes, the IRE returned now may not be the actual 2777 * default route used (default routes are chosen in a 2778 * round robin fashion). So if the metrics for different 2779 * default routes are different, we may return the wrong 2780 * metrics. This will not be a problem if the existing 2781 * bug is fixed. 2782 */ 2783 if (sire != NULL) 2784 ulp_info = &(sire->ire_uinfo); 2785 2786 if (!ip_bind_get_ire_v6(mpp, dst_ire, v6dst, ulp_info, 2787 ipst)) { 2788 error = -1; 2789 goto bad_addr; 2790 } 2791 } else if (ipsec_policy_set) { 2792 if (!ip_bind_ipsec_policy_set(connp, mp)) { 2793 error = -1; 2794 goto bad_addr; 2795 } 2796 } 2797 2798 /* 2799 * Cache IPsec policy in this conn. If we have per-socket policy, 2800 * we'll cache that. If we don't, we'll inherit global policy. 2801 * 2802 * We can't insert until the conn reflects the policy. Note that 2803 * conn_policy_cached is set by ipsec_conn_cache_policy() even for 2804 * connections where we don't have a policy. This is to prevent 2805 * global policy lookups in the inbound path. 2806 * 2807 * If we insert before we set conn_policy_cached, 2808 * CONN_INBOUND_POLICY_PRESENT_V6() check can still evaluate true 2809 * because global policy cound be non-empty. We normally call 2810 * ipsec_check_policy() for conn_policy_cached connections only if 2811 * conn_in_enforce_policy is set. But in this case, 2812 * conn_policy_cached can get set anytime since we made the 2813 * CONN_INBOUND_POLICY_PRESENT_V6() check and ipsec_check_policy() 2814 * is called, which will make the above assumption false. Thus, we 2815 * need to insert after we set conn_policy_cached. 2816 */ 2817 if ((error = ipsec_conn_cache_policy(connp, B_FALSE)) != 0) 2818 goto bad_addr; 2819 2820 /* If not fanout_insert this was just an address verification */ 2821 if (fanout_insert) { 2822 /* 2823 * The addresses have been verified. Time to insert in 2824 * the correct fanout list. 2825 */ 2826 error = ipcl_conn_insert_v6(connp, protocol, v6src, v6dst, 2827 connp->conn_ports, 2828 IPCL_IS_TCP(connp) ? connp->conn_tcp->tcp_bound_if : 0); 2829 } 2830 if (error == 0) { 2831 connp->conn_fully_bound = B_TRUE; 2832 /* 2833 * Our initial checks for MDT have passed; the IRE is not 2834 * LOCAL/LOOPBACK/BROADCAST, and the link layer seems to 2835 * be supporting MDT. Pass the IRE, IPC and ILL into 2836 * ip_mdinfo_return(), which performs further checks 2837 * against them and upon success, returns the MDT info 2838 * mblk which we will attach to the bind acknowledgment. 2839 */ 2840 if (md_dst_ire != NULL) { 2841 mblk_t *mdinfo_mp; 2842 2843 ASSERT(md_ill != NULL); 2844 ASSERT(md_ill->ill_mdt_capab != NULL); 2845 if ((mdinfo_mp = ip_mdinfo_return(md_dst_ire, connp, 2846 md_ill->ill_name, md_ill->ill_mdt_capab)) != NULL) { 2847 if (mp == NULL) { 2848 *mpp = mdinfo_mp; 2849 } else { 2850 linkb(mp, mdinfo_mp); 2851 } 2852 } 2853 } 2854 } 2855 bad_addr: 2856 if (ipsec_policy_set) { 2857 ASSERT(mp != NULL); 2858 freeb(mp); 2859 /* 2860 * As of now assume that nothing else accompanies 2861 * IPSEC_POLICY_SET. 2862 */ 2863 *mpp = NULL; 2864 } 2865 refrele_and_quit: 2866 if (src_ire != NULL) 2867 IRE_REFRELE(src_ire); 2868 if (dst_ire != NULL) 2869 IRE_REFRELE(dst_ire); 2870 if (sire != NULL) 2871 IRE_REFRELE(sire); 2872 if (src_ipif != NULL) 2873 ipif_refrele(src_ipif); 2874 if (md_dst_ire != NULL) 2875 IRE_REFRELE(md_dst_ire); 2876 if (ill_held && dst_ill != NULL) 2877 ill_refrele(dst_ill); 2878 if (effective_cred != NULL) 2879 crfree(effective_cred); 2880 return (error); 2881 } 2882 2883 /* ARGSUSED */ 2884 int 2885 ip_proto_bind_connected_v6(conn_t *connp, mblk_t **mpp, uint8_t protocol, 2886 in6_addr_t *v6srcp, uint16_t lport, const in6_addr_t *v6dstp, 2887 ip6_pkt_t *ipp, uint16_t fport, boolean_t fanout_insert, 2888 boolean_t verify_dst, cred_t *cr) 2889 { 2890 int error = 0; 2891 boolean_t orig_pkt_isv6 = connp->conn_pkt_isv6; 2892 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 2893 2894 ASSERT(connp->conn_af_isv6); 2895 connp->conn_ulp = protocol; 2896 2897 /* For raw socket, the local port is not set. */ 2898 lport = lport != 0 ? lport : connp->conn_lport; 2899 2900 /* 2901 * Bind to local and remote address. Local might be 2902 * unspecified in which case it will be extracted from 2903 * ire_src_addr_v6 2904 */ 2905 if (IN6_IS_ADDR_V4MAPPED(v6dstp) && !connp->conn_ipv6_v6only) { 2906 /* Connect to IPv4 address */ 2907 ipaddr_t v4src; 2908 ipaddr_t v4dst; 2909 2910 /* Is the source unspecified or mapped? */ 2911 if (!IN6_IS_ADDR_V4MAPPED(v6srcp) && 2912 !IN6_IS_ADDR_UNSPECIFIED(v6srcp)) { 2913 ip1dbg(("ip_proto_bind_connected_v6: " 2914 "dst is mapped, but not the src\n")); 2915 goto bad_addr; 2916 } 2917 IN6_V4MAPPED_TO_IPADDR(v6srcp, v4src); 2918 IN6_V4MAPPED_TO_IPADDR(v6dstp, v4dst); 2919 2920 /* Always verify destination reachability. */ 2921 error = ip_bind_connected_v4(connp, mpp, protocol, &v4src, 2922 lport, v4dst, fport, B_TRUE, B_TRUE, cr); 2923 if (error != 0) 2924 goto bad_addr; 2925 IN6_IPADDR_TO_V4MAPPED(v4src, v6srcp); 2926 connp->conn_pkt_isv6 = B_FALSE; 2927 } else if (IN6_IS_ADDR_V4MAPPED(v6srcp)) { 2928 ip1dbg(("ip_proto_bind_connected_v6: " 2929 "src is mapped, but not the dst\n")); 2930 goto bad_addr; 2931 } else { 2932 error = ip_bind_connected_v6(connp, mpp, protocol, v6srcp, 2933 lport, v6dstp, ipp, fport, B_TRUE, verify_dst, cr); 2934 if (error != 0) 2935 goto bad_addr; 2936 connp->conn_pkt_isv6 = B_TRUE; 2937 } 2938 2939 if (orig_pkt_isv6 != connp->conn_pkt_isv6) 2940 ip_setpktversion(connp, connp->conn_pkt_isv6, B_TRUE, ipst); 2941 2942 /* Send it home. */ 2943 return (0); 2944 2945 bad_addr: 2946 if (error == 0) 2947 error = -TBADADDR; 2948 return (error); 2949 } 2950 2951 /* 2952 * Get the ire in *mpp. Returns false if it fails (due to lack of space). 2953 * Makes the IRE be IRE_BROADCAST if dst is a multicast address. 2954 */ 2955 /* ARGSUSED4 */ 2956 static boolean_t 2957 ip_bind_get_ire_v6(mblk_t **mpp, ire_t *ire, const in6_addr_t *dst, 2958 iulp_t *ulp_info, ip_stack_t *ipst) 2959 { 2960 mblk_t *mp = *mpp; 2961 ire_t *ret_ire; 2962 2963 ASSERT(mp != NULL); 2964 2965 if (ire != NULL) { 2966 /* 2967 * mp initialized above to IRE_DB_REQ_TYPE 2968 * appended mblk. Its <upper protocol>'s 2969 * job to make sure there is room. 2970 */ 2971 if ((mp->b_datap->db_lim - mp->b_rptr) < sizeof (ire_t)) 2972 return (B_FALSE); 2973 2974 mp->b_datap->db_type = IRE_DB_TYPE; 2975 mp->b_wptr = mp->b_rptr + sizeof (ire_t); 2976 bcopy(ire, mp->b_rptr, sizeof (ire_t)); 2977 ret_ire = (ire_t *)mp->b_rptr; 2978 if (IN6_IS_ADDR_MULTICAST(dst) || 2979 IN6_IS_ADDR_V4MAPPED_CLASSD(dst)) { 2980 ret_ire->ire_type = IRE_BROADCAST; 2981 ret_ire->ire_addr_v6 = *dst; 2982 } 2983 if (ulp_info != NULL) { 2984 bcopy(ulp_info, &(ret_ire->ire_uinfo), 2985 sizeof (iulp_t)); 2986 } 2987 ret_ire->ire_mp = mp; 2988 } else { 2989 /* 2990 * No IRE was found. Remove IRE mblk. 2991 */ 2992 *mpp = mp->b_cont; 2993 freeb(mp); 2994 } 2995 return (B_TRUE); 2996 } 2997 2998 /* 2999 * Add an ip6i_t header to the front of the mblk. 3000 * Inline if possible else allocate a separate mblk containing only the ip6i_t. 3001 * Returns NULL if allocation fails (and frees original message). 3002 * Used in outgoing path when going through ip_newroute_*v6(). 3003 * Used in incoming path to pass ifindex to transports. 3004 */ 3005 mblk_t * 3006 ip_add_info_v6(mblk_t *mp, ill_t *ill, const in6_addr_t *dst) 3007 { 3008 mblk_t *mp1; 3009 ip6i_t *ip6i; 3010 ip6_t *ip6h; 3011 3012 ip6h = (ip6_t *)mp->b_rptr; 3013 ip6i = (ip6i_t *)(mp->b_rptr - sizeof (ip6i_t)); 3014 if ((uchar_t *)ip6i < mp->b_datap->db_base || 3015 mp->b_datap->db_ref > 1) { 3016 mp1 = allocb(sizeof (ip6i_t), BPRI_MED); 3017 if (mp1 == NULL) { 3018 freemsg(mp); 3019 return (NULL); 3020 } 3021 mp1->b_wptr = mp1->b_rptr = mp1->b_datap->db_lim; 3022 mp1->b_cont = mp; 3023 mp = mp1; 3024 ip6i = (ip6i_t *)(mp->b_rptr - sizeof (ip6i_t)); 3025 } 3026 mp->b_rptr = (uchar_t *)ip6i; 3027 ip6i->ip6i_vcf = ip6h->ip6_vcf; 3028 ip6i->ip6i_nxt = IPPROTO_RAW; 3029 if (ill != NULL) { 3030 ip6i->ip6i_flags = IP6I_IFINDEX; 3031 /* 3032 * If `ill' is in an IPMP group, make sure we use the IPMP 3033 * interface index so that e.g. IPV6_RECVPKTINFO will get the 3034 * IPMP interface index and not an underlying interface index. 3035 */ 3036 if (IS_UNDER_IPMP(ill)) 3037 ip6i->ip6i_ifindex = ipmp_ill_get_ipmp_ifindex(ill); 3038 else 3039 ip6i->ip6i_ifindex = ill->ill_phyint->phyint_ifindex; 3040 } else { 3041 ip6i->ip6i_flags = 0; 3042 } 3043 ip6i->ip6i_nexthop = *dst; 3044 return (mp); 3045 } 3046 3047 /* 3048 * Handle protocols with which IP is less intimate. There 3049 * can be more than one stream bound to a particular 3050 * protocol. When this is the case, normally each one gets a copy 3051 * of any incoming packets. 3052 * 3053 * Zones notes: 3054 * Packets will be distributed to streams in all zones. This is really only 3055 * useful for ICMPv6 as only applications in the global zone can create raw 3056 * sockets for other protocols. 3057 */ 3058 static void 3059 ip_fanout_proto_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, ill_t *ill, 3060 ill_t *inill, uint8_t nexthdr, uint_t nexthdr_offset, uint_t flags, 3061 boolean_t mctl_present, zoneid_t zoneid) 3062 { 3063 queue_t *rq; 3064 mblk_t *mp1, *first_mp1; 3065 in6_addr_t dst = ip6h->ip6_dst; 3066 in6_addr_t src = ip6h->ip6_src; 3067 mblk_t *first_mp = mp; 3068 boolean_t secure, shared_addr; 3069 conn_t *connp, *first_connp, *next_connp; 3070 connf_t *connfp; 3071 ip_stack_t *ipst = inill->ill_ipst; 3072 ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec; 3073 3074 if (mctl_present) { 3075 mp = first_mp->b_cont; 3076 secure = ipsec_in_is_secure(first_mp); 3077 ASSERT(mp != NULL); 3078 } else { 3079 secure = B_FALSE; 3080 } 3081 3082 shared_addr = (zoneid == ALL_ZONES); 3083 if (shared_addr) { 3084 /* 3085 * We don't allow multilevel ports for raw IP, so no need to 3086 * check for that here. 3087 */ 3088 zoneid = tsol_packet_to_zoneid(mp); 3089 } 3090 3091 connfp = &ipst->ips_ipcl_proto_fanout_v6[nexthdr]; 3092 mutex_enter(&connfp->connf_lock); 3093 connp = connfp->connf_head; 3094 for (connp = connfp->connf_head; connp != NULL; 3095 connp = connp->conn_next) { 3096 if (IPCL_PROTO_MATCH_V6(connp, nexthdr, ip6h, ill, flags, 3097 zoneid) && 3098 (!is_system_labeled() || 3099 tsol_receive_local(mp, &dst, IPV6_VERSION, shared_addr, 3100 connp))) 3101 break; 3102 } 3103 3104 if (connp == NULL) { 3105 /* 3106 * No one bound to this port. Is 3107 * there a client that wants all 3108 * unclaimed datagrams? 3109 */ 3110 mutex_exit(&connfp->connf_lock); 3111 if (ip_fanout_send_icmp_v6(q, first_mp, flags, 3112 ICMP6_PARAM_PROB, ICMP6_PARAMPROB_NEXTHEADER, 3113 nexthdr_offset, mctl_present, zoneid, ipst)) { 3114 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInUnknownProtos); 3115 } 3116 3117 return; 3118 } 3119 3120 ASSERT(IPCL_IS_NONSTR(connp) || connp->conn_upq != NULL); 3121 3122 CONN_INC_REF(connp); 3123 first_connp = connp; 3124 3125 /* 3126 * XXX: Fix the multiple protocol listeners case. We should not 3127 * be walking the conn->next list here. 3128 */ 3129 connp = connp->conn_next; 3130 for (;;) { 3131 while (connp != NULL) { 3132 if (IPCL_PROTO_MATCH_V6(connp, nexthdr, ip6h, ill, 3133 flags, zoneid) && 3134 (!is_system_labeled() || 3135 tsol_receive_local(mp, &dst, IPV6_VERSION, 3136 shared_addr, connp))) 3137 break; 3138 connp = connp->conn_next; 3139 } 3140 3141 /* 3142 * Just copy the data part alone. The mctl part is 3143 * needed just for verifying policy and it is never 3144 * sent up. 3145 */ 3146 if (connp == NULL || 3147 (((first_mp1 = dupmsg(first_mp)) == NULL) && 3148 ((first_mp1 = ip_copymsg(first_mp)) == NULL))) { 3149 /* 3150 * No more intested clients or memory 3151 * allocation failed 3152 */ 3153 connp = first_connp; 3154 break; 3155 } 3156 ASSERT(IPCL_IS_NONSTR(connp) || connp->conn_rq != NULL); 3157 mp1 = mctl_present ? first_mp1->b_cont : first_mp1; 3158 CONN_INC_REF(connp); 3159 mutex_exit(&connfp->connf_lock); 3160 rq = connp->conn_rq; 3161 /* 3162 * For link-local always add ifindex so that transport can set 3163 * sin6_scope_id. Avoid it for ICMP error fanout. 3164 */ 3165 if ((connp->conn_ip_recvpktinfo || 3166 IN6_IS_ADDR_LINKLOCAL(&src)) && 3167 (flags & IP_FF_IPINFO)) { 3168 /* Add header */ 3169 mp1 = ip_add_info_v6(mp1, inill, &dst); 3170 } 3171 if (mp1 == NULL) { 3172 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3173 } else if ( 3174 (IPCL_IS_NONSTR(connp) && PROTO_FLOW_CNTRLD(connp)) || 3175 (!IPCL_IS_NONSTR(connp) && !canputnext(rq))) { 3176 if (flags & IP_FF_RAWIP) { 3177 BUMP_MIB(ill->ill_ip_mib, 3178 rawipIfStatsInOverflows); 3179 } else { 3180 BUMP_MIB(ill->ill_icmp6_mib, 3181 ipv6IfIcmpInOverflows); 3182 } 3183 3184 freemsg(mp1); 3185 } else { 3186 ASSERT(!IPCL_IS_IPTUN(connp)); 3187 3188 if (CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) || 3189 secure) { 3190 first_mp1 = ipsec_check_inbound_policy( 3191 first_mp1, connp, NULL, ip6h, mctl_present); 3192 } 3193 if (first_mp1 != NULL) { 3194 if (mctl_present) 3195 freeb(first_mp1); 3196 BUMP_MIB(ill->ill_ip_mib, 3197 ipIfStatsHCInDelivers); 3198 (connp->conn_recv)(connp, mp1, NULL); 3199 } 3200 } 3201 mutex_enter(&connfp->connf_lock); 3202 /* Follow the next pointer before releasing the conn. */ 3203 next_connp = connp->conn_next; 3204 CONN_DEC_REF(connp); 3205 connp = next_connp; 3206 } 3207 3208 /* Last one. Send it upstream. */ 3209 mutex_exit(&connfp->connf_lock); 3210 3211 /* Initiate IPPF processing */ 3212 if (IP6_IN_IPP(flags, ipst)) { 3213 uint_t ifindex; 3214 3215 mutex_enter(&ill->ill_lock); 3216 ifindex = ill->ill_phyint->phyint_ifindex; 3217 mutex_exit(&ill->ill_lock); 3218 ip_process(IPP_LOCAL_IN, &mp, ifindex); 3219 if (mp == NULL) { 3220 CONN_DEC_REF(connp); 3221 if (mctl_present) 3222 freeb(first_mp); 3223 return; 3224 } 3225 } 3226 3227 /* 3228 * For link-local always add ifindex so that transport can set 3229 * sin6_scope_id. Avoid it for ICMP error fanout. 3230 */ 3231 if ((connp->conn_ip_recvpktinfo || IN6_IS_ADDR_LINKLOCAL(&src)) && 3232 (flags & IP_FF_IPINFO)) { 3233 /* Add header */ 3234 mp = ip_add_info_v6(mp, inill, &dst); 3235 if (mp == NULL) { 3236 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3237 CONN_DEC_REF(connp); 3238 if (mctl_present) 3239 freeb(first_mp); 3240 return; 3241 } else if (mctl_present) { 3242 first_mp->b_cont = mp; 3243 } else { 3244 first_mp = mp; 3245 } 3246 } 3247 3248 rq = connp->conn_rq; 3249 if ((IPCL_IS_NONSTR(connp) && PROTO_FLOW_CNTRLD(connp)) || 3250 (!IPCL_IS_NONSTR(connp) && !canputnext(rq))) { 3251 3252 if (flags & IP_FF_RAWIP) { 3253 BUMP_MIB(ill->ill_ip_mib, rawipIfStatsInOverflows); 3254 } else { 3255 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInOverflows); 3256 } 3257 3258 freemsg(first_mp); 3259 } else { 3260 ASSERT(!IPCL_IS_IPTUN(connp)); 3261 3262 if (CONN_INBOUND_POLICY_PRESENT(connp, ipss) || secure) { 3263 first_mp = ipsec_check_inbound_policy(first_mp, connp, 3264 NULL, ip6h, mctl_present); 3265 if (first_mp == NULL) { 3266 CONN_DEC_REF(connp); 3267 return; 3268 } 3269 } 3270 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 3271 (connp->conn_recv)(connp, mp, NULL); 3272 if (mctl_present) 3273 freeb(first_mp); 3274 } 3275 CONN_DEC_REF(connp); 3276 } 3277 3278 /* 3279 * Send an ICMP error after patching up the packet appropriately. Returns 3280 * non-zero if the appropriate MIB should be bumped; zero otherwise. 3281 */ 3282 int 3283 ip_fanout_send_icmp_v6(queue_t *q, mblk_t *mp, uint_t flags, 3284 uint_t icmp_type, uint8_t icmp_code, uint_t nexthdr_offset, 3285 boolean_t mctl_present, zoneid_t zoneid, ip_stack_t *ipst) 3286 { 3287 ip6_t *ip6h; 3288 mblk_t *first_mp; 3289 boolean_t secure; 3290 unsigned char db_type; 3291 ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec; 3292 3293 first_mp = mp; 3294 if (mctl_present) { 3295 mp = mp->b_cont; 3296 secure = ipsec_in_is_secure(first_mp); 3297 ASSERT(mp != NULL); 3298 } else { 3299 /* 3300 * If this is an ICMP error being reported - which goes 3301 * up as M_CTLs, we need to convert them to M_DATA till 3302 * we finish checking with global policy because 3303 * ipsec_check_global_policy() assumes M_DATA as clear 3304 * and M_CTL as secure. 3305 */ 3306 db_type = mp->b_datap->db_type; 3307 mp->b_datap->db_type = M_DATA; 3308 secure = B_FALSE; 3309 } 3310 /* 3311 * We are generating an icmp error for some inbound packet. 3312 * Called from all ip_fanout_(udp, tcp, proto) functions. 3313 * Before we generate an error, check with global policy 3314 * to see whether this is allowed to enter the system. As 3315 * there is no "conn", we are checking with global policy. 3316 */ 3317 ip6h = (ip6_t *)mp->b_rptr; 3318 if (secure || ipss->ipsec_inbound_v6_policy_present) { 3319 first_mp = ipsec_check_global_policy(first_mp, NULL, 3320 NULL, ip6h, mctl_present, ipst->ips_netstack); 3321 if (first_mp == NULL) 3322 return (0); 3323 } 3324 3325 if (!mctl_present) 3326 mp->b_datap->db_type = db_type; 3327 3328 if (flags & IP_FF_SEND_ICMP) { 3329 if (flags & IP_FF_HDR_COMPLETE) { 3330 if (ip_hdr_complete_v6(ip6h, zoneid, ipst)) { 3331 freemsg(first_mp); 3332 return (1); 3333 } 3334 } 3335 switch (icmp_type) { 3336 case ICMP6_DST_UNREACH: 3337 icmp_unreachable_v6(WR(q), first_mp, icmp_code, 3338 B_FALSE, B_FALSE, zoneid, ipst); 3339 break; 3340 case ICMP6_PARAM_PROB: 3341 icmp_param_problem_v6(WR(q), first_mp, icmp_code, 3342 nexthdr_offset, B_FALSE, B_FALSE, zoneid, ipst); 3343 break; 3344 default: 3345 #ifdef DEBUG 3346 panic("ip_fanout_send_icmp_v6: wrong type"); 3347 /*NOTREACHED*/ 3348 #else 3349 freemsg(first_mp); 3350 break; 3351 #endif 3352 } 3353 } else { 3354 freemsg(first_mp); 3355 return (0); 3356 } 3357 3358 return (1); 3359 } 3360 3361 /* 3362 * Fanout for TCP packets 3363 * The caller puts <fport, lport> in the ports parameter. 3364 */ 3365 static void 3366 ip_fanout_tcp_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, ill_t *ill, ill_t *inill, 3367 uint_t flags, uint_t hdr_len, boolean_t mctl_present, zoneid_t zoneid) 3368 { 3369 mblk_t *first_mp; 3370 boolean_t secure; 3371 conn_t *connp; 3372 tcph_t *tcph; 3373 boolean_t syn_present = B_FALSE; 3374 ip_stack_t *ipst = inill->ill_ipst; 3375 ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec; 3376 3377 first_mp = mp; 3378 if (mctl_present) { 3379 mp = first_mp->b_cont; 3380 secure = ipsec_in_is_secure(first_mp); 3381 ASSERT(mp != NULL); 3382 } else { 3383 secure = B_FALSE; 3384 } 3385 3386 connp = ipcl_classify_v6(mp, IPPROTO_TCP, hdr_len, zoneid, ipst); 3387 3388 if (connp == NULL || 3389 !conn_wantpacket_v6(connp, ill, ip6h, flags, zoneid)) { 3390 /* 3391 * No hard-bound match. Send Reset. 3392 */ 3393 dblk_t *dp = mp->b_datap; 3394 uint32_t ill_index; 3395 3396 ASSERT((dp->db_struioflag & STRUIO_IP) == 0); 3397 3398 /* Initiate IPPf processing, if needed. */ 3399 if (IPP_ENABLED(IPP_LOCAL_IN, ipst) && 3400 (flags & IP6_NO_IPPOLICY)) { 3401 ill_index = ill->ill_phyint->phyint_ifindex; 3402 ip_process(IPP_LOCAL_IN, &first_mp, ill_index); 3403 if (first_mp == NULL) { 3404 if (connp != NULL) 3405 CONN_DEC_REF(connp); 3406 return; 3407 } 3408 } 3409 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 3410 if (connp != NULL) { 3411 ip_xmit_reset_serialize(first_mp, hdr_len, zoneid, 3412 ipst->ips_netstack->netstack_tcp, connp); 3413 CONN_DEC_REF(connp); 3414 } else { 3415 tcp_xmit_listeners_reset(first_mp, hdr_len, zoneid, 3416 ipst->ips_netstack->netstack_tcp, NULL); 3417 } 3418 3419 return; 3420 } 3421 3422 tcph = (tcph_t *)&mp->b_rptr[hdr_len]; 3423 if ((tcph->th_flags[0] & (TH_SYN|TH_ACK|TH_RST|TH_URG)) == TH_SYN) { 3424 if (IPCL_IS_TCP(connp)) { 3425 squeue_t *sqp; 3426 3427 /* 3428 * If the queue belongs to a conn, and fused tcp 3429 * loopback is enabled, assign the eager's squeue 3430 * to be that of the active connect's. 3431 */ 3432 if ((flags & IP_FF_LOOPBACK) && do_tcp_fusion && 3433 CONN_Q(q) && IPCL_IS_TCP(Q_TO_CONN(q)) && 3434 !CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) && 3435 !secure && 3436 !IP6_IN_IPP(flags, ipst)) { 3437 ASSERT(Q_TO_CONN(q)->conn_sqp != NULL); 3438 sqp = Q_TO_CONN(q)->conn_sqp; 3439 } else { 3440 sqp = IP_SQUEUE_GET(lbolt); 3441 } 3442 3443 mp->b_datap->db_struioflag |= STRUIO_EAGER; 3444 DB_CKSUMSTART(mp) = (intptr_t)sqp; 3445 3446 /* 3447 * db_cksumstuff is unused in the incoming 3448 * path; Thus store the ifindex here. It will 3449 * be cleared in tcp_conn_create_v6(). 3450 */ 3451 DB_CKSUMSTUFF(mp) = 3452 (intptr_t)ill->ill_phyint->phyint_ifindex; 3453 syn_present = B_TRUE; 3454 } 3455 } 3456 3457 if (IPCL_IS_TCP(connp) && IPCL_IS_BOUND(connp) && !syn_present) { 3458 uint_t flags = (unsigned int)tcph->th_flags[0] & 0xFF; 3459 if ((flags & TH_RST) || (flags & TH_URG)) { 3460 CONN_DEC_REF(connp); 3461 freemsg(first_mp); 3462 return; 3463 } 3464 if (flags & TH_ACK) { 3465 ip_xmit_reset_serialize(first_mp, hdr_len, zoneid, 3466 ipst->ips_netstack->netstack_tcp, connp); 3467 CONN_DEC_REF(connp); 3468 return; 3469 } 3470 3471 CONN_DEC_REF(connp); 3472 freemsg(first_mp); 3473 return; 3474 } 3475 3476 if (CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) || secure) { 3477 first_mp = ipsec_check_inbound_policy(first_mp, connp, 3478 NULL, ip6h, mctl_present); 3479 if (first_mp == NULL) { 3480 CONN_DEC_REF(connp); 3481 return; 3482 } 3483 if (IPCL_IS_TCP(connp) && IPCL_IS_BOUND(connp)) { 3484 ASSERT(syn_present); 3485 if (mctl_present) { 3486 ASSERT(first_mp != mp); 3487 first_mp->b_datap->db_struioflag |= 3488 STRUIO_POLICY; 3489 } else { 3490 ASSERT(first_mp == mp); 3491 mp->b_datap->db_struioflag &= 3492 ~STRUIO_EAGER; 3493 mp->b_datap->db_struioflag |= 3494 STRUIO_POLICY; 3495 } 3496 } else { 3497 /* 3498 * Discard first_mp early since we're dealing with a 3499 * fully-connected conn_t and tcp doesn't do policy in 3500 * this case. Also, if someone is bound to IPPROTO_TCP 3501 * over raw IP, they don't expect to see a M_CTL. 3502 */ 3503 if (mctl_present) { 3504 freeb(first_mp); 3505 mctl_present = B_FALSE; 3506 } 3507 first_mp = mp; 3508 } 3509 } 3510 3511 /* Initiate IPPF processing */ 3512 if (IP6_IN_IPP(flags, ipst)) { 3513 uint_t ifindex; 3514 3515 mutex_enter(&ill->ill_lock); 3516 ifindex = ill->ill_phyint->phyint_ifindex; 3517 mutex_exit(&ill->ill_lock); 3518 ip_process(IPP_LOCAL_IN, &mp, ifindex); 3519 if (mp == NULL) { 3520 CONN_DEC_REF(connp); 3521 if (mctl_present) { 3522 freeb(first_mp); 3523 } 3524 return; 3525 } else if (mctl_present) { 3526 /* 3527 * ip_add_info_v6 might return a new mp. 3528 */ 3529 ASSERT(first_mp != mp); 3530 first_mp->b_cont = mp; 3531 } else { 3532 first_mp = mp; 3533 } 3534 } 3535 3536 /* 3537 * For link-local always add ifindex so that TCP can bind to that 3538 * interface. Avoid it for ICMP error fanout. 3539 */ 3540 if (!syn_present && ((connp->conn_ip_recvpktinfo || 3541 IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src)) && 3542 (flags & IP_FF_IPINFO))) { 3543 /* Add header */ 3544 mp = ip_add_info_v6(mp, inill, &ip6h->ip6_dst); 3545 if (mp == NULL) { 3546 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3547 CONN_DEC_REF(connp); 3548 if (mctl_present) 3549 freeb(first_mp); 3550 return; 3551 } else if (mctl_present) { 3552 ASSERT(first_mp != mp); 3553 first_mp->b_cont = mp; 3554 } else { 3555 first_mp = mp; 3556 } 3557 } 3558 3559 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 3560 if (IPCL_IS_TCP(connp)) { 3561 SQUEUE_ENTER_ONE(connp->conn_sqp, first_mp, connp->conn_recv, 3562 connp, ip_squeue_flag, SQTAG_IP6_TCP_INPUT); 3563 } else { 3564 /* SOCK_RAW, IPPROTO_TCP case */ 3565 (connp->conn_recv)(connp, first_mp, NULL); 3566 CONN_DEC_REF(connp); 3567 } 3568 } 3569 3570 /* 3571 * Fanout for UDP packets. 3572 * The caller puts <fport, lport> in the ports parameter. 3573 * ire_type must be IRE_BROADCAST for multicast and broadcast packets. 3574 * 3575 * If SO_REUSEADDR is set all multicast and broadcast packets 3576 * will be delivered to all streams bound to the same port. 3577 * 3578 * Zones notes: 3579 * Multicast packets will be distributed to streams in all zones. 3580 */ 3581 static void 3582 ip_fanout_udp_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, uint32_t ports, 3583 ill_t *ill, ill_t *inill, uint_t flags, boolean_t mctl_present, 3584 zoneid_t zoneid) 3585 { 3586 uint32_t dstport, srcport; 3587 in6_addr_t dst; 3588 mblk_t *first_mp; 3589 boolean_t secure; 3590 conn_t *connp; 3591 connf_t *connfp; 3592 conn_t *first_conn; 3593 conn_t *next_conn; 3594 mblk_t *mp1, *first_mp1; 3595 in6_addr_t src; 3596 boolean_t shared_addr; 3597 ip_stack_t *ipst = inill->ill_ipst; 3598 ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec; 3599 3600 first_mp = mp; 3601 if (mctl_present) { 3602 mp = first_mp->b_cont; 3603 secure = ipsec_in_is_secure(first_mp); 3604 ASSERT(mp != NULL); 3605 } else { 3606 secure = B_FALSE; 3607 } 3608 3609 /* Extract ports in net byte order */ 3610 dstport = htons(ntohl(ports) & 0xFFFF); 3611 srcport = htons(ntohl(ports) >> 16); 3612 dst = ip6h->ip6_dst; 3613 src = ip6h->ip6_src; 3614 3615 shared_addr = (zoneid == ALL_ZONES); 3616 if (shared_addr) { 3617 /* 3618 * No need to handle exclusive-stack zones since ALL_ZONES 3619 * only applies to the shared stack. 3620 */ 3621 zoneid = tsol_mlp_findzone(IPPROTO_UDP, dstport); 3622 /* 3623 * If no shared MLP is found, tsol_mlp_findzone returns 3624 * ALL_ZONES. In that case, we assume it's SLP, and 3625 * search for the zone based on the packet label. 3626 * That will also return ALL_ZONES on failure, but 3627 * we never allow conn_zoneid to be set to ALL_ZONES. 3628 */ 3629 if (zoneid == ALL_ZONES) 3630 zoneid = tsol_packet_to_zoneid(mp); 3631 } 3632 3633 /* Attempt to find a client stream based on destination port. */ 3634 connfp = &ipst->ips_ipcl_udp_fanout[IPCL_UDP_HASH(dstport, ipst)]; 3635 mutex_enter(&connfp->connf_lock); 3636 connp = connfp->connf_head; 3637 if (!IN6_IS_ADDR_MULTICAST(&dst)) { 3638 /* 3639 * Not multicast. Send to the one (first) client we find. 3640 */ 3641 while (connp != NULL) { 3642 if (IPCL_UDP_MATCH_V6(connp, dstport, dst, srcport, 3643 src) && IPCL_ZONE_MATCH(connp, zoneid) && 3644 conn_wantpacket_v6(connp, ill, ip6h, 3645 flags, zoneid)) { 3646 break; 3647 } 3648 connp = connp->conn_next; 3649 } 3650 if (connp == NULL || connp->conn_upq == NULL) 3651 goto notfound; 3652 3653 if (is_system_labeled() && 3654 !tsol_receive_local(mp, &dst, IPV6_VERSION, shared_addr, 3655 connp)) 3656 goto notfound; 3657 3658 /* Found a client */ 3659 CONN_INC_REF(connp); 3660 mutex_exit(&connfp->connf_lock); 3661 3662 if ((IPCL_IS_NONSTR(connp) && PROTO_FLOW_CNTRLD(connp)) || 3663 (!IPCL_IS_NONSTR(connp) && CONN_UDP_FLOWCTLD(connp))) { 3664 freemsg(first_mp); 3665 CONN_DEC_REF(connp); 3666 return; 3667 } 3668 if (CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) || secure) { 3669 first_mp = ipsec_check_inbound_policy(first_mp, 3670 connp, NULL, ip6h, mctl_present); 3671 if (first_mp == NULL) { 3672 CONN_DEC_REF(connp); 3673 return; 3674 } 3675 } 3676 /* Initiate IPPF processing */ 3677 if (IP6_IN_IPP(flags, ipst)) { 3678 uint_t ifindex; 3679 3680 mutex_enter(&ill->ill_lock); 3681 ifindex = ill->ill_phyint->phyint_ifindex; 3682 mutex_exit(&ill->ill_lock); 3683 ip_process(IPP_LOCAL_IN, &mp, ifindex); 3684 if (mp == NULL) { 3685 CONN_DEC_REF(connp); 3686 if (mctl_present) 3687 freeb(first_mp); 3688 return; 3689 } 3690 } 3691 /* 3692 * For link-local always add ifindex so that 3693 * transport can set sin6_scope_id. Avoid it for 3694 * ICMP error fanout. 3695 */ 3696 if ((connp->conn_ip_recvpktinfo || 3697 IN6_IS_ADDR_LINKLOCAL(&src)) && 3698 (flags & IP_FF_IPINFO)) { 3699 /* Add header */ 3700 mp = ip_add_info_v6(mp, inill, &dst); 3701 if (mp == NULL) { 3702 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3703 CONN_DEC_REF(connp); 3704 if (mctl_present) 3705 freeb(first_mp); 3706 return; 3707 } else if (mctl_present) { 3708 first_mp->b_cont = mp; 3709 } else { 3710 first_mp = mp; 3711 } 3712 } 3713 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 3714 3715 /* Send it upstream */ 3716 (connp->conn_recv)(connp, mp, NULL); 3717 3718 IP6_STAT(ipst, ip6_udp_fannorm); 3719 CONN_DEC_REF(connp); 3720 if (mctl_present) 3721 freeb(first_mp); 3722 return; 3723 } 3724 3725 while (connp != NULL) { 3726 if ((IPCL_UDP_MATCH_V6(connp, dstport, dst, srcport, src)) && 3727 conn_wantpacket_v6(connp, ill, ip6h, flags, zoneid) && 3728 (!is_system_labeled() || 3729 tsol_receive_local(mp, &dst, IPV6_VERSION, shared_addr, 3730 connp))) 3731 break; 3732 connp = connp->conn_next; 3733 } 3734 3735 if (connp == NULL || connp->conn_upq == NULL) 3736 goto notfound; 3737 3738 first_conn = connp; 3739 3740 CONN_INC_REF(connp); 3741 connp = connp->conn_next; 3742 for (;;) { 3743 while (connp != NULL) { 3744 if (IPCL_UDP_MATCH_V6(connp, dstport, dst, srcport, 3745 src) && conn_wantpacket_v6(connp, ill, ip6h, 3746 flags, zoneid) && 3747 (!is_system_labeled() || 3748 tsol_receive_local(mp, &dst, IPV6_VERSION, 3749 shared_addr, connp))) 3750 break; 3751 connp = connp->conn_next; 3752 } 3753 /* 3754 * Just copy the data part alone. The mctl part is 3755 * needed just for verifying policy and it is never 3756 * sent up. 3757 */ 3758 if (connp == NULL || 3759 (((first_mp1 = dupmsg(first_mp)) == NULL) && 3760 ((first_mp1 = ip_copymsg(first_mp)) == NULL))) { 3761 /* 3762 * No more interested clients or memory 3763 * allocation failed 3764 */ 3765 connp = first_conn; 3766 break; 3767 } 3768 mp1 = mctl_present ? first_mp1->b_cont : first_mp1; 3769 CONN_INC_REF(connp); 3770 mutex_exit(&connfp->connf_lock); 3771 /* 3772 * For link-local always add ifindex so that transport 3773 * can set sin6_scope_id. Avoid it for ICMP error 3774 * fanout. 3775 */ 3776 if ((connp->conn_ip_recvpktinfo || 3777 IN6_IS_ADDR_LINKLOCAL(&src)) && 3778 (flags & IP_FF_IPINFO)) { 3779 /* Add header */ 3780 mp1 = ip_add_info_v6(mp1, inill, &dst); 3781 } 3782 /* mp1 could have changed */ 3783 if (mctl_present) 3784 first_mp1->b_cont = mp1; 3785 else 3786 first_mp1 = mp1; 3787 if (mp1 == NULL) { 3788 if (mctl_present) 3789 freeb(first_mp1); 3790 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3791 goto next_one; 3792 } 3793 if ((IPCL_IS_NONSTR(connp) && PROTO_FLOW_CNTRLD(connp)) || 3794 (!IPCL_IS_NONSTR(connp) && CONN_UDP_FLOWCTLD(connp))) { 3795 BUMP_MIB(ill->ill_ip_mib, udpIfStatsInOverflows); 3796 freemsg(first_mp1); 3797 goto next_one; 3798 } 3799 3800 if (CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) || secure) { 3801 first_mp1 = ipsec_check_inbound_policy 3802 (first_mp1, connp, NULL, ip6h, 3803 mctl_present); 3804 } 3805 if (first_mp1 != NULL) { 3806 if (mctl_present) 3807 freeb(first_mp1); 3808 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 3809 3810 /* Send it upstream */ 3811 (connp->conn_recv)(connp, mp1, NULL); 3812 } 3813 next_one: 3814 mutex_enter(&connfp->connf_lock); 3815 /* Follow the next pointer before releasing the conn. */ 3816 next_conn = connp->conn_next; 3817 IP6_STAT(ipst, ip6_udp_fanmb); 3818 CONN_DEC_REF(connp); 3819 connp = next_conn; 3820 } 3821 3822 /* Last one. Send it upstream. */ 3823 mutex_exit(&connfp->connf_lock); 3824 3825 /* Initiate IPPF processing */ 3826 if (IP6_IN_IPP(flags, ipst)) { 3827 uint_t ifindex; 3828 3829 mutex_enter(&ill->ill_lock); 3830 ifindex = ill->ill_phyint->phyint_ifindex; 3831 mutex_exit(&ill->ill_lock); 3832 ip_process(IPP_LOCAL_IN, &mp, ifindex); 3833 if (mp == NULL) { 3834 CONN_DEC_REF(connp); 3835 if (mctl_present) { 3836 freeb(first_mp); 3837 } 3838 return; 3839 } 3840 } 3841 3842 /* 3843 * For link-local always add ifindex so that transport can set 3844 * sin6_scope_id. Avoid it for ICMP error fanout. 3845 */ 3846 if ((connp->conn_ip_recvpktinfo || 3847 IN6_IS_ADDR_LINKLOCAL(&src)) && (flags & IP_FF_IPINFO)) { 3848 /* Add header */ 3849 mp = ip_add_info_v6(mp, inill, &dst); 3850 if (mp == NULL) { 3851 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3852 CONN_DEC_REF(connp); 3853 if (mctl_present) 3854 freeb(first_mp); 3855 return; 3856 } else if (mctl_present) { 3857 first_mp->b_cont = mp; 3858 } else { 3859 first_mp = mp; 3860 } 3861 } 3862 if ((IPCL_IS_NONSTR(connp) && PROTO_FLOW_CNTRLD(connp)) || 3863 (!IPCL_IS_NONSTR(connp) && CONN_UDP_FLOWCTLD(connp))) { 3864 BUMP_MIB(ill->ill_ip_mib, udpIfStatsInOverflows); 3865 freemsg(mp); 3866 } else { 3867 if (CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) || secure) { 3868 first_mp = ipsec_check_inbound_policy(first_mp, 3869 connp, NULL, ip6h, mctl_present); 3870 if (first_mp == NULL) { 3871 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3872 CONN_DEC_REF(connp); 3873 return; 3874 } 3875 } 3876 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 3877 3878 /* Send it upstream */ 3879 (connp->conn_recv)(connp, mp, NULL); 3880 } 3881 IP6_STAT(ipst, ip6_udp_fanmb); 3882 CONN_DEC_REF(connp); 3883 if (mctl_present) 3884 freeb(first_mp); 3885 return; 3886 3887 notfound: 3888 mutex_exit(&connfp->connf_lock); 3889 /* 3890 * No one bound to this port. Is 3891 * there a client that wants all 3892 * unclaimed datagrams? 3893 */ 3894 if (ipst->ips_ipcl_proto_fanout_v6[IPPROTO_UDP].connf_head != NULL) { 3895 ip_fanout_proto_v6(q, first_mp, ip6h, ill, inill, IPPROTO_UDP, 3896 0, flags | IP_FF_RAWIP | IP_FF_IPINFO, mctl_present, 3897 zoneid); 3898 } else { 3899 if (ip_fanout_send_icmp_v6(q, first_mp, flags, 3900 ICMP6_DST_UNREACH, ICMP6_DST_UNREACH_NOPORT, 0, 3901 mctl_present, zoneid, ipst)) { 3902 BUMP_MIB(ill->ill_ip_mib, udpIfStatsNoPorts); 3903 } 3904 } 3905 } 3906 3907 /* 3908 * int ip_find_hdr_v6() 3909 * 3910 * This routine is used by the upper layer protocols and the IP tunnel 3911 * module to: 3912 * - Set extension header pointers to appropriate locations 3913 * - Determine IPv6 header length and return it 3914 * - Return a pointer to the last nexthdr value 3915 * 3916 * The caller must initialize ipp_fields. 3917 * 3918 * NOTE: If multiple extension headers of the same type are present, 3919 * ip_find_hdr_v6() will set the respective extension header pointers 3920 * to the first one that it encounters in the IPv6 header. It also 3921 * skips fragment headers. This routine deals with malformed packets 3922 * of various sorts in which case the returned length is up to the 3923 * malformed part. 3924 */ 3925 int 3926 ip_find_hdr_v6(mblk_t *mp, ip6_t *ip6h, ip6_pkt_t *ipp, uint8_t *nexthdrp) 3927 { 3928 uint_t length, ehdrlen; 3929 uint8_t nexthdr; 3930 uint8_t *whereptr, *endptr; 3931 ip6_dest_t *tmpdstopts; 3932 ip6_rthdr_t *tmprthdr; 3933 ip6_hbh_t *tmphopopts; 3934 ip6_frag_t *tmpfraghdr; 3935 3936 length = IPV6_HDR_LEN; 3937 whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */ 3938 endptr = mp->b_wptr; 3939 3940 nexthdr = ip6h->ip6_nxt; 3941 while (whereptr < endptr) { 3942 /* Is there enough left for len + nexthdr? */ 3943 if (whereptr + MIN_EHDR_LEN > endptr) 3944 goto done; 3945 3946 switch (nexthdr) { 3947 case IPPROTO_HOPOPTS: 3948 tmphopopts = (ip6_hbh_t *)whereptr; 3949 ehdrlen = 8 * (tmphopopts->ip6h_len + 1); 3950 if ((uchar_t *)tmphopopts + ehdrlen > endptr) 3951 goto done; 3952 nexthdr = tmphopopts->ip6h_nxt; 3953 /* return only 1st hbh */ 3954 if (!(ipp->ipp_fields & IPPF_HOPOPTS)) { 3955 ipp->ipp_fields |= IPPF_HOPOPTS; 3956 ipp->ipp_hopopts = tmphopopts; 3957 ipp->ipp_hopoptslen = ehdrlen; 3958 } 3959 break; 3960 case IPPROTO_DSTOPTS: 3961 tmpdstopts = (ip6_dest_t *)whereptr; 3962 ehdrlen = 8 * (tmpdstopts->ip6d_len + 1); 3963 if ((uchar_t *)tmpdstopts + ehdrlen > endptr) 3964 goto done; 3965 nexthdr = tmpdstopts->ip6d_nxt; 3966 /* 3967 * ipp_dstopts is set to the destination header after a 3968 * routing header. 3969 * Assume it is a post-rthdr destination header 3970 * and adjust when we find an rthdr. 3971 */ 3972 if (!(ipp->ipp_fields & IPPF_DSTOPTS)) { 3973 ipp->ipp_fields |= IPPF_DSTOPTS; 3974 ipp->ipp_dstopts = tmpdstopts; 3975 ipp->ipp_dstoptslen = ehdrlen; 3976 } 3977 break; 3978 case IPPROTO_ROUTING: 3979 tmprthdr = (ip6_rthdr_t *)whereptr; 3980 ehdrlen = 8 * (tmprthdr->ip6r_len + 1); 3981 if ((uchar_t *)tmprthdr + ehdrlen > endptr) 3982 goto done; 3983 nexthdr = tmprthdr->ip6r_nxt; 3984 /* return only 1st rthdr */ 3985 if (!(ipp->ipp_fields & IPPF_RTHDR)) { 3986 ipp->ipp_fields |= IPPF_RTHDR; 3987 ipp->ipp_rthdr = tmprthdr; 3988 ipp->ipp_rthdrlen = ehdrlen; 3989 } 3990 /* 3991 * Make any destination header we've seen be a 3992 * pre-rthdr destination header. 3993 */ 3994 if (ipp->ipp_fields & IPPF_DSTOPTS) { 3995 ipp->ipp_fields &= ~IPPF_DSTOPTS; 3996 ipp->ipp_fields |= IPPF_RTDSTOPTS; 3997 ipp->ipp_rtdstopts = ipp->ipp_dstopts; 3998 ipp->ipp_dstopts = NULL; 3999 ipp->ipp_rtdstoptslen = ipp->ipp_dstoptslen; 4000 ipp->ipp_dstoptslen = 0; 4001 } 4002 break; 4003 case IPPROTO_FRAGMENT: 4004 tmpfraghdr = (ip6_frag_t *)whereptr; 4005 ehdrlen = sizeof (ip6_frag_t); 4006 if ((uchar_t *)tmpfraghdr + ehdrlen > endptr) 4007 goto done; 4008 nexthdr = tmpfraghdr->ip6f_nxt; 4009 if (!(ipp->ipp_fields & IPPF_FRAGHDR)) { 4010 ipp->ipp_fields |= IPPF_FRAGHDR; 4011 ipp->ipp_fraghdr = tmpfraghdr; 4012 ipp->ipp_fraghdrlen = ehdrlen; 4013 } 4014 break; 4015 case IPPROTO_NONE: 4016 default: 4017 goto done; 4018 } 4019 length += ehdrlen; 4020 whereptr += ehdrlen; 4021 } 4022 done: 4023 if (nexthdrp != NULL) 4024 *nexthdrp = nexthdr; 4025 return (length); 4026 } 4027 4028 int 4029 ip_hdr_complete_v6(ip6_t *ip6h, zoneid_t zoneid, ip_stack_t *ipst) 4030 { 4031 ire_t *ire; 4032 4033 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 4034 ire = ire_lookup_local_v6(zoneid, ipst); 4035 if (ire == NULL) { 4036 ip1dbg(("ip_hdr_complete_v6: no source IRE\n")); 4037 return (1); 4038 } 4039 ip6h->ip6_src = ire->ire_addr_v6; 4040 ire_refrele(ire); 4041 } 4042 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 4043 ip6h->ip6_hops = ipst->ips_ipv6_def_hops; 4044 return (0); 4045 } 4046 4047 /* 4048 * Try to determine where and what are the IPv6 header length and 4049 * pointer to nexthdr value for the upper layer protocol (or an 4050 * unknown next hdr). 4051 * 4052 * Parameters returns a pointer to the nexthdr value; 4053 * Must handle malformed packets of various sorts. 4054 * Function returns failure for malformed cases. 4055 */ 4056 boolean_t 4057 ip_hdr_length_nexthdr_v6(mblk_t *mp, ip6_t *ip6h, uint16_t *hdr_length_ptr, 4058 uint8_t **nexthdrpp) 4059 { 4060 uint16_t length; 4061 uint_t ehdrlen; 4062 uint8_t *nexthdrp; 4063 uint8_t *whereptr; 4064 uint8_t *endptr; 4065 ip6_dest_t *desthdr; 4066 ip6_rthdr_t *rthdr; 4067 ip6_frag_t *fraghdr; 4068 4069 ASSERT((IPH_HDR_VERSION(ip6h) & ~IP_FORWARD_PROG_BIT) == IPV6_VERSION); 4070 length = IPV6_HDR_LEN; 4071 whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */ 4072 endptr = mp->b_wptr; 4073 4074 nexthdrp = &ip6h->ip6_nxt; 4075 while (whereptr < endptr) { 4076 /* Is there enough left for len + nexthdr? */ 4077 if (whereptr + MIN_EHDR_LEN > endptr) 4078 break; 4079 4080 switch (*nexthdrp) { 4081 case IPPROTO_HOPOPTS: 4082 case IPPROTO_DSTOPTS: 4083 /* Assumes the headers are identical for hbh and dst */ 4084 desthdr = (ip6_dest_t *)whereptr; 4085 ehdrlen = 8 * (desthdr->ip6d_len + 1); 4086 if ((uchar_t *)desthdr + ehdrlen > endptr) 4087 return (B_FALSE); 4088 nexthdrp = &desthdr->ip6d_nxt; 4089 break; 4090 case IPPROTO_ROUTING: 4091 rthdr = (ip6_rthdr_t *)whereptr; 4092 ehdrlen = 8 * (rthdr->ip6r_len + 1); 4093 if ((uchar_t *)rthdr + ehdrlen > endptr) 4094 return (B_FALSE); 4095 nexthdrp = &rthdr->ip6r_nxt; 4096 break; 4097 case IPPROTO_FRAGMENT: 4098 fraghdr = (ip6_frag_t *)whereptr; 4099 ehdrlen = sizeof (ip6_frag_t); 4100 if ((uchar_t *)&fraghdr[1] > endptr) 4101 return (B_FALSE); 4102 nexthdrp = &fraghdr->ip6f_nxt; 4103 break; 4104 case IPPROTO_NONE: 4105 /* No next header means we're finished */ 4106 default: 4107 *hdr_length_ptr = length; 4108 *nexthdrpp = nexthdrp; 4109 return (B_TRUE); 4110 } 4111 length += ehdrlen; 4112 whereptr += ehdrlen; 4113 *hdr_length_ptr = length; 4114 *nexthdrpp = nexthdrp; 4115 } 4116 switch (*nexthdrp) { 4117 case IPPROTO_HOPOPTS: 4118 case IPPROTO_DSTOPTS: 4119 case IPPROTO_ROUTING: 4120 case IPPROTO_FRAGMENT: 4121 /* 4122 * If any know extension headers are still to be processed, 4123 * the packet's malformed (or at least all the IP header(s) are 4124 * not in the same mblk - and that should never happen. 4125 */ 4126 return (B_FALSE); 4127 4128 default: 4129 /* 4130 * If we get here, we know that all of the IP headers were in 4131 * the same mblk, even if the ULP header is in the next mblk. 4132 */ 4133 *hdr_length_ptr = length; 4134 *nexthdrpp = nexthdrp; 4135 return (B_TRUE); 4136 } 4137 } 4138 4139 /* 4140 * Return the length of the IPv6 related headers (including extension headers) 4141 * Returns a length even if the packet is malformed. 4142 */ 4143 int 4144 ip_hdr_length_v6(mblk_t *mp, ip6_t *ip6h) 4145 { 4146 uint16_t hdr_len; 4147 uint8_t *nexthdrp; 4148 4149 (void) ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_len, &nexthdrp); 4150 return (hdr_len); 4151 } 4152 4153 /* 4154 * IPv6 - 4155 * ip_newroute_v6 is called by ip_rput_data_v6 or ip_wput_v6 whenever we need 4156 * to send out a packet to a destination address for which we do not have 4157 * specific routing information. 4158 * 4159 * Handle non-multicast packets. If ill is non-NULL the match is done 4160 * for that ill. 4161 * 4162 * When a specific ill is specified (using IPV6_PKTINFO, 4163 * IPV6_MULTICAST_IF, or IPV6_BOUND_IF) we will only match 4164 * on routing entries (ftable and ctable) that have a matching 4165 * ire->ire_ipif->ipif_ill. Thus this can only be used 4166 * for destinations that are on-link for the specific ill 4167 * and that can appear on multiple links. Thus it is useful 4168 * for multicast destinations, link-local destinations, and 4169 * at some point perhaps for site-local destinations (if the 4170 * node sits at a site boundary). 4171 * We create the cache entries in the regular ctable since 4172 * it can not "confuse" things for other destinations. 4173 * 4174 * NOTE : These are the scopes of some of the variables that point at IRE, 4175 * which needs to be followed while making any future modifications 4176 * to avoid memory leaks. 4177 * 4178 * - ire and sire are the entries looked up initially by 4179 * ire_ftable_lookup_v6. 4180 * - ipif_ire is used to hold the interface ire associated with 4181 * the new cache ire. But it's scope is limited, so we always REFRELE 4182 * it before branching out to error paths. 4183 * - save_ire is initialized before ire_create, so that ire returned 4184 * by ire_create will not over-write the ire. We REFRELE save_ire 4185 * before breaking out of the switch. 4186 * 4187 * Thus on failures, we have to REFRELE only ire and sire, if they 4188 * are not NULL. 4189 */ 4190 /* ARGSUSED */ 4191 void 4192 ip_newroute_v6(queue_t *q, mblk_t *mp, const in6_addr_t *v6dstp, 4193 const in6_addr_t *v6srcp, ill_t *ill, zoneid_t zoneid, ip_stack_t *ipst) 4194 { 4195 in6_addr_t v6gw; 4196 in6_addr_t dst; 4197 ire_t *ire = NULL; 4198 ipif_t *src_ipif = NULL; 4199 ill_t *dst_ill = NULL; 4200 ire_t *sire = NULL; 4201 ire_t *save_ire; 4202 ip6_t *ip6h; 4203 int err = 0; 4204 mblk_t *first_mp; 4205 ipsec_out_t *io; 4206 ushort_t ire_marks = 0; 4207 int match_flags; 4208 ire_t *first_sire = NULL; 4209 mblk_t *copy_mp = NULL; 4210 mblk_t *xmit_mp = NULL; 4211 in6_addr_t save_dst; 4212 uint32_t multirt_flags = 4213 MULTIRT_CACHEGW | MULTIRT_USESTAMP | MULTIRT_SETSTAMP; 4214 boolean_t multirt_is_resolvable; 4215 boolean_t multirt_resolve_next; 4216 boolean_t need_rele = B_FALSE; 4217 boolean_t ip6_asp_table_held = B_FALSE; 4218 tsol_ire_gw_secattr_t *attrp = NULL; 4219 tsol_gcgrp_t *gcgrp = NULL; 4220 tsol_gcgrp_addr_t ga; 4221 4222 ASSERT(!IN6_IS_ADDR_MULTICAST(v6dstp)); 4223 4224 first_mp = mp; 4225 if (mp->b_datap->db_type == M_CTL) { 4226 mp = mp->b_cont; 4227 io = (ipsec_out_t *)first_mp->b_rptr; 4228 ASSERT(io->ipsec_out_type == IPSEC_OUT); 4229 } else { 4230 io = NULL; 4231 } 4232 4233 ip6h = (ip6_t *)mp->b_rptr; 4234 4235 if (IN6_IS_ADDR_LOOPBACK(v6dstp)) { 4236 ip1dbg(("ip_newroute_v6: dst with loopback addr\n")); 4237 goto icmp_err_ret; 4238 } else if (IN6_IS_ADDR_LOOPBACK(v6srcp)) { 4239 ip1dbg(("ip_newroute_v6: src with loopback addr\n")); 4240 goto icmp_err_ret; 4241 } 4242 4243 /* 4244 * If this IRE is created for forwarding or it is not for 4245 * TCP traffic, mark it as temporary. 4246 * 4247 * Is it sufficient just to check the next header?? 4248 */ 4249 if (mp->b_prev != NULL || !IP_FLOW_CONTROLLED_ULP(ip6h->ip6_nxt)) 4250 ire_marks |= IRE_MARK_TEMPORARY; 4251 4252 /* 4253 * Get what we can from ire_ftable_lookup_v6 which will follow an IRE 4254 * chain until it gets the most specific information available. 4255 * For example, we know that there is no IRE_CACHE for this dest, 4256 * but there may be an IRE_OFFSUBNET which specifies a gateway. 4257 * ire_ftable_lookup_v6 will look up the gateway, etc. 4258 */ 4259 4260 if (ill == NULL) { 4261 match_flags = MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT | 4262 MATCH_IRE_PARENT | MATCH_IRE_RJ_BHOLE | MATCH_IRE_SECATTR; 4263 ire = ire_ftable_lookup_v6(v6dstp, 0, 0, 0, 4264 NULL, &sire, zoneid, 0, msg_getlabel(mp), 4265 match_flags, ipst); 4266 } else { 4267 match_flags = MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT | 4268 MATCH_IRE_RJ_BHOLE | MATCH_IRE_ILL; 4269 match_flags |= MATCH_IRE_PARENT | MATCH_IRE_SECATTR; 4270 4271 /* 4272 * Because nce_xmit() calls ip_output_v6() and NCEs are always 4273 * tied to an underlying interface, IS_UNDER_IPMP() may be 4274 * true even when building IREs that will be used for data 4275 * traffic. As such, use the packet's source address to 4276 * determine whether the traffic is test traffic, and set 4277 * MATCH_IRE_MARK_TESTHIDDEN if so. 4278 */ 4279 if (IS_UNDER_IPMP(ill) && !IN6_IS_ADDR_UNSPECIFIED(v6srcp)) { 4280 if (ipif_lookup_testaddr_v6(ill, v6srcp, NULL)) 4281 match_flags |= MATCH_IRE_MARK_TESTHIDDEN; 4282 } 4283 4284 ire = ire_ftable_lookup_v6(v6dstp, NULL, NULL, 0, ill->ill_ipif, 4285 &sire, zoneid, 0, msg_getlabel(mp), match_flags, ipst); 4286 } 4287 4288 ip3dbg(("ip_newroute_v6: ire_ftable_lookup_v6() " 4289 "returned ire %p, sire %p\n", (void *)ire, (void *)sire)); 4290 4291 /* 4292 * We enter a loop that will be run only once in most cases. 4293 * The loop is re-entered in the case where the destination 4294 * can be reached through multiple RTF_MULTIRT-flagged routes. 4295 * The intention is to compute multiple routes to a single 4296 * destination in a single ip_newroute_v6 call. 4297 * The information is contained in sire->ire_flags. 4298 */ 4299 do { 4300 multirt_resolve_next = B_FALSE; 4301 4302 if (dst_ill != NULL) { 4303 ill_refrele(dst_ill); 4304 dst_ill = NULL; 4305 } 4306 if (src_ipif != NULL) { 4307 ipif_refrele(src_ipif); 4308 src_ipif = NULL; 4309 } 4310 if ((sire != NULL) && sire->ire_flags & RTF_MULTIRT) { 4311 ip3dbg(("ip_newroute_v6: starting new resolution " 4312 "with first_mp %p, tag %d\n", 4313 (void *)first_mp, MULTIRT_DEBUG_TAGGED(first_mp))); 4314 4315 /* 4316 * We check if there are trailing unresolved routes for 4317 * the destination contained in sire. 4318 */ 4319 multirt_is_resolvable = ire_multirt_lookup_v6(&ire, 4320 &sire, multirt_flags, msg_getlabel(mp), ipst); 4321 4322 ip3dbg(("ip_newroute_v6: multirt_is_resolvable %d, " 4323 "ire %p, sire %p\n", 4324 multirt_is_resolvable, (void *)ire, (void *)sire)); 4325 4326 if (!multirt_is_resolvable) { 4327 /* 4328 * No more multirt routes to resolve; give up 4329 * (all routes resolved or no more resolvable 4330 * routes). 4331 */ 4332 if (ire != NULL) { 4333 ire_refrele(ire); 4334 ire = NULL; 4335 } 4336 } else { 4337 ASSERT(sire != NULL); 4338 ASSERT(ire != NULL); 4339 /* 4340 * We simply use first_sire as a flag that 4341 * indicates if a resolvable multirt route has 4342 * already been found during the preceding 4343 * loops. If it is not the case, we may have 4344 * to send an ICMP error to report that the 4345 * destination is unreachable. We do not 4346 * IRE_REFHOLD first_sire. 4347 */ 4348 if (first_sire == NULL) { 4349 first_sire = sire; 4350 } 4351 } 4352 } 4353 if ((ire == NULL) || (ire == sire)) { 4354 /* 4355 * either ire == NULL (the destination cannot be 4356 * resolved) or ire == sire (the gateway cannot be 4357 * resolved). At this point, there are no more routes 4358 * to resolve for the destination, thus we exit. 4359 */ 4360 if (ip_debug > 3) { 4361 /* ip2dbg */ 4362 pr_addr_dbg("ip_newroute_v6: " 4363 "can't resolve %s\n", AF_INET6, v6dstp); 4364 } 4365 ip3dbg(("ip_newroute_v6: " 4366 "ire %p, sire %p, first_sire %p\n", 4367 (void *)ire, (void *)sire, (void *)first_sire)); 4368 4369 if (sire != NULL) { 4370 ire_refrele(sire); 4371 sire = NULL; 4372 } 4373 4374 if (first_sire != NULL) { 4375 /* 4376 * At least one multirt route has been found 4377 * in the same ip_newroute() call; there is no 4378 * need to report an ICMP error. 4379 * first_sire was not IRE_REFHOLDed. 4380 */ 4381 MULTIRT_DEBUG_UNTAG(first_mp); 4382 freemsg(first_mp); 4383 return; 4384 } 4385 ip_rts_change_v6(RTM_MISS, v6dstp, 0, 0, 0, 0, 0, 0, 4386 RTA_DST, ipst); 4387 goto icmp_err_ret; 4388 } 4389 4390 ASSERT(ire->ire_ipversion == IPV6_VERSION); 4391 4392 /* 4393 * Verify that the returned IRE does not have either the 4394 * RTF_REJECT or RTF_BLACKHOLE flags set and that the IRE is 4395 * either an IRE_CACHE, IRE_IF_NORESOLVER or IRE_IF_RESOLVER. 4396 */ 4397 if ((ire->ire_flags & (RTF_REJECT | RTF_BLACKHOLE)) || 4398 (ire->ire_type & (IRE_CACHE | IRE_INTERFACE)) == 0) 4399 goto icmp_err_ret; 4400 4401 /* 4402 * Increment the ire_ob_pkt_count field for ire if it is an 4403 * INTERFACE (IF_RESOLVER or IF_NORESOLVER) IRE type, and 4404 * increment the same for the parent IRE, sire, if it is some 4405 * sort of prefix IRE (which includes DEFAULT, PREFIX, and HOST) 4406 */ 4407 if ((ire->ire_type & IRE_INTERFACE) != 0) { 4408 UPDATE_OB_PKT_COUNT(ire); 4409 ire->ire_last_used_time = lbolt; 4410 } 4411 4412 if (sire != NULL) { 4413 mutex_enter(&sire->ire_lock); 4414 v6gw = sire->ire_gateway_addr_v6; 4415 mutex_exit(&sire->ire_lock); 4416 ASSERT((sire->ire_type & (IRE_CACHETABLE | 4417 IRE_INTERFACE)) == 0); 4418 UPDATE_OB_PKT_COUNT(sire); 4419 sire->ire_last_used_time = lbolt; 4420 } else { 4421 v6gw = ipv6_all_zeros; 4422 } 4423 4424 /* 4425 * We have a route to reach the destination. Find the 4426 * appropriate ill, then get a source address that matches the 4427 * right scope via ipif_select_source_v6(). 4428 * 4429 * If we are here trying to create an IRE_CACHE for an offlink 4430 * destination and have an IRE_CACHE entry for VNI, then use 4431 * ire_stq instead since VNI's queue is a black hole. 4432 * 4433 * Note: While we pick a dst_ill we are really only interested 4434 * in the ill for load spreading. The source ipif is 4435 * determined by source address selection below. 4436 */ 4437 if ((ire->ire_type == IRE_CACHE) && 4438 IS_VNI(ire->ire_ipif->ipif_ill)) { 4439 dst_ill = ire->ire_stq->q_ptr; 4440 ill_refhold(dst_ill); 4441 } else { 4442 ill_t *ill = ire->ire_ipif->ipif_ill; 4443 4444 if (IS_IPMP(ill)) { 4445 dst_ill = 4446 ipmp_illgrp_hold_next_ill(ill->ill_grp); 4447 } else { 4448 dst_ill = ill; 4449 ill_refhold(dst_ill); 4450 } 4451 } 4452 4453 if (dst_ill == NULL) { 4454 if (ip_debug > 2) { 4455 pr_addr_dbg("ip_newroute_v6 : no dst " 4456 "ill for dst %s\n", AF_INET6, v6dstp); 4457 } 4458 goto icmp_err_ret; 4459 } 4460 4461 if (ill != NULL && dst_ill != ill && 4462 !IS_IN_SAME_ILLGRP(dst_ill, ill)) { 4463 /* 4464 * We should have found a route matching "ill" 4465 * as we called ire_ftable_lookup_v6 with 4466 * MATCH_IRE_ILL. Rather than asserting when 4467 * there is a mismatch, we just drop the packet. 4468 */ 4469 ip0dbg(("ip_newroute_v6: BOUND_IF failed: " 4470 "dst_ill %s ill %s\n", dst_ill->ill_name, 4471 ill->ill_name)); 4472 goto icmp_err_ret; 4473 } 4474 4475 /* 4476 * Pick a source address which matches the scope of the 4477 * destination address. 4478 * For RTF_SETSRC routes, the source address is imposed by the 4479 * parent ire (sire). 4480 */ 4481 ASSERT(src_ipif == NULL); 4482 4483 /* 4484 * Because nce_xmit() calls ip_output_v6() and NCEs are always 4485 * tied to the underlying interface, IS_UNDER_IPMP() may be 4486 * true even when building IREs that will be used for data 4487 * traffic. As such, see if the packet's source address is a 4488 * test address, and if so use that test address's ipif for 4489 * the IRE so that the logic that sets IRE_MARK_TESTHIDDEN in 4490 * ire_add_v6() can work properly. 4491 */ 4492 if (ill != NULL && IS_UNDER_IPMP(ill)) 4493 (void) ipif_lookup_testaddr_v6(ill, v6srcp, &src_ipif); 4494 4495 if (src_ipif == NULL && ire->ire_type == IRE_IF_RESOLVER && 4496 !IN6_IS_ADDR_UNSPECIFIED(&v6gw) && 4497 ip6_asp_can_lookup(ipst)) { 4498 /* 4499 * The ire cache entry we're adding is for the 4500 * gateway itself. The source address in this case 4501 * is relative to the gateway's address. 4502 */ 4503 ip6_asp_table_held = B_TRUE; 4504 src_ipif = ipif_select_source_v6(dst_ill, &v6gw, 4505 B_TRUE, IPV6_PREFER_SRC_DEFAULT, zoneid); 4506 if (src_ipif != NULL) 4507 ire_marks |= IRE_MARK_USESRC_CHECK; 4508 } else if (src_ipif == NULL) { 4509 if ((sire != NULL) && (sire->ire_flags & RTF_SETSRC)) { 4510 /* 4511 * Check that the ipif matching the requested 4512 * source address still exists. 4513 */ 4514 src_ipif = ipif_lookup_addr_v6( 4515 &sire->ire_src_addr_v6, NULL, zoneid, 4516 NULL, NULL, NULL, NULL, ipst); 4517 } 4518 if (src_ipif == NULL && ip6_asp_can_lookup(ipst)) { 4519 ip6_asp_table_held = B_TRUE; 4520 src_ipif = ipif_select_source_v6(dst_ill, 4521 v6dstp, B_FALSE, 4522 IPV6_PREFER_SRC_DEFAULT, zoneid); 4523 if (src_ipif != NULL) 4524 ire_marks |= IRE_MARK_USESRC_CHECK; 4525 } 4526 } 4527 4528 if (src_ipif == NULL) { 4529 if (ip_debug > 2) { 4530 /* ip1dbg */ 4531 pr_addr_dbg("ip_newroute_v6: no src for " 4532 "dst %s\n", AF_INET6, v6dstp); 4533 printf("ip_newroute_v6: interface name %s\n", 4534 dst_ill->ill_name); 4535 } 4536 goto icmp_err_ret; 4537 } 4538 4539 if (ip_debug > 3) { 4540 /* ip2dbg */ 4541 pr_addr_dbg("ip_newroute_v6: first hop %s\n", 4542 AF_INET6, &v6gw); 4543 } 4544 ip2dbg(("\tire type %s (%d)\n", 4545 ip_nv_lookup(ire_nv_tbl, ire->ire_type), ire->ire_type)); 4546 4547 /* 4548 * At this point in ip_newroute_v6(), ire is either the 4549 * IRE_CACHE of the next-hop gateway for an off-subnet 4550 * destination or an IRE_INTERFACE type that should be used 4551 * to resolve an on-subnet destination or an on-subnet 4552 * next-hop gateway. 4553 * 4554 * In the IRE_CACHE case, we have the following : 4555 * 4556 * 1) src_ipif - used for getting a source address. 4557 * 4558 * 2) dst_ill - from which we derive ire_stq/ire_rfq. This 4559 * means packets using this IRE_CACHE will go out on dst_ill. 4560 * 4561 * 3) The IRE sire will point to the prefix that is the longest 4562 * matching route for the destination. These prefix types 4563 * include IRE_DEFAULT, IRE_PREFIX, IRE_HOST. 4564 * 4565 * The newly created IRE_CACHE entry for the off-subnet 4566 * destination is tied to both the prefix route and the 4567 * interface route used to resolve the next-hop gateway 4568 * via the ire_phandle and ire_ihandle fields, respectively. 4569 * 4570 * In the IRE_INTERFACE case, we have the following : 4571 * 4572 * 1) src_ipif - used for getting a source address. 4573 * 4574 * 2) dst_ill - from which we derive ire_stq/ire_rfq. This 4575 * means packets using the IRE_CACHE that we will build 4576 * here will go out on dst_ill. 4577 * 4578 * 3) sire may or may not be NULL. But, the IRE_CACHE that is 4579 * to be created will only be tied to the IRE_INTERFACE that 4580 * was derived from the ire_ihandle field. 4581 * 4582 * If sire is non-NULL, it means the destination is off-link 4583 * and we will first create the IRE_CACHE for the gateway. 4584 * Next time through ip_newroute_v6, we will create the 4585 * IRE_CACHE for the final destination as described above. 4586 */ 4587 save_ire = ire; 4588 switch (ire->ire_type) { 4589 case IRE_CACHE: { 4590 ire_t *ipif_ire; 4591 4592 ASSERT(sire != NULL); 4593 if (IN6_IS_ADDR_UNSPECIFIED(&v6gw)) { 4594 mutex_enter(&ire->ire_lock); 4595 v6gw = ire->ire_gateway_addr_v6; 4596 mutex_exit(&ire->ire_lock); 4597 } 4598 /* 4599 * We need 3 ire's to create a new cache ire for an 4600 * off-link destination from the cache ire of the 4601 * gateway. 4602 * 4603 * 1. The prefix ire 'sire' 4604 * 2. The cache ire of the gateway 'ire' 4605 * 3. The interface ire 'ipif_ire' 4606 * 4607 * We have (1) and (2). We lookup (3) below. 4608 * 4609 * If there is no interface route to the gateway, 4610 * it is a race condition, where we found the cache 4611 * but the inteface route has been deleted. 4612 */ 4613 ipif_ire = ire_ihandle_lookup_offlink_v6(ire, sire); 4614 if (ipif_ire == NULL) { 4615 ip1dbg(("ip_newroute_v6:" 4616 "ire_ihandle_lookup_offlink_v6 failed\n")); 4617 goto icmp_err_ret; 4618 } 4619 4620 /* 4621 * Note: the new ire inherits RTF_SETSRC 4622 * and RTF_MULTIRT to propagate these flags from prefix 4623 * to cache. 4624 */ 4625 4626 /* 4627 * Check cached gateway IRE for any security 4628 * attributes; if found, associate the gateway 4629 * credentials group to the destination IRE. 4630 */ 4631 if ((attrp = save_ire->ire_gw_secattr) != NULL) { 4632 mutex_enter(&attrp->igsa_lock); 4633 if ((gcgrp = attrp->igsa_gcgrp) != NULL) 4634 GCGRP_REFHOLD(gcgrp); 4635 mutex_exit(&attrp->igsa_lock); 4636 } 4637 4638 ire = ire_create_v6( 4639 v6dstp, /* dest address */ 4640 &ipv6_all_ones, /* mask */ 4641 &src_ipif->ipif_v6src_addr, /* source address */ 4642 &v6gw, /* gateway address */ 4643 &save_ire->ire_max_frag, 4644 NULL, /* src nce */ 4645 dst_ill->ill_rq, /* recv-from queue */ 4646 dst_ill->ill_wq, /* send-to queue */ 4647 IRE_CACHE, 4648 src_ipif, 4649 &sire->ire_mask_v6, /* Parent mask */ 4650 sire->ire_phandle, /* Parent handle */ 4651 ipif_ire->ire_ihandle, /* Interface handle */ 4652 sire->ire_flags & /* flags if any */ 4653 (RTF_SETSRC | RTF_MULTIRT), 4654 &(sire->ire_uinfo), 4655 NULL, 4656 gcgrp, 4657 ipst); 4658 4659 if (ire == NULL) { 4660 if (gcgrp != NULL) { 4661 GCGRP_REFRELE(gcgrp); 4662 gcgrp = NULL; 4663 } 4664 ire_refrele(save_ire); 4665 ire_refrele(ipif_ire); 4666 break; 4667 } 4668 4669 /* reference now held by IRE */ 4670 gcgrp = NULL; 4671 4672 ire->ire_marks |= ire_marks; 4673 4674 /* 4675 * Prevent sire and ipif_ire from getting deleted. The 4676 * newly created ire is tied to both of them via the 4677 * phandle and ihandle respectively. 4678 */ 4679 IRB_REFHOLD(sire->ire_bucket); 4680 /* Has it been removed already ? */ 4681 if (sire->ire_marks & IRE_MARK_CONDEMNED) { 4682 IRB_REFRELE(sire->ire_bucket); 4683 ire_refrele(ipif_ire); 4684 ire_refrele(save_ire); 4685 break; 4686 } 4687 4688 IRB_REFHOLD(ipif_ire->ire_bucket); 4689 /* Has it been removed already ? */ 4690 if (ipif_ire->ire_marks & IRE_MARK_CONDEMNED) { 4691 IRB_REFRELE(ipif_ire->ire_bucket); 4692 IRB_REFRELE(sire->ire_bucket); 4693 ire_refrele(ipif_ire); 4694 ire_refrele(save_ire); 4695 break; 4696 } 4697 4698 xmit_mp = first_mp; 4699 if (ire->ire_flags & RTF_MULTIRT) { 4700 copy_mp = copymsg(first_mp); 4701 if (copy_mp != NULL) { 4702 xmit_mp = copy_mp; 4703 MULTIRT_DEBUG_TAG(first_mp); 4704 } 4705 } 4706 ire_add_then_send(q, ire, xmit_mp); 4707 if (ip6_asp_table_held) { 4708 ip6_asp_table_refrele(ipst); 4709 ip6_asp_table_held = B_FALSE; 4710 } 4711 ire_refrele(save_ire); 4712 4713 /* Assert that sire is not deleted yet. */ 4714 ASSERT(sire->ire_ptpn != NULL); 4715 IRB_REFRELE(sire->ire_bucket); 4716 4717 /* Assert that ipif_ire is not deleted yet. */ 4718 ASSERT(ipif_ire->ire_ptpn != NULL); 4719 IRB_REFRELE(ipif_ire->ire_bucket); 4720 ire_refrele(ipif_ire); 4721 4722 if (copy_mp != NULL) { 4723 /* 4724 * Search for the next unresolved 4725 * multirt route. 4726 */ 4727 copy_mp = NULL; 4728 ipif_ire = NULL; 4729 ire = NULL; 4730 /* re-enter the loop */ 4731 multirt_resolve_next = B_TRUE; 4732 continue; 4733 } 4734 ire_refrele(sire); 4735 ill_refrele(dst_ill); 4736 ipif_refrele(src_ipif); 4737 return; 4738 } 4739 case IRE_IF_NORESOLVER: 4740 /* 4741 * We have what we need to build an IRE_CACHE. 4742 * 4743 * handle the Gated case, where we create 4744 * a NORESOLVER route for loopback. 4745 */ 4746 if (dst_ill->ill_net_type != IRE_IF_NORESOLVER) 4747 break; 4748 /* 4749 * TSol note: We are creating the ire cache for the 4750 * destination 'dst'. If 'dst' is offlink, going 4751 * through the first hop 'gw', the security attributes 4752 * of 'dst' must be set to point to the gateway 4753 * credentials of gateway 'gw'. If 'dst' is onlink, it 4754 * is possible that 'dst' is a potential gateway that is 4755 * referenced by some route that has some security 4756 * attributes. Thus in the former case, we need to do a 4757 * gcgrp_lookup of 'gw' while in the latter case we 4758 * need to do gcgrp_lookup of 'dst' itself. 4759 */ 4760 ga.ga_af = AF_INET6; 4761 if (!IN6_IS_ADDR_UNSPECIFIED(&v6gw)) 4762 ga.ga_addr = v6gw; 4763 else 4764 ga.ga_addr = *v6dstp; 4765 gcgrp = gcgrp_lookup(&ga, B_FALSE); 4766 4767 /* 4768 * Note: the new ire inherits sire flags RTF_SETSRC 4769 * and RTF_MULTIRT to propagate those rules from prefix 4770 * to cache. 4771 */ 4772 ire = ire_create_v6( 4773 v6dstp, /* dest address */ 4774 &ipv6_all_ones, /* mask */ 4775 &src_ipif->ipif_v6src_addr, /* source address */ 4776 &v6gw, /* gateway address */ 4777 &save_ire->ire_max_frag, 4778 NULL, /* no src nce */ 4779 dst_ill->ill_rq, /* recv-from queue */ 4780 dst_ill->ill_wq, /* send-to queue */ 4781 IRE_CACHE, 4782 src_ipif, 4783 &save_ire->ire_mask_v6, /* Parent mask */ 4784 (sire != NULL) ? /* Parent handle */ 4785 sire->ire_phandle : 0, 4786 save_ire->ire_ihandle, /* Interface handle */ 4787 (sire != NULL) ? /* flags if any */ 4788 sire->ire_flags & 4789 (RTF_SETSRC | RTF_MULTIRT) : 0, 4790 &(save_ire->ire_uinfo), 4791 NULL, 4792 gcgrp, 4793 ipst); 4794 4795 if (ire == NULL) { 4796 if (gcgrp != NULL) { 4797 GCGRP_REFRELE(gcgrp); 4798 gcgrp = NULL; 4799 } 4800 ire_refrele(save_ire); 4801 break; 4802 } 4803 4804 /* reference now held by IRE */ 4805 gcgrp = NULL; 4806 4807 ire->ire_marks |= ire_marks; 4808 4809 if (!IN6_IS_ADDR_UNSPECIFIED(&v6gw)) 4810 dst = v6gw; 4811 else 4812 dst = *v6dstp; 4813 err = ndp_noresolver(dst_ill, &dst); 4814 if (err != 0) { 4815 ire_refrele(save_ire); 4816 break; 4817 } 4818 4819 /* Prevent save_ire from getting deleted */ 4820 IRB_REFHOLD(save_ire->ire_bucket); 4821 /* Has it been removed already ? */ 4822 if (save_ire->ire_marks & IRE_MARK_CONDEMNED) { 4823 IRB_REFRELE(save_ire->ire_bucket); 4824 ire_refrele(save_ire); 4825 break; 4826 } 4827 4828 xmit_mp = first_mp; 4829 /* 4830 * In case of MULTIRT, a copy of the current packet 4831 * to send is made to further re-enter the 4832 * loop and attempt another route resolution 4833 */ 4834 if ((sire != NULL) && sire->ire_flags & RTF_MULTIRT) { 4835 copy_mp = copymsg(first_mp); 4836 if (copy_mp != NULL) { 4837 xmit_mp = copy_mp; 4838 MULTIRT_DEBUG_TAG(first_mp); 4839 } 4840 } 4841 ire_add_then_send(q, ire, xmit_mp); 4842 if (ip6_asp_table_held) { 4843 ip6_asp_table_refrele(ipst); 4844 ip6_asp_table_held = B_FALSE; 4845 } 4846 4847 /* Assert that it is not deleted yet. */ 4848 ASSERT(save_ire->ire_ptpn != NULL); 4849 IRB_REFRELE(save_ire->ire_bucket); 4850 ire_refrele(save_ire); 4851 4852 if (copy_mp != NULL) { 4853 /* 4854 * If we found a (no)resolver, we ignore any 4855 * trailing top priority IRE_CACHE in 4856 * further loops. This ensures that we do not 4857 * omit any (no)resolver despite the priority 4858 * in this call. 4859 * IRE_CACHE, if any, will be processed 4860 * by another thread entering ip_newroute(), 4861 * (on resolver response, for example). 4862 * We use this to force multiple parallel 4863 * resolution as soon as a packet needs to be 4864 * sent. The result is, after one packet 4865 * emission all reachable routes are generally 4866 * resolved. 4867 * Otherwise, complete resolution of MULTIRT 4868 * routes would require several emissions as 4869 * side effect. 4870 */ 4871 multirt_flags &= ~MULTIRT_CACHEGW; 4872 4873 /* 4874 * Search for the next unresolved multirt 4875 * route. 4876 */ 4877 copy_mp = NULL; 4878 save_ire = NULL; 4879 ire = NULL; 4880 /* re-enter the loop */ 4881 multirt_resolve_next = B_TRUE; 4882 continue; 4883 } 4884 4885 /* Don't need sire anymore */ 4886 if (sire != NULL) 4887 ire_refrele(sire); 4888 ill_refrele(dst_ill); 4889 ipif_refrele(src_ipif); 4890 return; 4891 4892 case IRE_IF_RESOLVER: 4893 /* 4894 * We can't build an IRE_CACHE yet, but at least we 4895 * found a resolver that can help. 4896 */ 4897 dst = *v6dstp; 4898 4899 /* 4900 * To be at this point in the code with a non-zero gw 4901 * means that dst is reachable through a gateway that 4902 * we have never resolved. By changing dst to the gw 4903 * addr we resolve the gateway first. When 4904 * ire_add_then_send() tries to put the IP dg to dst, 4905 * it will reenter ip_newroute() at which time we will 4906 * find the IRE_CACHE for the gw and create another 4907 * IRE_CACHE above (for dst itself). 4908 */ 4909 if (!IN6_IS_ADDR_UNSPECIFIED(&v6gw)) { 4910 save_dst = dst; 4911 dst = v6gw; 4912 v6gw = ipv6_all_zeros; 4913 } 4914 if (dst_ill->ill_flags & ILLF_XRESOLV) { 4915 /* 4916 * Ask the external resolver to do its thing. 4917 * Make an mblk chain in the following form: 4918 * ARQ_REQ_MBLK-->IRE_MBLK-->packet 4919 */ 4920 mblk_t *ire_mp; 4921 mblk_t *areq_mp; 4922 areq_t *areq; 4923 in6_addr_t *addrp; 4924 4925 ip1dbg(("ip_newroute_v6:ILLF_XRESOLV\n")); 4926 if (ip6_asp_table_held) { 4927 ip6_asp_table_refrele(ipst); 4928 ip6_asp_table_held = B_FALSE; 4929 } 4930 ire = ire_create_mp_v6( 4931 &dst, /* dest address */ 4932 &ipv6_all_ones, /* mask */ 4933 &src_ipif->ipif_v6src_addr, 4934 /* source address */ 4935 &v6gw, /* gateway address */ 4936 NULL, /* no src nce */ 4937 dst_ill->ill_rq, /* recv-from queue */ 4938 dst_ill->ill_wq, /* send-to queue */ 4939 IRE_CACHE, 4940 src_ipif, 4941 &save_ire->ire_mask_v6, /* Parent mask */ 4942 0, 4943 save_ire->ire_ihandle, 4944 /* Interface handle */ 4945 0, /* flags if any */ 4946 &(save_ire->ire_uinfo), 4947 NULL, 4948 NULL, 4949 ipst); 4950 4951 ire_refrele(save_ire); 4952 if (ire == NULL) { 4953 ip1dbg(("ip_newroute_v6:" 4954 "ire is NULL\n")); 4955 break; 4956 } 4957 4958 if ((sire != NULL) && 4959 (sire->ire_flags & RTF_MULTIRT)) { 4960 /* 4961 * processing a copy of the packet to 4962 * send for further resolution loops 4963 */ 4964 copy_mp = copymsg(first_mp); 4965 if (copy_mp != NULL) 4966 MULTIRT_DEBUG_TAG(copy_mp); 4967 } 4968 ire->ire_marks |= ire_marks; 4969 ire_mp = ire->ire_mp; 4970 /* 4971 * Now create or find an nce for this interface. 4972 * The hw addr will need to to be set from 4973 * the reply to the AR_ENTRY_QUERY that 4974 * we're about to send. This will be done in 4975 * ire_add_v6(). 4976 */ 4977 err = ndp_resolver(dst_ill, &dst, mp, zoneid); 4978 switch (err) { 4979 case 0: 4980 /* 4981 * New cache entry created. 4982 * Break, then ask the external 4983 * resolver. 4984 */ 4985 break; 4986 case EINPROGRESS: 4987 /* 4988 * Resolution in progress; 4989 * packet has been queued by 4990 * ndp_resolver(). 4991 */ 4992 ire_delete(ire); 4993 ire = NULL; 4994 /* 4995 * Check if another multirt 4996 * route must be resolved. 4997 */ 4998 if (copy_mp != NULL) { 4999 /* 5000 * If we found a resolver, we 5001 * ignore any trailing top 5002 * priority IRE_CACHE in 5003 * further loops. The reason is 5004 * the same as for noresolver. 5005 */ 5006 multirt_flags &= 5007 ~MULTIRT_CACHEGW; 5008 /* 5009 * Search for the next 5010 * unresolved multirt route. 5011 */ 5012 first_mp = copy_mp; 5013 copy_mp = NULL; 5014 mp = first_mp; 5015 if (mp->b_datap->db_type == 5016 M_CTL) { 5017 mp = mp->b_cont; 5018 } 5019 ASSERT(sire != NULL); 5020 dst = save_dst; 5021 /* 5022 * re-enter the loop 5023 */ 5024 multirt_resolve_next = 5025 B_TRUE; 5026 continue; 5027 } 5028 5029 if (sire != NULL) 5030 ire_refrele(sire); 5031 ill_refrele(dst_ill); 5032 ipif_refrele(src_ipif); 5033 return; 5034 default: 5035 /* 5036 * Transient error; packet will be 5037 * freed. 5038 */ 5039 ire_delete(ire); 5040 ire = NULL; 5041 break; 5042 } 5043 if (err != 0) 5044 break; 5045 /* 5046 * Now set up the AR_ENTRY_QUERY and send it. 5047 */ 5048 areq_mp = ill_arp_alloc(dst_ill, 5049 (uchar_t *)&ipv6_areq_template, 5050 (caddr_t)&dst); 5051 if (areq_mp == NULL) { 5052 ip1dbg(("ip_newroute_v6:" 5053 "areq_mp is NULL\n")); 5054 freemsg(ire_mp); 5055 break; 5056 } 5057 areq = (areq_t *)areq_mp->b_rptr; 5058 addrp = (in6_addr_t *)((char *)areq + 5059 areq->areq_target_addr_offset); 5060 *addrp = dst; 5061 addrp = (in6_addr_t *)((char *)areq + 5062 areq->areq_sender_addr_offset); 5063 *addrp = src_ipif->ipif_v6src_addr; 5064 /* 5065 * link the chain, then send up to the resolver. 5066 */ 5067 linkb(areq_mp, ire_mp); 5068 linkb(areq_mp, mp); 5069 ip1dbg(("ip_newroute_v6:" 5070 "putnext to resolver\n")); 5071 putnext(dst_ill->ill_rq, areq_mp); 5072 /* 5073 * Check if another multirt route 5074 * must be resolved. 5075 */ 5076 ire = NULL; 5077 if (copy_mp != NULL) { 5078 /* 5079 * If we find a resolver, we ignore any 5080 * trailing top priority IRE_CACHE in 5081 * further loops. The reason is the 5082 * same as for noresolver. 5083 */ 5084 multirt_flags &= ~MULTIRT_CACHEGW; 5085 /* 5086 * Search for the next unresolved 5087 * multirt route. 5088 */ 5089 first_mp = copy_mp; 5090 copy_mp = NULL; 5091 mp = first_mp; 5092 if (mp->b_datap->db_type == M_CTL) { 5093 mp = mp->b_cont; 5094 } 5095 ASSERT(sire != NULL); 5096 dst = save_dst; 5097 /* 5098 * re-enter the loop 5099 */ 5100 multirt_resolve_next = B_TRUE; 5101 continue; 5102 } 5103 5104 if (sire != NULL) 5105 ire_refrele(sire); 5106 ill_refrele(dst_ill); 5107 ipif_refrele(src_ipif); 5108 return; 5109 } 5110 /* 5111 * Non-external resolver case. 5112 * 5113 * TSol note: Please see the note above the 5114 * IRE_IF_NORESOLVER case. 5115 */ 5116 ga.ga_af = AF_INET6; 5117 ga.ga_addr = dst; 5118 gcgrp = gcgrp_lookup(&ga, B_FALSE); 5119 5120 ire = ire_create_v6( 5121 &dst, /* dest address */ 5122 &ipv6_all_ones, /* mask */ 5123 &src_ipif->ipif_v6src_addr, /* source address */ 5124 &v6gw, /* gateway address */ 5125 &save_ire->ire_max_frag, 5126 NULL, /* no src nce */ 5127 dst_ill->ill_rq, /* recv-from queue */ 5128 dst_ill->ill_wq, /* send-to queue */ 5129 IRE_CACHE, 5130 src_ipif, 5131 &save_ire->ire_mask_v6, /* Parent mask */ 5132 0, 5133 save_ire->ire_ihandle, /* Interface handle */ 5134 0, /* flags if any */ 5135 &(save_ire->ire_uinfo), 5136 NULL, 5137 gcgrp, 5138 ipst); 5139 5140 if (ire == NULL) { 5141 if (gcgrp != NULL) { 5142 GCGRP_REFRELE(gcgrp); 5143 gcgrp = NULL; 5144 } 5145 ire_refrele(save_ire); 5146 break; 5147 } 5148 5149 /* reference now held by IRE */ 5150 gcgrp = NULL; 5151 5152 if ((sire != NULL) && 5153 (sire->ire_flags & RTF_MULTIRT)) { 5154 copy_mp = copymsg(first_mp); 5155 if (copy_mp != NULL) 5156 MULTIRT_DEBUG_TAG(copy_mp); 5157 } 5158 5159 ire->ire_marks |= ire_marks; 5160 err = ndp_resolver(dst_ill, &dst, first_mp, zoneid); 5161 switch (err) { 5162 case 0: 5163 /* Prevent save_ire from getting deleted */ 5164 IRB_REFHOLD(save_ire->ire_bucket); 5165 /* Has it been removed already ? */ 5166 if (save_ire->ire_marks & IRE_MARK_CONDEMNED) { 5167 IRB_REFRELE(save_ire->ire_bucket); 5168 ire_refrele(save_ire); 5169 break; 5170 } 5171 5172 /* 5173 * We have a resolved cache entry, 5174 * add in the IRE. 5175 */ 5176 ire_add_then_send(q, ire, first_mp); 5177 if (ip6_asp_table_held) { 5178 ip6_asp_table_refrele(ipst); 5179 ip6_asp_table_held = B_FALSE; 5180 } 5181 5182 /* Assert that it is not deleted yet. */ 5183 ASSERT(save_ire->ire_ptpn != NULL); 5184 IRB_REFRELE(save_ire->ire_bucket); 5185 ire_refrele(save_ire); 5186 /* 5187 * Check if another multirt route 5188 * must be resolved. 5189 */ 5190 ire = NULL; 5191 if (copy_mp != NULL) { 5192 /* 5193 * If we find a resolver, we ignore any 5194 * trailing top priority IRE_CACHE in 5195 * further loops. The reason is the 5196 * same as for noresolver. 5197 */ 5198 multirt_flags &= ~MULTIRT_CACHEGW; 5199 /* 5200 * Search for the next unresolved 5201 * multirt route. 5202 */ 5203 first_mp = copy_mp; 5204 copy_mp = NULL; 5205 mp = first_mp; 5206 if (mp->b_datap->db_type == M_CTL) { 5207 mp = mp->b_cont; 5208 } 5209 ASSERT(sire != NULL); 5210 dst = save_dst; 5211 /* 5212 * re-enter the loop 5213 */ 5214 multirt_resolve_next = B_TRUE; 5215 continue; 5216 } 5217 5218 if (sire != NULL) 5219 ire_refrele(sire); 5220 ill_refrele(dst_ill); 5221 ipif_refrele(src_ipif); 5222 return; 5223 5224 case EINPROGRESS: 5225 /* 5226 * mp was consumed - presumably queued. 5227 * No need for ire, presumably resolution is 5228 * in progress, and ire will be added when the 5229 * address is resolved. 5230 */ 5231 if (ip6_asp_table_held) { 5232 ip6_asp_table_refrele(ipst); 5233 ip6_asp_table_held = B_FALSE; 5234 } 5235 ASSERT(ire->ire_nce == NULL); 5236 ire_delete(ire); 5237 ire_refrele(save_ire); 5238 /* 5239 * Check if another multirt route 5240 * must be resolved. 5241 */ 5242 ire = NULL; 5243 if (copy_mp != NULL) { 5244 /* 5245 * If we find a resolver, we ignore any 5246 * trailing top priority IRE_CACHE in 5247 * further loops. The reason is the 5248 * same as for noresolver. 5249 */ 5250 multirt_flags &= ~MULTIRT_CACHEGW; 5251 /* 5252 * Search for the next unresolved 5253 * multirt route. 5254 */ 5255 first_mp = copy_mp; 5256 copy_mp = NULL; 5257 mp = first_mp; 5258 if (mp->b_datap->db_type == M_CTL) { 5259 mp = mp->b_cont; 5260 } 5261 ASSERT(sire != NULL); 5262 dst = save_dst; 5263 /* 5264 * re-enter the loop 5265 */ 5266 multirt_resolve_next = B_TRUE; 5267 continue; 5268 } 5269 if (sire != NULL) 5270 ire_refrele(sire); 5271 ill_refrele(dst_ill); 5272 ipif_refrele(src_ipif); 5273 return; 5274 default: 5275 /* Some transient error */ 5276 ASSERT(ire->ire_nce == NULL); 5277 ire_refrele(save_ire); 5278 break; 5279 } 5280 break; 5281 default: 5282 break; 5283 } 5284 if (ip6_asp_table_held) { 5285 ip6_asp_table_refrele(ipst); 5286 ip6_asp_table_held = B_FALSE; 5287 } 5288 } while (multirt_resolve_next); 5289 5290 err_ret: 5291 ip1dbg(("ip_newroute_v6: dropped\n")); 5292 if (src_ipif != NULL) 5293 ipif_refrele(src_ipif); 5294 if (dst_ill != NULL) { 5295 need_rele = B_TRUE; 5296 ill = dst_ill; 5297 } 5298 if (ill != NULL) { 5299 if (mp->b_prev != NULL) { 5300 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 5301 } else { 5302 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); 5303 } 5304 5305 if (need_rele) 5306 ill_refrele(ill); 5307 } else { 5308 if (mp->b_prev != NULL) { 5309 BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsInDiscards); 5310 } else { 5311 BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsOutDiscards); 5312 } 5313 } 5314 /* Did this packet originate externally? */ 5315 if (mp->b_prev) { 5316 mp->b_next = NULL; 5317 mp->b_prev = NULL; 5318 } 5319 if (copy_mp != NULL) { 5320 MULTIRT_DEBUG_UNTAG(copy_mp); 5321 freemsg(copy_mp); 5322 } 5323 MULTIRT_DEBUG_UNTAG(first_mp); 5324 freemsg(first_mp); 5325 if (ire != NULL) 5326 ire_refrele(ire); 5327 if (sire != NULL) 5328 ire_refrele(sire); 5329 return; 5330 5331 icmp_err_ret: 5332 if (ip6_asp_table_held) 5333 ip6_asp_table_refrele(ipst); 5334 if (src_ipif != NULL) 5335 ipif_refrele(src_ipif); 5336 if (dst_ill != NULL) { 5337 need_rele = B_TRUE; 5338 ill = dst_ill; 5339 } 5340 ip1dbg(("ip_newroute_v6: no route\n")); 5341 if (sire != NULL) 5342 ire_refrele(sire); 5343 /* 5344 * We need to set sire to NULL to avoid double freeing if we 5345 * ever goto err_ret from below. 5346 */ 5347 sire = NULL; 5348 ip6h = (ip6_t *)mp->b_rptr; 5349 /* Skip ip6i_t header if present */ 5350 if (ip6h->ip6_nxt == IPPROTO_RAW) { 5351 /* Make sure the IPv6 header is present */ 5352 if ((mp->b_wptr - (uchar_t *)ip6h) < 5353 sizeof (ip6i_t) + IPV6_HDR_LEN) { 5354 if (!pullupmsg(mp, sizeof (ip6i_t) + IPV6_HDR_LEN)) { 5355 ip1dbg(("ip_newroute_v6: pullupmsg failed\n")); 5356 goto err_ret; 5357 } 5358 } 5359 mp->b_rptr += sizeof (ip6i_t); 5360 ip6h = (ip6_t *)mp->b_rptr; 5361 } 5362 /* Did this packet originate externally? */ 5363 if (mp->b_prev) { 5364 if (ill != NULL) { 5365 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInNoRoutes); 5366 } else { 5367 BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsInNoRoutes); 5368 } 5369 mp->b_next = NULL; 5370 mp->b_prev = NULL; 5371 q = WR(q); 5372 } else { 5373 if (ill != NULL) { 5374 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutNoRoutes); 5375 } else { 5376 BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsOutNoRoutes); 5377 } 5378 if (ip_hdr_complete_v6(ip6h, zoneid, ipst)) { 5379 /* Failed */ 5380 if (copy_mp != NULL) { 5381 MULTIRT_DEBUG_UNTAG(copy_mp); 5382 freemsg(copy_mp); 5383 } 5384 MULTIRT_DEBUG_UNTAG(first_mp); 5385 freemsg(first_mp); 5386 if (ire != NULL) 5387 ire_refrele(ire); 5388 if (need_rele) 5389 ill_refrele(ill); 5390 return; 5391 } 5392 } 5393 5394 if (need_rele) 5395 ill_refrele(ill); 5396 5397 /* 5398 * At this point we will have ire only if RTF_BLACKHOLE 5399 * or RTF_REJECT flags are set on the IRE. It will not 5400 * generate ICMP6_DST_UNREACH_NOROUTE if RTF_BLACKHOLE is set. 5401 */ 5402 if (ire != NULL) { 5403 if (ire->ire_flags & RTF_BLACKHOLE) { 5404 ire_refrele(ire); 5405 if (copy_mp != NULL) { 5406 MULTIRT_DEBUG_UNTAG(copy_mp); 5407 freemsg(copy_mp); 5408 } 5409 MULTIRT_DEBUG_UNTAG(first_mp); 5410 freemsg(first_mp); 5411 return; 5412 } 5413 ire_refrele(ire); 5414 } 5415 if (ip_debug > 3) { 5416 /* ip2dbg */ 5417 pr_addr_dbg("ip_newroute_v6: no route to %s\n", 5418 AF_INET6, v6dstp); 5419 } 5420 icmp_unreachable_v6(WR(q), first_mp, ICMP6_DST_UNREACH_NOROUTE, 5421 B_FALSE, B_FALSE, zoneid, ipst); 5422 } 5423 5424 /* 5425 * ip_newroute_ipif_v6 is called by ip_wput_v6 and ip_wput_ipsec_out_v6 whenever 5426 * we need to send out a packet to a destination address for which we do not 5427 * have specific routing information. It is only used for multicast packets. 5428 * 5429 * If unspec_src we allow creating an IRE with source address zero. 5430 * ire_send_v6() will delete it after the packet is sent. 5431 */ 5432 void 5433 ip_newroute_ipif_v6(queue_t *q, mblk_t *mp, ipif_t *ipif, 5434 const in6_addr_t *v6dstp, const in6_addr_t *v6srcp, int unspec_src, 5435 zoneid_t zoneid) 5436 { 5437 ire_t *ire = NULL; 5438 ipif_t *src_ipif = NULL; 5439 int err = 0; 5440 ill_t *dst_ill = NULL; 5441 ire_t *save_ire; 5442 ipsec_out_t *io; 5443 ill_t *ill; 5444 mblk_t *first_mp; 5445 ire_t *fire = NULL; 5446 mblk_t *copy_mp = NULL; 5447 const in6_addr_t *ire_v6srcp; 5448 boolean_t probe = B_FALSE; 5449 boolean_t multirt_resolve_next; 5450 boolean_t ipif_held = B_FALSE; 5451 boolean_t ill_held = B_FALSE; 5452 boolean_t ip6_asp_table_held = B_FALSE; 5453 ip_stack_t *ipst = ipif->ipif_ill->ill_ipst; 5454 5455 /* 5456 * This loop is run only once in most cases. 5457 * We loop to resolve further routes only when the destination 5458 * can be reached through multiple RTF_MULTIRT-flagged ires. 5459 */ 5460 do { 5461 multirt_resolve_next = B_FALSE; 5462 if (dst_ill != NULL) { 5463 ill_refrele(dst_ill); 5464 dst_ill = NULL; 5465 } 5466 5467 if (src_ipif != NULL) { 5468 ipif_refrele(src_ipif); 5469 src_ipif = NULL; 5470 } 5471 ASSERT(ipif != NULL); 5472 ill = ipif->ipif_ill; 5473 5474 ASSERT(!IN6_IS_ADDR_V4MAPPED(v6dstp)); 5475 if (ip_debug > 2) { 5476 /* ip1dbg */ 5477 pr_addr_dbg("ip_newroute_ipif_v6: v6dst %s\n", 5478 AF_INET6, v6dstp); 5479 printf("ip_newroute_ipif_v6: if %s, v6 %d\n", 5480 ill->ill_name, ipif->ipif_isv6); 5481 } 5482 5483 first_mp = mp; 5484 if (mp->b_datap->db_type == M_CTL) { 5485 mp = mp->b_cont; 5486 io = (ipsec_out_t *)first_mp->b_rptr; 5487 ASSERT(io->ipsec_out_type == IPSEC_OUT); 5488 } else { 5489 io = NULL; 5490 } 5491 5492 /* 5493 * If the interface is a pt-pt interface we look for an 5494 * IRE_IF_RESOLVER or IRE_IF_NORESOLVER that matches both the 5495 * local_address and the pt-pt destination address. 5496 * Otherwise we just match the local address. 5497 */ 5498 if (!(ill->ill_flags & ILLF_MULTICAST)) { 5499 goto err_ret; 5500 } 5501 5502 /* 5503 * We check if an IRE_OFFSUBNET for the addr that goes through 5504 * ipif exists. We need it to determine if the RTF_SETSRC and/or 5505 * RTF_MULTIRT flags must be honored. 5506 */ 5507 fire = ipif_lookup_multi_ire_v6(ipif, v6dstp); 5508 ip2dbg(("ip_newroute_ipif_v6: " 5509 "ipif_lookup_multi_ire_v6(" 5510 "ipif %p, dst %08x) = fire %p\n", 5511 (void *)ipif, ntohl(V4_PART_OF_V6((*v6dstp))), 5512 (void *)fire)); 5513 5514 ASSERT(src_ipif == NULL); 5515 5516 /* 5517 * Because nce_xmit() calls ip_output_v6() and NCEs are always 5518 * tied to the underlying interface, IS_UNDER_IPMP() may be 5519 * true even when building IREs that will be used for data 5520 * traffic. As such, see if the packet's source address is a 5521 * test address, and if so use that test address's ipif for 5522 * the IRE so that the logic that sets IRE_MARK_TESTHIDDEN in 5523 * ire_add_v6() can work properly. 5524 */ 5525 if (IS_UNDER_IPMP(ill)) 5526 probe = ipif_lookup_testaddr_v6(ill, v6srcp, &src_ipif); 5527 5528 /* 5529 * Determine the outbound (destination) ill for this route. 5530 * If IPMP is not in use, that's the same as our ill. If IPMP 5531 * is in-use and we're on the IPMP interface, or we're on an 5532 * underlying ill but sending data traffic, use a suitable 5533 * destination ill from the group. The latter case covers a 5534 * subtle edge condition with multicast: when we bring up an 5535 * IPv6 data address, we will create an NCE on an underlying 5536 * interface, and send solitications to ff02::1, which would 5537 * take us through here, and cause us to create an IRE for 5538 * ff02::1. To meet our defined semantics for multicast (and 5539 * ensure there aren't unexpected echoes), that IRE needs to 5540 * use the IPMP group's nominated multicast interface. 5541 * 5542 * Note: the source ipif is determined by source address 5543 * selection later. 5544 */ 5545 if (IS_IPMP(ill) || (IS_UNDER_IPMP(ill) && !probe)) { 5546 ill_t *ipmp_ill; 5547 ipmp_illgrp_t *illg; 5548 5549 if (IS_UNDER_IPMP(ill)) { 5550 ipmp_ill = ipmp_ill_hold_ipmp_ill(ill); 5551 } else { 5552 ipmp_ill = ill; 5553 ill_refhold(ipmp_ill); /* for symmetry */ 5554 } 5555 5556 if (ipmp_ill == NULL) 5557 goto err_ret; 5558 5559 illg = ipmp_ill->ill_grp; 5560 if (IN6_IS_ADDR_MULTICAST(v6dstp)) 5561 dst_ill = ipmp_illgrp_hold_cast_ill(illg); 5562 else 5563 dst_ill = ipmp_illgrp_hold_next_ill(illg); 5564 5565 ill_refrele(ipmp_ill); 5566 } else { 5567 dst_ill = ill; 5568 ill_refhold(dst_ill); /* for symmetry */ 5569 } 5570 5571 if (dst_ill == NULL) { 5572 if (ip_debug > 2) { 5573 pr_addr_dbg("ip_newroute_ipif_v6: " 5574 "no dst ill for dst %s\n", 5575 AF_INET6, v6dstp); 5576 } 5577 goto err_ret; 5578 } 5579 5580 /* 5581 * Pick a source address which matches the scope of the 5582 * destination address. 5583 * For RTF_SETSRC routes, the source address is imposed by the 5584 * parent ire (fire). 5585 */ 5586 5587 if (src_ipif == NULL && fire != NULL && 5588 (fire->ire_flags & RTF_SETSRC)) { 5589 /* 5590 * Check that the ipif matching the requested source 5591 * address still exists. 5592 */ 5593 src_ipif = ipif_lookup_addr_v6(&fire->ire_src_addr_v6, 5594 NULL, zoneid, NULL, NULL, NULL, NULL, ipst); 5595 } 5596 5597 if (src_ipif == NULL && ip6_asp_can_lookup(ipst)) { 5598 ip6_asp_table_held = B_TRUE; 5599 src_ipif = ipif_select_source_v6(dst_ill, v6dstp, 5600 B_FALSE, IPV6_PREFER_SRC_DEFAULT, zoneid); 5601 } 5602 5603 if (src_ipif == NULL) { 5604 if (!unspec_src) { 5605 if (ip_debug > 2) { 5606 /* ip1dbg */ 5607 pr_addr_dbg("ip_newroute_ipif_v6: " 5608 "no src for dst %s\n", 5609 AF_INET6, v6dstp); 5610 printf(" through interface %s\n", 5611 dst_ill->ill_name); 5612 } 5613 goto err_ret; 5614 } 5615 ire_v6srcp = &ipv6_all_zeros; 5616 src_ipif = ipif; 5617 ipif_refhold(src_ipif); 5618 } else { 5619 ire_v6srcp = &src_ipif->ipif_v6src_addr; 5620 } 5621 5622 ire = ipif_to_ire_v6(ipif); 5623 if (ire == NULL) { 5624 if (ip_debug > 2) { 5625 /* ip1dbg */ 5626 pr_addr_dbg("ip_newroute_ipif_v6: v6src %s\n", 5627 AF_INET6, &ipif->ipif_v6lcl_addr); 5628 printf("ip_newroute_ipif_v6: " 5629 "if %s\n", dst_ill->ill_name); 5630 } 5631 goto err_ret; 5632 } 5633 if (ire->ire_flags & (RTF_REJECT | RTF_BLACKHOLE)) 5634 goto err_ret; 5635 5636 ASSERT(ire->ire_ipversion == IPV6_VERSION); 5637 5638 ip1dbg(("ip_newroute_ipif_v6: interface type %s (%d),", 5639 ip_nv_lookup(ire_nv_tbl, ire->ire_type), ire->ire_type)); 5640 if (ip_debug > 2) { 5641 /* ip1dbg */ 5642 pr_addr_dbg(" address %s\n", 5643 AF_INET6, &ire->ire_src_addr_v6); 5644 } 5645 save_ire = ire; 5646 ip2dbg(("ip_newroute_ipif: ire %p, ipif %p\n", 5647 (void *)ire, (void *)ipif)); 5648 5649 if ((fire != NULL) && (fire->ire_flags & RTF_MULTIRT)) { 5650 /* 5651 * an IRE_OFFSUBET was looked up 5652 * on that interface. 5653 * this ire has RTF_MULTIRT flag, 5654 * so the resolution loop 5655 * will be re-entered to resolve 5656 * additional routes on other 5657 * interfaces. For that purpose, 5658 * a copy of the packet is 5659 * made at this point. 5660 */ 5661 fire->ire_last_used_time = lbolt; 5662 copy_mp = copymsg(first_mp); 5663 if (copy_mp) { 5664 MULTIRT_DEBUG_TAG(copy_mp); 5665 } 5666 } 5667 5668 switch (ire->ire_type) { 5669 case IRE_IF_NORESOLVER: { 5670 /* 5671 * We have what we need to build an IRE_CACHE. 5672 * 5673 * handle the Gated case, where we create 5674 * a NORESOLVER route for loopback. 5675 */ 5676 if (dst_ill->ill_net_type != IRE_IF_NORESOLVER) 5677 break; 5678 /* 5679 * The newly created ire will inherit the flags of the 5680 * parent ire, if any. 5681 */ 5682 ire = ire_create_v6( 5683 v6dstp, /* dest address */ 5684 &ipv6_all_ones, /* mask */ 5685 ire_v6srcp, /* source address */ 5686 NULL, /* gateway address */ 5687 &save_ire->ire_max_frag, 5688 NULL, /* no src nce */ 5689 dst_ill->ill_rq, /* recv-from queue */ 5690 dst_ill->ill_wq, /* send-to queue */ 5691 IRE_CACHE, 5692 src_ipif, 5693 NULL, 5694 (fire != NULL) ? /* Parent handle */ 5695 fire->ire_phandle : 0, 5696 save_ire->ire_ihandle, /* Interface handle */ 5697 (fire != NULL) ? 5698 (fire->ire_flags & (RTF_SETSRC | RTF_MULTIRT)) : 5699 0, 5700 &ire_uinfo_null, 5701 NULL, 5702 NULL, 5703 ipst); 5704 5705 if (ire == NULL) { 5706 ire_refrele(save_ire); 5707 break; 5708 } 5709 5710 err = ndp_noresolver(dst_ill, v6dstp); 5711 if (err != 0) { 5712 ire_refrele(save_ire); 5713 break; 5714 } 5715 5716 /* Prevent save_ire from getting deleted */ 5717 IRB_REFHOLD(save_ire->ire_bucket); 5718 /* Has it been removed already ? */ 5719 if (save_ire->ire_marks & IRE_MARK_CONDEMNED) { 5720 IRB_REFRELE(save_ire->ire_bucket); 5721 ire_refrele(save_ire); 5722 break; 5723 } 5724 5725 ire_add_then_send(q, ire, first_mp); 5726 if (ip6_asp_table_held) { 5727 ip6_asp_table_refrele(ipst); 5728 ip6_asp_table_held = B_FALSE; 5729 } 5730 5731 /* Assert that it is not deleted yet. */ 5732 ASSERT(save_ire->ire_ptpn != NULL); 5733 IRB_REFRELE(save_ire->ire_bucket); 5734 ire_refrele(save_ire); 5735 if (fire != NULL) { 5736 ire_refrele(fire); 5737 fire = NULL; 5738 } 5739 5740 /* 5741 * The resolution loop is re-entered if we 5742 * actually are in a multirouting case. 5743 */ 5744 if (copy_mp != NULL) { 5745 boolean_t need_resolve = 5746 ire_multirt_need_resolve_v6(v6dstp, 5747 msg_getlabel(copy_mp), ipst); 5748 if (!need_resolve) { 5749 MULTIRT_DEBUG_UNTAG(copy_mp); 5750 freemsg(copy_mp); 5751 copy_mp = NULL; 5752 } else { 5753 /* 5754 * ipif_lookup_group_v6() calls 5755 * ire_lookup_multi_v6() that uses 5756 * ire_ftable_lookup_v6() to find 5757 * an IRE_INTERFACE for the group. 5758 * In the multirt case, 5759 * ire_lookup_multi_v6() then invokes 5760 * ire_multirt_lookup_v6() to find 5761 * the next resolvable ire. 5762 * As a result, we obtain a new 5763 * interface, derived from the 5764 * next ire. 5765 */ 5766 if (ipif_held) { 5767 ipif_refrele(ipif); 5768 ipif_held = B_FALSE; 5769 } 5770 ipif = ipif_lookup_group_v6(v6dstp, 5771 zoneid, ipst); 5772 ip2dbg(("ip_newroute_ipif: " 5773 "multirt dst %08x, ipif %p\n", 5774 ntohl(V4_PART_OF_V6((*v6dstp))), 5775 (void *)ipif)); 5776 if (ipif != NULL) { 5777 ipif_held = B_TRUE; 5778 mp = copy_mp; 5779 copy_mp = NULL; 5780 multirt_resolve_next = 5781 B_TRUE; 5782 continue; 5783 } else { 5784 freemsg(copy_mp); 5785 } 5786 } 5787 } 5788 ill_refrele(dst_ill); 5789 if (ipif_held) { 5790 ipif_refrele(ipif); 5791 ipif_held = B_FALSE; 5792 } 5793 if (src_ipif != NULL) 5794 ipif_refrele(src_ipif); 5795 return; 5796 } 5797 case IRE_IF_RESOLVER: { 5798 5799 ASSERT(dst_ill->ill_isv6); 5800 5801 /* 5802 * We obtain a partial IRE_CACHE which we will pass 5803 * along with the resolver query. When the response 5804 * comes back it will be there ready for us to add. 5805 */ 5806 /* 5807 * the newly created ire will inherit the flags of the 5808 * parent ire, if any. 5809 */ 5810 ire = ire_create_v6( 5811 v6dstp, /* dest address */ 5812 &ipv6_all_ones, /* mask */ 5813 ire_v6srcp, /* source address */ 5814 NULL, /* gateway address */ 5815 &save_ire->ire_max_frag, 5816 NULL, /* src nce */ 5817 dst_ill->ill_rq, /* recv-from queue */ 5818 dst_ill->ill_wq, /* send-to queue */ 5819 IRE_CACHE, 5820 src_ipif, 5821 NULL, 5822 (fire != NULL) ? /* Parent handle */ 5823 fire->ire_phandle : 0, 5824 save_ire->ire_ihandle, /* Interface handle */ 5825 (fire != NULL) ? 5826 (fire->ire_flags & (RTF_SETSRC | RTF_MULTIRT)) : 5827 0, 5828 &ire_uinfo_null, 5829 NULL, 5830 NULL, 5831 ipst); 5832 5833 if (ire == NULL) { 5834 ire_refrele(save_ire); 5835 break; 5836 } 5837 5838 /* Resolve and add ire to the ctable */ 5839 err = ndp_resolver(dst_ill, v6dstp, first_mp, zoneid); 5840 switch (err) { 5841 case 0: 5842 /* Prevent save_ire from getting deleted */ 5843 IRB_REFHOLD(save_ire->ire_bucket); 5844 /* Has it been removed already ? */ 5845 if (save_ire->ire_marks & IRE_MARK_CONDEMNED) { 5846 IRB_REFRELE(save_ire->ire_bucket); 5847 ire_refrele(save_ire); 5848 break; 5849 } 5850 /* 5851 * We have a resolved cache entry, 5852 * add in the IRE. 5853 */ 5854 ire_add_then_send(q, ire, first_mp); 5855 if (ip6_asp_table_held) { 5856 ip6_asp_table_refrele(ipst); 5857 ip6_asp_table_held = B_FALSE; 5858 } 5859 5860 /* Assert that it is not deleted yet. */ 5861 ASSERT(save_ire->ire_ptpn != NULL); 5862 IRB_REFRELE(save_ire->ire_bucket); 5863 ire_refrele(save_ire); 5864 if (fire != NULL) { 5865 ire_refrele(fire); 5866 fire = NULL; 5867 } 5868 5869 /* 5870 * The resolution loop is re-entered if we 5871 * actually are in a multirouting case. 5872 */ 5873 if (copy_mp != NULL) { 5874 boolean_t need_resolve = 5875 ire_multirt_need_resolve_v6(v6dstp, 5876 msg_getlabel(copy_mp), ipst); 5877 if (!need_resolve) { 5878 MULTIRT_DEBUG_UNTAG(copy_mp); 5879 freemsg(copy_mp); 5880 copy_mp = NULL; 5881 } else { 5882 /* 5883 * ipif_lookup_group_v6() calls 5884 * ire_lookup_multi_v6() that 5885 * uses ire_ftable_lookup_v6() 5886 * to find an IRE_INTERFACE for 5887 * the group. In the multirt 5888 * case, ire_lookup_multi_v6() 5889 * then invokes 5890 * ire_multirt_lookup_v6() to 5891 * find the next resolvable ire. 5892 * As a result, we obtain a new 5893 * interface, derived from the 5894 * next ire. 5895 */ 5896 if (ipif_held) { 5897 ipif_refrele(ipif); 5898 ipif_held = B_FALSE; 5899 } 5900 ipif = ipif_lookup_group_v6( 5901 v6dstp, zoneid, ipst); 5902 ip2dbg(("ip_newroute_ipif: " 5903 "multirt dst %08x, " 5904 "ipif %p\n", 5905 ntohl(V4_PART_OF_V6( 5906 (*v6dstp))), 5907 (void *)ipif)); 5908 if (ipif != NULL) { 5909 ipif_held = B_TRUE; 5910 mp = copy_mp; 5911 copy_mp = NULL; 5912 multirt_resolve_next = 5913 B_TRUE; 5914 continue; 5915 } else { 5916 freemsg(copy_mp); 5917 } 5918 } 5919 } 5920 ill_refrele(dst_ill); 5921 if (ipif_held) { 5922 ipif_refrele(ipif); 5923 ipif_held = B_FALSE; 5924 } 5925 if (src_ipif != NULL) 5926 ipif_refrele(src_ipif); 5927 return; 5928 5929 case EINPROGRESS: 5930 /* 5931 * mp was consumed - presumably queued. 5932 * No need for ire, presumably resolution is 5933 * in progress, and ire will be added when the 5934 * address is resolved. 5935 */ 5936 if (ip6_asp_table_held) { 5937 ip6_asp_table_refrele(ipst); 5938 ip6_asp_table_held = B_FALSE; 5939 } 5940 ire_delete(ire); 5941 ire_refrele(save_ire); 5942 if (fire != NULL) { 5943 ire_refrele(fire); 5944 fire = NULL; 5945 } 5946 5947 /* 5948 * The resolution loop is re-entered if we 5949 * actually are in a multirouting case. 5950 */ 5951 if (copy_mp != NULL) { 5952 boolean_t need_resolve = 5953 ire_multirt_need_resolve_v6(v6dstp, 5954 msg_getlabel(copy_mp), ipst); 5955 if (!need_resolve) { 5956 MULTIRT_DEBUG_UNTAG(copy_mp); 5957 freemsg(copy_mp); 5958 copy_mp = NULL; 5959 } else { 5960 /* 5961 * ipif_lookup_group_v6() calls 5962 * ire_lookup_multi_v6() that 5963 * uses ire_ftable_lookup_v6() 5964 * to find an IRE_INTERFACE for 5965 * the group. In the multirt 5966 * case, ire_lookup_multi_v6() 5967 * then invokes 5968 * ire_multirt_lookup_v6() to 5969 * find the next resolvable ire. 5970 * As a result, we obtain a new 5971 * interface, derived from the 5972 * next ire. 5973 */ 5974 if (ipif_held) { 5975 ipif_refrele(ipif); 5976 ipif_held = B_FALSE; 5977 } 5978 ipif = ipif_lookup_group_v6( 5979 v6dstp, zoneid, ipst); 5980 ip2dbg(("ip_newroute_ipif: " 5981 "multirt dst %08x, " 5982 "ipif %p\n", 5983 ntohl(V4_PART_OF_V6( 5984 (*v6dstp))), 5985 (void *)ipif)); 5986 if (ipif != NULL) { 5987 ipif_held = B_TRUE; 5988 mp = copy_mp; 5989 copy_mp = NULL; 5990 multirt_resolve_next = 5991 B_TRUE; 5992 continue; 5993 } else { 5994 freemsg(copy_mp); 5995 } 5996 } 5997 } 5998 ill_refrele(dst_ill); 5999 if (ipif_held) { 6000 ipif_refrele(ipif); 6001 ipif_held = B_FALSE; 6002 } 6003 if (src_ipif != NULL) 6004 ipif_refrele(src_ipif); 6005 return; 6006 default: 6007 /* Some transient error */ 6008 ire_refrele(save_ire); 6009 break; 6010 } 6011 break; 6012 } 6013 default: 6014 break; 6015 } 6016 if (ip6_asp_table_held) { 6017 ip6_asp_table_refrele(ipst); 6018 ip6_asp_table_held = B_FALSE; 6019 } 6020 } while (multirt_resolve_next); 6021 6022 err_ret: 6023 if (ip6_asp_table_held) 6024 ip6_asp_table_refrele(ipst); 6025 if (ire != NULL) 6026 ire_refrele(ire); 6027 if (fire != NULL) 6028 ire_refrele(fire); 6029 if (ipif != NULL && ipif_held) 6030 ipif_refrele(ipif); 6031 if (src_ipif != NULL) 6032 ipif_refrele(src_ipif); 6033 6034 /* Multicast - no point in trying to generate ICMP error */ 6035 if (dst_ill != NULL) { 6036 ill = dst_ill; 6037 ill_held = B_TRUE; 6038 } 6039 if (mp->b_prev || mp->b_next) { 6040 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 6041 } else { 6042 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); 6043 } 6044 ip1dbg(("ip_newroute_ipif_v6: dropped\n")); 6045 mp->b_next = NULL; 6046 mp->b_prev = NULL; 6047 freemsg(first_mp); 6048 if (ill_held) 6049 ill_refrele(ill); 6050 } 6051 6052 /* 6053 * Parse and process any hop-by-hop or destination options. 6054 * 6055 * Assumes that q is an ill read queue so that ICMP errors for link-local 6056 * destinations are sent out the correct interface. 6057 * 6058 * Returns -1 if there was an error and mp has been consumed. 6059 * Returns 0 if no special action is needed. 6060 * Returns 1 if the packet contained a router alert option for this node 6061 * which is verified to be "interesting/known" for our implementation. 6062 * 6063 * XXX Note: In future as more hbh or dest options are defined, 6064 * it may be better to have different routines for hbh and dest 6065 * options as opt_type fields other than IP6OPT_PAD1 and IP6OPT_PADN 6066 * may have same value in different namespaces. Or is it same namespace ?? 6067 * Current code checks for each opt_type (other than pads) if it is in 6068 * the expected nexthdr (hbh or dest) 6069 */ 6070 static int 6071 ip_process_options_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, 6072 uint8_t *optptr, uint_t optlen, uint8_t hdr_type, ip_stack_t *ipst) 6073 { 6074 uint8_t opt_type; 6075 uint_t optused; 6076 int ret = 0; 6077 mblk_t *first_mp; 6078 const char *errtype; 6079 zoneid_t zoneid; 6080 ill_t *ill = q->q_ptr; 6081 ipif_t *ipif; 6082 6083 first_mp = mp; 6084 if (mp->b_datap->db_type == M_CTL) { 6085 mp = mp->b_cont; 6086 } 6087 6088 while (optlen != 0) { 6089 opt_type = *optptr; 6090 if (opt_type == IP6OPT_PAD1) { 6091 optused = 1; 6092 } else { 6093 if (optlen < 2) 6094 goto bad_opt; 6095 errtype = "malformed"; 6096 if (opt_type == ip6opt_ls) { 6097 optused = 2 + optptr[1]; 6098 if (optused > optlen) 6099 goto bad_opt; 6100 } else switch (opt_type) { 6101 case IP6OPT_PADN: 6102 /* 6103 * Note:We don't verify that (N-2) pad octets 6104 * are zero as required by spec. Adhere to 6105 * "be liberal in what you accept..." part of 6106 * implementation philosophy (RFC791,RFC1122) 6107 */ 6108 optused = 2 + optptr[1]; 6109 if (optused > optlen) 6110 goto bad_opt; 6111 break; 6112 6113 case IP6OPT_JUMBO: 6114 if (hdr_type != IPPROTO_HOPOPTS) 6115 goto opt_error; 6116 goto opt_error; /* XXX Not implemented! */ 6117 6118 case IP6OPT_ROUTER_ALERT: { 6119 struct ip6_opt_router *or; 6120 6121 if (hdr_type != IPPROTO_HOPOPTS) 6122 goto opt_error; 6123 optused = 2 + optptr[1]; 6124 if (optused > optlen) 6125 goto bad_opt; 6126 or = (struct ip6_opt_router *)optptr; 6127 /* Check total length and alignment */ 6128 if (optused != sizeof (*or) || 6129 ((uintptr_t)or->ip6or_value & 0x1) != 0) 6130 goto opt_error; 6131 /* Check value */ 6132 switch (*((uint16_t *)or->ip6or_value)) { 6133 case IP6_ALERT_MLD: 6134 case IP6_ALERT_RSVP: 6135 ret = 1; 6136 } 6137 break; 6138 } 6139 case IP6OPT_HOME_ADDRESS: { 6140 /* 6141 * Minimal support for the home address option 6142 * (which is required by all IPv6 nodes). 6143 * Implement by just swapping the home address 6144 * and source address. 6145 * XXX Note: this has IPsec implications since 6146 * AH needs to take this into account. 6147 * Also, when IPsec is used we need to ensure 6148 * that this is only processed once 6149 * in the received packet (to avoid swapping 6150 * back and forth). 6151 * NOTE:This option processing is considered 6152 * to be unsafe and prone to a denial of 6153 * service attack. 6154 * The current processing is not safe even with 6155 * IPsec secured IP packets. Since the home 6156 * address option processing requirement still 6157 * is in the IETF draft and in the process of 6158 * being redefined for its usage, it has been 6159 * decided to turn off the option by default. 6160 * If this section of code needs to be executed, 6161 * ndd variable ip6_ignore_home_address_opt 6162 * should be set to 0 at the user's own risk. 6163 */ 6164 struct ip6_opt_home_address *oh; 6165 in6_addr_t tmp; 6166 6167 if (ipst->ips_ipv6_ignore_home_address_opt) 6168 goto opt_error; 6169 6170 if (hdr_type != IPPROTO_DSTOPTS) 6171 goto opt_error; 6172 optused = 2 + optptr[1]; 6173 if (optused > optlen) 6174 goto bad_opt; 6175 6176 /* 6177 * We did this dest. opt the first time 6178 * around (i.e. before AH processing). 6179 * If we've done AH... stop now. 6180 */ 6181 if (first_mp != mp) { 6182 ipsec_in_t *ii; 6183 6184 ii = (ipsec_in_t *)first_mp->b_rptr; 6185 if (ii->ipsec_in_ah_sa != NULL) 6186 break; 6187 } 6188 6189 oh = (struct ip6_opt_home_address *)optptr; 6190 /* Check total length and alignment */ 6191 if (optused < sizeof (*oh) || 6192 ((uintptr_t)oh->ip6oh_addr & 0x7) != 0) 6193 goto opt_error; 6194 /* Swap ip6_src and the home address */ 6195 tmp = ip6h->ip6_src; 6196 /* XXX Note: only 8 byte alignment option */ 6197 ip6h->ip6_src = *(in6_addr_t *)oh->ip6oh_addr; 6198 *(in6_addr_t *)oh->ip6oh_addr = tmp; 6199 break; 6200 } 6201 6202 case IP6OPT_TUNNEL_LIMIT: 6203 if (hdr_type != IPPROTO_DSTOPTS) { 6204 goto opt_error; 6205 } 6206 optused = 2 + optptr[1]; 6207 if (optused > optlen) { 6208 goto bad_opt; 6209 } 6210 if (optused != 3) { 6211 goto opt_error; 6212 } 6213 break; 6214 6215 default: 6216 errtype = "unknown"; 6217 /* FALLTHROUGH */ 6218 opt_error: 6219 /* Determine which zone should send error */ 6220 zoneid = ipif_lookup_addr_zoneid_v6( 6221 &ip6h->ip6_dst, ill, ipst); 6222 switch (IP6OPT_TYPE(opt_type)) { 6223 case IP6OPT_TYPE_SKIP: 6224 optused = 2 + optptr[1]; 6225 if (optused > optlen) 6226 goto bad_opt; 6227 ip1dbg(("ip_process_options_v6: %s " 6228 "opt 0x%x skipped\n", 6229 errtype, opt_type)); 6230 break; 6231 case IP6OPT_TYPE_DISCARD: 6232 ip1dbg(("ip_process_options_v6: %s " 6233 "opt 0x%x; packet dropped\n", 6234 errtype, opt_type)); 6235 freemsg(first_mp); 6236 return (-1); 6237 case IP6OPT_TYPE_ICMP: 6238 if (zoneid == ALL_ZONES) { 6239 freemsg(first_mp); 6240 return (-1); 6241 } 6242 icmp_param_problem_v6(WR(q), first_mp, 6243 ICMP6_PARAMPROB_OPTION, 6244 (uint32_t)(optptr - 6245 (uint8_t *)ip6h), 6246 B_FALSE, B_FALSE, zoneid, ipst); 6247 return (-1); 6248 case IP6OPT_TYPE_FORCEICMP: 6249 /* 6250 * If we don't have a zone and the dst 6251 * addr is multicast, then pick a zone 6252 * based on the inbound interface. 6253 */ 6254 if (zoneid == ALL_ZONES && 6255 IN6_IS_ADDR_MULTICAST( 6256 &ip6h->ip6_dst)) { 6257 ipif = ipif_select_source_v6( 6258 ill, &ip6h->ip6_src, 6259 B_TRUE, 6260 IPV6_PREFER_SRC_DEFAULT, 6261 ALL_ZONES); 6262 if (ipif != NULL) { 6263 zoneid = 6264 ipif->ipif_zoneid; 6265 ipif_refrele(ipif); 6266 } 6267 } 6268 if (zoneid == ALL_ZONES) { 6269 freemsg(first_mp); 6270 return (-1); 6271 } 6272 icmp_param_problem_v6(WR(q), first_mp, 6273 ICMP6_PARAMPROB_OPTION, 6274 (uint32_t)(optptr - 6275 (uint8_t *)ip6h), 6276 B_FALSE, B_TRUE, zoneid, ipst); 6277 return (-1); 6278 default: 6279 ASSERT(0); 6280 } 6281 } 6282 } 6283 optlen -= optused; 6284 optptr += optused; 6285 } 6286 return (ret); 6287 6288 bad_opt: 6289 /* Determine which zone should send error */ 6290 zoneid = ipif_lookup_addr_zoneid_v6(&ip6h->ip6_dst, ill, ipst); 6291 if (zoneid == ALL_ZONES) { 6292 freemsg(first_mp); 6293 } else { 6294 icmp_param_problem_v6(WR(q), first_mp, ICMP6_PARAMPROB_OPTION, 6295 (uint32_t)(optptr - (uint8_t *)ip6h), 6296 B_FALSE, B_FALSE, zoneid, ipst); 6297 } 6298 return (-1); 6299 } 6300 6301 /* 6302 * Process a routing header that is not yet empty. 6303 * Because of RFC 5095, we now reject all route headers. 6304 */ 6305 static void 6306 ip_process_rthdr(queue_t *q, mblk_t *mp, ip6_t *ip6h, ip6_rthdr_t *rth, 6307 ill_t *ill, mblk_t *hada_mp) 6308 { 6309 ip_stack_t *ipst = ill->ill_ipst; 6310 6311 ASSERT(rth->ip6r_segleft != 0); 6312 6313 if (!ipst->ips_ipv6_forward_src_routed) { 6314 /* XXX Check for source routed out same interface? */ 6315 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 6316 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInAddrErrors); 6317 freemsg(hada_mp); 6318 freemsg(mp); 6319 return; 6320 } 6321 if (hada_mp != NULL) { 6322 freemsg(hada_mp); 6323 freemsg(mp); 6324 return; 6325 } 6326 /* Sent by forwarding path, and router is global zone */ 6327 icmp_param_problem_v6(WR(q), mp, ICMP6_PARAMPROB_HEADER, 6328 (uint32_t)((uchar_t *)&rth->ip6r_type - (uchar_t *)ip6h), B_FALSE, 6329 B_FALSE, GLOBAL_ZONEID, ipst); 6330 } 6331 6332 /* 6333 * Read side put procedure for IPv6 module. 6334 */ 6335 void 6336 ip_rput_v6(queue_t *q, mblk_t *mp) 6337 { 6338 mblk_t *first_mp; 6339 mblk_t *hada_mp = NULL; 6340 ip6_t *ip6h; 6341 boolean_t ll_multicast = B_FALSE; 6342 boolean_t mctl_present = B_FALSE; 6343 ill_t *ill; 6344 struct iocblk *iocp; 6345 uint_t flags = 0; 6346 mblk_t *dl_mp; 6347 ip_stack_t *ipst; 6348 int check; 6349 6350 ill = (ill_t *)q->q_ptr; 6351 ipst = ill->ill_ipst; 6352 if (ill->ill_state_flags & ILL_CONDEMNED) { 6353 union DL_primitives *dl; 6354 6355 dl = (union DL_primitives *)mp->b_rptr; 6356 /* 6357 * Things are opening or closing - only accept DLPI 6358 * ack messages. If the stream is closing and ip_wsrv 6359 * has completed, ip_close is out of the qwait, but has 6360 * not yet completed qprocsoff. Don't proceed any further 6361 * because the ill has been cleaned up and things hanging 6362 * off the ill have been freed. 6363 */ 6364 if ((mp->b_datap->db_type != M_PCPROTO) || 6365 (dl->dl_primitive == DL_UNITDATA_IND)) { 6366 inet_freemsg(mp); 6367 return; 6368 } 6369 } 6370 6371 dl_mp = NULL; 6372 switch (mp->b_datap->db_type) { 6373 case M_DATA: { 6374 int hlen; 6375 uchar_t *ucp; 6376 struct ether_header *eh; 6377 dl_unitdata_ind_t *dui; 6378 6379 /* 6380 * This is a work-around for CR 6451644, a bug in Nemo. It 6381 * should be removed when that problem is fixed. 6382 */ 6383 if (ill->ill_mactype == DL_ETHER && 6384 (hlen = MBLKHEAD(mp)) >= sizeof (struct ether_header) && 6385 (ucp = mp->b_rptr)[-1] == (ETHERTYPE_IPV6 & 0xFF) && 6386 ucp[-2] == (ETHERTYPE_IPV6 >> 8)) { 6387 if (hlen >= sizeof (struct ether_vlan_header) && 6388 ucp[-5] == 0 && ucp[-6] == 0x81) 6389 ucp -= sizeof (struct ether_vlan_header); 6390 else 6391 ucp -= sizeof (struct ether_header); 6392 /* 6393 * If it's a group address, then fabricate a 6394 * DL_UNITDATA_IND message. 6395 */ 6396 if ((ll_multicast = (ucp[0] & 1)) != 0 && 6397 (dl_mp = allocb(DL_UNITDATA_IND_SIZE + 16, 6398 BPRI_HI)) != NULL) { 6399 eh = (struct ether_header *)ucp; 6400 dui = (dl_unitdata_ind_t *)dl_mp->b_rptr; 6401 DB_TYPE(dl_mp) = M_PROTO; 6402 dl_mp->b_wptr = (uchar_t *)(dui + 1) + 16; 6403 dui->dl_primitive = DL_UNITDATA_IND; 6404 dui->dl_dest_addr_length = 8; 6405 dui->dl_dest_addr_offset = DL_UNITDATA_IND_SIZE; 6406 dui->dl_src_addr_length = 8; 6407 dui->dl_src_addr_offset = DL_UNITDATA_IND_SIZE + 6408 8; 6409 dui->dl_group_address = 1; 6410 ucp = (uchar_t *)(dui + 1); 6411 if (ill->ill_sap_length > 0) 6412 ucp += ill->ill_sap_length; 6413 bcopy(&eh->ether_dhost, ucp, 6); 6414 bcopy(&eh->ether_shost, ucp + 8, 6); 6415 ucp = (uchar_t *)(dui + 1); 6416 if (ill->ill_sap_length < 0) 6417 ucp += 8 + ill->ill_sap_length; 6418 bcopy(&eh->ether_type, ucp, 2); 6419 bcopy(&eh->ether_type, ucp + 8, 2); 6420 } 6421 } 6422 break; 6423 } 6424 6425 case M_PROTO: 6426 case M_PCPROTO: 6427 if (((dl_unitdata_ind_t *)mp->b_rptr)->dl_primitive != 6428 DL_UNITDATA_IND) { 6429 /* Go handle anything other than data elsewhere. */ 6430 ip_rput_dlpi(q, mp); 6431 return; 6432 } 6433 ll_multicast = ip_get_dlpi_mbcast(ill, mp); 6434 6435 /* Save the DLPI header. */ 6436 dl_mp = mp; 6437 mp = mp->b_cont; 6438 dl_mp->b_cont = NULL; 6439 break; 6440 case M_BREAK: 6441 panic("ip_rput_v6: got an M_BREAK"); 6442 /*NOTREACHED*/ 6443 case M_IOCACK: 6444 iocp = (struct iocblk *)mp->b_rptr; 6445 switch (iocp->ioc_cmd) { 6446 case DL_IOC_HDR_INFO: 6447 ill = (ill_t *)q->q_ptr; 6448 ill_fastpath_ack(ill, mp); 6449 return; 6450 default: 6451 putnext(q, mp); 6452 return; 6453 } 6454 /* FALLTHRU */ 6455 case M_ERROR: 6456 case M_HANGUP: 6457 mutex_enter(&ill->ill_lock); 6458 if (ill->ill_state_flags & ILL_CONDEMNED) { 6459 mutex_exit(&ill->ill_lock); 6460 freemsg(mp); 6461 return; 6462 } 6463 ill_refhold_locked(ill); 6464 mutex_exit(&ill->ill_lock); 6465 qwriter_ip(ill, q, mp, ip_rput_other, CUR_OP, B_FALSE); 6466 return; 6467 case M_CTL: 6468 if ((MBLKL(mp) > sizeof (int)) && 6469 ((da_ipsec_t *)mp->b_rptr)->da_type == IPHADA_M_CTL) { 6470 ASSERT(MBLKL(mp) >= sizeof (da_ipsec_t)); 6471 mctl_present = B_TRUE; 6472 break; 6473 } 6474 putnext(q, mp); 6475 return; 6476 case M_IOCNAK: 6477 iocp = (struct iocblk *)mp->b_rptr; 6478 switch (iocp->ioc_cmd) { 6479 case DL_IOC_HDR_INFO: 6480 ip_rput_other(NULL, q, mp, NULL); 6481 return; 6482 default: 6483 break; 6484 } 6485 /* FALLTHRU */ 6486 default: 6487 putnext(q, mp); 6488 return; 6489 } 6490 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInReceives); 6491 UPDATE_MIB(ill->ill_ip_mib, ipIfStatsHCInOctets, 6492 (mp->b_cont == NULL) ? MBLKL(mp) : msgdsize(mp)); 6493 /* 6494 * if db_ref > 1 then copymsg and free original. Packet may be 6495 * changed and do not want other entity who has a reference to this 6496 * message to trip over the changes. This is a blind change because 6497 * trying to catch all places that might change packet is too 6498 * difficult (since it may be a module above this one). 6499 */ 6500 if (mp->b_datap->db_ref > 1) { 6501 mblk_t *mp1; 6502 6503 mp1 = copymsg(mp); 6504 freemsg(mp); 6505 if (mp1 == NULL) { 6506 first_mp = NULL; 6507 goto discard; 6508 } 6509 mp = mp1; 6510 } 6511 first_mp = mp; 6512 if (mctl_present) { 6513 hada_mp = first_mp; 6514 mp = first_mp->b_cont; 6515 } 6516 6517 if ((check = ip_check_v6_mblk(mp, ill)) == IP6_MBLK_HDR_ERR) { 6518 freemsg(mp); 6519 return; 6520 } 6521 6522 ip6h = (ip6_t *)mp->b_rptr; 6523 6524 /* 6525 * ip:::receive must see ipv6 packets with a full header, 6526 * and so is placed after the IP6_MBLK_HDR_ERR check. 6527 */ 6528 DTRACE_IP7(receive, mblk_t *, first_mp, conn_t *, NULL, void_ip_t *, 6529 ip6h, __dtrace_ipsr_ill_t *, ill, ipha_t *, NULL, ip6_t *, ip6h, 6530 int, 0); 6531 6532 if (check != IP6_MBLK_OK) { 6533 freemsg(mp); 6534 return; 6535 } 6536 6537 DTRACE_PROBE4(ip6__physical__in__start, 6538 ill_t *, ill, ill_t *, NULL, 6539 ip6_t *, ip6h, mblk_t *, first_mp); 6540 6541 FW_HOOKS6(ipst->ips_ip6_physical_in_event, 6542 ipst->ips_ipv6firewall_physical_in, 6543 ill, NULL, ip6h, first_mp, mp, ll_multicast, ipst); 6544 6545 DTRACE_PROBE1(ip6__physical__in__end, mblk_t *, first_mp); 6546 6547 if (first_mp == NULL) 6548 return; 6549 6550 /* 6551 * Attach any necessary label information to this packet. 6552 */ 6553 if (is_system_labeled() && !tsol_get_pkt_label(mp, IPV6_VERSION)) { 6554 if (ip6opt_ls != 0) 6555 ip0dbg(("tsol_get_pkt_label v6 failed\n")); 6556 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors); 6557 goto discard; 6558 } 6559 6560 /* IP observability hook. */ 6561 if (ipst->ips_ip6_observe.he_interested) { 6562 zoneid_t dzone; 6563 6564 dzone = ip_get_zoneid_v6(&ip6h->ip6_dst, mp, ill, ipst, 6565 ALL_ZONES); 6566 ipobs_hook(mp, IPOBS_HOOK_INBOUND, ALL_ZONES, dzone, 6567 ill, ipst); 6568 } 6569 6570 if ((ip6h->ip6_vcf & IPV6_VERS_AND_FLOW_MASK) == 6571 IPV6_DEFAULT_VERS_AND_FLOW) { 6572 /* 6573 * It may be a bit too expensive to do this mapped address 6574 * check here, but in the interest of robustness, it seems 6575 * like the correct place. 6576 * TODO: Avoid this check for e.g. connected TCP sockets 6577 */ 6578 if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_src)) { 6579 ip1dbg(("ip_rput_v6: pkt with mapped src addr\n")); 6580 goto discard; 6581 } 6582 6583 if (IN6_IS_ADDR_LOOPBACK(&ip6h->ip6_src)) { 6584 ip1dbg(("ip_rput_v6: pkt with loopback src")); 6585 goto discard; 6586 } else if (IN6_IS_ADDR_LOOPBACK(&ip6h->ip6_dst)) { 6587 ip1dbg(("ip_rput_v6: pkt with loopback dst")); 6588 goto discard; 6589 } 6590 6591 flags |= (ll_multicast ? IP6_IN_LLMCAST : 0); 6592 ip_rput_data_v6(q, ill, mp, ip6h, flags, hada_mp, dl_mp); 6593 } else { 6594 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInWrongIPVersion); 6595 goto discard; 6596 } 6597 freemsg(dl_mp); 6598 return; 6599 6600 discard: 6601 if (dl_mp != NULL) 6602 freeb(dl_mp); 6603 freemsg(first_mp); 6604 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 6605 } 6606 6607 /* 6608 * Walk through the IPv6 packet in mp and see if there's an AH header 6609 * in it. See if the AH header needs to get done before other headers in 6610 * the packet. (Worker function for ipsec_early_ah_v6().) 6611 */ 6612 #define IPSEC_HDR_DONT_PROCESS 0 6613 #define IPSEC_HDR_PROCESS 1 6614 #define IPSEC_MEMORY_ERROR 2 /* or malformed packet */ 6615 static int 6616 ipsec_needs_processing_v6(mblk_t *mp, uint8_t *nexthdr) 6617 { 6618 uint_t length; 6619 uint_t ehdrlen; 6620 uint8_t *whereptr; 6621 uint8_t *endptr; 6622 uint8_t *nexthdrp; 6623 ip6_dest_t *desthdr; 6624 ip6_rthdr_t *rthdr; 6625 ip6_t *ip6h; 6626 6627 /* 6628 * For now just pullup everything. In general, the less pullups, 6629 * the better, but there's so much squirrelling through anyway, 6630 * it's just easier this way. 6631 */ 6632 if (!pullupmsg(mp, -1)) { 6633 return (IPSEC_MEMORY_ERROR); 6634 } 6635 6636 ip6h = (ip6_t *)mp->b_rptr; 6637 length = IPV6_HDR_LEN; 6638 whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */ 6639 endptr = mp->b_wptr; 6640 6641 /* 6642 * We can't just use the argument nexthdr in the place 6643 * of nexthdrp becaue we don't dereference nexthdrp 6644 * till we confirm whether it is a valid address. 6645 */ 6646 nexthdrp = &ip6h->ip6_nxt; 6647 while (whereptr < endptr) { 6648 /* Is there enough left for len + nexthdr? */ 6649 if (whereptr + MIN_EHDR_LEN > endptr) 6650 return (IPSEC_MEMORY_ERROR); 6651 6652 switch (*nexthdrp) { 6653 case IPPROTO_HOPOPTS: 6654 case IPPROTO_DSTOPTS: 6655 /* Assumes the headers are identical for hbh and dst */ 6656 desthdr = (ip6_dest_t *)whereptr; 6657 ehdrlen = 8 * (desthdr->ip6d_len + 1); 6658 if ((uchar_t *)desthdr + ehdrlen > endptr) 6659 return (IPSEC_MEMORY_ERROR); 6660 /* 6661 * Return DONT_PROCESS because the destination 6662 * options header may be for each hop in a 6663 * routing-header, and we only want AH if we're 6664 * finished with routing headers. 6665 */ 6666 if (*nexthdrp == IPPROTO_DSTOPTS) 6667 return (IPSEC_HDR_DONT_PROCESS); 6668 nexthdrp = &desthdr->ip6d_nxt; 6669 break; 6670 case IPPROTO_ROUTING: 6671 rthdr = (ip6_rthdr_t *)whereptr; 6672 6673 /* 6674 * If there's more hops left on the routing header, 6675 * return now with DON'T PROCESS. 6676 */ 6677 if (rthdr->ip6r_segleft > 0) 6678 return (IPSEC_HDR_DONT_PROCESS); 6679 6680 ehdrlen = 8 * (rthdr->ip6r_len + 1); 6681 if ((uchar_t *)rthdr + ehdrlen > endptr) 6682 return (IPSEC_MEMORY_ERROR); 6683 nexthdrp = &rthdr->ip6r_nxt; 6684 break; 6685 case IPPROTO_FRAGMENT: 6686 /* Wait for reassembly */ 6687 return (IPSEC_HDR_DONT_PROCESS); 6688 case IPPROTO_AH: 6689 *nexthdr = IPPROTO_AH; 6690 return (IPSEC_HDR_PROCESS); 6691 case IPPROTO_NONE: 6692 /* No next header means we're finished */ 6693 default: 6694 return (IPSEC_HDR_DONT_PROCESS); 6695 } 6696 length += ehdrlen; 6697 whereptr += ehdrlen; 6698 } 6699 /* 6700 * Malformed/truncated packet. 6701 */ 6702 return (IPSEC_MEMORY_ERROR); 6703 } 6704 6705 /* 6706 * Path for AH if options are present. If this is the first time we are 6707 * sending a datagram to AH, allocate a IPSEC_IN message and prepend it. 6708 * Otherwise, just fanout. Return value answers the boolean question: 6709 * "Did I consume the mblk you sent me?" 6710 * 6711 * Sometimes AH needs to be done before other IPv6 headers for security 6712 * reasons. This function (and its ipsec_needs_processing_v6() above) 6713 * indicates if that is so, and fans out to the appropriate IPsec protocol 6714 * for the datagram passed in. 6715 */ 6716 static boolean_t 6717 ipsec_early_ah_v6(queue_t *q, mblk_t *first_mp, boolean_t mctl_present, 6718 ill_t *ill, ill_t *inill, mblk_t *hada_mp, zoneid_t zoneid) 6719 { 6720 mblk_t *mp; 6721 uint8_t nexthdr; 6722 ipsec_in_t *ii = NULL; 6723 ah_t *ah; 6724 ipsec_status_t ipsec_rc; 6725 ip_stack_t *ipst = ill->ill_ipst; 6726 netstack_t *ns = ipst->ips_netstack; 6727 ipsec_stack_t *ipss = ns->netstack_ipsec; 6728 6729 ASSERT((hada_mp == NULL) || (!mctl_present)); 6730 6731 switch (ipsec_needs_processing_v6( 6732 (mctl_present ? first_mp->b_cont : first_mp), &nexthdr)) { 6733 case IPSEC_MEMORY_ERROR: 6734 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 6735 freemsg(hada_mp); 6736 freemsg(first_mp); 6737 return (B_TRUE); 6738 case IPSEC_HDR_DONT_PROCESS: 6739 return (B_FALSE); 6740 } 6741 6742 /* Default means send it to AH! */ 6743 ASSERT(nexthdr == IPPROTO_AH); 6744 if (!mctl_present) { 6745 mp = first_mp; 6746 first_mp = ipsec_in_alloc(B_FALSE, ipst->ips_netstack); 6747 if (first_mp == NULL) { 6748 ip1dbg(("ipsec_early_ah_v6: IPSEC_IN " 6749 "allocation failure.\n")); 6750 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 6751 freemsg(hada_mp); 6752 freemsg(mp); 6753 return (B_TRUE); 6754 } 6755 /* 6756 * Store the ill_index so that when we come back 6757 * from IPSEC we ride on the same queue. 6758 */ 6759 ii = (ipsec_in_t *)first_mp->b_rptr; 6760 ii->ipsec_in_ill_index = ill->ill_phyint->phyint_ifindex; 6761 ii->ipsec_in_rill_index = inill->ill_phyint->phyint_ifindex; 6762 first_mp->b_cont = mp; 6763 } 6764 /* 6765 * Cache hardware acceleration info. 6766 */ 6767 if (hada_mp != NULL) { 6768 ASSERT(ii != NULL); 6769 IPSECHW_DEBUG(IPSECHW_PKT, ("ipsec_early_ah_v6: " 6770 "caching data attr.\n")); 6771 ii->ipsec_in_accelerated = B_TRUE; 6772 ii->ipsec_in_da = hada_mp; 6773 } 6774 6775 if (!ipsec_loaded(ipss)) { 6776 ip_proto_not_sup(q, first_mp, IP_FF_SEND_ICMP, zoneid, ipst); 6777 return (B_TRUE); 6778 } 6779 6780 ah = ipsec_inbound_ah_sa(first_mp, ns); 6781 if (ah == NULL) 6782 return (B_TRUE); 6783 ASSERT(ii->ipsec_in_ah_sa != NULL); 6784 ASSERT(ii->ipsec_in_ah_sa->ipsa_input_func != NULL); 6785 ipsec_rc = ii->ipsec_in_ah_sa->ipsa_input_func(first_mp, ah); 6786 6787 switch (ipsec_rc) { 6788 case IPSEC_STATUS_SUCCESS: 6789 /* we're done with IPsec processing, send it up */ 6790 ip_fanout_proto_again(first_mp, ill, inill, NULL); 6791 break; 6792 case IPSEC_STATUS_FAILED: 6793 BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsInDiscards); 6794 break; 6795 case IPSEC_STATUS_PENDING: 6796 /* no action needed */ 6797 break; 6798 } 6799 return (B_TRUE); 6800 } 6801 6802 static boolean_t 6803 ip_iptun_input_v6(mblk_t *ipsec_mp, mblk_t *data_mp, 6804 size_t hdr_len, uint8_t nexthdr, zoneid_t zoneid, ill_t *ill, 6805 ip_stack_t *ipst) 6806 { 6807 conn_t *connp; 6808 6809 ASSERT(ipsec_mp == NULL || ipsec_mp->b_cont == data_mp); 6810 6811 connp = ipcl_classify_v6(data_mp, nexthdr, hdr_len, zoneid, ipst); 6812 if (connp != NULL) { 6813 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 6814 connp->conn_recv(connp, ipsec_mp != NULL ? ipsec_mp : data_mp, 6815 NULL); 6816 CONN_DEC_REF(connp); 6817 return (B_TRUE); 6818 } 6819 return (B_FALSE); 6820 } 6821 6822 /* 6823 * Validate the IPv6 mblk for alignment. 6824 */ 6825 int 6826 ip_check_v6_mblk(mblk_t *mp, ill_t *ill) 6827 { 6828 int pkt_len, ip6_len; 6829 ip6_t *ip6h = (ip6_t *)mp->b_rptr; 6830 6831 /* check for alignment and full IPv6 header */ 6832 if (!OK_32PTR((uchar_t *)ip6h) || 6833 (mp->b_wptr - (uchar_t *)ip6h) < IPV6_HDR_LEN) { 6834 if (!pullupmsg(mp, IPV6_HDR_LEN)) { 6835 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 6836 ip1dbg(("ip_rput_v6: pullupmsg failed\n")); 6837 return (IP6_MBLK_HDR_ERR); 6838 } 6839 ip6h = (ip6_t *)mp->b_rptr; 6840 } 6841 6842 ASSERT(OK_32PTR((uchar_t *)ip6h) && 6843 (mp->b_wptr - (uchar_t *)ip6h) >= IPV6_HDR_LEN); 6844 6845 if (mp->b_cont == NULL) 6846 pkt_len = mp->b_wptr - mp->b_rptr; 6847 else 6848 pkt_len = msgdsize(mp); 6849 ip6_len = ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN; 6850 6851 /* 6852 * Check for bogus (too short packet) and packet which 6853 * was padded by the link layer. 6854 */ 6855 if (ip6_len != pkt_len) { 6856 ssize_t diff; 6857 6858 if (ip6_len > pkt_len) { 6859 ip1dbg(("ip_rput_data_v6: packet too short %d %d\n", 6860 ip6_len, pkt_len)); 6861 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInTruncatedPkts); 6862 return (IP6_MBLK_LEN_ERR); 6863 } 6864 diff = (ssize_t)(pkt_len - ip6_len); 6865 6866 if (!adjmsg(mp, -diff)) { 6867 ip1dbg(("ip_rput_data_v6: adjmsg failed\n")); 6868 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 6869 return (IP6_MBLK_LEN_ERR); 6870 } 6871 6872 /* 6873 * adjmsg may have freed an mblk from the chain, hence 6874 * invalidate any hw checksum here. This will force IP to 6875 * calculate the checksum in sw, but only for this packet. 6876 */ 6877 DB_CKSUMFLAGS(mp) = 0; 6878 } 6879 return (IP6_MBLK_OK); 6880 } 6881 6882 /* 6883 * ip_rput_data_v6 -- received IPv6 packets in M_DATA messages show up here. 6884 * ip_rput_v6 has already verified alignment, the min length, the version, 6885 * and db_ref = 1. 6886 * 6887 * The ill passed in (the arg named inill) is the ill that the packet 6888 * actually arrived on. We need to remember this when saving the 6889 * input interface index into potential IPV6_PKTINFO data in 6890 * ip_add_info_v6(). 6891 * 6892 * This routine doesn't free dl_mp; that's the caller's responsibility on 6893 * return. (Note that the callers are complex enough that there's no tail 6894 * recursion here anyway.) 6895 */ 6896 void 6897 ip_rput_data_v6(queue_t *q, ill_t *inill, mblk_t *mp, ip6_t *ip6h, 6898 uint_t flags, mblk_t *hada_mp, mblk_t *dl_mp) 6899 { 6900 ire_t *ire = NULL; 6901 ill_t *ill = inill; 6902 ill_t *outill; 6903 uint8_t *whereptr; 6904 uint8_t nexthdr; 6905 uint16_t remlen; 6906 uint_t prev_nexthdr_offset; 6907 uint_t used; 6908 size_t old_pkt_len; 6909 size_t pkt_len; 6910 uint16_t ip6_len; 6911 uint_t hdr_len; 6912 boolean_t mctl_present; 6913 mblk_t *first_mp; 6914 mblk_t *first_mp1; 6915 boolean_t no_forward; 6916 ip6_hbh_t *hbhhdr; 6917 boolean_t ll_multicast = (flags & IP6_IN_LLMCAST); 6918 conn_t *connp; 6919 uint32_t ports; 6920 zoneid_t zoneid = GLOBAL_ZONEID; 6921 uint16_t hck_flags, reass_hck_flags; 6922 uint32_t reass_sum; 6923 boolean_t cksum_err; 6924 mblk_t *mp1; 6925 ip_stack_t *ipst = inill->ill_ipst; 6926 ilb_stack_t *ilbs = ipst->ips_netstack->netstack_ilb; 6927 in6_addr_t lb_dst; 6928 int lb_ret = ILB_PASSED; 6929 6930 EXTRACT_PKT_MP(mp, first_mp, mctl_present); 6931 6932 if (hada_mp != NULL) { 6933 /* 6934 * It's an IPsec accelerated packet. 6935 * Keep a pointer to the data attributes around until 6936 * we allocate the ipsecinfo structure. 6937 */ 6938 IPSECHW_DEBUG(IPSECHW_PKT, 6939 ("ip_rput_data_v6: inbound HW accelerated IPsec pkt\n")); 6940 hada_mp->b_cont = NULL; 6941 /* 6942 * Since it is accelerated, it came directly from 6943 * the ill. 6944 */ 6945 ASSERT(mctl_present == B_FALSE); 6946 ASSERT(mp->b_datap->db_type != M_CTL); 6947 } 6948 6949 ip6h = (ip6_t *)mp->b_rptr; 6950 ip6_len = ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN; 6951 old_pkt_len = pkt_len = ip6_len; 6952 6953 if (ILL_HCKSUM_CAPABLE(ill) && !mctl_present && dohwcksum) 6954 hck_flags = DB_CKSUMFLAGS(mp); 6955 else 6956 hck_flags = 0; 6957 6958 /* Clear checksum flags in case we need to forward */ 6959 DB_CKSUMFLAGS(mp) = 0; 6960 reass_sum = reass_hck_flags = 0; 6961 6962 nexthdr = ip6h->ip6_nxt; 6963 6964 prev_nexthdr_offset = (uint_t)((uchar_t *)&ip6h->ip6_nxt - 6965 (uchar_t *)ip6h); 6966 whereptr = (uint8_t *)&ip6h[1]; 6967 remlen = pkt_len - IPV6_HDR_LEN; /* Track how much is left */ 6968 6969 /* Process hop by hop header options */ 6970 if (nexthdr == IPPROTO_HOPOPTS) { 6971 uint_t ehdrlen; 6972 uint8_t *optptr; 6973 6974 if (remlen < MIN_EHDR_LEN) 6975 goto pkt_too_short; 6976 if (mp->b_cont != NULL && 6977 whereptr + MIN_EHDR_LEN > mp->b_wptr) { 6978 if (!pullupmsg(mp, IPV6_HDR_LEN + MIN_EHDR_LEN)) { 6979 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 6980 freemsg(hada_mp); 6981 freemsg(first_mp); 6982 return; 6983 } 6984 ip6h = (ip6_t *)mp->b_rptr; 6985 whereptr = (uint8_t *)ip6h + pkt_len - remlen; 6986 } 6987 hbhhdr = (ip6_hbh_t *)whereptr; 6988 nexthdr = hbhhdr->ip6h_nxt; 6989 prev_nexthdr_offset = (uint_t)(whereptr - (uint8_t *)ip6h); 6990 ehdrlen = 8 * (hbhhdr->ip6h_len + 1); 6991 6992 if (remlen < ehdrlen) 6993 goto pkt_too_short; 6994 if (mp->b_cont != NULL && 6995 whereptr + ehdrlen > mp->b_wptr) { 6996 if (!pullupmsg(mp, IPV6_HDR_LEN + ehdrlen)) { 6997 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 6998 freemsg(hada_mp); 6999 freemsg(first_mp); 7000 return; 7001 } 7002 ip6h = (ip6_t *)mp->b_rptr; 7003 whereptr = (uint8_t *)ip6h + pkt_len - remlen; 7004 hbhhdr = (ip6_hbh_t *)whereptr; 7005 } 7006 7007 optptr = whereptr + 2; 7008 whereptr += ehdrlen; 7009 remlen -= ehdrlen; 7010 switch (ip_process_options_v6(q, first_mp, ip6h, optptr, 7011 ehdrlen - 2, IPPROTO_HOPOPTS, ipst)) { 7012 case -1: 7013 /* 7014 * Packet has been consumed and any 7015 * needed ICMP messages sent. 7016 */ 7017 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors); 7018 freemsg(hada_mp); 7019 return; 7020 case 0: 7021 /* no action needed */ 7022 break; 7023 case 1: 7024 /* Known router alert */ 7025 goto ipv6forus; 7026 } 7027 } 7028 7029 /* 7030 * On incoming v6 multicast packets we will bypass the ire table, 7031 * and assume that the read queue corresponds to the targetted 7032 * interface. 7033 * 7034 * The effect of this is the same as the IPv4 original code, but is 7035 * much cleaner I think. See ip_rput for how that was done. 7036 */ 7037 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 7038 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInMcastPkts); 7039 UPDATE_MIB(ill->ill_ip_mib, ipIfStatsHCInMcastOctets, pkt_len); 7040 7041 /* 7042 * So that we don't end up with dups, only one ill in an IPMP 7043 * group is nominated to receive multicast data traffic. 7044 * However, link-locals on any underlying interfaces will have 7045 * joined their solicited-node multicast addresses and we must 7046 * accept those packets. (We don't attempt to precisely 7047 * filter out duplicate solicited-node multicast packets since 7048 * e.g. an IPMP interface and underlying interface may have 7049 * the same solicited-node multicast address.) Note that we 7050 * won't generally have duplicates because we only issue a 7051 * DL_ENABMULTI_REQ on one interface in a group; the exception 7052 * is when PHYI_MULTI_BCAST is set. 7053 */ 7054 if (IS_UNDER_IPMP(ill) && !ill->ill_nom_cast && 7055 !IN6_IS_ADDR_MC_SOLICITEDNODE(&ip6h->ip6_dst)) { 7056 goto drop_pkt; 7057 } 7058 7059 /* 7060 * XXX TODO Give to mrouted to for multicast forwarding. 7061 */ 7062 if (ilm_lookup_ill_v6(ill, &ip6h->ip6_dst, B_FALSE, 7063 ALL_ZONES) == NULL) { 7064 if (ip_debug > 3) { 7065 /* ip2dbg */ 7066 pr_addr_dbg("ip_rput_data_v6: got mcast packet" 7067 " which is not for us: %s\n", AF_INET6, 7068 &ip6h->ip6_dst); 7069 } 7070 drop_pkt: BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 7071 freemsg(hada_mp); 7072 freemsg(first_mp); 7073 return; 7074 } 7075 if (ip_debug > 3) { 7076 /* ip2dbg */ 7077 pr_addr_dbg("ip_rput_data_v6: multicast for us: %s\n", 7078 AF_INET6, &ip6h->ip6_dst); 7079 } 7080 zoneid = GLOBAL_ZONEID; 7081 goto ipv6forus; 7082 } 7083 7084 /* 7085 * Find an ire that matches destination. For link-local addresses 7086 * we have to match the ill. 7087 * TBD for site local addresses. 7088 */ 7089 if (IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_dst)) { 7090 ire = ire_ctable_lookup_v6(&ip6h->ip6_dst, NULL, 7091 IRE_CACHE|IRE_LOCAL, ill->ill_ipif, ALL_ZONES, NULL, 7092 MATCH_IRE_TYPE | MATCH_IRE_ILL, ipst); 7093 } else { 7094 if (ilb_has_rules(ilbs) && ILB_SUPP_L4(nexthdr)) { 7095 /* For convenience, we just pull up the mblk. */ 7096 if (mp->b_cont != NULL) { 7097 if (pullupmsg(mp, -1) == 0) { 7098 BUMP_MIB(ill->ill_ip_mib, 7099 ipIfStatsInDiscards); 7100 freemsg(hada_mp); 7101 freemsg(first_mp); 7102 return; 7103 } 7104 hdr_len = pkt_len - remlen; 7105 ip6h = (ip6_t *)mp->b_rptr; 7106 whereptr = (uint8_t *)ip6h + hdr_len; 7107 } 7108 lb_ret = ilb_check_v6(ilbs, ill, mp, ip6h, nexthdr, 7109 whereptr, &lb_dst); 7110 if (lb_ret == ILB_DROPPED) { 7111 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 7112 freemsg(hada_mp); 7113 freemsg(first_mp); 7114 return; 7115 } 7116 } 7117 7118 ire = ire_cache_lookup_v6((lb_ret == ILB_BALANCED) ? &lb_dst : 7119 &ip6h->ip6_dst, ALL_ZONES, msg_getlabel(mp), ipst); 7120 7121 if (ire != NULL && ire->ire_stq != NULL && 7122 ire->ire_zoneid != GLOBAL_ZONEID && 7123 ire->ire_zoneid != ALL_ZONES) { 7124 /* 7125 * Should only use IREs that are visible from the 7126 * global zone for forwarding. 7127 */ 7128 ire_refrele(ire); 7129 ire = ire_cache_lookup_v6(&ip6h->ip6_dst, 7130 GLOBAL_ZONEID, msg_getlabel(mp), ipst); 7131 } 7132 } 7133 7134 if (ire == NULL) { 7135 /* 7136 * No matching IRE found. Mark this packet as having 7137 * originated externally. 7138 */ 7139 if (!(ill->ill_flags & ILLF_ROUTER) || ll_multicast) { 7140 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 7141 if (!(ill->ill_flags & ILLF_ROUTER)) { 7142 BUMP_MIB(ill->ill_ip_mib, 7143 ipIfStatsInAddrErrors); 7144 } 7145 freemsg(hada_mp); 7146 freemsg(first_mp); 7147 return; 7148 } 7149 if (ip6h->ip6_hops <= 1) { 7150 if (hada_mp != NULL) 7151 goto hada_drop; 7152 /* Sent by forwarding path, and router is global zone */ 7153 icmp_time_exceeded_v6(WR(q), first_mp, 7154 ICMP6_TIME_EXCEED_TRANSIT, ll_multicast, B_FALSE, 7155 GLOBAL_ZONEID, ipst); 7156 return; 7157 } 7158 /* 7159 * Per RFC 3513 section 2.5.2, we must not forward packets with 7160 * an unspecified source address. 7161 */ 7162 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 7163 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 7164 freemsg(hada_mp); 7165 freemsg(first_mp); 7166 return; 7167 } 7168 mp->b_prev = (mblk_t *)(uintptr_t) 7169 ill->ill_phyint->phyint_ifindex; 7170 ip_newroute_v6(q, mp, (lb_ret == ILB_BALANCED) ? &lb_dst : 7171 &ip6h->ip6_dst, &ip6h->ip6_src, 7172 IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_dst) ? ill : NULL, 7173 GLOBAL_ZONEID, ipst); 7174 return; 7175 } 7176 /* we have a matching IRE */ 7177 if (ire->ire_stq != NULL) { 7178 /* 7179 * To be quicker, we may wish not to chase pointers 7180 * (ire->ire_ipif->ipif_ill...) and instead store the 7181 * forwarding policy in the ire. An unfortunate side- 7182 * effect of this would be requiring an ire flush whenever 7183 * the ILLF_ROUTER flag changes. For now, chase pointers 7184 * once and store in the boolean no_forward. 7185 * 7186 * This appears twice to keep it out of the non-forwarding, 7187 * yes-it's-for-us-on-the-right-interface case. 7188 */ 7189 no_forward = ((ill->ill_flags & 7190 ire->ire_ipif->ipif_ill->ill_flags & ILLF_ROUTER) == 0); 7191 7192 ASSERT(first_mp == mp); 7193 /* 7194 * This ire has a send-to queue - forward the packet. 7195 */ 7196 if (no_forward || ll_multicast || (hada_mp != NULL)) { 7197 freemsg(hada_mp); 7198 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 7199 if (no_forward) { 7200 BUMP_MIB(ill->ill_ip_mib, 7201 ipIfStatsInAddrErrors); 7202 } 7203 freemsg(mp); 7204 ire_refrele(ire); 7205 return; 7206 } 7207 /* 7208 * ipIfStatsHCInForwDatagrams should only be increment if there 7209 * will be an attempt to forward the packet, which is why we 7210 * increment after the above condition has been checked. 7211 */ 7212 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInForwDatagrams); 7213 if (ip6h->ip6_hops <= 1) { 7214 ip1dbg(("ip_rput_data_v6: hop limit expired.\n")); 7215 /* Sent by forwarding path, and router is global zone */ 7216 icmp_time_exceeded_v6(WR(q), mp, 7217 ICMP6_TIME_EXCEED_TRANSIT, ll_multicast, B_FALSE, 7218 GLOBAL_ZONEID, ipst); 7219 ire_refrele(ire); 7220 return; 7221 } 7222 /* 7223 * Per RFC 3513 section 2.5.2, we must not forward packets with 7224 * an unspecified source address. 7225 */ 7226 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 7227 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 7228 freemsg(mp); 7229 ire_refrele(ire); 7230 return; 7231 } 7232 7233 if (is_system_labeled()) { 7234 mblk_t *mp1; 7235 7236 if ((mp1 = tsol_ip_forward(ire, mp)) == NULL) { 7237 BUMP_MIB(ill->ill_ip_mib, 7238 ipIfStatsForwProhibits); 7239 freemsg(mp); 7240 ire_refrele(ire); 7241 return; 7242 } 7243 /* Size may have changed */ 7244 mp = mp1; 7245 ip6h = (ip6_t *)mp->b_rptr; 7246 pkt_len = msgdsize(mp); 7247 } 7248 7249 if (pkt_len > ire->ire_max_frag) { 7250 int max_frag = ire->ire_max_frag; 7251 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInTooBigErrors); 7252 /* 7253 * Handle labeled packet resizing. 7254 */ 7255 if (is_system_labeled()) { 7256 max_frag = tsol_pmtu_adjust(mp, max_frag, 7257 pkt_len - old_pkt_len, AF_INET6); 7258 } 7259 7260 /* Sent by forwarding path, and router is global zone */ 7261 icmp_pkt2big_v6(WR(q), mp, max_frag, 7262 ll_multicast, B_TRUE, GLOBAL_ZONEID, ipst); 7263 ire_refrele(ire); 7264 return; 7265 } 7266 7267 /* 7268 * Check to see if we're forwarding the packet to a 7269 * different link from which it came. If so, check the 7270 * source and destination addresses since routers must not 7271 * forward any packets with link-local source or 7272 * destination addresses to other links. Otherwise (if 7273 * we're forwarding onto the same link), conditionally send 7274 * a redirect message. 7275 */ 7276 if (ire->ire_rfq != q && 7277 !IS_IN_SAME_ILLGRP(ill, (ill_t *)ire->ire_rfq->q_ptr)) { 7278 if (IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_dst) || 7279 IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src)) { 7280 BUMP_MIB(ill->ill_ip_mib, 7281 ipIfStatsInAddrErrors); 7282 freemsg(mp); 7283 ire_refrele(ire); 7284 return; 7285 } 7286 /* TBD add site-local check at site boundary? */ 7287 } else if (ipst->ips_ipv6_send_redirects) { 7288 in6_addr_t *v6targ; 7289 in6_addr_t gw_addr_v6; 7290 ire_t *src_ire_v6 = NULL; 7291 7292 /* 7293 * Don't send a redirect when forwarding a source 7294 * routed packet. 7295 */ 7296 if (ip_source_routed_v6(ip6h, mp, ipst)) 7297 goto forward; 7298 7299 mutex_enter(&ire->ire_lock); 7300 gw_addr_v6 = ire->ire_gateway_addr_v6; 7301 mutex_exit(&ire->ire_lock); 7302 if (!IN6_IS_ADDR_UNSPECIFIED(&gw_addr_v6)) { 7303 v6targ = &gw_addr_v6; 7304 /* 7305 * We won't send redirects to a router 7306 * that doesn't have a link local 7307 * address, but will forward. 7308 */ 7309 if (!IN6_IS_ADDR_LINKLOCAL(v6targ)) { 7310 BUMP_MIB(ill->ill_ip_mib, 7311 ipIfStatsInAddrErrors); 7312 goto forward; 7313 } 7314 } else { 7315 v6targ = &ip6h->ip6_dst; 7316 } 7317 7318 src_ire_v6 = ire_ftable_lookup_v6(&ip6h->ip6_src, 7319 NULL, NULL, IRE_INTERFACE, ire->ire_ipif, NULL, 7320 GLOBAL_ZONEID, 0, NULL, 7321 MATCH_IRE_IPIF | MATCH_IRE_TYPE, 7322 ipst); 7323 7324 if (src_ire_v6 != NULL) { 7325 /* 7326 * The source is directly connected. 7327 */ 7328 mp1 = copymsg(mp); 7329 if (mp1 != NULL) { 7330 icmp_send_redirect_v6(WR(q), 7331 mp1, v6targ, &ip6h->ip6_dst, 7332 ill, B_FALSE); 7333 } 7334 ire_refrele(src_ire_v6); 7335 } 7336 } 7337 7338 forward: 7339 /* Hoplimit verified above */ 7340 ip6h->ip6_hops--; 7341 7342 outill = ire->ire_ipif->ipif_ill; 7343 7344 DTRACE_PROBE4(ip6__forwarding__start, 7345 ill_t *, inill, ill_t *, outill, 7346 ip6_t *, ip6h, mblk_t *, mp); 7347 7348 FW_HOOKS6(ipst->ips_ip6_forwarding_event, 7349 ipst->ips_ipv6firewall_forwarding, 7350 inill, outill, ip6h, mp, mp, 0, ipst); 7351 7352 DTRACE_PROBE1(ip6__forwarding__end, mblk_t *, mp); 7353 7354 if (mp != NULL) { 7355 UPDATE_IB_PKT_COUNT(ire); 7356 ire->ire_last_used_time = lbolt; 7357 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCOutForwDatagrams); 7358 ip_xmit_v6(mp, ire, 0, NULL, B_FALSE, NULL); 7359 } 7360 IRE_REFRELE(ire); 7361 return; 7362 } 7363 7364 /* 7365 * Need to put on correct queue for reassembly to find it. 7366 * No need to use put() since reassembly has its own locks. 7367 * Note: multicast packets and packets destined to addresses 7368 * assigned to loopback (ire_rfq is NULL) will be reassembled on 7369 * the arriving ill. Unlike the IPv4 case, enabling strict 7370 * destination multihoming will prevent accepting packets 7371 * addressed to an IRE_LOCAL on lo0. 7372 */ 7373 if (ire->ire_rfq != q) { 7374 if ((ire = ip_check_multihome(&ip6h->ip6_dst, ire, ill)) 7375 == NULL) { 7376 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 7377 freemsg(hada_mp); 7378 freemsg(first_mp); 7379 return; 7380 } 7381 if (ire->ire_rfq != NULL) { 7382 q = ire->ire_rfq; 7383 ill = (ill_t *)q->q_ptr; 7384 ASSERT(ill != NULL); 7385 } 7386 } 7387 7388 zoneid = ire->ire_zoneid; 7389 UPDATE_IB_PKT_COUNT(ire); 7390 ire->ire_last_used_time = lbolt; 7391 /* Don't use the ire after this point, we'll NULL it out to be sure. */ 7392 ire_refrele(ire); 7393 ire = NULL; 7394 ipv6forus: 7395 /* 7396 * Looks like this packet is for us one way or another. 7397 * This is where we'll process destination headers etc. 7398 */ 7399 for (; ; ) { 7400 switch (nexthdr) { 7401 case IPPROTO_TCP: { 7402 uint16_t *up; 7403 uint32_t sum; 7404 int offset; 7405 7406 hdr_len = pkt_len - remlen; 7407 7408 if (hada_mp != NULL) { 7409 ip0dbg(("tcp hada drop\n")); 7410 goto hada_drop; 7411 } 7412 7413 7414 /* TCP needs all of the TCP header */ 7415 if (remlen < TCP_MIN_HEADER_LENGTH) 7416 goto pkt_too_short; 7417 if (mp->b_cont != NULL && 7418 whereptr + TCP_MIN_HEADER_LENGTH > mp->b_wptr) { 7419 if (!pullupmsg(mp, 7420 hdr_len + TCP_MIN_HEADER_LENGTH)) { 7421 BUMP_MIB(ill->ill_ip_mib, 7422 ipIfStatsInDiscards); 7423 freemsg(first_mp); 7424 return; 7425 } 7426 hck_flags = 0; 7427 ip6h = (ip6_t *)mp->b_rptr; 7428 whereptr = (uint8_t *)ip6h + hdr_len; 7429 } 7430 /* 7431 * Extract the offset field from the TCP header. 7432 */ 7433 offset = ((uchar_t *)ip6h)[hdr_len + 12] >> 4; 7434 if (offset != 5) { 7435 if (offset < 5) { 7436 ip1dbg(("ip_rput_data_v6: short " 7437 "TCP data offset")); 7438 BUMP_MIB(ill->ill_ip_mib, 7439 ipIfStatsInDiscards); 7440 freemsg(first_mp); 7441 return; 7442 } 7443 /* 7444 * There must be TCP options. 7445 * Make sure we can grab them. 7446 */ 7447 offset <<= 2; 7448 if (remlen < offset) 7449 goto pkt_too_short; 7450 if (mp->b_cont != NULL && 7451 whereptr + offset > mp->b_wptr) { 7452 if (!pullupmsg(mp, 7453 hdr_len + offset)) { 7454 BUMP_MIB(ill->ill_ip_mib, 7455 ipIfStatsInDiscards); 7456 freemsg(first_mp); 7457 return; 7458 } 7459 hck_flags = 0; 7460 ip6h = (ip6_t *)mp->b_rptr; 7461 whereptr = (uint8_t *)ip6h + hdr_len; 7462 } 7463 } 7464 7465 up = (uint16_t *)&ip6h->ip6_src; 7466 /* 7467 * TCP checksum calculation. First sum up the 7468 * pseudo-header fields: 7469 * - Source IPv6 address 7470 * - Destination IPv6 address 7471 * - TCP payload length 7472 * - TCP protocol ID 7473 */ 7474 sum = htons(IPPROTO_TCP + remlen) + 7475 up[0] + up[1] + up[2] + up[3] + 7476 up[4] + up[5] + up[6] + up[7] + 7477 up[8] + up[9] + up[10] + up[11] + 7478 up[12] + up[13] + up[14] + up[15]; 7479 7480 /* Fold initial sum */ 7481 sum = (sum & 0xffff) + (sum >> 16); 7482 7483 mp1 = mp->b_cont; 7484 7485 if ((hck_flags & (HCK_FULLCKSUM|HCK_PARTIALCKSUM)) == 0) 7486 IP6_STAT(ipst, ip6_in_sw_cksum); 7487 7488 IP_CKSUM_RECV(hck_flags, sum, (uchar_t *) 7489 ((uchar_t *)mp->b_rptr + DB_CKSUMSTART(mp)), 7490 (int32_t)(whereptr - (uchar_t *)mp->b_rptr), 7491 mp, mp1, cksum_err); 7492 7493 if (cksum_err) { 7494 BUMP_MIB(ill->ill_ip_mib, tcpIfStatsInErrs); 7495 7496 if (hck_flags & HCK_FULLCKSUM) { 7497 IP6_STAT(ipst, 7498 ip6_tcp_in_full_hw_cksum_err); 7499 } else if (hck_flags & HCK_PARTIALCKSUM) { 7500 IP6_STAT(ipst, 7501 ip6_tcp_in_part_hw_cksum_err); 7502 } else { 7503 IP6_STAT(ipst, ip6_tcp_in_sw_cksum_err); 7504 } 7505 freemsg(first_mp); 7506 return; 7507 } 7508 tcp_fanout: 7509 ip_fanout_tcp_v6(q, first_mp, ip6h, ill, inill, 7510 (flags|IP_FF_SEND_ICMP|IP_FF_SYN_ADDIRE| 7511 IP_FF_IPINFO), hdr_len, mctl_present, zoneid); 7512 return; 7513 } 7514 case IPPROTO_SCTP: 7515 { 7516 sctp_hdr_t *sctph; 7517 uint32_t calcsum, pktsum; 7518 uint_t hdr_len = pkt_len - remlen; 7519 sctp_stack_t *sctps; 7520 7521 sctps = inill->ill_ipst->ips_netstack->netstack_sctp; 7522 7523 /* SCTP needs all of the SCTP header */ 7524 if (remlen < sizeof (*sctph)) { 7525 goto pkt_too_short; 7526 } 7527 if (whereptr + sizeof (*sctph) > mp->b_wptr) { 7528 ASSERT(mp->b_cont != NULL); 7529 if (!pullupmsg(mp, hdr_len + sizeof (*sctph))) { 7530 BUMP_MIB(ill->ill_ip_mib, 7531 ipIfStatsInDiscards); 7532 freemsg(mp); 7533 return; 7534 } 7535 ip6h = (ip6_t *)mp->b_rptr; 7536 whereptr = (uint8_t *)ip6h + hdr_len; 7537 } 7538 7539 sctph = (sctp_hdr_t *)(mp->b_rptr + hdr_len); 7540 /* checksum */ 7541 pktsum = sctph->sh_chksum; 7542 sctph->sh_chksum = 0; 7543 calcsum = sctp_cksum(mp, hdr_len); 7544 if (calcsum != pktsum) { 7545 BUMP_MIB(&sctps->sctps_mib, sctpChecksumError); 7546 freemsg(mp); 7547 return; 7548 } 7549 sctph->sh_chksum = pktsum; 7550 ports = *(uint32_t *)(mp->b_rptr + hdr_len); 7551 if ((connp = sctp_fanout(&ip6h->ip6_src, &ip6h->ip6_dst, 7552 ports, zoneid, mp, sctps)) == NULL) { 7553 ip_fanout_sctp_raw(first_mp, ill, 7554 (ipha_t *)ip6h, B_FALSE, ports, 7555 mctl_present, 7556 (flags|IP_FF_SEND_ICMP|IP_FF_IPINFO), 7557 B_TRUE, zoneid); 7558 return; 7559 } 7560 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 7561 sctp_input(connp, (ipha_t *)ip6h, mp, first_mp, ill, 7562 B_FALSE, mctl_present); 7563 return; 7564 } 7565 case IPPROTO_UDP: { 7566 uint16_t *up; 7567 uint32_t sum; 7568 7569 hdr_len = pkt_len - remlen; 7570 7571 if (hada_mp != NULL) { 7572 ip0dbg(("udp hada drop\n")); 7573 goto hada_drop; 7574 } 7575 7576 /* Verify that at least the ports are present */ 7577 if (remlen < UDPH_SIZE) 7578 goto pkt_too_short; 7579 if (mp->b_cont != NULL && 7580 whereptr + UDPH_SIZE > mp->b_wptr) { 7581 if (!pullupmsg(mp, hdr_len + UDPH_SIZE)) { 7582 BUMP_MIB(ill->ill_ip_mib, 7583 ipIfStatsInDiscards); 7584 freemsg(first_mp); 7585 return; 7586 } 7587 hck_flags = 0; 7588 ip6h = (ip6_t *)mp->b_rptr; 7589 whereptr = (uint8_t *)ip6h + hdr_len; 7590 } 7591 7592 /* 7593 * Before going through the regular checksum 7594 * calculation, make sure the received checksum 7595 * is non-zero. RFC 2460 says, a 0x0000 checksum 7596 * in a UDP packet (within IPv6 packet) is invalid 7597 * and should be replaced by 0xffff. This makes 7598 * sense as regular checksum calculation will 7599 * pass for both the cases i.e. 0x0000 and 0xffff. 7600 * Removing one of the case makes error detection 7601 * stronger. 7602 */ 7603 7604 if (((udpha_t *)whereptr)->uha_checksum == 0) { 7605 /* 0x0000 checksum is invalid */ 7606 ip1dbg(("ip_rput_data_v6: Invalid UDP " 7607 "checksum value 0x0000\n")); 7608 BUMP_MIB(ill->ill_ip_mib, 7609 udpIfStatsInCksumErrs); 7610 freemsg(first_mp); 7611 return; 7612 } 7613 7614 up = (uint16_t *)&ip6h->ip6_src; 7615 7616 /* 7617 * UDP checksum calculation. First sum up the 7618 * pseudo-header fields: 7619 * - Source IPv6 address 7620 * - Destination IPv6 address 7621 * - UDP payload length 7622 * - UDP protocol ID 7623 */ 7624 7625 sum = htons(IPPROTO_UDP + remlen) + 7626 up[0] + up[1] + up[2] + up[3] + 7627 up[4] + up[5] + up[6] + up[7] + 7628 up[8] + up[9] + up[10] + up[11] + 7629 up[12] + up[13] + up[14] + up[15]; 7630 7631 /* Fold initial sum */ 7632 sum = (sum & 0xffff) + (sum >> 16); 7633 7634 if (reass_hck_flags != 0) { 7635 hck_flags = reass_hck_flags; 7636 7637 IP_CKSUM_RECV_REASS(hck_flags, 7638 (int32_t)(whereptr - (uchar_t *)mp->b_rptr), 7639 sum, reass_sum, cksum_err); 7640 } else { 7641 mp1 = mp->b_cont; 7642 7643 IP_CKSUM_RECV(hck_flags, sum, (uchar_t *) 7644 ((uchar_t *)mp->b_rptr + DB_CKSUMSTART(mp)), 7645 (int32_t)(whereptr - (uchar_t *)mp->b_rptr), 7646 mp, mp1, cksum_err); 7647 } 7648 7649 if ((hck_flags & (HCK_FULLCKSUM|HCK_PARTIALCKSUM)) == 0) 7650 IP6_STAT(ipst, ip6_in_sw_cksum); 7651 7652 if (cksum_err) { 7653 BUMP_MIB(ill->ill_ip_mib, 7654 udpIfStatsInCksumErrs); 7655 7656 if (hck_flags & HCK_FULLCKSUM) 7657 IP6_STAT(ipst, 7658 ip6_udp_in_full_hw_cksum_err); 7659 else if (hck_flags & HCK_PARTIALCKSUM) 7660 IP6_STAT(ipst, 7661 ip6_udp_in_part_hw_cksum_err); 7662 else 7663 IP6_STAT(ipst, ip6_udp_in_sw_cksum_err); 7664 7665 freemsg(first_mp); 7666 return; 7667 } 7668 goto udp_fanout; 7669 } 7670 case IPPROTO_ICMPV6: { 7671 uint16_t *up; 7672 uint32_t sum; 7673 uint_t hdr_len = pkt_len - remlen; 7674 7675 if (hada_mp != NULL) { 7676 ip0dbg(("icmp hada drop\n")); 7677 goto hada_drop; 7678 } 7679 7680 up = (uint16_t *)&ip6h->ip6_src; 7681 sum = htons(IPPROTO_ICMPV6 + remlen) + 7682 up[0] + up[1] + up[2] + up[3] + 7683 up[4] + up[5] + up[6] + up[7] + 7684 up[8] + up[9] + up[10] + up[11] + 7685 up[12] + up[13] + up[14] + up[15]; 7686 sum = (sum & 0xffff) + (sum >> 16); 7687 sum = IP_CSUM(mp, hdr_len, sum); 7688 if (sum != 0) { 7689 /* IPv6 ICMP checksum failed */ 7690 ip1dbg(("ip_rput_data_v6: ICMPv6 checksum " 7691 "failed %x\n", 7692 sum)); 7693 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInMsgs); 7694 BUMP_MIB(ill->ill_icmp6_mib, 7695 ipv6IfIcmpInErrors); 7696 freemsg(first_mp); 7697 return; 7698 } 7699 7700 icmp_fanout: 7701 /* Check variable for testing applications */ 7702 if (ipst->ips_ipv6_drop_inbound_icmpv6) { 7703 freemsg(first_mp); 7704 return; 7705 } 7706 /* 7707 * Assume that there is always at least one conn for 7708 * ICMPv6 (in.ndpd) i.e. don't optimize the case 7709 * where there is no conn. 7710 */ 7711 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 7712 ilm_t *ilm; 7713 ilm_walker_t ilw; 7714 7715 ASSERT(!IS_LOOPBACK(ill)); 7716 /* 7717 * In the multicast case, applications may have 7718 * joined the group from different zones, so we 7719 * need to deliver the packet to each of them. 7720 * Loop through the multicast memberships 7721 * structures (ilm) on the receive ill and send 7722 * a copy of the packet up each matching one. 7723 */ 7724 ilm = ilm_walker_start(&ilw, inill); 7725 for (; ilm != NULL; 7726 ilm = ilm_walker_step(&ilw, ilm)) { 7727 if (!IN6_ARE_ADDR_EQUAL( 7728 &ilm->ilm_v6addr, &ip6h->ip6_dst)) 7729 continue; 7730 if (!ipif_lookup_zoneid( 7731 ilw.ilw_walk_ill, ilm->ilm_zoneid, 7732 IPIF_UP, NULL)) 7733 continue; 7734 7735 first_mp1 = ip_copymsg(first_mp); 7736 if (first_mp1 == NULL) 7737 continue; 7738 icmp_inbound_v6(q, first_mp1, 7739 ilw.ilw_walk_ill, inill, 7740 hdr_len, mctl_present, 0, 7741 ilm->ilm_zoneid, dl_mp); 7742 } 7743 ilm_walker_finish(&ilw); 7744 } else { 7745 first_mp1 = ip_copymsg(first_mp); 7746 if (first_mp1 != NULL) 7747 icmp_inbound_v6(q, first_mp1, ill, 7748 inill, hdr_len, mctl_present, 0, 7749 zoneid, dl_mp); 7750 } 7751 goto proto_fanout; 7752 } 7753 case IPPROTO_ENCAP: 7754 case IPPROTO_IPV6: 7755 if (ip_iptun_input_v6(mctl_present ? first_mp : NULL, 7756 mp, pkt_len - remlen, nexthdr, zoneid, ill, ipst)) { 7757 return; 7758 } 7759 /* 7760 * If there was no IP tunnel data-link bound to 7761 * receive this packet, then we fall through to 7762 * allow potential raw sockets bound to either of 7763 * these protocols to pick it up. 7764 */ 7765 /* FALLTHRU */ 7766 proto_fanout: 7767 default: { 7768 /* 7769 * Handle protocols with which IPv6 is less intimate. 7770 */ 7771 uint_t proto_flags = IP_FF_RAWIP|IP_FF_IPINFO; 7772 7773 if (hada_mp != NULL) { 7774 ip0dbg(("default hada drop\n")); 7775 goto hada_drop; 7776 } 7777 7778 /* 7779 * Enable sending ICMP for "Unknown" nexthdr 7780 * case. i.e. where we did not FALLTHRU from 7781 * IPPROTO_ICMPV6 processing case above. 7782 * If we did FALLTHRU, then the packet has already been 7783 * processed for IPPF, don't process it again in 7784 * ip_fanout_proto_v6; set IP6_NO_IPPOLICY in the 7785 * flags 7786 */ 7787 if (nexthdr != IPPROTO_ICMPV6) 7788 proto_flags |= IP_FF_SEND_ICMP; 7789 else 7790 proto_flags |= IP6_NO_IPPOLICY; 7791 7792 ip_fanout_proto_v6(q, first_mp, ip6h, ill, inill, 7793 nexthdr, prev_nexthdr_offset, (flags|proto_flags), 7794 mctl_present, zoneid); 7795 return; 7796 } 7797 7798 case IPPROTO_DSTOPTS: { 7799 uint_t ehdrlen; 7800 uint8_t *optptr; 7801 ip6_dest_t *desthdr; 7802 7803 /* If packet is too short, look no further */ 7804 if (remlen < MIN_EHDR_LEN) 7805 goto pkt_too_short; 7806 7807 /* Check if AH is present. */ 7808 if (ipsec_early_ah_v6(q, first_mp, mctl_present, ill, 7809 inill, hada_mp, zoneid)) { 7810 return; 7811 } 7812 7813 /* 7814 * Reinitialize pointers, as ipsec_early_ah_v6() does 7815 * complete pullups. We don't have to do more pullups 7816 * as a result. 7817 */ 7818 whereptr = (uint8_t *)((uintptr_t)mp->b_rptr + 7819 (uintptr_t)(whereptr - ((uint8_t *)ip6h))); 7820 ip6h = (ip6_t *)mp->b_rptr; 7821 7822 desthdr = (ip6_dest_t *)whereptr; 7823 nexthdr = desthdr->ip6d_nxt; 7824 prev_nexthdr_offset = (uint_t)(whereptr - 7825 (uint8_t *)ip6h); 7826 ehdrlen = 8 * (desthdr->ip6d_len + 1); 7827 if (remlen < ehdrlen) 7828 goto pkt_too_short; 7829 optptr = whereptr + 2; 7830 /* 7831 * Note: XXX This code does not seem to make 7832 * distinction between Destination Options Header 7833 * being before/after Routing Header which can 7834 * happen if we are at the end of source route. 7835 * This may become significant in future. 7836 * (No real significant Destination Options are 7837 * defined/implemented yet ). 7838 */ 7839 switch (ip_process_options_v6(q, first_mp, ip6h, optptr, 7840 ehdrlen - 2, IPPROTO_DSTOPTS, ipst)) { 7841 case -1: 7842 /* 7843 * Packet has been consumed and any needed 7844 * ICMP errors sent. 7845 */ 7846 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors); 7847 freemsg(hada_mp); 7848 return; 7849 case 0: 7850 /* No action needed continue */ 7851 break; 7852 case 1: 7853 /* 7854 * Unnexpected return value 7855 * (Router alert is a Hop-by-Hop option) 7856 */ 7857 #ifdef DEBUG 7858 panic("ip_rput_data_v6: router " 7859 "alert hbh opt indication in dest opt"); 7860 /*NOTREACHED*/ 7861 #else 7862 freemsg(hada_mp); 7863 freemsg(first_mp); 7864 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 7865 return; 7866 #endif 7867 } 7868 used = ehdrlen; 7869 break; 7870 } 7871 case IPPROTO_FRAGMENT: { 7872 ip6_frag_t *fraghdr; 7873 size_t no_frag_hdr_len; 7874 7875 if (hada_mp != NULL) { 7876 ip0dbg(("frag hada drop\n")); 7877 goto hada_drop; 7878 } 7879 7880 ASSERT(first_mp == mp); 7881 if (remlen < sizeof (ip6_frag_t)) 7882 goto pkt_too_short; 7883 7884 if (mp->b_cont != NULL && 7885 whereptr + sizeof (ip6_frag_t) > mp->b_wptr) { 7886 if (!pullupmsg(mp, 7887 pkt_len - remlen + sizeof (ip6_frag_t))) { 7888 BUMP_MIB(ill->ill_ip_mib, 7889 ipIfStatsInDiscards); 7890 freemsg(mp); 7891 return; 7892 } 7893 hck_flags = 0; 7894 ip6h = (ip6_t *)mp->b_rptr; 7895 whereptr = (uint8_t *)ip6h + pkt_len - remlen; 7896 } 7897 7898 fraghdr = (ip6_frag_t *)whereptr; 7899 used = (uint_t)sizeof (ip6_frag_t); 7900 BUMP_MIB(ill->ill_ip_mib, ipIfStatsReasmReqds); 7901 7902 /* 7903 * Invoke the CGTP (multirouting) filtering module to 7904 * process the incoming packet. Packets identified as 7905 * duplicates must be discarded. Filtering is active 7906 * only if the the ip_cgtp_filter ndd variable is 7907 * non-zero. 7908 */ 7909 if (ipst->ips_ip_cgtp_filter && 7910 ipst->ips_ip_cgtp_filter_ops != NULL) { 7911 int cgtp_flt_pkt; 7912 netstackid_t stackid; 7913 7914 stackid = ipst->ips_netstack->netstack_stackid; 7915 7916 cgtp_flt_pkt = 7917 ipst->ips_ip_cgtp_filter_ops->cfo_filter_v6( 7918 stackid, inill->ill_phyint->phyint_ifindex, 7919 ip6h, fraghdr); 7920 if (cgtp_flt_pkt == CGTP_IP_PKT_DUPLICATE) { 7921 freemsg(mp); 7922 return; 7923 } 7924 } 7925 7926 /* Restore the flags */ 7927 DB_CKSUMFLAGS(mp) = hck_flags; 7928 7929 mp = ip_rput_frag_v6(ill, inill, mp, ip6h, fraghdr, 7930 remlen - used, &prev_nexthdr_offset, 7931 &reass_sum, &reass_hck_flags); 7932 if (mp == NULL) { 7933 /* Reassembly is still pending */ 7934 return; 7935 } 7936 /* The first mblk are the headers before the frag hdr */ 7937 BUMP_MIB(ill->ill_ip_mib, ipIfStatsReasmOKs); 7938 7939 first_mp = mp; /* mp has most likely changed! */ 7940 no_frag_hdr_len = mp->b_wptr - mp->b_rptr; 7941 ip6h = (ip6_t *)mp->b_rptr; 7942 nexthdr = ((char *)ip6h)[prev_nexthdr_offset]; 7943 whereptr = mp->b_rptr + no_frag_hdr_len; 7944 remlen = ntohs(ip6h->ip6_plen) + 7945 (uint16_t)(IPV6_HDR_LEN - no_frag_hdr_len); 7946 pkt_len = msgdsize(mp); 7947 used = 0; 7948 break; 7949 } 7950 case IPPROTO_HOPOPTS: { 7951 if (hada_mp != NULL) { 7952 ip0dbg(("hop hada drop\n")); 7953 goto hada_drop; 7954 } 7955 /* 7956 * Illegal header sequence. 7957 * (Hop-by-hop headers are processed above 7958 * and required to immediately follow IPv6 header) 7959 */ 7960 icmp_param_problem_v6(WR(q), first_mp, 7961 ICMP6_PARAMPROB_NEXTHEADER, 7962 prev_nexthdr_offset, 7963 B_FALSE, B_FALSE, zoneid, ipst); 7964 return; 7965 } 7966 case IPPROTO_ROUTING: { 7967 uint_t ehdrlen; 7968 ip6_rthdr_t *rthdr; 7969 7970 /* If packet is too short, look no further */ 7971 if (remlen < MIN_EHDR_LEN) 7972 goto pkt_too_short; 7973 7974 /* Check if AH is present. */ 7975 if (ipsec_early_ah_v6(q, first_mp, mctl_present, ill, 7976 inill, hada_mp, zoneid)) { 7977 return; 7978 } 7979 7980 /* 7981 * Reinitialize pointers, as ipsec_early_ah_v6() does 7982 * complete pullups. We don't have to do more pullups 7983 * as a result. 7984 */ 7985 whereptr = (uint8_t *)((uintptr_t)mp->b_rptr + 7986 (uintptr_t)(whereptr - ((uint8_t *)ip6h))); 7987 ip6h = (ip6_t *)mp->b_rptr; 7988 7989 rthdr = (ip6_rthdr_t *)whereptr; 7990 nexthdr = rthdr->ip6r_nxt; 7991 prev_nexthdr_offset = (uint_t)(whereptr - 7992 (uint8_t *)ip6h); 7993 ehdrlen = 8 * (rthdr->ip6r_len + 1); 7994 if (remlen < ehdrlen) 7995 goto pkt_too_short; 7996 if (rthdr->ip6r_segleft != 0) { 7997 /* Not end of source route */ 7998 if (ll_multicast) { 7999 BUMP_MIB(ill->ill_ip_mib, 8000 ipIfStatsForwProhibits); 8001 freemsg(hada_mp); 8002 freemsg(mp); 8003 return; 8004 } 8005 ip_process_rthdr(q, mp, ip6h, rthdr, ill, 8006 hada_mp); 8007 return; 8008 } 8009 used = ehdrlen; 8010 break; 8011 } 8012 case IPPROTO_AH: 8013 case IPPROTO_ESP: { 8014 /* 8015 * Fast path for AH/ESP. If this is the first time 8016 * we are sending a datagram to AH/ESP, allocate 8017 * a IPSEC_IN message and prepend it. Otherwise, 8018 * just fanout. 8019 */ 8020 8021 ipsec_in_t *ii; 8022 int ipsec_rc; 8023 ipsec_stack_t *ipss; 8024 8025 ipss = ipst->ips_netstack->netstack_ipsec; 8026 if (!mctl_present) { 8027 ASSERT(first_mp == mp); 8028 first_mp = ipsec_in_alloc(B_FALSE, 8029 ipst->ips_netstack); 8030 if (first_mp == NULL) { 8031 ip1dbg(("ip_rput_data_v6: IPSEC_IN " 8032 "allocation failure.\n")); 8033 BUMP_MIB(ill->ill_ip_mib, 8034 ipIfStatsInDiscards); 8035 freemsg(mp); 8036 return; 8037 } 8038 /* 8039 * Store the ill_index so that when we come back 8040 * from IPSEC we ride on the same queue. 8041 */ 8042 ii = (ipsec_in_t *)first_mp->b_rptr; 8043 ii->ipsec_in_ill_index = 8044 ill->ill_phyint->phyint_ifindex; 8045 ii->ipsec_in_rill_index = 8046 inill->ill_phyint->phyint_ifindex; 8047 first_mp->b_cont = mp; 8048 /* 8049 * Cache hardware acceleration info. 8050 */ 8051 if (hada_mp != NULL) { 8052 IPSECHW_DEBUG(IPSECHW_PKT, 8053 ("ip_rput_data_v6: " 8054 "caching data attr.\n")); 8055 ii->ipsec_in_accelerated = B_TRUE; 8056 ii->ipsec_in_da = hada_mp; 8057 hada_mp = NULL; 8058 } 8059 } else { 8060 ii = (ipsec_in_t *)first_mp->b_rptr; 8061 } 8062 8063 if (!ipsec_loaded(ipss)) { 8064 ip_proto_not_sup(q, first_mp, IP_FF_SEND_ICMP, 8065 zoneid, ipst); 8066 return; 8067 } 8068 8069 /* select inbound SA and have IPsec process the pkt */ 8070 if (nexthdr == IPPROTO_ESP) { 8071 esph_t *esph = ipsec_inbound_esp_sa(first_mp, 8072 ipst->ips_netstack); 8073 if (esph == NULL) 8074 return; 8075 ASSERT(ii->ipsec_in_esp_sa != NULL); 8076 ASSERT(ii->ipsec_in_esp_sa->ipsa_input_func != 8077 NULL); 8078 ipsec_rc = ii->ipsec_in_esp_sa->ipsa_input_func( 8079 first_mp, esph); 8080 } else { 8081 ah_t *ah = ipsec_inbound_ah_sa(first_mp, 8082 ipst->ips_netstack); 8083 if (ah == NULL) 8084 return; 8085 ASSERT(ii->ipsec_in_ah_sa != NULL); 8086 ASSERT(ii->ipsec_in_ah_sa->ipsa_input_func != 8087 NULL); 8088 ipsec_rc = ii->ipsec_in_ah_sa->ipsa_input_func( 8089 first_mp, ah); 8090 } 8091 8092 switch (ipsec_rc) { 8093 case IPSEC_STATUS_SUCCESS: 8094 break; 8095 case IPSEC_STATUS_FAILED: 8096 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 8097 /* FALLTHRU */ 8098 case IPSEC_STATUS_PENDING: 8099 return; 8100 } 8101 /* we're done with IPsec processing, send it up */ 8102 ip_fanout_proto_again(first_mp, ill, inill, NULL); 8103 return; 8104 } 8105 case IPPROTO_NONE: 8106 /* All processing is done. Count as "delivered". */ 8107 freemsg(hada_mp); 8108 freemsg(first_mp); 8109 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 8110 return; 8111 } 8112 whereptr += used; 8113 ASSERT(remlen >= used); 8114 remlen -= used; 8115 } 8116 /* NOTREACHED */ 8117 8118 pkt_too_short: 8119 ip1dbg(("ip_rput_data_v6: packet too short %d %lu %d\n", 8120 ip6_len, pkt_len, remlen)); 8121 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInTruncatedPkts); 8122 freemsg(hada_mp); 8123 freemsg(first_mp); 8124 return; 8125 udp_fanout: 8126 if (mctl_present || IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 8127 connp = NULL; 8128 } else { 8129 connp = ipcl_classify_v6(mp, IPPROTO_UDP, hdr_len, zoneid, 8130 ipst); 8131 if ((connp != NULL) && (connp->conn_upq == NULL)) { 8132 CONN_DEC_REF(connp); 8133 connp = NULL; 8134 } 8135 } 8136 8137 if (connp == NULL) { 8138 uint32_t ports; 8139 8140 ports = *(uint32_t *)(mp->b_rptr + hdr_len + 8141 UDP_PORTS_OFFSET); 8142 IP6_STAT(ipst, ip6_udp_slow_path); 8143 ip_fanout_udp_v6(q, first_mp, ip6h, ports, ill, inill, 8144 (flags|IP_FF_SEND_ICMP|IP_FF_IPINFO), mctl_present, 8145 zoneid); 8146 return; 8147 } 8148 8149 if ((IPCL_IS_NONSTR(connp) && PROTO_FLOW_CNTRLD(connp)) || 8150 (!IPCL_IS_NONSTR(connp) && CONN_UDP_FLOWCTLD(connp))) { 8151 freemsg(first_mp); 8152 BUMP_MIB(ill->ill_ip_mib, udpIfStatsInOverflows); 8153 CONN_DEC_REF(connp); 8154 return; 8155 } 8156 8157 /* Initiate IPPF processing */ 8158 if (IP6_IN_IPP(flags, ipst)) { 8159 ip_process(IPP_LOCAL_IN, &mp, ill->ill_phyint->phyint_ifindex); 8160 if (mp == NULL) { 8161 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 8162 CONN_DEC_REF(connp); 8163 return; 8164 } 8165 } 8166 8167 if (connp->conn_ip_recvpktinfo || 8168 IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src)) { 8169 mp = ip_add_info_v6(mp, inill, &ip6h->ip6_dst); 8170 if (mp == NULL) { 8171 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 8172 CONN_DEC_REF(connp); 8173 return; 8174 } 8175 } 8176 8177 IP6_STAT(ipst, ip6_udp_fast_path); 8178 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 8179 8180 /* Send it upstream */ 8181 (connp->conn_recv)(connp, mp, NULL); 8182 8183 CONN_DEC_REF(connp); 8184 freemsg(hada_mp); 8185 return; 8186 8187 hada_drop: 8188 ip1dbg(("ip_rput_data_v6: malformed accelerated packet\n")); 8189 /* IPsec kstats: bump counter here */ 8190 freemsg(hada_mp); 8191 freemsg(first_mp); 8192 } 8193 8194 /* 8195 * Reassemble fragment. 8196 * When it returns a completed message the first mblk will only contain 8197 * the headers prior to the fragment header. 8198 * 8199 * prev_nexthdr_offset is an offset indication of where the nexthdr field is 8200 * of the preceding header. This is needed to patch the previous header's 8201 * nexthdr field when reassembly completes. 8202 */ 8203 static mblk_t * 8204 ip_rput_frag_v6(ill_t *ill, ill_t *inill, mblk_t *mp, ip6_t *ip6h, 8205 ip6_frag_t *fraghdr, uint_t remlen, uint_t *prev_nexthdr_offset, 8206 uint32_t *cksum_val, uint16_t *cksum_flags) 8207 { 8208 uint32_t ident = ntohl(fraghdr->ip6f_ident); 8209 uint16_t offset; 8210 boolean_t more_frags; 8211 uint8_t nexthdr = fraghdr->ip6f_nxt; 8212 in6_addr_t *v6dst_ptr; 8213 in6_addr_t *v6src_ptr; 8214 uint_t end; 8215 uint_t hdr_length; 8216 size_t count; 8217 ipf_t *ipf; 8218 ipf_t **ipfp; 8219 ipfb_t *ipfb; 8220 mblk_t *mp1; 8221 uint8_t ecn_info = 0; 8222 size_t msg_len; 8223 mblk_t *tail_mp; 8224 mblk_t *t_mp; 8225 boolean_t pruned = B_FALSE; 8226 uint32_t sum_val; 8227 uint16_t sum_flags; 8228 ip_stack_t *ipst = ill->ill_ipst; 8229 8230 if (cksum_val != NULL) 8231 *cksum_val = 0; 8232 if (cksum_flags != NULL) 8233 *cksum_flags = 0; 8234 8235 /* 8236 * We utilize hardware computed checksum info only for UDP since 8237 * IP fragmentation is a normal occurence for the protocol. In 8238 * addition, checksum offload support for IP fragments carrying 8239 * UDP payload is commonly implemented across network adapters. 8240 */ 8241 ASSERT(inill != NULL); 8242 if (nexthdr == IPPROTO_UDP && dohwcksum && ILL_HCKSUM_CAPABLE(inill) && 8243 (DB_CKSUMFLAGS(mp) & (HCK_FULLCKSUM | HCK_PARTIALCKSUM))) { 8244 mblk_t *mp1 = mp->b_cont; 8245 int32_t len; 8246 8247 /* Record checksum information from the packet */ 8248 sum_val = (uint32_t)DB_CKSUM16(mp); 8249 sum_flags = DB_CKSUMFLAGS(mp); 8250 8251 /* fragmented payload offset from beginning of mblk */ 8252 offset = (uint16_t)((uchar_t *)&fraghdr[1] - mp->b_rptr); 8253 8254 if ((sum_flags & HCK_PARTIALCKSUM) && 8255 (mp1 == NULL || mp1->b_cont == NULL) && 8256 offset >= (uint16_t)DB_CKSUMSTART(mp) && 8257 ((len = offset - (uint16_t)DB_CKSUMSTART(mp)) & 1) == 0) { 8258 uint32_t adj; 8259 /* 8260 * Partial checksum has been calculated by hardware 8261 * and attached to the packet; in addition, any 8262 * prepended extraneous data is even byte aligned. 8263 * If any such data exists, we adjust the checksum; 8264 * this would also handle any postpended data. 8265 */ 8266 IP_ADJCKSUM_PARTIAL(mp->b_rptr + DB_CKSUMSTART(mp), 8267 mp, mp1, len, adj); 8268 8269 /* One's complement subtract extraneous checksum */ 8270 if (adj >= sum_val) 8271 sum_val = ~(adj - sum_val) & 0xFFFF; 8272 else 8273 sum_val -= adj; 8274 } 8275 } else { 8276 sum_val = 0; 8277 sum_flags = 0; 8278 } 8279 8280 /* Clear hardware checksumming flag */ 8281 DB_CKSUMFLAGS(mp) = 0; 8282 8283 /* 8284 * Note: Fragment offset in header is in 8-octet units. 8285 * Clearing least significant 3 bits not only extracts 8286 * it but also gets it in units of octets. 8287 */ 8288 offset = ntohs(fraghdr->ip6f_offlg) & ~7; 8289 more_frags = (fraghdr->ip6f_offlg & IP6F_MORE_FRAG); 8290 8291 /* 8292 * Is the more frags flag on and the payload length not a multiple 8293 * of eight? 8294 */ 8295 if (more_frags && (ntohs(ip6h->ip6_plen) & 7)) { 8296 zoneid_t zoneid; 8297 8298 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors); 8299 zoneid = ipif_lookup_addr_zoneid_v6(&ip6h->ip6_dst, ill, ipst); 8300 if (zoneid == ALL_ZONES) { 8301 freemsg(mp); 8302 return (NULL); 8303 } 8304 icmp_param_problem_v6(ill->ill_wq, mp, ICMP6_PARAMPROB_HEADER, 8305 (uint32_t)((char *)&ip6h->ip6_plen - 8306 (char *)ip6h), B_FALSE, B_FALSE, zoneid, ipst); 8307 return (NULL); 8308 } 8309 8310 v6src_ptr = &ip6h->ip6_src; 8311 v6dst_ptr = &ip6h->ip6_dst; 8312 end = remlen; 8313 8314 hdr_length = (uint_t)((char *)&fraghdr[1] - (char *)ip6h); 8315 end += offset; 8316 8317 /* 8318 * Would fragment cause reassembled packet to have a payload length 8319 * greater than IP_MAXPACKET - the max payload size? 8320 */ 8321 if (end > IP_MAXPACKET) { 8322 zoneid_t zoneid; 8323 8324 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors); 8325 zoneid = ipif_lookup_addr_zoneid_v6(&ip6h->ip6_dst, ill, ipst); 8326 if (zoneid == ALL_ZONES) { 8327 freemsg(mp); 8328 return (NULL); 8329 } 8330 icmp_param_problem_v6(ill->ill_wq, mp, ICMP6_PARAMPROB_HEADER, 8331 (uint32_t)((char *)&fraghdr->ip6f_offlg - 8332 (char *)ip6h), B_FALSE, B_FALSE, zoneid, ipst); 8333 return (NULL); 8334 } 8335 8336 /* 8337 * This packet just has one fragment. Reassembly not 8338 * needed. 8339 */ 8340 if (!more_frags && offset == 0) { 8341 goto reass_done; 8342 } 8343 8344 /* 8345 * Drop the fragmented as early as possible, if 8346 * we don't have resource(s) to re-assemble. 8347 */ 8348 if (ipst->ips_ip_reass_queue_bytes == 0) { 8349 freemsg(mp); 8350 return (NULL); 8351 } 8352 8353 /* Record the ECN field info. */ 8354 ecn_info = (uint8_t)(ntohl(ip6h->ip6_vcf & htonl(~0xFFCFFFFF)) >> 20); 8355 /* 8356 * If this is not the first fragment, dump the unfragmentable 8357 * portion of the packet. 8358 */ 8359 if (offset) 8360 mp->b_rptr = (uchar_t *)&fraghdr[1]; 8361 8362 /* 8363 * Fragmentation reassembly. Each ILL has a hash table for 8364 * queueing packets undergoing reassembly for all IPIFs 8365 * associated with the ILL. The hash is based on the packet 8366 * IP ident field. The ILL frag hash table was allocated 8367 * as a timer block at the time the ILL was created. Whenever 8368 * there is anything on the reassembly queue, the timer will 8369 * be running. 8370 */ 8371 msg_len = MBLKSIZE(mp); 8372 tail_mp = mp; 8373 while (tail_mp->b_cont != NULL) { 8374 tail_mp = tail_mp->b_cont; 8375 msg_len += MBLKSIZE(tail_mp); 8376 } 8377 /* 8378 * If the reassembly list for this ILL will get too big 8379 * prune it. 8380 */ 8381 8382 if ((msg_len + sizeof (*ipf) + ill->ill_frag_count) >= 8383 ipst->ips_ip_reass_queue_bytes) { 8384 ill_frag_prune(ill, 8385 (ipst->ips_ip_reass_queue_bytes < msg_len) ? 0 : 8386 (ipst->ips_ip_reass_queue_bytes - msg_len)); 8387 pruned = B_TRUE; 8388 } 8389 8390 ipfb = &ill->ill_frag_hash_tbl[ILL_FRAG_HASH_V6(*v6src_ptr, ident)]; 8391 mutex_enter(&ipfb->ipfb_lock); 8392 8393 ipfp = &ipfb->ipfb_ipf; 8394 /* Try to find an existing fragment queue for this packet. */ 8395 for (;;) { 8396 ipf = ipfp[0]; 8397 if (ipf) { 8398 /* 8399 * It has to match on ident, source address, and 8400 * dest address. 8401 */ 8402 if (ipf->ipf_ident == ident && 8403 IN6_ARE_ADDR_EQUAL(&ipf->ipf_v6src, v6src_ptr) && 8404 IN6_ARE_ADDR_EQUAL(&ipf->ipf_v6dst, v6dst_ptr)) { 8405 8406 /* 8407 * If we have received too many 8408 * duplicate fragments for this packet 8409 * free it. 8410 */ 8411 if (ipf->ipf_num_dups > ip_max_frag_dups) { 8412 ill_frag_free_pkts(ill, ipfb, ipf, 1); 8413 freemsg(mp); 8414 mutex_exit(&ipfb->ipfb_lock); 8415 return (NULL); 8416 } 8417 8418 break; 8419 } 8420 ipfp = &ipf->ipf_hash_next; 8421 continue; 8422 } 8423 8424 8425 /* 8426 * If we pruned the list, do we want to store this new 8427 * fragment?. We apply an optimization here based on the 8428 * fact that most fragments will be received in order. 8429 * So if the offset of this incoming fragment is zero, 8430 * it is the first fragment of a new packet. We will 8431 * keep it. Otherwise drop the fragment, as we have 8432 * probably pruned the packet already (since the 8433 * packet cannot be found). 8434 */ 8435 8436 if (pruned && offset != 0) { 8437 mutex_exit(&ipfb->ipfb_lock); 8438 freemsg(mp); 8439 return (NULL); 8440 } 8441 8442 /* New guy. Allocate a frag message. */ 8443 mp1 = allocb(sizeof (*ipf), BPRI_MED); 8444 if (!mp1) { 8445 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 8446 freemsg(mp); 8447 partial_reass_done: 8448 mutex_exit(&ipfb->ipfb_lock); 8449 return (NULL); 8450 } 8451 8452 if (ipfb->ipfb_frag_pkts >= MAX_FRAG_PKTS(ipst)) { 8453 /* 8454 * Too many fragmented packets in this hash bucket. 8455 * Free the oldest. 8456 */ 8457 ill_frag_free_pkts(ill, ipfb, ipfb->ipfb_ipf, 1); 8458 } 8459 8460 mp1->b_cont = mp; 8461 8462 /* Initialize the fragment header. */ 8463 ipf = (ipf_t *)mp1->b_rptr; 8464 ipf->ipf_mp = mp1; 8465 ipf->ipf_ptphn = ipfp; 8466 ipfp[0] = ipf; 8467 ipf->ipf_hash_next = NULL; 8468 ipf->ipf_ident = ident; 8469 ipf->ipf_v6src = *v6src_ptr; 8470 ipf->ipf_v6dst = *v6dst_ptr; 8471 /* Record reassembly start time. */ 8472 ipf->ipf_timestamp = gethrestime_sec(); 8473 /* Record ipf generation and account for frag header */ 8474 ipf->ipf_gen = ill->ill_ipf_gen++; 8475 ipf->ipf_count = MBLKSIZE(mp1); 8476 ipf->ipf_protocol = nexthdr; 8477 ipf->ipf_nf_hdr_len = 0; 8478 ipf->ipf_prev_nexthdr_offset = 0; 8479 ipf->ipf_last_frag_seen = B_FALSE; 8480 ipf->ipf_ecn = ecn_info; 8481 ipf->ipf_num_dups = 0; 8482 ipfb->ipfb_frag_pkts++; 8483 ipf->ipf_checksum = 0; 8484 ipf->ipf_checksum_flags = 0; 8485 8486 /* Store checksum value in fragment header */ 8487 if (sum_flags != 0) { 8488 sum_val = (sum_val & 0xFFFF) + (sum_val >> 16); 8489 sum_val = (sum_val & 0xFFFF) + (sum_val >> 16); 8490 ipf->ipf_checksum = sum_val; 8491 ipf->ipf_checksum_flags = sum_flags; 8492 } 8493 8494 /* 8495 * We handle reassembly two ways. In the easy case, 8496 * where all the fragments show up in order, we do 8497 * minimal bookkeeping, and just clip new pieces on 8498 * the end. If we ever see a hole, then we go off 8499 * to ip_reassemble which has to mark the pieces and 8500 * keep track of the number of holes, etc. Obviously, 8501 * the point of having both mechanisms is so we can 8502 * handle the easy case as efficiently as possible. 8503 */ 8504 if (offset == 0) { 8505 /* Easy case, in-order reassembly so far. */ 8506 /* Update the byte count */ 8507 ipf->ipf_count += msg_len; 8508 ipf->ipf_tail_mp = tail_mp; 8509 /* 8510 * Keep track of next expected offset in 8511 * ipf_end. 8512 */ 8513 ipf->ipf_end = end; 8514 ipf->ipf_nf_hdr_len = hdr_length; 8515 ipf->ipf_prev_nexthdr_offset = *prev_nexthdr_offset; 8516 } else { 8517 /* Hard case, hole at the beginning. */ 8518 ipf->ipf_tail_mp = NULL; 8519 /* 8520 * ipf_end == 0 means that we have given up 8521 * on easy reassembly. 8522 */ 8523 ipf->ipf_end = 0; 8524 8525 /* Forget checksum offload from now on */ 8526 ipf->ipf_checksum_flags = 0; 8527 8528 /* 8529 * ipf_hole_cnt is set by ip_reassemble. 8530 * ipf_count is updated by ip_reassemble. 8531 * No need to check for return value here 8532 * as we don't expect reassembly to complete or 8533 * fail for the first fragment itself. 8534 */ 8535 (void) ip_reassemble(mp, ipf, offset, more_frags, ill, 8536 msg_len); 8537 } 8538 /* Update per ipfb and ill byte counts */ 8539 ipfb->ipfb_count += ipf->ipf_count; 8540 ASSERT(ipfb->ipfb_count > 0); /* Wraparound */ 8541 atomic_add_32(&ill->ill_frag_count, ipf->ipf_count); 8542 /* If the frag timer wasn't already going, start it. */ 8543 mutex_enter(&ill->ill_lock); 8544 ill_frag_timer_start(ill); 8545 mutex_exit(&ill->ill_lock); 8546 goto partial_reass_done; 8547 } 8548 8549 /* 8550 * If the packet's flag has changed (it could be coming up 8551 * from an interface different than the previous, therefore 8552 * possibly different checksum capability), then forget about 8553 * any stored checksum states. Otherwise add the value to 8554 * the existing one stored in the fragment header. 8555 */ 8556 if (sum_flags != 0 && sum_flags == ipf->ipf_checksum_flags) { 8557 sum_val += ipf->ipf_checksum; 8558 sum_val = (sum_val & 0xFFFF) + (sum_val >> 16); 8559 sum_val = (sum_val & 0xFFFF) + (sum_val >> 16); 8560 ipf->ipf_checksum = sum_val; 8561 } else if (ipf->ipf_checksum_flags != 0) { 8562 /* Forget checksum offload from now on */ 8563 ipf->ipf_checksum_flags = 0; 8564 } 8565 8566 /* 8567 * We have a new piece of a datagram which is already being 8568 * reassembled. Update the ECN info if all IP fragments 8569 * are ECN capable. If there is one which is not, clear 8570 * all the info. If there is at least one which has CE 8571 * code point, IP needs to report that up to transport. 8572 */ 8573 if (ecn_info != IPH_ECN_NECT && ipf->ipf_ecn != IPH_ECN_NECT) { 8574 if (ecn_info == IPH_ECN_CE) 8575 ipf->ipf_ecn = IPH_ECN_CE; 8576 } else { 8577 ipf->ipf_ecn = IPH_ECN_NECT; 8578 } 8579 8580 if (offset && ipf->ipf_end == offset) { 8581 /* The new fragment fits at the end */ 8582 ipf->ipf_tail_mp->b_cont = mp; 8583 /* Update the byte count */ 8584 ipf->ipf_count += msg_len; 8585 /* Update per ipfb and ill byte counts */ 8586 ipfb->ipfb_count += msg_len; 8587 ASSERT(ipfb->ipfb_count > 0); /* Wraparound */ 8588 atomic_add_32(&ill->ill_frag_count, msg_len); 8589 if (more_frags) { 8590 /* More to come. */ 8591 ipf->ipf_end = end; 8592 ipf->ipf_tail_mp = tail_mp; 8593 goto partial_reass_done; 8594 } 8595 } else { 8596 /* 8597 * Go do the hard cases. 8598 * Call ip_reassemble(). 8599 */ 8600 int ret; 8601 8602 if (offset == 0) { 8603 if (ipf->ipf_prev_nexthdr_offset == 0) { 8604 ipf->ipf_nf_hdr_len = hdr_length; 8605 ipf->ipf_prev_nexthdr_offset = 8606 *prev_nexthdr_offset; 8607 } 8608 } 8609 /* Save current byte count */ 8610 count = ipf->ipf_count; 8611 ret = ip_reassemble(mp, ipf, offset, more_frags, ill, msg_len); 8612 8613 /* Count of bytes added and subtracted (freeb()ed) */ 8614 count = ipf->ipf_count - count; 8615 if (count) { 8616 /* Update per ipfb and ill byte counts */ 8617 ipfb->ipfb_count += count; 8618 ASSERT(ipfb->ipfb_count > 0); /* Wraparound */ 8619 atomic_add_32(&ill->ill_frag_count, count); 8620 } 8621 if (ret == IP_REASS_PARTIAL) { 8622 goto partial_reass_done; 8623 } else if (ret == IP_REASS_FAILED) { 8624 /* Reassembly failed. Free up all resources */ 8625 ill_frag_free_pkts(ill, ipfb, ipf, 1); 8626 for (t_mp = mp; t_mp != NULL; t_mp = t_mp->b_cont) { 8627 IP_REASS_SET_START(t_mp, 0); 8628 IP_REASS_SET_END(t_mp, 0); 8629 } 8630 freemsg(mp); 8631 goto partial_reass_done; 8632 } 8633 8634 /* We will reach here iff 'ret' is IP_REASS_COMPLETE */ 8635 } 8636 /* 8637 * We have completed reassembly. Unhook the frag header from 8638 * the reassembly list. 8639 * 8640 * Grab the unfragmentable header length next header value out 8641 * of the first fragment 8642 */ 8643 ASSERT(ipf->ipf_nf_hdr_len != 0); 8644 hdr_length = ipf->ipf_nf_hdr_len; 8645 8646 /* 8647 * Before we free the frag header, record the ECN info 8648 * to report back to the transport. 8649 */ 8650 ecn_info = ipf->ipf_ecn; 8651 8652 /* 8653 * Store the nextheader field in the header preceding the fragment 8654 * header 8655 */ 8656 nexthdr = ipf->ipf_protocol; 8657 *prev_nexthdr_offset = ipf->ipf_prev_nexthdr_offset; 8658 ipfp = ipf->ipf_ptphn; 8659 8660 /* We need to supply these to caller */ 8661 if ((sum_flags = ipf->ipf_checksum_flags) != 0) 8662 sum_val = ipf->ipf_checksum; 8663 else 8664 sum_val = 0; 8665 8666 mp1 = ipf->ipf_mp; 8667 count = ipf->ipf_count; 8668 ipf = ipf->ipf_hash_next; 8669 if (ipf) 8670 ipf->ipf_ptphn = ipfp; 8671 ipfp[0] = ipf; 8672 atomic_add_32(&ill->ill_frag_count, -count); 8673 ASSERT(ipfb->ipfb_count >= count); 8674 ipfb->ipfb_count -= count; 8675 ipfb->ipfb_frag_pkts--; 8676 mutex_exit(&ipfb->ipfb_lock); 8677 /* Ditch the frag header. */ 8678 mp = mp1->b_cont; 8679 freeb(mp1); 8680 8681 /* 8682 * Make sure the packet is good by doing some sanity 8683 * check. If bad we can silentely drop the packet. 8684 */ 8685 reass_done: 8686 if (hdr_length < sizeof (ip6_frag_t)) { 8687 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors); 8688 ip1dbg(("ip_rput_frag_v6: bad packet\n")); 8689 freemsg(mp); 8690 return (NULL); 8691 } 8692 8693 /* 8694 * Remove the fragment header from the initial header by 8695 * splitting the mblk into the non-fragmentable header and 8696 * everthing after the fragment extension header. This has the 8697 * side effect of putting all the headers that need destination 8698 * processing into the b_cont block-- on return this fact is 8699 * used in order to avoid having to look at the extensions 8700 * already processed. 8701 * 8702 * Note that this code assumes that the unfragmentable portion 8703 * of the header is in the first mblk and increments 8704 * the read pointer past it. If this assumption is broken 8705 * this code fails badly. 8706 */ 8707 if (mp->b_rptr + hdr_length != mp->b_wptr) { 8708 mblk_t *nmp; 8709 8710 if (!(nmp = dupb(mp))) { 8711 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 8712 ip1dbg(("ip_rput_frag_v6: dupb failed\n")); 8713 freemsg(mp); 8714 return (NULL); 8715 } 8716 nmp->b_cont = mp->b_cont; 8717 mp->b_cont = nmp; 8718 nmp->b_rptr += hdr_length; 8719 } 8720 mp->b_wptr = mp->b_rptr + hdr_length - sizeof (ip6_frag_t); 8721 8722 ip6h = (ip6_t *)mp->b_rptr; 8723 ((char *)ip6h)[*prev_nexthdr_offset] = nexthdr; 8724 8725 /* Restore original IP length in header. */ 8726 ip6h->ip6_plen = htons((uint16_t)(msgdsize(mp) - IPV6_HDR_LEN)); 8727 /* Record the ECN info. */ 8728 ip6h->ip6_vcf &= htonl(0xFFCFFFFF); 8729 ip6h->ip6_vcf |= htonl(ecn_info << 20); 8730 8731 /* Reassembly is successful; return checksum information if needed */ 8732 if (cksum_val != NULL) 8733 *cksum_val = sum_val; 8734 if (cksum_flags != NULL) 8735 *cksum_flags = sum_flags; 8736 8737 return (mp); 8738 } 8739 8740 /* 8741 * Given an mblk and a ptr, find the destination address in an IPv6 routing 8742 * header. 8743 */ 8744 static in6_addr_t 8745 pluck_out_dst(mblk_t *mp, uint8_t *whereptr, in6_addr_t oldrv) 8746 { 8747 ip6_rthdr0_t *rt0; 8748 int segleft, numaddr; 8749 in6_addr_t *ap, rv = oldrv; 8750 8751 rt0 = (ip6_rthdr0_t *)whereptr; 8752 if (rt0->ip6r0_type != 0 && rt0->ip6r0_type != 2) { 8753 DTRACE_PROBE2(pluck_out_dst_unknown_type, mblk_t *, mp, 8754 uint8_t *, whereptr); 8755 return (rv); 8756 } 8757 segleft = rt0->ip6r0_segleft; 8758 numaddr = rt0->ip6r0_len / 2; 8759 8760 if ((rt0->ip6r0_len & 0x1) || 8761 whereptr + (rt0->ip6r0_len + 1) * 8 > mp->b_wptr || 8762 (segleft > rt0->ip6r0_len / 2)) { 8763 /* 8764 * Corrupt packet. Either the routing header length is odd 8765 * (can't happen) or mismatched compared to the packet, or the 8766 * number of addresses is. Return what we can. This will 8767 * only be a problem on forwarded packets that get squeezed 8768 * through an outbound tunnel enforcing IPsec Tunnel Mode. 8769 */ 8770 DTRACE_PROBE2(pluck_out_dst_badpkt, mblk_t *, mp, uint8_t *, 8771 whereptr); 8772 return (rv); 8773 } 8774 8775 if (segleft != 0) { 8776 ap = (in6_addr_t *)((char *)rt0 + sizeof (*rt0)); 8777 rv = ap[numaddr - 1]; 8778 } 8779 8780 return (rv); 8781 } 8782 8783 /* 8784 * Walk through the options to see if there is a routing header. 8785 * If present get the destination which is the last address of 8786 * the option. 8787 */ 8788 in6_addr_t 8789 ip_get_dst_v6(ip6_t *ip6h, mblk_t *mp, boolean_t *is_fragment) 8790 { 8791 mblk_t *current_mp = mp; 8792 uint8_t nexthdr; 8793 uint8_t *whereptr; 8794 int ehdrlen; 8795 in6_addr_t rv; 8796 8797 whereptr = (uint8_t *)ip6h; 8798 ehdrlen = sizeof (ip6_t); 8799 8800 /* We assume at least the IPv6 base header is within one mblk. */ 8801 ASSERT(mp->b_rptr <= whereptr && mp->b_wptr >= whereptr + ehdrlen); 8802 8803 rv = ip6h->ip6_dst; 8804 nexthdr = ip6h->ip6_nxt; 8805 if (is_fragment != NULL) 8806 *is_fragment = B_FALSE; 8807 8808 /* 8809 * We also assume (thanks to ipsec_tun_outbound()'s pullup) that 8810 * no extension headers will be split across mblks. 8811 */ 8812 8813 while (nexthdr == IPPROTO_HOPOPTS || nexthdr == IPPROTO_DSTOPTS || 8814 nexthdr == IPPROTO_ROUTING) { 8815 if (nexthdr == IPPROTO_ROUTING) 8816 rv = pluck_out_dst(current_mp, whereptr, rv); 8817 8818 /* 8819 * All IPv6 extension headers have the next-header in byte 8820 * 0, and the (length - 8) in 8-byte-words. 8821 */ 8822 while (whereptr + ehdrlen >= current_mp->b_wptr) { 8823 ehdrlen -= (current_mp->b_wptr - whereptr); 8824 current_mp = current_mp->b_cont; 8825 if (current_mp == NULL) { 8826 /* Bad packet. Return what we can. */ 8827 DTRACE_PROBE3(ip_get_dst_v6_badpkt, mblk_t *, 8828 mp, mblk_t *, current_mp, ip6_t *, ip6h); 8829 goto done; 8830 } 8831 whereptr = current_mp->b_rptr; 8832 } 8833 whereptr += ehdrlen; 8834 8835 nexthdr = *whereptr; 8836 ASSERT(whereptr + 1 < current_mp->b_wptr); 8837 ehdrlen = (*(whereptr + 1) + 1) * 8; 8838 } 8839 8840 done: 8841 if (nexthdr == IPPROTO_FRAGMENT && is_fragment != NULL) 8842 *is_fragment = B_TRUE; 8843 return (rv); 8844 } 8845 8846 /* 8847 * ip_source_routed_v6: 8848 * This function is called by redirect code in ip_rput_data_v6 to 8849 * know whether this packet is source routed through this node i.e 8850 * whether this node (router) is part of the journey. This 8851 * function is called under two cases : 8852 * 8853 * case 1 : Routing header was processed by this node and 8854 * ip_process_rthdr replaced ip6_dst with the next hop 8855 * and we are forwarding the packet to the next hop. 8856 * 8857 * case 2 : Routing header was not processed by this node and we 8858 * are just forwarding the packet. 8859 * 8860 * For case (1) we don't want to send redirects. For case(2) we 8861 * want to send redirects. 8862 */ 8863 static boolean_t 8864 ip_source_routed_v6(ip6_t *ip6h, mblk_t *mp, ip_stack_t *ipst) 8865 { 8866 uint8_t nexthdr; 8867 in6_addr_t *addrptr; 8868 ip6_rthdr0_t *rthdr; 8869 uint8_t numaddr; 8870 ip6_hbh_t *hbhhdr; 8871 uint_t ehdrlen; 8872 uint8_t *byteptr; 8873 8874 ip2dbg(("ip_source_routed_v6\n")); 8875 nexthdr = ip6h->ip6_nxt; 8876 ehdrlen = IPV6_HDR_LEN; 8877 8878 /* if a routing hdr is preceeded by HOPOPT or DSTOPT */ 8879 while (nexthdr == IPPROTO_HOPOPTS || 8880 nexthdr == IPPROTO_DSTOPTS) { 8881 byteptr = (uint8_t *)ip6h + ehdrlen; 8882 /* 8883 * Check if we have already processed 8884 * packets or we are just a forwarding 8885 * router which only pulled up msgs up 8886 * to IPV6HDR and one HBH ext header 8887 */ 8888 if (byteptr + MIN_EHDR_LEN > mp->b_wptr) { 8889 ip2dbg(("ip_source_routed_v6: Extension" 8890 " headers not processed\n")); 8891 return (B_FALSE); 8892 } 8893 hbhhdr = (ip6_hbh_t *)byteptr; 8894 nexthdr = hbhhdr->ip6h_nxt; 8895 ehdrlen = ehdrlen + 8 * (hbhhdr->ip6h_len + 1); 8896 } 8897 switch (nexthdr) { 8898 case IPPROTO_ROUTING: 8899 byteptr = (uint8_t *)ip6h + ehdrlen; 8900 /* 8901 * If for some reason, we haven't pulled up 8902 * the routing hdr data mblk, then we must 8903 * not have processed it at all. So for sure 8904 * we are not part of the source routed journey. 8905 */ 8906 if (byteptr + MIN_EHDR_LEN > mp->b_wptr) { 8907 ip2dbg(("ip_source_routed_v6: Routing" 8908 " header not processed\n")); 8909 return (B_FALSE); 8910 } 8911 rthdr = (ip6_rthdr0_t *)byteptr; 8912 /* 8913 * Either we are an intermediate router or the 8914 * last hop before destination and we have 8915 * already processed the routing header. 8916 * If segment_left is greater than or equal to zero, 8917 * then we must be the (numaddr - segleft) entry 8918 * of the routing header. Although ip6r0_segleft 8919 * is a unit8_t variable, we still check for zero 8920 * or greater value, if in case the data type 8921 * is changed someday in future. 8922 */ 8923 if (rthdr->ip6r0_segleft > 0 || 8924 rthdr->ip6r0_segleft == 0) { 8925 ire_t *ire = NULL; 8926 8927 numaddr = rthdr->ip6r0_len / 2; 8928 addrptr = (in6_addr_t *)((char *)rthdr + 8929 sizeof (*rthdr)); 8930 addrptr += (numaddr - (rthdr->ip6r0_segleft + 1)); 8931 if (addrptr != NULL) { 8932 ire = ire_ctable_lookup_v6(addrptr, NULL, 8933 IRE_LOCAL, NULL, ALL_ZONES, NULL, 8934 MATCH_IRE_TYPE, 8935 ipst); 8936 if (ire != NULL) { 8937 ire_refrele(ire); 8938 return (B_TRUE); 8939 } 8940 ip1dbg(("ip_source_routed_v6: No ire found\n")); 8941 } 8942 } 8943 /* FALLTHRU */ 8944 default: 8945 ip2dbg(("ip_source_routed_v6: Not source routed here\n")); 8946 return (B_FALSE); 8947 } 8948 } 8949 8950 /* 8951 * ip_wput_v6 -- Packets sent down from transport modules show up here. 8952 * Assumes that the following set of headers appear in the first 8953 * mblk: 8954 * ip6i_t (if present) CAN also appear as a separate mblk. 8955 * ip6_t 8956 * Any extension headers 8957 * TCP/UDP/SCTP header (if present) 8958 * The routine can handle an ICMPv6 header that is not in the first mblk. 8959 * 8960 * The order to determine the outgoing interface is as follows: 8961 * 1. If an ip6i_t with IP6I_IFINDEX set then use that ill. 8962 * 2. If q is an ill queue and (link local or multicast destination) then 8963 * use that ill. 8964 * 3. If IPV6_BOUND_IF has been set use that ill. 8965 * 4. For multicast: if IPV6_MULTICAST_IF has been set use it. Otherwise 8966 * look for the best IRE match for the unspecified group to determine 8967 * the ill. 8968 * 5. For unicast: Just do an IRE lookup for the best match. 8969 * 8970 * arg2 is always a queue_t *. 8971 * When that queue is an ill_t (i.e. q_next != NULL), then arg must be 8972 * the zoneid. 8973 * When that queue is not an ill_t, then arg must be a conn_t pointer. 8974 */ 8975 void 8976 ip_output_v6(void *arg, mblk_t *mp, void *arg2, int caller) 8977 { 8978 conn_t *connp = NULL; 8979 queue_t *q = (queue_t *)arg2; 8980 ire_t *ire = NULL; 8981 ire_t *sctp_ire = NULL; 8982 ip6_t *ip6h; 8983 in6_addr_t *v6dstp; 8984 ill_t *ill = NULL; 8985 ipif_t *ipif; 8986 ip6i_t *ip6i; 8987 int cksum_request; /* -1 => normal. */ 8988 /* 1 => Skip TCP/UDP/SCTP checksum */ 8989 /* Otherwise contains insert offset for checksum */ 8990 int unspec_src; 8991 boolean_t do_outrequests; /* Increment OutRequests? */ 8992 mib2_ipIfStatsEntry_t *mibptr; 8993 int match_flags = MATCH_IRE_ILL; 8994 mblk_t *first_mp; 8995 boolean_t mctl_present; 8996 ipsec_out_t *io; 8997 boolean_t multirt_need_resolve = B_FALSE; 8998 mblk_t *copy_mp = NULL; 8999 int err = 0; 9000 int ip6i_flags = 0; 9001 zoneid_t zoneid; 9002 ill_t *saved_ill = NULL; 9003 boolean_t conn_lock_held; 9004 boolean_t need_decref = B_FALSE; 9005 ip_stack_t *ipst; 9006 9007 if (q->q_next != NULL) { 9008 ill = (ill_t *)q->q_ptr; 9009 ipst = ill->ill_ipst; 9010 } else { 9011 connp = (conn_t *)arg; 9012 ASSERT(connp != NULL); 9013 ipst = connp->conn_netstack->netstack_ip; 9014 } 9015 9016 /* 9017 * Highest bit in version field is Reachability Confirmation bit 9018 * used by NUD in ip_xmit_v6(). 9019 */ 9020 #ifdef _BIG_ENDIAN 9021 #define IPVER(ip6h) ((((uint32_t *)ip6h)[0] >> 28) & 0x7) 9022 #else 9023 #define IPVER(ip6h) ((((uint32_t *)ip6h)[0] >> 4) & 0x7) 9024 #endif 9025 9026 /* 9027 * M_CTL comes from 5 places 9028 * 9029 * 1) TCP sends down IPSEC_OUT(M_CTL) for detached connections 9030 * both V4 and V6 datagrams. 9031 * 9032 * 2) AH/ESP sends down M_CTL after doing their job with both 9033 * V4 and V6 datagrams. 9034 * 9035 * 3) NDP callbacks when nce is resolved and IPSEC_OUT has been 9036 * attached. 9037 * 9038 * 4) Notifications from an external resolver (for XRESOLV ifs) 9039 * 9040 * 5) AH/ESP send down IPSEC_CTL(M_CTL) to be relayed to hardware for 9041 * IPsec hardware acceleration support. 9042 * 9043 * We need to handle (1)'s IPv6 case and (3) here. For the 9044 * IPv4 case in (1), and (2), IPSEC processing has already 9045 * started. The code in ip_wput() already knows how to handle 9046 * continuing IPSEC processing (for IPv4 and IPv6). All other 9047 * M_CTLs (including case (4)) are passed on to ip_wput_nondata() 9048 * for handling. 9049 */ 9050 first_mp = mp; 9051 mctl_present = B_FALSE; 9052 io = NULL; 9053 9054 /* Multidata transmit? */ 9055 if (DB_TYPE(mp) == M_MULTIDATA) { 9056 /* 9057 * We should never get here, since all Multidata messages 9058 * originating from tcp should have been directed over to 9059 * tcp_multisend() in the first place. 9060 */ 9061 BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsOutDiscards); 9062 freemsg(mp); 9063 return; 9064 } else if (DB_TYPE(mp) == M_CTL) { 9065 uint32_t mctltype = 0; 9066 uint32_t mlen = MBLKL(first_mp); 9067 9068 mp = mp->b_cont; 9069 mctl_present = B_TRUE; 9070 io = (ipsec_out_t *)first_mp->b_rptr; 9071 9072 /* 9073 * Validate this M_CTL message. The only three types of 9074 * M_CTL messages we expect to see in this code path are 9075 * ipsec_out_t or ipsec_in_t structures (allocated as 9076 * ipsec_info_t unions), or ipsec_ctl_t structures. 9077 * The ipsec_out_type and ipsec_in_type overlap in the two 9078 * data structures, and they are either set to IPSEC_OUT 9079 * or IPSEC_IN depending on which data structure it is. 9080 * ipsec_ctl_t is an IPSEC_CTL. 9081 * 9082 * All other M_CTL messages are sent to ip_wput_nondata() 9083 * for handling. 9084 */ 9085 if (mlen >= sizeof (io->ipsec_out_type)) 9086 mctltype = io->ipsec_out_type; 9087 9088 if ((mlen == sizeof (ipsec_ctl_t)) && 9089 (mctltype == IPSEC_CTL)) { 9090 ip_output(arg, first_mp, arg2, caller); 9091 return; 9092 } 9093 9094 if ((mlen < sizeof (ipsec_info_t)) || 9095 (mctltype != IPSEC_OUT && mctltype != IPSEC_IN) || 9096 mp == NULL) { 9097 ip_wput_nondata(NULL, q, first_mp, NULL); 9098 return; 9099 } 9100 /* NDP callbacks have q_next non-NULL. That's case #3. */ 9101 if (q->q_next == NULL) { 9102 ip6h = (ip6_t *)mp->b_rptr; 9103 /* 9104 * For a freshly-generated TCP dgram that needs IPV6 9105 * processing, don't call ip_wput immediately. We can 9106 * tell this by the ipsec_out_proc_begin. In-progress 9107 * IPSEC_OUT messages have proc_begin set to TRUE, 9108 * and we want to send all IPSEC_IN messages to 9109 * ip_wput() for IPsec processing or finishing. 9110 */ 9111 if (mctltype == IPSEC_IN || 9112 IPVER(ip6h) != IPV6_VERSION || 9113 io->ipsec_out_proc_begin) { 9114 mibptr = &ipst->ips_ip6_mib; 9115 goto notv6; 9116 } 9117 } 9118 } else if (DB_TYPE(mp) != M_DATA) { 9119 ip_wput_nondata(NULL, q, mp, NULL); 9120 return; 9121 } 9122 9123 ip6h = (ip6_t *)mp->b_rptr; 9124 9125 if (IPVER(ip6h) != IPV6_VERSION) { 9126 mibptr = &ipst->ips_ip6_mib; 9127 goto notv6; 9128 } 9129 9130 if (is_system_labeled() && DB_TYPE(mp) == M_DATA && 9131 (connp == NULL || !connp->conn_ulp_labeled)) { 9132 cred_t *cr; 9133 pid_t pid; 9134 9135 if (connp != NULL) { 9136 ASSERT(CONN_CRED(connp) != NULL); 9137 cr = BEST_CRED(mp, connp, &pid); 9138 err = tsol_check_label_v6(cr, &mp, 9139 connp->conn_mac_mode, ipst, pid); 9140 } else if ((cr = msg_getcred(mp, &pid)) != NULL) { 9141 err = tsol_check_label_v6(cr, &mp, CONN_MAC_DEFAULT, 9142 ipst, pid); 9143 } 9144 if (mctl_present) 9145 first_mp->b_cont = mp; 9146 else 9147 first_mp = mp; 9148 if (err != 0) { 9149 DTRACE_PROBE3( 9150 tsol_ip_log_drop_checklabel_ip6, char *, 9151 "conn(1), failed to check/update mp(2)", 9152 conn_t, connp, mblk_t, mp); 9153 freemsg(first_mp); 9154 return; 9155 } 9156 ip6h = (ip6_t *)mp->b_rptr; 9157 } 9158 if (q->q_next != NULL) { 9159 /* 9160 * We don't know if this ill will be used for IPv6 9161 * until the ILLF_IPV6 flag is set via SIOCSLIFNAME. 9162 * ipif_set_values() sets the ill_isv6 flag to true if 9163 * ILLF_IPV6 is set. If the ill_isv6 flag isn't true, 9164 * just drop the packet. 9165 */ 9166 if (!ill->ill_isv6) { 9167 ip1dbg(("ip_wput_v6: Received an IPv6 packet before " 9168 "ILLF_IPV6 was set\n")); 9169 freemsg(first_mp); 9170 return; 9171 } 9172 /* For uniformity do a refhold */ 9173 mutex_enter(&ill->ill_lock); 9174 if (!ILL_CAN_LOOKUP(ill)) { 9175 mutex_exit(&ill->ill_lock); 9176 freemsg(first_mp); 9177 return; 9178 } 9179 ill_refhold_locked(ill); 9180 mutex_exit(&ill->ill_lock); 9181 mibptr = ill->ill_ip_mib; 9182 9183 ASSERT(mibptr != NULL); 9184 unspec_src = 0; 9185 BUMP_MIB(mibptr, ipIfStatsHCOutRequests); 9186 do_outrequests = B_FALSE; 9187 zoneid = (zoneid_t)(uintptr_t)arg; 9188 } else { 9189 ASSERT(connp != NULL); 9190 zoneid = connp->conn_zoneid; 9191 9192 /* is queue flow controlled? */ 9193 if ((q->q_first || connp->conn_draining) && 9194 (caller == IP_WPUT)) { 9195 /* 9196 * 1) TCP sends down M_CTL for detached connections. 9197 * 2) AH/ESP sends down M_CTL. 9198 * 9199 * We don't flow control either of the above. Only 9200 * UDP and others are flow controlled for which we 9201 * can't have a M_CTL. 9202 */ 9203 ASSERT(first_mp == mp); 9204 (void) putq(q, mp); 9205 return; 9206 } 9207 mibptr = &ipst->ips_ip6_mib; 9208 unspec_src = connp->conn_unspec_src; 9209 do_outrequests = B_TRUE; 9210 if (mp->b_flag & MSGHASREF) { 9211 mp->b_flag &= ~MSGHASREF; 9212 ASSERT(connp->conn_ulp == IPPROTO_SCTP); 9213 SCTP_EXTRACT_IPINFO(mp, sctp_ire); 9214 need_decref = B_TRUE; 9215 } 9216 9217 /* 9218 * If there is a policy, try to attach an ipsec_out in 9219 * the front. At the end, first_mp either points to a 9220 * M_DATA message or IPSEC_OUT message linked to a 9221 * M_DATA message. We have to do it now as we might 9222 * lose the "conn" if we go through ip_newroute. 9223 */ 9224 if (!mctl_present && 9225 (connp->conn_out_enforce_policy || 9226 connp->conn_latch != NULL)) { 9227 ASSERT(first_mp == mp); 9228 /* XXX Any better way to get the protocol fast ? */ 9229 if (((mp = ipsec_attach_ipsec_out(&mp, connp, NULL, 9230 connp->conn_ulp, ipst->ips_netstack)) == NULL)) { 9231 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 9232 if (need_decref) 9233 CONN_DEC_REF(connp); 9234 return; 9235 } else { 9236 ASSERT(mp->b_datap->db_type == M_CTL); 9237 first_mp = mp; 9238 mp = mp->b_cont; 9239 mctl_present = B_TRUE; 9240 io = (ipsec_out_t *)first_mp->b_rptr; 9241 } 9242 } 9243 } 9244 9245 /* check for alignment and full IPv6 header */ 9246 if (!OK_32PTR((uchar_t *)ip6h) || 9247 (mp->b_wptr - (uchar_t *)ip6h) < IPV6_HDR_LEN) { 9248 ip0dbg(("ip_wput_v6: bad alignment or length\n")); 9249 if (do_outrequests) 9250 BUMP_MIB(mibptr, ipIfStatsHCOutRequests); 9251 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 9252 freemsg(first_mp); 9253 if (ill != NULL) 9254 ill_refrele(ill); 9255 if (need_decref) 9256 CONN_DEC_REF(connp); 9257 return; 9258 } 9259 v6dstp = &ip6h->ip6_dst; 9260 cksum_request = -1; 9261 ip6i = NULL; 9262 9263 /* 9264 * Once neighbor discovery has completed, ndp_process() will provide 9265 * locally generated packets for which processing can be reattempted. 9266 * In these cases, connp is NULL and the original zone is part of a 9267 * prepended ipsec_out_t. 9268 */ 9269 if (io != NULL) { 9270 /* 9271 * When coming from icmp_input_v6, the zoneid might not match 9272 * for the loopback case, because inside icmp_input_v6 the 9273 * queue_t is a conn queue from the sending side. 9274 */ 9275 zoneid = io->ipsec_out_zoneid; 9276 ASSERT(zoneid != ALL_ZONES); 9277 } 9278 9279 if (ip6h->ip6_nxt == IPPROTO_RAW) { 9280 /* 9281 * This is an ip6i_t header followed by an ip6_hdr. 9282 * Check which fields are set. 9283 * 9284 * When the packet comes from a transport we should have 9285 * all needed headers in the first mblk. However, when 9286 * going through ip_newroute*_v6 the ip6i might be in 9287 * a separate mblk when we return here. In that case 9288 * we pullup everything to ensure that extension and transport 9289 * headers "stay" in the first mblk. 9290 */ 9291 ip6i = (ip6i_t *)ip6h; 9292 ip6i_flags = ip6i->ip6i_flags; 9293 9294 ASSERT((mp->b_wptr - (uchar_t *)ip6i) == sizeof (ip6i_t) || 9295 ((mp->b_wptr - (uchar_t *)ip6i) >= 9296 sizeof (ip6i_t) + IPV6_HDR_LEN)); 9297 9298 if ((mp->b_wptr - (uchar_t *)ip6i) == sizeof (ip6i_t)) { 9299 if (!pullupmsg(mp, -1)) { 9300 ip1dbg(("ip_wput_v6: pullupmsg failed\n")); 9301 if (do_outrequests) { 9302 BUMP_MIB(mibptr, 9303 ipIfStatsHCOutRequests); 9304 } 9305 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 9306 freemsg(first_mp); 9307 if (ill != NULL) 9308 ill_refrele(ill); 9309 if (need_decref) 9310 CONN_DEC_REF(connp); 9311 return; 9312 } 9313 ip6h = (ip6_t *)mp->b_rptr; 9314 v6dstp = &ip6h->ip6_dst; 9315 ip6i = (ip6i_t *)ip6h; 9316 } 9317 ip6h = (ip6_t *)&ip6i[1]; 9318 9319 /* 9320 * Advance rptr past the ip6i_t to get ready for 9321 * transmitting the packet. However, if the packet gets 9322 * passed to ip_newroute*_v6 then rptr is moved back so 9323 * that the ip6i_t header can be inspected when the 9324 * packet comes back here after passing through 9325 * ire_add_then_send. 9326 */ 9327 mp->b_rptr = (uchar_t *)ip6h; 9328 9329 if (ip6i->ip6i_flags & IP6I_IFINDEX) { 9330 ASSERT(ip6i->ip6i_ifindex != 0); 9331 if (ill != NULL) 9332 ill_refrele(ill); 9333 ill = ill_lookup_on_ifindex(ip6i->ip6i_ifindex, 1, 9334 NULL, NULL, NULL, NULL, ipst); 9335 if (ill == NULL) { 9336 if (do_outrequests) { 9337 BUMP_MIB(mibptr, 9338 ipIfStatsHCOutRequests); 9339 } 9340 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 9341 ip1dbg(("ip_wput_v6: bad ifindex %d\n", 9342 ip6i->ip6i_ifindex)); 9343 if (need_decref) 9344 CONN_DEC_REF(connp); 9345 freemsg(first_mp); 9346 return; 9347 } 9348 mibptr = ill->ill_ip_mib; 9349 /* 9350 * Preserve the index so that when we return from 9351 * IPSEC processing, we know where to send the packet. 9352 */ 9353 if (mctl_present) { 9354 ASSERT(io != NULL); 9355 io->ipsec_out_ill_index = ip6i->ip6i_ifindex; 9356 } 9357 } 9358 if (ip6i->ip6i_flags & IP6I_VERIFY_SRC) { 9359 cred_t *cr = msg_getcred(mp, NULL); 9360 9361 /* rpcmod doesn't send down db_credp for UDP packets */ 9362 if (cr == NULL) { 9363 if (connp != NULL) 9364 cr = connp->conn_cred; 9365 else 9366 cr = ill->ill_credp; 9367 } 9368 9369 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)); 9370 if (secpolicy_net_rawaccess(cr) != 0) { 9371 /* 9372 * Use IPCL_ZONEID to honor SO_ALLZONES. 9373 */ 9374 ire = ire_route_lookup_v6(&ip6h->ip6_src, 9375 0, 0, (IRE_LOCAL|IRE_LOOPBACK), NULL, 9376 NULL, connp != NULL ? 9377 IPCL_ZONEID(connp) : zoneid, NULL, 9378 MATCH_IRE_TYPE | MATCH_IRE_ZONEONLY, ipst); 9379 if (ire == NULL) { 9380 if (do_outrequests) 9381 BUMP_MIB(mibptr, 9382 ipIfStatsHCOutRequests); 9383 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 9384 ip1dbg(("ip_wput_v6: bad source " 9385 "addr\n")); 9386 freemsg(first_mp); 9387 if (ill != NULL) 9388 ill_refrele(ill); 9389 if (need_decref) 9390 CONN_DEC_REF(connp); 9391 return; 9392 } 9393 ire_refrele(ire); 9394 } 9395 /* No need to verify again when using ip_newroute */ 9396 ip6i->ip6i_flags &= ~IP6I_VERIFY_SRC; 9397 } 9398 if (!(ip6i->ip6i_flags & IP6I_NEXTHOP)) { 9399 /* 9400 * Make sure they match since ip_newroute*_v6 etc might 9401 * (unknown to them) inspect ip6i_nexthop when 9402 * they think they access ip6_dst. 9403 */ 9404 ip6i->ip6i_nexthop = ip6h->ip6_dst; 9405 } 9406 if (ip6i->ip6i_flags & IP6I_NO_ULP_CKSUM) 9407 cksum_request = 1; 9408 if (ip6i->ip6i_flags & IP6I_RAW_CHECKSUM) 9409 cksum_request = ip6i->ip6i_checksum_off; 9410 if (ip6i->ip6i_flags & IP6I_UNSPEC_SRC) 9411 unspec_src = 1; 9412 9413 if (do_outrequests && ill != NULL) { 9414 BUMP_MIB(mibptr, ipIfStatsHCOutRequests); 9415 do_outrequests = B_FALSE; 9416 } 9417 /* 9418 * Store ip6i_t info that we need after we come back 9419 * from IPSEC processing. 9420 */ 9421 if (mctl_present) { 9422 ASSERT(io != NULL); 9423 io->ipsec_out_unspec_src = unspec_src; 9424 } 9425 } 9426 if (connp != NULL && connp->conn_dontroute) 9427 ip6h->ip6_hops = 1; 9428 9429 if (IN6_IS_ADDR_MULTICAST(v6dstp)) 9430 goto ipv6multicast; 9431 9432 /* 1. If an ip6i_t with IP6I_IFINDEX set then use that ill. */ 9433 if (ip6i != NULL && (ip6i->ip6i_flags & IP6I_IFINDEX)) { 9434 ASSERT(ill != NULL); 9435 goto send_from_ill; 9436 } 9437 9438 /* 9439 * 2. If q is an ill queue and there's a link-local destination 9440 * then use that ill. 9441 */ 9442 if (ill != NULL && IN6_IS_ADDR_LINKLOCAL(v6dstp)) 9443 goto send_from_ill; 9444 9445 /* 3. If IPV6_BOUND_IF has been set use that ill. */ 9446 if (connp != NULL && connp->conn_outgoing_ill != NULL) { 9447 ill_t *conn_outgoing_ill; 9448 9449 conn_outgoing_ill = conn_get_held_ill(connp, 9450 &connp->conn_outgoing_ill, &err); 9451 if (err == ILL_LOOKUP_FAILED) { 9452 if (ill != NULL) 9453 ill_refrele(ill); 9454 if (need_decref) 9455 CONN_DEC_REF(connp); 9456 freemsg(first_mp); 9457 return; 9458 } 9459 if (ill != NULL) 9460 ill_refrele(ill); 9461 ill = conn_outgoing_ill; 9462 mibptr = ill->ill_ip_mib; 9463 goto send_from_ill; 9464 } 9465 9466 /* 9467 * 4. For unicast: Just do an IRE lookup for the best match. 9468 * If we get here for a link-local address it is rather random 9469 * what interface we pick on a multihomed host. 9470 * *If* there is an IRE_CACHE (and the link-local address 9471 * isn't duplicated on multi links) this will find the IRE_CACHE. 9472 * Otherwise it will use one of the matching IRE_INTERFACE routes 9473 * for the link-local prefix. Hence, applications 9474 * *should* be encouraged to specify an outgoing interface when sending 9475 * to a link local address. 9476 */ 9477 if (connp == NULL || (IP_FLOW_CONTROLLED_ULP(connp->conn_ulp) && 9478 !connp->conn_fully_bound)) { 9479 /* 9480 * We cache IRE_CACHEs to avoid lookups. We don't do 9481 * this for the tcp global queue and listen end point 9482 * as it does not really have a real destination to 9483 * talk to. 9484 */ 9485 ire = ire_cache_lookup_v6(v6dstp, zoneid, msg_getlabel(mp), 9486 ipst); 9487 } else { 9488 /* 9489 * IRE_MARK_CONDEMNED is marked in ire_delete. We don't 9490 * grab a lock here to check for CONDEMNED as it is okay 9491 * to send a packet or two with the IRE_CACHE that is going 9492 * away. 9493 */ 9494 mutex_enter(&connp->conn_lock); 9495 ire = sctp_ire != NULL ? sctp_ire : connp->conn_ire_cache; 9496 if (ire != NULL && 9497 IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, v6dstp) && 9498 !(ire->ire_marks & IRE_MARK_CONDEMNED)) { 9499 9500 IRE_REFHOLD(ire); 9501 mutex_exit(&connp->conn_lock); 9502 9503 } else { 9504 boolean_t cached = B_FALSE; 9505 9506 connp->conn_ire_cache = NULL; 9507 mutex_exit(&connp->conn_lock); 9508 /* Release the old ire */ 9509 if (ire != NULL && sctp_ire == NULL) 9510 IRE_REFRELE_NOTR(ire); 9511 9512 ire = ire_cache_lookup_v6(v6dstp, zoneid, 9513 msg_getlabel(mp), ipst); 9514 if (ire != NULL) { 9515 IRE_REFHOLD_NOTR(ire); 9516 9517 mutex_enter(&connp->conn_lock); 9518 if (CONN_CACHE_IRE(connp) && 9519 (connp->conn_ire_cache == NULL)) { 9520 rw_enter(&ire->ire_bucket->irb_lock, 9521 RW_READER); 9522 if (!(ire->ire_marks & 9523 IRE_MARK_CONDEMNED)) { 9524 connp->conn_ire_cache = ire; 9525 cached = B_TRUE; 9526 } 9527 rw_exit(&ire->ire_bucket->irb_lock); 9528 } 9529 mutex_exit(&connp->conn_lock); 9530 9531 /* 9532 * We can continue to use the ire but since it 9533 * was not cached, we should drop the extra 9534 * reference. 9535 */ 9536 if (!cached) 9537 IRE_REFRELE_NOTR(ire); 9538 } 9539 } 9540 } 9541 9542 if (ire != NULL) { 9543 if (do_outrequests) { 9544 /* Handle IRE_LOCAL's that might appear here */ 9545 if (ire->ire_type == IRE_CACHE) { 9546 mibptr = ((ill_t *)ire->ire_stq->q_ptr)-> 9547 ill_ip_mib; 9548 } else { 9549 mibptr = ire->ire_ipif->ipif_ill->ill_ip_mib; 9550 } 9551 BUMP_MIB(mibptr, ipIfStatsHCOutRequests); 9552 } 9553 9554 /* 9555 * Check if the ire has the RTF_MULTIRT flag, inherited 9556 * from an IRE_OFFSUBNET ire entry in ip_newroute(). 9557 */ 9558 if (ire->ire_flags & RTF_MULTIRT) { 9559 /* 9560 * Force hop limit of multirouted packets if required. 9561 * The hop limit of such packets is bounded by the 9562 * ip_multirt_ttl ndd variable. 9563 * NDP packets must have a hop limit of 255; don't 9564 * change the hop limit in that case. 9565 */ 9566 if ((ipst->ips_ip_multirt_ttl > 0) && 9567 (ip6h->ip6_hops > ipst->ips_ip_multirt_ttl) && 9568 (ip6h->ip6_hops != IPV6_MAX_HOPS)) { 9569 if (ip_debug > 3) { 9570 ip2dbg(("ip_wput_v6: forcing multirt " 9571 "hop limit to %d (was %d) ", 9572 ipst->ips_ip_multirt_ttl, 9573 ip6h->ip6_hops)); 9574 pr_addr_dbg("v6dst %s\n", AF_INET6, 9575 &ire->ire_addr_v6); 9576 } 9577 ip6h->ip6_hops = ipst->ips_ip_multirt_ttl; 9578 } 9579 9580 /* 9581 * We look at this point if there are pending 9582 * unresolved routes. ire_multirt_need_resolve_v6() 9583 * checks in O(n) that all IRE_OFFSUBNET ire 9584 * entries for the packet's destination and 9585 * flagged RTF_MULTIRT are currently resolved. 9586 * If some remain unresolved, we do a copy 9587 * of the current message. It will be used 9588 * to initiate additional route resolutions. 9589 */ 9590 multirt_need_resolve = 9591 ire_multirt_need_resolve_v6(&ire->ire_addr_v6, 9592 msg_getlabel(first_mp), ipst); 9593 ip2dbg(("ip_wput_v6: ire %p, " 9594 "multirt_need_resolve %d, first_mp %p\n", 9595 (void *)ire, multirt_need_resolve, 9596 (void *)first_mp)); 9597 if (multirt_need_resolve) { 9598 copy_mp = copymsg(first_mp); 9599 if (copy_mp != NULL) { 9600 MULTIRT_DEBUG_TAG(copy_mp); 9601 } 9602 } 9603 } 9604 ip_wput_ire_v6(q, first_mp, ire, unspec_src, cksum_request, 9605 connp, caller, ip6i_flags, zoneid); 9606 if (need_decref) { 9607 CONN_DEC_REF(connp); 9608 connp = NULL; 9609 } 9610 IRE_REFRELE(ire); 9611 9612 /* 9613 * Try to resolve another multiroute if 9614 * ire_multirt_need_resolve_v6() deemed it necessary. 9615 * copy_mp will be consumed (sent or freed) by 9616 * ip_newroute_v6(). 9617 */ 9618 if (copy_mp != NULL) { 9619 if (mctl_present) { 9620 ip6h = (ip6_t *)copy_mp->b_cont->b_rptr; 9621 } else { 9622 ip6h = (ip6_t *)copy_mp->b_rptr; 9623 } 9624 ip_newroute_v6(q, copy_mp, &ip6h->ip6_dst, 9625 &ip6h->ip6_src, NULL, zoneid, ipst); 9626 } 9627 if (ill != NULL) 9628 ill_refrele(ill); 9629 return; 9630 } 9631 9632 /* 9633 * No full IRE for this destination. Send it to 9634 * ip_newroute_v6 to see if anything else matches. 9635 * Mark this packet as having originated on this 9636 * machine. 9637 * Update rptr if there was an ip6i_t header. 9638 */ 9639 mp->b_prev = NULL; 9640 mp->b_next = NULL; 9641 if (ip6i != NULL) 9642 mp->b_rptr -= sizeof (ip6i_t); 9643 9644 if (unspec_src) { 9645 if (ip6i == NULL) { 9646 /* 9647 * Add ip6i_t header to carry unspec_src 9648 * until the packet comes back in ip_wput_v6. 9649 */ 9650 mp = ip_add_info_v6(mp, NULL, v6dstp); 9651 if (mp == NULL) { 9652 if (do_outrequests) 9653 BUMP_MIB(mibptr, 9654 ipIfStatsHCOutRequests); 9655 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 9656 if (mctl_present) 9657 freeb(first_mp); 9658 if (ill != NULL) 9659 ill_refrele(ill); 9660 if (need_decref) 9661 CONN_DEC_REF(connp); 9662 return; 9663 } 9664 ip6i = (ip6i_t *)mp->b_rptr; 9665 9666 if (mctl_present) { 9667 ASSERT(first_mp != mp); 9668 first_mp->b_cont = mp; 9669 } else { 9670 first_mp = mp; 9671 } 9672 9673 if ((mp->b_wptr - (uchar_t *)ip6i) == 9674 sizeof (ip6i_t)) { 9675 /* 9676 * ndp_resolver called from ip_newroute_v6 9677 * expects pulled up message. 9678 */ 9679 if (!pullupmsg(mp, -1)) { 9680 ip1dbg(("ip_wput_v6: pullupmsg" 9681 " failed\n")); 9682 if (do_outrequests) { 9683 BUMP_MIB(mibptr, 9684 ipIfStatsHCOutRequests); 9685 } 9686 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 9687 freemsg(first_mp); 9688 if (ill != NULL) 9689 ill_refrele(ill); 9690 if (need_decref) 9691 CONN_DEC_REF(connp); 9692 return; 9693 } 9694 ip6i = (ip6i_t *)mp->b_rptr; 9695 } 9696 ip6h = (ip6_t *)&ip6i[1]; 9697 v6dstp = &ip6h->ip6_dst; 9698 } 9699 ip6i->ip6i_flags |= IP6I_UNSPEC_SRC; 9700 if (mctl_present) { 9701 ASSERT(io != NULL); 9702 io->ipsec_out_unspec_src = unspec_src; 9703 } 9704 } 9705 if (do_outrequests) 9706 BUMP_MIB(mibptr, ipIfStatsHCOutRequests); 9707 if (need_decref) 9708 CONN_DEC_REF(connp); 9709 ip_newroute_v6(q, first_mp, v6dstp, &ip6h->ip6_src, NULL, zoneid, ipst); 9710 if (ill != NULL) 9711 ill_refrele(ill); 9712 return; 9713 9714 9715 /* 9716 * Handle multicast packets with or without an conn. 9717 * Assumes that the transports set ip6_hops taking 9718 * IPV6_MULTICAST_HOPS (and the other ways to set the hoplimit) 9719 * into account. 9720 */ 9721 ipv6multicast: 9722 ip2dbg(("ip_wput_v6: multicast\n")); 9723 9724 /* 9725 * Hold the conn_lock till we refhold the ill of interest that is 9726 * pointed to from the conn. Since we cannot do an ill/ipif_refrele 9727 * while holding any locks, postpone the refrele until after the 9728 * conn_lock is dropped. 9729 */ 9730 if (connp != NULL) { 9731 mutex_enter(&connp->conn_lock); 9732 conn_lock_held = B_TRUE; 9733 } else { 9734 conn_lock_held = B_FALSE; 9735 } 9736 if (ip6i != NULL && (ip6i->ip6i_flags & IP6I_IFINDEX)) { 9737 /* 1. If an ip6i_t with IP6I_IFINDEX set then use that ill. */ 9738 ASSERT(ill != NULL); 9739 } else if (ill != NULL) { 9740 /* 9741 * 2. If q is an ill queue and (link local or multicast 9742 * destination) then use that ill. 9743 * We don't need the ipif initialization here. 9744 * This useless assert below is just to prevent lint from 9745 * reporting a null body if statement. 9746 */ 9747 ASSERT(ill != NULL); 9748 } else if (connp != NULL) { 9749 /* 9750 * 3. If IPV6_BOUND_IF has been set use that ill. 9751 * 9752 * 4. For multicast: if IPV6_MULTICAST_IF has been set use it. 9753 * Otherwise look for the best IRE match for the unspecified 9754 * group to determine the ill. 9755 * 9756 * conn_multicast_ill is used for only IPv6 packets. 9757 * conn_multicast_ipif is used for only IPv4 packets. 9758 * Thus a PF_INET6 socket send both IPv4 and IPv6 9759 * multicast packets using different IP*_MULTICAST_IF 9760 * interfaces. 9761 */ 9762 if (connp->conn_outgoing_ill != NULL) { 9763 err = ill_check_and_refhold(connp->conn_outgoing_ill); 9764 if (err == ILL_LOOKUP_FAILED) { 9765 ip1dbg(("ip_output_v6: multicast" 9766 " conn_outgoing_ill no ipif\n")); 9767 multicast_discard: 9768 ASSERT(saved_ill == NULL); 9769 if (conn_lock_held) 9770 mutex_exit(&connp->conn_lock); 9771 if (ill != NULL) 9772 ill_refrele(ill); 9773 freemsg(first_mp); 9774 if (do_outrequests) 9775 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 9776 if (need_decref) 9777 CONN_DEC_REF(connp); 9778 return; 9779 } 9780 ill = connp->conn_outgoing_ill; 9781 } else if (connp->conn_multicast_ill != NULL) { 9782 err = ill_check_and_refhold(connp->conn_multicast_ill); 9783 if (err == ILL_LOOKUP_FAILED) { 9784 ip1dbg(("ip_output_v6: multicast" 9785 " conn_multicast_ill no ipif\n")); 9786 goto multicast_discard; 9787 } 9788 ill = connp->conn_multicast_ill; 9789 } else { 9790 mutex_exit(&connp->conn_lock); 9791 conn_lock_held = B_FALSE; 9792 ipif = ipif_lookup_group_v6(v6dstp, zoneid, ipst); 9793 if (ipif == NULL) { 9794 ip1dbg(("ip_output_v6: multicast no ipif\n")); 9795 goto multicast_discard; 9796 } 9797 /* 9798 * We have a ref to this ipif, so we can safely 9799 * access ipif_ill. 9800 */ 9801 ill = ipif->ipif_ill; 9802 mutex_enter(&ill->ill_lock); 9803 if (!ILL_CAN_LOOKUP(ill)) { 9804 mutex_exit(&ill->ill_lock); 9805 ipif_refrele(ipif); 9806 ill = NULL; 9807 ip1dbg(("ip_output_v6: multicast no ipif\n")); 9808 goto multicast_discard; 9809 } 9810 ill_refhold_locked(ill); 9811 mutex_exit(&ill->ill_lock); 9812 ipif_refrele(ipif); 9813 /* 9814 * Save binding until IPV6_MULTICAST_IF 9815 * changes it 9816 */ 9817 mutex_enter(&connp->conn_lock); 9818 connp->conn_multicast_ill = ill; 9819 mutex_exit(&connp->conn_lock); 9820 } 9821 } 9822 if (conn_lock_held) 9823 mutex_exit(&connp->conn_lock); 9824 9825 if (saved_ill != NULL) 9826 ill_refrele(saved_ill); 9827 9828 ASSERT(ill != NULL); 9829 /* 9830 * For multicast loopback interfaces replace the multicast address 9831 * with a unicast address for the ire lookup. 9832 */ 9833 if (IS_LOOPBACK(ill)) 9834 v6dstp = &ill->ill_ipif->ipif_v6lcl_addr; 9835 9836 mibptr = ill->ill_ip_mib; 9837 if (do_outrequests) { 9838 BUMP_MIB(mibptr, ipIfStatsHCOutRequests); 9839 do_outrequests = B_FALSE; 9840 } 9841 BUMP_MIB(mibptr, ipIfStatsHCOutMcastPkts); 9842 UPDATE_MIB(mibptr, ipIfStatsHCOutMcastOctets, 9843 ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN); 9844 9845 /* 9846 * As we may lose the conn by the time we reach ip_wput_ire_v6 9847 * we copy conn_multicast_loop and conn_dontroute on to an 9848 * ipsec_out. In case if this datagram goes out secure, 9849 * we need the ill_index also. Copy that also into the 9850 * ipsec_out. 9851 */ 9852 if (mctl_present) { 9853 io = (ipsec_out_t *)first_mp->b_rptr; 9854 ASSERT(first_mp->b_datap->db_type == M_CTL); 9855 ASSERT(io->ipsec_out_type == IPSEC_OUT); 9856 } else { 9857 ASSERT(mp == first_mp); 9858 if ((first_mp = ipsec_alloc_ipsec_out(ipst->ips_netstack)) == 9859 NULL) { 9860 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 9861 freemsg(mp); 9862 if (ill != NULL) 9863 ill_refrele(ill); 9864 if (need_decref) 9865 CONN_DEC_REF(connp); 9866 return; 9867 } 9868 io = (ipsec_out_t *)first_mp->b_rptr; 9869 /* This is not a secure packet */ 9870 io->ipsec_out_secure = B_FALSE; 9871 io->ipsec_out_use_global_policy = B_TRUE; 9872 io->ipsec_out_zoneid = 9873 (zoneid != ALL_ZONES ? zoneid : GLOBAL_ZONEID); 9874 first_mp->b_cont = mp; 9875 mctl_present = B_TRUE; 9876 } 9877 io->ipsec_out_ill_index = ill->ill_phyint->phyint_ifindex; 9878 io->ipsec_out_unspec_src = unspec_src; 9879 if (connp != NULL) 9880 io->ipsec_out_dontroute = connp->conn_dontroute; 9881 9882 send_from_ill: 9883 ASSERT(ill != NULL); 9884 ASSERT(mibptr == ill->ill_ip_mib); 9885 9886 if (do_outrequests) { 9887 BUMP_MIB(mibptr, ipIfStatsHCOutRequests); 9888 do_outrequests = B_FALSE; 9889 } 9890 9891 /* 9892 * Because nce_xmit() calls ip_output_v6() and NCEs are always tied to 9893 * an underlying interface, IS_UNDER_IPMP() may be true even when 9894 * building IREs that will be used for data traffic. As such, use the 9895 * packet's source address to determine whether the traffic is test 9896 * traffic, and set MATCH_IRE_MARK_TESTHIDDEN if so. 9897 * 9898 * Separately, we also need to mark probe packets so that ND can 9899 * process them specially; see the comments in nce_queue_mp_common(). 9900 */ 9901 if (IS_UNDER_IPMP(ill) && !IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src) && 9902 ipif_lookup_testaddr_v6(ill, &ip6h->ip6_src, NULL)) { 9903 if (ip6i == NULL) { 9904 if ((mp = ip_add_info_v6(mp, NULL, v6dstp)) == NULL) { 9905 if (mctl_present) 9906 freeb(first_mp); 9907 goto discard; 9908 } 9909 9910 if (mctl_present) 9911 first_mp->b_cont = mp; 9912 else 9913 first_mp = mp; 9914 9915 /* ndp_resolver() expects a pulled-up message */ 9916 if (MBLKL(mp) == sizeof (ip6i_t) && 9917 pullupmsg(mp, -1) == 0) { 9918 ip1dbg(("ip_output_v6: pullupmsg failed\n")); 9919 discard: BUMP_MIB(mibptr, ipIfStatsOutDiscards); 9920 ill_refrele(ill); 9921 if (need_decref) 9922 CONN_DEC_REF(connp); 9923 return; 9924 } 9925 ip6i = (ip6i_t *)mp->b_rptr; 9926 ip6h = (ip6_t *)&ip6i[1]; 9927 v6dstp = &ip6h->ip6_dst; 9928 mp->b_rptr = (uchar_t *)ip6h; /* rewound below */ 9929 } 9930 ip6i->ip6i_flags |= IP6I_IPMP_PROBE; 9931 match_flags |= MATCH_IRE_MARK_TESTHIDDEN; 9932 } 9933 9934 if (io != NULL) 9935 io->ipsec_out_ill_index = ill->ill_phyint->phyint_ifindex; 9936 9937 /* 9938 * When a specific ill is specified (using IPV6_PKTINFO, 9939 * IPV6_MULTICAST_IF, or IPV6_BOUND_IF) we will only match 9940 * on routing entries (ftable and ctable) that have a matching 9941 * ire->ire_ipif->ipif_ill. Thus this can only be used 9942 * for destinations that are on-link for the specific ill 9943 * and that can appear on multiple links. Thus it is useful 9944 * for multicast destinations, link-local destinations, and 9945 * at some point perhaps for site-local destinations (if the 9946 * node sits at a site boundary). 9947 * We create the cache entries in the regular ctable since 9948 * it can not "confuse" things for other destinations. 9949 * table. 9950 * 9951 * NOTE : conn_ire_cache is not used for caching ire_ctable_lookups. 9952 * It is used only when ire_cache_lookup is used above. 9953 */ 9954 ire = ire_ctable_lookup_v6(v6dstp, 0, 0, ill->ill_ipif, 9955 zoneid, msg_getlabel(mp), match_flags, ipst); 9956 if (ire != NULL) { 9957 /* 9958 * Check if the ire has the RTF_MULTIRT flag, inherited 9959 * from an IRE_OFFSUBNET ire entry in ip_newroute(). 9960 */ 9961 if (ire->ire_flags & RTF_MULTIRT) { 9962 /* 9963 * Force hop limit of multirouted packets if required. 9964 * The hop limit of such packets is bounded by the 9965 * ip_multirt_ttl ndd variable. 9966 * NDP packets must have a hop limit of 255; don't 9967 * change the hop limit in that case. 9968 */ 9969 if ((ipst->ips_ip_multirt_ttl > 0) && 9970 (ip6h->ip6_hops > ipst->ips_ip_multirt_ttl) && 9971 (ip6h->ip6_hops != IPV6_MAX_HOPS)) { 9972 if (ip_debug > 3) { 9973 ip2dbg(("ip_wput_v6: forcing multirt " 9974 "hop limit to %d (was %d) ", 9975 ipst->ips_ip_multirt_ttl, 9976 ip6h->ip6_hops)); 9977 pr_addr_dbg("v6dst %s\n", AF_INET6, 9978 &ire->ire_addr_v6); 9979 } 9980 ip6h->ip6_hops = ipst->ips_ip_multirt_ttl; 9981 } 9982 9983 /* 9984 * We look at this point if there are pending 9985 * unresolved routes. ire_multirt_need_resolve_v6() 9986 * checks in O(n) that all IRE_OFFSUBNET ire 9987 * entries for the packet's destination and 9988 * flagged RTF_MULTIRT are currently resolved. 9989 * If some remain unresolved, we make a copy 9990 * of the current message. It will be used 9991 * to initiate additional route resolutions. 9992 */ 9993 multirt_need_resolve = 9994 ire_multirt_need_resolve_v6(&ire->ire_addr_v6, 9995 msg_getlabel(first_mp), ipst); 9996 ip2dbg(("ip_wput_v6[send_from_ill]: ire %p, " 9997 "multirt_need_resolve %d, first_mp %p\n", 9998 (void *)ire, multirt_need_resolve, 9999 (void *)first_mp)); 10000 if (multirt_need_resolve) { 10001 copy_mp = copymsg(first_mp); 10002 if (copy_mp != NULL) { 10003 MULTIRT_DEBUG_TAG(copy_mp); 10004 } 10005 } 10006 } 10007 10008 ip1dbg(("ip_wput_v6: send on %s, ire = %p, ill index = %d\n", 10009 ill->ill_name, (void *)ire, 10010 ill->ill_phyint->phyint_ifindex)); 10011 ip_wput_ire_v6(q, first_mp, ire, unspec_src, cksum_request, 10012 connp, caller, ip6i_flags, zoneid); 10013 ire_refrele(ire); 10014 if (need_decref) { 10015 CONN_DEC_REF(connp); 10016 connp = NULL; 10017 } 10018 10019 /* 10020 * Try to resolve another multiroute if 10021 * ire_multirt_need_resolve_v6() deemed it necessary. 10022 * copy_mp will be consumed (sent or freed) by 10023 * ip_newroute_[ipif_]v6(). 10024 */ 10025 if (copy_mp != NULL) { 10026 if (mctl_present) { 10027 ip6h = (ip6_t *)copy_mp->b_cont->b_rptr; 10028 } else { 10029 ip6h = (ip6_t *)copy_mp->b_rptr; 10030 } 10031 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 10032 ipif = ipif_lookup_group_v6(&ip6h->ip6_dst, 10033 zoneid, ipst); 10034 if (ipif == NULL) { 10035 ip1dbg(("ip_wput_v6: No ipif for " 10036 "multicast\n")); 10037 MULTIRT_DEBUG_UNTAG(copy_mp); 10038 freemsg(copy_mp); 10039 return; 10040 } 10041 ip_newroute_ipif_v6(q, copy_mp, ipif, 10042 &ip6h->ip6_dst, &ip6h->ip6_src, unspec_src, 10043 zoneid); 10044 ipif_refrele(ipif); 10045 } else { 10046 ip_newroute_v6(q, copy_mp, &ip6h->ip6_dst, 10047 &ip6h->ip6_src, ill, zoneid, ipst); 10048 } 10049 } 10050 ill_refrele(ill); 10051 return; 10052 } 10053 if (need_decref) { 10054 CONN_DEC_REF(connp); 10055 connp = NULL; 10056 } 10057 10058 /* Update rptr if there was an ip6i_t header. */ 10059 if (ip6i != NULL) 10060 mp->b_rptr -= sizeof (ip6i_t); 10061 if (unspec_src) { 10062 if (ip6i == NULL) { 10063 /* 10064 * Add ip6i_t header to carry unspec_src 10065 * until the packet comes back in ip_wput_v6. 10066 */ 10067 if (mctl_present) { 10068 first_mp->b_cont = 10069 ip_add_info_v6(mp, NULL, v6dstp); 10070 mp = first_mp->b_cont; 10071 if (mp == NULL) 10072 freeb(first_mp); 10073 } else { 10074 first_mp = mp = ip_add_info_v6(mp, NULL, 10075 v6dstp); 10076 } 10077 if (mp == NULL) { 10078 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10079 ill_refrele(ill); 10080 return; 10081 } 10082 ip6i = (ip6i_t *)mp->b_rptr; 10083 if ((mp->b_wptr - (uchar_t *)ip6i) == 10084 sizeof (ip6i_t)) { 10085 /* 10086 * ndp_resolver called from ip_newroute_v6 10087 * expects a pulled up message. 10088 */ 10089 if (!pullupmsg(mp, -1)) { 10090 ip1dbg(("ip_wput_v6: pullupmsg" 10091 " failed\n")); 10092 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10093 freemsg(first_mp); 10094 return; 10095 } 10096 ip6i = (ip6i_t *)mp->b_rptr; 10097 } 10098 ip6h = (ip6_t *)&ip6i[1]; 10099 v6dstp = &ip6h->ip6_dst; 10100 } 10101 ip6i->ip6i_flags |= IP6I_UNSPEC_SRC; 10102 if (mctl_present) { 10103 ASSERT(io != NULL); 10104 io->ipsec_out_unspec_src = unspec_src; 10105 } 10106 } 10107 if (IN6_IS_ADDR_MULTICAST(v6dstp)) { 10108 ip_newroute_ipif_v6(q, first_mp, ill->ill_ipif, v6dstp, 10109 &ip6h->ip6_src, unspec_src, zoneid); 10110 } else { 10111 ip_newroute_v6(q, first_mp, v6dstp, &ip6h->ip6_src, ill, 10112 zoneid, ipst); 10113 } 10114 ill_refrele(ill); 10115 return; 10116 10117 notv6: 10118 /* FIXME?: assume the caller calls the right version of ip_output? */ 10119 if (q->q_next == NULL) { 10120 connp = Q_TO_CONN(q); 10121 10122 /* 10123 * We can change conn_send for all types of conn, even 10124 * though only TCP uses it right now. 10125 * FIXME: sctp could use conn_send but doesn't currently. 10126 */ 10127 ip_setpktversion(connp, B_FALSE, B_TRUE, ipst); 10128 } 10129 BUMP_MIB(mibptr, ipIfStatsOutWrongIPVersion); 10130 (void) ip_output(arg, first_mp, arg2, caller); 10131 if (ill != NULL) 10132 ill_refrele(ill); 10133 } 10134 10135 /* 10136 * If this is a conn_t queue, then we pass in the conn. This includes the 10137 * zoneid. 10138 * Otherwise, this is a message for an ill_t queue, 10139 * in which case we use the global zoneid since those are all part of 10140 * the global zone. 10141 */ 10142 void 10143 ip_wput_v6(queue_t *q, mblk_t *mp) 10144 { 10145 if (CONN_Q(q)) 10146 ip_output_v6(Q_TO_CONN(q), mp, q, IP_WPUT); 10147 else 10148 ip_output_v6(GLOBAL_ZONEID, mp, q, IP_WPUT); 10149 } 10150 10151 /* 10152 * NULL send-to queue - packet is to be delivered locally. 10153 */ 10154 void 10155 ip_wput_local_v6(queue_t *q, ill_t *ill, ip6_t *ip6h, mblk_t *first_mp, 10156 ire_t *ire, int fanout_flags, zoneid_t zoneid) 10157 { 10158 uint32_t ports; 10159 mblk_t *mp = first_mp, *first_mp1; 10160 boolean_t mctl_present; 10161 uint8_t nexthdr; 10162 uint16_t hdr_length; 10163 ipsec_out_t *io; 10164 mib2_ipIfStatsEntry_t *mibptr; 10165 ilm_t *ilm; 10166 uint_t nexthdr_offset; 10167 ip_stack_t *ipst = ill->ill_ipst; 10168 10169 if (DB_TYPE(mp) == M_CTL) { 10170 io = (ipsec_out_t *)mp->b_rptr; 10171 if (!io->ipsec_out_secure) { 10172 mp = mp->b_cont; 10173 freeb(first_mp); 10174 first_mp = mp; 10175 mctl_present = B_FALSE; 10176 } else { 10177 mctl_present = B_TRUE; 10178 mp = first_mp->b_cont; 10179 ipsec_out_to_in(first_mp); 10180 } 10181 } else { 10182 mctl_present = B_FALSE; 10183 } 10184 10185 /* 10186 * Remove reachability confirmation bit from version field 10187 * before passing the packet on to any firewall hooks or 10188 * looping back the packet. 10189 */ 10190 if (ip6h->ip6_vcf & IP_FORWARD_PROG) 10191 ip6h->ip6_vcf &= ~IP_FORWARD_PROG; 10192 10193 DTRACE_PROBE4(ip6__loopback__in__start, 10194 ill_t *, ill, ill_t *, NULL, 10195 ip6_t *, ip6h, mblk_t *, first_mp); 10196 10197 FW_HOOKS6(ipst->ips_ip6_loopback_in_event, 10198 ipst->ips_ipv6firewall_loopback_in, 10199 ill, NULL, ip6h, first_mp, mp, 0, ipst); 10200 10201 DTRACE_PROBE1(ip6__loopback__in__end, mblk_t *, first_mp); 10202 10203 if (first_mp == NULL) 10204 return; 10205 10206 if (ipst->ips_ip6_observe.he_interested) { 10207 zoneid_t szone, dzone, lookup_zoneid = ALL_ZONES; 10208 zoneid_t stackzoneid = netstackid_to_zoneid( 10209 ipst->ips_netstack->netstack_stackid); 10210 10211 szone = (stackzoneid == GLOBAL_ZONEID) ? zoneid : stackzoneid; 10212 /* 10213 * ::1 is special, as we cannot lookup its zoneid by 10214 * address. For this case, restrict the lookup to the 10215 * source zone. 10216 */ 10217 if (IN6_IS_ADDR_LOOPBACK(&ip6h->ip6_dst)) 10218 lookup_zoneid = zoneid; 10219 dzone = ip_get_zoneid_v6(&ip6h->ip6_dst, mp, ill, ipst, 10220 lookup_zoneid); 10221 ipobs_hook(mp, IPOBS_HOOK_LOCAL, szone, dzone, ill, ipst); 10222 } 10223 10224 DTRACE_IP7(receive, mblk_t *, first_mp, conn_t *, NULL, void_ip_t *, 10225 ip6h, __dtrace_ipsr_ill_t *, ill, ipha_t *, NULL, ip6_t *, ip6h, 10226 int, 1); 10227 10228 nexthdr = ip6h->ip6_nxt; 10229 mibptr = ill->ill_ip_mib; 10230 10231 /* Fastpath */ 10232 switch (nexthdr) { 10233 case IPPROTO_TCP: 10234 case IPPROTO_UDP: 10235 case IPPROTO_ICMPV6: 10236 case IPPROTO_SCTP: 10237 hdr_length = IPV6_HDR_LEN; 10238 nexthdr_offset = (uint_t)((uchar_t *)&ip6h->ip6_nxt - 10239 (uchar_t *)ip6h); 10240 break; 10241 default: { 10242 uint8_t *nexthdrp; 10243 10244 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, 10245 &hdr_length, &nexthdrp)) { 10246 /* Malformed packet */ 10247 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10248 freemsg(first_mp); 10249 return; 10250 } 10251 nexthdr = *nexthdrp; 10252 nexthdr_offset = nexthdrp - (uint8_t *)ip6h; 10253 break; 10254 } 10255 } 10256 10257 UPDATE_OB_PKT_COUNT(ire); 10258 ire->ire_last_used_time = lbolt; 10259 10260 switch (nexthdr) { 10261 case IPPROTO_TCP: 10262 if (DB_TYPE(mp) == M_DATA) { 10263 /* 10264 * M_DATA mblk, so init mblk (chain) for 10265 * no struio(). 10266 */ 10267 mblk_t *mp1 = mp; 10268 10269 do { 10270 mp1->b_datap->db_struioflag = 0; 10271 } while ((mp1 = mp1->b_cont) != NULL); 10272 } 10273 ports = *(uint32_t *)(mp->b_rptr + hdr_length + 10274 TCP_PORTS_OFFSET); 10275 ip_fanout_tcp_v6(q, first_mp, ip6h, ill, ill, 10276 fanout_flags|IP_FF_SEND_ICMP|IP_FF_SYN_ADDIRE| 10277 IP_FF_IPINFO|IP6_NO_IPPOLICY|IP_FF_LOOPBACK, 10278 hdr_length, mctl_present, ire->ire_zoneid); 10279 return; 10280 10281 case IPPROTO_UDP: 10282 ports = *(uint32_t *)(mp->b_rptr + hdr_length + 10283 UDP_PORTS_OFFSET); 10284 ip_fanout_udp_v6(q, first_mp, ip6h, ports, ill, ill, 10285 fanout_flags|IP_FF_SEND_ICMP|IP_FF_IPINFO| 10286 IP6_NO_IPPOLICY, mctl_present, ire->ire_zoneid); 10287 return; 10288 10289 case IPPROTO_SCTP: 10290 { 10291 ports = *(uint32_t *)(mp->b_rptr + hdr_length); 10292 ip_fanout_sctp(first_mp, ill, (ipha_t *)ip6h, ports, 10293 fanout_flags|IP_FF_SEND_ICMP|IP_FF_IPINFO, 10294 mctl_present, IP6_NO_IPPOLICY, ire->ire_zoneid); 10295 return; 10296 } 10297 case IPPROTO_ICMPV6: { 10298 icmp6_t *icmp6; 10299 10300 /* check for full IPv6+ICMPv6 header */ 10301 if ((mp->b_wptr - mp->b_rptr) < 10302 (hdr_length + ICMP6_MINLEN)) { 10303 if (!pullupmsg(mp, hdr_length + ICMP6_MINLEN)) { 10304 ip1dbg(("ip_wput_v6: ICMP hdr pullupmsg" 10305 " failed\n")); 10306 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10307 freemsg(first_mp); 10308 return; 10309 } 10310 ip6h = (ip6_t *)mp->b_rptr; 10311 } 10312 icmp6 = (icmp6_t *)((uchar_t *)ip6h + hdr_length); 10313 10314 /* Update output mib stats */ 10315 icmp_update_out_mib_v6(ill, icmp6); 10316 10317 /* Check variable for testing applications */ 10318 if (ipst->ips_ipv6_drop_inbound_icmpv6) { 10319 freemsg(first_mp); 10320 return; 10321 } 10322 /* 10323 * Assume that there is always at least one conn for 10324 * ICMPv6 (in.ndpd) i.e. don't optimize the case 10325 * where there is no conn. 10326 */ 10327 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst) && 10328 !IS_LOOPBACK(ill)) { 10329 ilm_walker_t ilw; 10330 10331 /* 10332 * In the multicast case, applications may have 10333 * joined the group from different zones, so we 10334 * need to deliver the packet to each of them. 10335 * Loop through the multicast memberships 10336 * structures (ilm) on the receive ill and send 10337 * a copy of the packet up each matching one. 10338 * However, we don't do this for multicasts sent 10339 * on the loopback interface (PHYI_LOOPBACK flag 10340 * set) as they must stay in the sender's zone. 10341 */ 10342 ilm = ilm_walker_start(&ilw, ill); 10343 for (; ilm != NULL; 10344 ilm = ilm_walker_step(&ilw, ilm)) { 10345 if (!IN6_ARE_ADDR_EQUAL( 10346 &ilm->ilm_v6addr, &ip6h->ip6_dst)) 10347 continue; 10348 if ((fanout_flags & 10349 IP_FF_NO_MCAST_LOOP) && 10350 ilm->ilm_zoneid == ire->ire_zoneid) 10351 continue; 10352 if (!ipif_lookup_zoneid( 10353 ilw.ilw_walk_ill, ilm->ilm_zoneid, 10354 IPIF_UP, NULL)) 10355 continue; 10356 10357 first_mp1 = ip_copymsg(first_mp); 10358 if (first_mp1 == NULL) 10359 continue; 10360 icmp_inbound_v6(q, first_mp1, 10361 ilw.ilw_walk_ill, ill, hdr_length, 10362 mctl_present, IP6_NO_IPPOLICY, 10363 ilm->ilm_zoneid, NULL); 10364 } 10365 ilm_walker_finish(&ilw); 10366 } else { 10367 first_mp1 = ip_copymsg(first_mp); 10368 if (first_mp1 != NULL) 10369 icmp_inbound_v6(q, first_mp1, ill, ill, 10370 hdr_length, mctl_present, 10371 IP6_NO_IPPOLICY, ire->ire_zoneid, 10372 NULL); 10373 } 10374 } 10375 /* FALLTHRU */ 10376 default: { 10377 /* 10378 * Handle protocols with which IPv6 is less intimate. 10379 */ 10380 fanout_flags |= IP_FF_RAWIP|IP_FF_IPINFO; 10381 10382 /* 10383 * Enable sending ICMP for "Unknown" nexthdr 10384 * case. i.e. where we did not FALLTHRU from 10385 * IPPROTO_ICMPV6 processing case above. 10386 */ 10387 if (nexthdr != IPPROTO_ICMPV6) 10388 fanout_flags |= IP_FF_SEND_ICMP; 10389 /* 10390 * Note: There can be more than one stream bound 10391 * to a particular protocol. When this is the case, 10392 * each one gets a copy of any incoming packets. 10393 */ 10394 ip_fanout_proto_v6(q, first_mp, ip6h, ill, ill, nexthdr, 10395 nexthdr_offset, fanout_flags|IP6_NO_IPPOLICY, 10396 mctl_present, ire->ire_zoneid); 10397 return; 10398 } 10399 } 10400 } 10401 10402 /* 10403 * Send packet using IRE. 10404 * Checksumming is controlled by cksum_request: 10405 * -1 => normal i.e. TCP/UDP/SCTP/ICMPv6 are checksummed and nothing else. 10406 * 1 => Skip TCP/UDP/SCTP checksum 10407 * Otherwise => checksum_request contains insert offset for checksum 10408 * 10409 * Assumes that the following set of headers appear in the first 10410 * mblk: 10411 * ip6_t 10412 * Any extension headers 10413 * TCP/UDP/SCTP header (if present) 10414 * The routine can handle an ICMPv6 header that is not in the first mblk. 10415 * 10416 * NOTE : This function does not ire_refrele the ire passed in as the 10417 * argument unlike ip_wput_ire where the REFRELE is done. 10418 * Refer to ip_wput_ire for more on this. 10419 */ 10420 static void 10421 ip_wput_ire_v6(queue_t *q, mblk_t *mp, ire_t *ire, int unspec_src, 10422 int cksum_request, conn_t *connp, int caller, int flags, zoneid_t zoneid) 10423 { 10424 ip6_t *ip6h; 10425 uint8_t nexthdr; 10426 uint16_t hdr_length; 10427 uint_t reachable = 0x0; 10428 ill_t *ill; 10429 mib2_ipIfStatsEntry_t *mibptr; 10430 mblk_t *first_mp; 10431 boolean_t mctl_present; 10432 ipsec_out_t *io; 10433 boolean_t conn_dontroute; /* conn value for multicast */ 10434 boolean_t conn_multicast_loop; /* conn value for multicast */ 10435 boolean_t multicast_forward; /* Should we forward ? */ 10436 int max_frag; 10437 ip_stack_t *ipst = ire->ire_ipst; 10438 ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec; 10439 10440 ill = ire_to_ill(ire); 10441 first_mp = mp; 10442 multicast_forward = B_FALSE; 10443 10444 if (mp->b_datap->db_type != M_CTL) { 10445 ip6h = (ip6_t *)first_mp->b_rptr; 10446 } else { 10447 io = (ipsec_out_t *)first_mp->b_rptr; 10448 ASSERT(io->ipsec_out_type == IPSEC_OUT); 10449 /* 10450 * Grab the zone id now because the M_CTL can be discarded by 10451 * ip_wput_ire_parse_ipsec_out() below. 10452 */ 10453 ASSERT(zoneid == io->ipsec_out_zoneid); 10454 ASSERT(zoneid != ALL_ZONES); 10455 ip6h = (ip6_t *)first_mp->b_cont->b_rptr; 10456 /* 10457 * For the multicast case, ipsec_out carries conn_dontroute and 10458 * conn_multicast_loop as conn may not be available here. We 10459 * need this for multicast loopback and forwarding which is done 10460 * later in the code. 10461 */ 10462 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 10463 conn_dontroute = io->ipsec_out_dontroute; 10464 conn_multicast_loop = io->ipsec_out_multicast_loop; 10465 /* 10466 * If conn_dontroute is not set or conn_multicast_loop 10467 * is set, we need to do forwarding/loopback. For 10468 * datagrams from ip_wput_multicast, conn_dontroute is 10469 * set to B_TRUE and conn_multicast_loop is set to 10470 * B_FALSE so that we neither do forwarding nor 10471 * loopback. 10472 */ 10473 if (!conn_dontroute || conn_multicast_loop) 10474 multicast_forward = B_TRUE; 10475 } 10476 } 10477 10478 /* 10479 * If the sender didn't supply the hop limit and there is a default 10480 * unicast hop limit associated with the output interface, we use 10481 * that if the packet is unicast. Interface specific unicast hop 10482 * limits as set via the SIOCSLIFLNKINFO ioctl. 10483 */ 10484 if (ill->ill_max_hops != 0 && !(flags & IP6I_HOPLIMIT) && 10485 !(IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst))) { 10486 ip6h->ip6_hops = ill->ill_max_hops; 10487 } 10488 10489 if (ire->ire_type == IRE_LOCAL && ire->ire_zoneid != zoneid && 10490 ire->ire_zoneid != ALL_ZONES) { 10491 /* 10492 * When a zone sends a packet to another zone, we try to deliver 10493 * the packet under the same conditions as if the destination 10494 * was a real node on the network. To do so, we look for a 10495 * matching route in the forwarding table. 10496 * RTF_REJECT and RTF_BLACKHOLE are handled just like 10497 * ip_newroute_v6() does. 10498 * Note that IRE_LOCAL are special, since they are used 10499 * when the zoneid doesn't match in some cases. This means that 10500 * we need to handle ipha_src differently since ire_src_addr 10501 * belongs to the receiving zone instead of the sending zone. 10502 * When ip_restrict_interzone_loopback is set, then 10503 * ire_cache_lookup_v6() ensures that IRE_LOCAL are only used 10504 * for loopback between zones when the logical "Ethernet" would 10505 * have looped them back. 10506 */ 10507 ire_t *src_ire; 10508 10509 src_ire = ire_ftable_lookup_v6(&ip6h->ip6_dst, 0, 0, 0, 10510 NULL, NULL, zoneid, 0, NULL, (MATCH_IRE_RECURSIVE | 10511 MATCH_IRE_DEFAULT | MATCH_IRE_RJ_BHOLE), ipst); 10512 if (src_ire != NULL && 10513 !(src_ire->ire_flags & (RTF_REJECT | RTF_BLACKHOLE)) && 10514 (!ipst->ips_ip_restrict_interzone_loopback || 10515 ire_local_same_lan(ire, src_ire))) { 10516 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src) && 10517 !unspec_src) { 10518 ip6h->ip6_src = src_ire->ire_src_addr_v6; 10519 } 10520 ire_refrele(src_ire); 10521 } else { 10522 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutNoRoutes); 10523 if (src_ire != NULL) { 10524 if (src_ire->ire_flags & RTF_BLACKHOLE) { 10525 ire_refrele(src_ire); 10526 freemsg(first_mp); 10527 return; 10528 } 10529 ire_refrele(src_ire); 10530 } 10531 if (ip_hdr_complete_v6(ip6h, zoneid, ipst)) { 10532 /* Failed */ 10533 freemsg(first_mp); 10534 return; 10535 } 10536 icmp_unreachable_v6(q, first_mp, 10537 ICMP6_DST_UNREACH_NOROUTE, B_FALSE, B_FALSE, 10538 zoneid, ipst); 10539 return; 10540 } 10541 } 10542 10543 if (mp->b_datap->db_type == M_CTL || 10544 ipss->ipsec_outbound_v6_policy_present) { 10545 mp = ip_wput_ire_parse_ipsec_out(first_mp, NULL, ip6h, ire, 10546 connp, unspec_src, zoneid); 10547 if (mp == NULL) { 10548 return; 10549 } 10550 } 10551 10552 first_mp = mp; 10553 if (mp->b_datap->db_type == M_CTL) { 10554 io = (ipsec_out_t *)mp->b_rptr; 10555 ASSERT(io->ipsec_out_type == IPSEC_OUT); 10556 mp = mp->b_cont; 10557 mctl_present = B_TRUE; 10558 } else { 10559 mctl_present = B_FALSE; 10560 } 10561 10562 ip6h = (ip6_t *)mp->b_rptr; 10563 nexthdr = ip6h->ip6_nxt; 10564 mibptr = ill->ill_ip_mib; 10565 10566 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src) && !unspec_src) { 10567 ipif_t *ipif; 10568 10569 /* 10570 * Select the source address using ipif_select_source_v6. 10571 */ 10572 ipif = ipif_select_source_v6(ill, &ip6h->ip6_dst, B_FALSE, 10573 IPV6_PREFER_SRC_DEFAULT, zoneid); 10574 if (ipif == NULL) { 10575 if (ip_debug > 2) { 10576 /* ip1dbg */ 10577 pr_addr_dbg("ip_wput_ire_v6: no src for " 10578 "dst %s\n", AF_INET6, &ip6h->ip6_dst); 10579 printf("through interface %s\n", ill->ill_name); 10580 } 10581 freemsg(first_mp); 10582 return; 10583 } 10584 ip6h->ip6_src = ipif->ipif_v6src_addr; 10585 ipif_refrele(ipif); 10586 } 10587 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 10588 if ((connp != NULL && connp->conn_multicast_loop) || 10589 !IS_LOOPBACK(ill)) { 10590 if (ilm_lookup_ill_v6(ill, &ip6h->ip6_dst, B_FALSE, 10591 ALL_ZONES) != NULL) { 10592 mblk_t *nmp; 10593 int fanout_flags = 0; 10594 10595 if (connp != NULL && 10596 !connp->conn_multicast_loop) { 10597 fanout_flags |= IP_FF_NO_MCAST_LOOP; 10598 } 10599 ip1dbg(("ip_wput_ire_v6: " 10600 "Loopback multicast\n")); 10601 nmp = ip_copymsg(first_mp); 10602 if (nmp != NULL) { 10603 ip6_t *nip6h; 10604 mblk_t *mp_ip6h; 10605 10606 if (mctl_present) { 10607 nip6h = (ip6_t *) 10608 nmp->b_cont->b_rptr; 10609 mp_ip6h = nmp->b_cont; 10610 } else { 10611 nip6h = (ip6_t *)nmp->b_rptr; 10612 mp_ip6h = nmp; 10613 } 10614 10615 DTRACE_PROBE4( 10616 ip6__loopback__out__start, 10617 ill_t *, NULL, 10618 ill_t *, ill, 10619 ip6_t *, nip6h, 10620 mblk_t *, nmp); 10621 10622 FW_HOOKS6( 10623 ipst->ips_ip6_loopback_out_event, 10624 ipst->ips_ipv6firewall_loopback_out, 10625 NULL, ill, nip6h, nmp, mp_ip6h, 10626 0, ipst); 10627 10628 DTRACE_PROBE1( 10629 ip6__loopback__out__end, 10630 mblk_t *, nmp); 10631 10632 /* 10633 * DTrace this as ip:::send. A blocked 10634 * packet will fire the send probe, but 10635 * not the receive probe. 10636 */ 10637 DTRACE_IP7(send, mblk_t *, nmp, 10638 conn_t *, NULL, void_ip_t *, nip6h, 10639 __dtrace_ipsr_ill_t *, ill, 10640 ipha_t *, NULL, ip6_t *, nip6h, 10641 int, 1); 10642 10643 if (nmp != NULL) { 10644 /* 10645 * Deliver locally and to 10646 * every local zone, except 10647 * the sending zone when 10648 * IPV6_MULTICAST_LOOP is 10649 * disabled. 10650 */ 10651 ip_wput_local_v6(RD(q), ill, 10652 nip6h, nmp, ire, 10653 fanout_flags, zoneid); 10654 } 10655 } else { 10656 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10657 ip1dbg(("ip_wput_ire_v6: " 10658 "copymsg failed\n")); 10659 } 10660 } 10661 } 10662 if (ip6h->ip6_hops == 0 || 10663 IN6_IS_ADDR_MC_NODELOCAL(&ip6h->ip6_dst) || 10664 IS_LOOPBACK(ill)) { 10665 /* 10666 * Local multicast or just loopback on loopback 10667 * interface. 10668 */ 10669 BUMP_MIB(mibptr, ipIfStatsHCOutMcastPkts); 10670 UPDATE_MIB(mibptr, ipIfStatsHCOutMcastOctets, 10671 ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN); 10672 ip1dbg(("ip_wput_ire_v6: local multicast only\n")); 10673 freemsg(first_mp); 10674 return; 10675 } 10676 } 10677 10678 if (ire->ire_stq != NULL) { 10679 uint32_t sum; 10680 uint_t ill_index = ((ill_t *)ire->ire_stq->q_ptr)-> 10681 ill_phyint->phyint_ifindex; 10682 queue_t *dev_q = ire->ire_stq->q_next; 10683 10684 /* 10685 * non-NULL send-to queue - packet is to be sent 10686 * out an interface. 10687 */ 10688 10689 /* Driver is flow-controlling? */ 10690 if (!IP_FLOW_CONTROLLED_ULP(nexthdr) && 10691 DEV_Q_FLOW_BLOCKED(dev_q)) { 10692 /* 10693 * Queue packet if we have an conn to give back 10694 * pressure. We can't queue packets intended for 10695 * hardware acceleration since we've tossed that 10696 * state already. If the packet is being fed back 10697 * from ire_send_v6, we don't know the position in 10698 * the queue to enqueue the packet and we discard 10699 * the packet. 10700 */ 10701 if (ipst->ips_ip_output_queue && connp != NULL && 10702 !mctl_present && caller != IRE_SEND) { 10703 if (caller == IP_WSRV) { 10704 idl_tx_list_t *idl_txl; 10705 10706 idl_txl = &ipst->ips_idl_tx_list[0]; 10707 connp->conn_did_putbq = 1; 10708 (void) putbq(connp->conn_wq, mp); 10709 conn_drain_insert(connp, idl_txl); 10710 /* 10711 * caller == IP_WSRV implies we are 10712 * the service thread, and the 10713 * queue is already noenabled. 10714 * The check for canput and 10715 * the putbq is not atomic. 10716 * So we need to check again. 10717 */ 10718 if (canput(dev_q)) 10719 connp->conn_did_putbq = 0; 10720 } else { 10721 (void) putq(connp->conn_wq, mp); 10722 } 10723 return; 10724 } 10725 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10726 freemsg(first_mp); 10727 return; 10728 } 10729 10730 /* 10731 * Look for reachability confirmations from the transport. 10732 */ 10733 if (ip6h->ip6_vcf & IP_FORWARD_PROG) { 10734 reachable |= IPV6_REACHABILITY_CONFIRMATION; 10735 ip6h->ip6_vcf &= ~IP_FORWARD_PROG; 10736 if (mctl_present) 10737 io->ipsec_out_reachable = B_TRUE; 10738 } 10739 /* Fastpath */ 10740 switch (nexthdr) { 10741 case IPPROTO_TCP: 10742 case IPPROTO_UDP: 10743 case IPPROTO_ICMPV6: 10744 case IPPROTO_SCTP: 10745 hdr_length = IPV6_HDR_LEN; 10746 break; 10747 default: { 10748 uint8_t *nexthdrp; 10749 10750 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, 10751 &hdr_length, &nexthdrp)) { 10752 /* Malformed packet */ 10753 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10754 freemsg(first_mp); 10755 return; 10756 } 10757 nexthdr = *nexthdrp; 10758 break; 10759 } 10760 } 10761 10762 if (cksum_request != -1 && nexthdr != IPPROTO_ICMPV6) { 10763 uint16_t *up; 10764 uint16_t *insp; 10765 10766 /* 10767 * The packet header is processed once for all, even 10768 * in the multirouting case. We disable hardware 10769 * checksum if the packet is multirouted, as it will be 10770 * replicated via several interfaces, and not all of 10771 * them may have this capability. 10772 */ 10773 if (cksum_request == 1 && 10774 !(ire->ire_flags & RTF_MULTIRT)) { 10775 /* Skip the transport checksum */ 10776 goto cksum_done; 10777 } 10778 /* 10779 * Do user-configured raw checksum. 10780 * Compute checksum and insert at offset "cksum_request" 10781 */ 10782 10783 /* check for enough headers for checksum */ 10784 cksum_request += hdr_length; /* offset from rptr */ 10785 if ((mp->b_wptr - mp->b_rptr) < 10786 (cksum_request + sizeof (int16_t))) { 10787 if (!pullupmsg(mp, 10788 cksum_request + sizeof (int16_t))) { 10789 ip1dbg(("ip_wput_v6: ICMP hdr pullupmsg" 10790 " failed\n")); 10791 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10792 freemsg(first_mp); 10793 return; 10794 } 10795 ip6h = (ip6_t *)mp->b_rptr; 10796 } 10797 insp = (uint16_t *)((uchar_t *)ip6h + cksum_request); 10798 ASSERT(((uintptr_t)insp & 0x1) == 0); 10799 up = (uint16_t *)&ip6h->ip6_src; 10800 /* 10801 * icmp has placed length and routing 10802 * header adjustment in *insp. 10803 */ 10804 sum = htons(nexthdr) + 10805 up[0] + up[1] + up[2] + up[3] + 10806 up[4] + up[5] + up[6] + up[7] + 10807 up[8] + up[9] + up[10] + up[11] + 10808 up[12] + up[13] + up[14] + up[15]; 10809 sum = (sum & 0xffff) + (sum >> 16); 10810 *insp = IP_CSUM(mp, hdr_length, sum); 10811 } else if (nexthdr == IPPROTO_TCP) { 10812 uint16_t *up; 10813 10814 /* 10815 * Check for full IPv6 header + enough TCP header 10816 * to get at the checksum field. 10817 */ 10818 if ((mp->b_wptr - mp->b_rptr) < 10819 (hdr_length + TCP_CHECKSUM_OFFSET + 10820 TCP_CHECKSUM_SIZE)) { 10821 if (!pullupmsg(mp, hdr_length + 10822 TCP_CHECKSUM_OFFSET + TCP_CHECKSUM_SIZE)) { 10823 ip1dbg(("ip_wput_v6: TCP hdr pullupmsg" 10824 " failed\n")); 10825 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10826 freemsg(first_mp); 10827 return; 10828 } 10829 ip6h = (ip6_t *)mp->b_rptr; 10830 } 10831 10832 up = (uint16_t *)&ip6h->ip6_src; 10833 /* 10834 * Note: The TCP module has stored the length value 10835 * into the tcp checksum field, so we don't 10836 * need to explicitly sum it in here. 10837 */ 10838 sum = up[0] + up[1] + up[2] + up[3] + 10839 up[4] + up[5] + up[6] + up[7] + 10840 up[8] + up[9] + up[10] + up[11] + 10841 up[12] + up[13] + up[14] + up[15]; 10842 10843 /* Fold the initial sum */ 10844 sum = (sum & 0xffff) + (sum >> 16); 10845 10846 up = (uint16_t *)(((uchar_t *)ip6h) + 10847 hdr_length + TCP_CHECKSUM_OFFSET); 10848 10849 IP_CKSUM_XMIT(ill, ire, mp, ip6h, up, IPPROTO_TCP, 10850 hdr_length, ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN, 10851 ire->ire_max_frag, mctl_present, sum); 10852 10853 /* Software checksum? */ 10854 if (DB_CKSUMFLAGS(mp) == 0) { 10855 IP6_STAT(ipst, ip6_out_sw_cksum); 10856 IP6_STAT_UPDATE(ipst, 10857 ip6_tcp_out_sw_cksum_bytes, 10858 (ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN) - 10859 hdr_length); 10860 } 10861 } else if (nexthdr == IPPROTO_UDP) { 10862 uint16_t *up; 10863 10864 /* 10865 * check for full IPv6 header + enough UDP header 10866 * to get at the UDP checksum field 10867 */ 10868 if ((mp->b_wptr - mp->b_rptr) < (hdr_length + 10869 UDP_CHECKSUM_OFFSET + UDP_CHECKSUM_SIZE)) { 10870 if (!pullupmsg(mp, hdr_length + 10871 UDP_CHECKSUM_OFFSET + UDP_CHECKSUM_SIZE)) { 10872 ip1dbg(("ip_wput_v6: UDP hdr pullupmsg" 10873 " failed\n")); 10874 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10875 freemsg(first_mp); 10876 return; 10877 } 10878 ip6h = (ip6_t *)mp->b_rptr; 10879 } 10880 up = (uint16_t *)&ip6h->ip6_src; 10881 /* 10882 * Note: The UDP module has stored the length value 10883 * into the udp checksum field, so we don't 10884 * need to explicitly sum it in here. 10885 */ 10886 sum = up[0] + up[1] + up[2] + up[3] + 10887 up[4] + up[5] + up[6] + up[7] + 10888 up[8] + up[9] + up[10] + up[11] + 10889 up[12] + up[13] + up[14] + up[15]; 10890 10891 /* Fold the initial sum */ 10892 sum = (sum & 0xffff) + (sum >> 16); 10893 10894 up = (uint16_t *)(((uchar_t *)ip6h) + 10895 hdr_length + UDP_CHECKSUM_OFFSET); 10896 10897 IP_CKSUM_XMIT(ill, ire, mp, ip6h, up, IPPROTO_UDP, 10898 hdr_length, ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN, 10899 ire->ire_max_frag, mctl_present, sum); 10900 10901 /* Software checksum? */ 10902 if (DB_CKSUMFLAGS(mp) == 0) { 10903 IP6_STAT(ipst, ip6_out_sw_cksum); 10904 IP6_STAT_UPDATE(ipst, 10905 ip6_udp_out_sw_cksum_bytes, 10906 (ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN) - 10907 hdr_length); 10908 } 10909 } else if (nexthdr == IPPROTO_ICMPV6) { 10910 uint16_t *up; 10911 icmp6_t *icmp6; 10912 10913 /* check for full IPv6+ICMPv6 header */ 10914 if ((mp->b_wptr - mp->b_rptr) < 10915 (hdr_length + ICMP6_MINLEN)) { 10916 if (!pullupmsg(mp, hdr_length + ICMP6_MINLEN)) { 10917 ip1dbg(("ip_wput_v6: ICMP hdr pullupmsg" 10918 " failed\n")); 10919 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10920 freemsg(first_mp); 10921 return; 10922 } 10923 ip6h = (ip6_t *)mp->b_rptr; 10924 } 10925 icmp6 = (icmp6_t *)((uchar_t *)ip6h + hdr_length); 10926 up = (uint16_t *)&ip6h->ip6_src; 10927 /* 10928 * icmp has placed length and routing 10929 * header adjustment in icmp6_cksum. 10930 */ 10931 sum = htons(IPPROTO_ICMPV6) + 10932 up[0] + up[1] + up[2] + up[3] + 10933 up[4] + up[5] + up[6] + up[7] + 10934 up[8] + up[9] + up[10] + up[11] + 10935 up[12] + up[13] + up[14] + up[15]; 10936 sum = (sum & 0xffff) + (sum >> 16); 10937 icmp6->icmp6_cksum = IP_CSUM(mp, hdr_length, sum); 10938 10939 /* Update output mib stats */ 10940 icmp_update_out_mib_v6(ill, icmp6); 10941 } else if (nexthdr == IPPROTO_SCTP) { 10942 sctp_hdr_t *sctph; 10943 10944 if (MBLKL(mp) < (hdr_length + sizeof (*sctph))) { 10945 if (!pullupmsg(mp, hdr_length + 10946 sizeof (*sctph))) { 10947 ip1dbg(("ip_wput_v6: SCTP hdr pullupmsg" 10948 " failed\n")); 10949 BUMP_MIB(ill->ill_ip_mib, 10950 ipIfStatsOutDiscards); 10951 freemsg(mp); 10952 return; 10953 } 10954 ip6h = (ip6_t *)mp->b_rptr; 10955 } 10956 sctph = (sctp_hdr_t *)(mp->b_rptr + hdr_length); 10957 sctph->sh_chksum = 0; 10958 sctph->sh_chksum = sctp_cksum(mp, hdr_length); 10959 } 10960 10961 cksum_done: 10962 /* 10963 * We force the insertion of a fragment header using the 10964 * IPH_FRAG_HDR flag in two cases: 10965 * - after reception of an ICMPv6 "packet too big" message 10966 * with a MTU < 1280 (cf. RFC 2460 section 5) 10967 * - for multirouted IPv6 packets, so that the receiver can 10968 * discard duplicates according to their fragment identifier 10969 * 10970 * Two flags modifed from the API can modify this behavior. 10971 * The first is IPV6_USE_MIN_MTU. With this API the user 10972 * can specify how to manage PMTUD for unicast and multicast. 10973 * 10974 * IPV6_DONTFRAG disallows fragmentation. 10975 */ 10976 max_frag = ire->ire_max_frag; 10977 switch (IP6I_USE_MIN_MTU_API(flags)) { 10978 case IPV6_USE_MIN_MTU_DEFAULT: 10979 case IPV6_USE_MIN_MTU_UNICAST: 10980 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 10981 max_frag = IPV6_MIN_MTU; 10982 } 10983 break; 10984 10985 case IPV6_USE_MIN_MTU_NEVER: 10986 max_frag = IPV6_MIN_MTU; 10987 break; 10988 } 10989 if (ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN > max_frag || 10990 (ire->ire_frag_flag & IPH_FRAG_HDR)) { 10991 if (connp != NULL && (flags & IP6I_DONTFRAG)) { 10992 icmp_pkt2big_v6(ire->ire_stq, first_mp, 10993 max_frag, B_FALSE, B_TRUE, zoneid, ipst); 10994 return; 10995 } 10996 10997 if (ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN != 10998 (mp->b_cont ? msgdsize(mp) : 10999 mp->b_wptr - (uchar_t *)ip6h)) { 11000 ip0dbg(("Packet length mismatch: %d, %ld\n", 11001 ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN, 11002 msgdsize(mp))); 11003 freemsg(first_mp); 11004 return; 11005 } 11006 /* Do IPSEC processing first */ 11007 if (mctl_present) { 11008 ipsec_out_process(q, first_mp, ire, ill_index); 11009 return; 11010 } 11011 ASSERT(mp->b_prev == NULL); 11012 ip2dbg(("Fragmenting Size = %d, mtu = %d\n", 11013 ntohs(ip6h->ip6_plen) + 11014 IPV6_HDR_LEN, max_frag)); 11015 ASSERT(mp == first_mp); 11016 /* Initiate IPPF processing */ 11017 if (IPP_ENABLED(IPP_LOCAL_OUT, ipst)) { 11018 ip_process(IPP_LOCAL_OUT, &mp, ill_index); 11019 if (mp == NULL) { 11020 return; 11021 } 11022 } 11023 ip_wput_frag_v6(mp, ire, reachable, connp, 11024 caller, max_frag); 11025 return; 11026 } 11027 /* Do IPSEC processing first */ 11028 if (mctl_present) { 11029 int extra_len = ipsec_out_extra_length(first_mp); 11030 11031 if (ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN + extra_len > 11032 max_frag && connp != NULL && 11033 (flags & IP6I_DONTFRAG)) { 11034 /* 11035 * IPsec headers will push the packet over the 11036 * MTU limit. Issue an ICMPv6 Packet Too Big 11037 * message for this packet if the upper-layer 11038 * that issued this packet will be able to 11039 * react to the icmp_pkt2big_v6() that we'll 11040 * generate. 11041 */ 11042 icmp_pkt2big_v6(ire->ire_stq, first_mp, 11043 max_frag, B_FALSE, B_TRUE, zoneid, ipst); 11044 return; 11045 } 11046 ipsec_out_process(q, first_mp, ire, ill_index); 11047 return; 11048 } 11049 /* 11050 * XXX multicast: add ip_mforward_v6() here. 11051 * Check conn_dontroute 11052 */ 11053 #ifdef lint 11054 /* 11055 * XXX The only purpose of this statement is to avoid lint 11056 * errors. See the above "XXX multicast". When that gets 11057 * fixed, remove this whole #ifdef lint section. 11058 */ 11059 ip3dbg(("multicast forward is %s.\n", 11060 (multicast_forward ? "TRUE" : "FALSE"))); 11061 #endif 11062 11063 UPDATE_OB_PKT_COUNT(ire); 11064 ire->ire_last_used_time = lbolt; 11065 ASSERT(mp == first_mp); 11066 ip_xmit_v6(mp, ire, reachable, connp, caller, NULL); 11067 } else { 11068 /* 11069 * DTrace this as ip:::send. A blocked packet will fire the 11070 * send probe, but not the receive probe. 11071 */ 11072 DTRACE_IP7(send, mblk_t *, first_mp, conn_t *, NULL, 11073 void_ip_t *, ip6h, __dtrace_ipsr_ill_t *, ill, ipha_t *, 11074 NULL, ip6_t *, ip6h, int, 1); 11075 DTRACE_PROBE4(ip6__loopback__out__start, 11076 ill_t *, NULL, ill_t *, ill, 11077 ip6_t *, ip6h, mblk_t *, first_mp); 11078 FW_HOOKS6(ipst->ips_ip6_loopback_out_event, 11079 ipst->ips_ipv6firewall_loopback_out, 11080 NULL, ill, ip6h, first_mp, mp, 0, ipst); 11081 DTRACE_PROBE1(ip6__loopback__out__end, mblk_t *, first_mp); 11082 if (first_mp != NULL) { 11083 ip_wput_local_v6(RD(q), ill, ip6h, first_mp, ire, 0, 11084 zoneid); 11085 } 11086 } 11087 } 11088 11089 /* 11090 * Outbound IPv6 fragmentation routine using MDT. 11091 */ 11092 static void 11093 ip_wput_frag_mdt_v6(mblk_t *mp, ire_t *ire, size_t max_chunk, 11094 size_t unfragmentable_len, uint8_t nexthdr, uint_t prev_nexthdr_offset) 11095 { 11096 ip6_t *ip6h = (ip6_t *)mp->b_rptr; 11097 uint_t pkts, wroff, hdr_chunk_len, pbuf_idx; 11098 mblk_t *hdr_mp, *md_mp = NULL; 11099 int i1; 11100 multidata_t *mmd; 11101 unsigned char *hdr_ptr, *pld_ptr; 11102 ip_pdescinfo_t pdi; 11103 uint32_t ident; 11104 size_t len; 11105 uint16_t offset; 11106 queue_t *stq = ire->ire_stq; 11107 ill_t *ill = (ill_t *)stq->q_ptr; 11108 ip_stack_t *ipst = ill->ill_ipst; 11109 11110 ASSERT(DB_TYPE(mp) == M_DATA); 11111 ASSERT(MBLKL(mp) > unfragmentable_len); 11112 11113 /* 11114 * Move read ptr past unfragmentable portion, we don't want this part 11115 * of the data in our fragments. 11116 */ 11117 mp->b_rptr += unfragmentable_len; 11118 11119 /* Calculate how many packets we will send out */ 11120 i1 = (mp->b_cont == NULL) ? MBLKL(mp) : msgsize(mp); 11121 pkts = (i1 + max_chunk - 1) / max_chunk; 11122 ASSERT(pkts > 1); 11123 11124 /* Allocate a message block which will hold all the IP Headers. */ 11125 wroff = ipst->ips_ip_wroff_extra; 11126 hdr_chunk_len = wroff + unfragmentable_len + sizeof (ip6_frag_t); 11127 11128 i1 = pkts * hdr_chunk_len; 11129 /* 11130 * Create the header buffer, Multidata and destination address 11131 * and SAP attribute that should be associated with it. 11132 */ 11133 if ((hdr_mp = allocb(i1, BPRI_HI)) == NULL || 11134 ((hdr_mp->b_wptr += i1), 11135 (mmd = mmd_alloc(hdr_mp, &md_mp, KM_NOSLEEP)) == NULL) || 11136 !ip_md_addr_attr(mmd, NULL, ire->ire_nce->nce_res_mp)) { 11137 freemsg(mp); 11138 if (md_mp == NULL) { 11139 freemsg(hdr_mp); 11140 } else { 11141 free_mmd: IP6_STAT(ipst, ip6_frag_mdt_discarded); 11142 freemsg(md_mp); 11143 } 11144 IP6_STAT(ipst, ip6_frag_mdt_allocfail); 11145 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails); 11146 return; 11147 } 11148 IP6_STAT(ipst, ip6_frag_mdt_allocd); 11149 11150 /* 11151 * Add a payload buffer to the Multidata; this operation must not 11152 * fail, or otherwise our logic in this routine is broken. There 11153 * is no memory allocation done by the routine, so any returned 11154 * failure simply tells us that we've done something wrong. 11155 * 11156 * A failure tells us that either we're adding the same payload 11157 * buffer more than once, or we're trying to add more buffers than 11158 * allowed. None of the above cases should happen, and we panic 11159 * because either there's horrible heap corruption, and/or 11160 * programming mistake. 11161 */ 11162 if ((pbuf_idx = mmd_addpldbuf(mmd, mp)) < 0) { 11163 goto pbuf_panic; 11164 } 11165 11166 hdr_ptr = hdr_mp->b_rptr; 11167 pld_ptr = mp->b_rptr; 11168 11169 pdi.flags = PDESC_HBUF_REF | PDESC_PBUF_REF; 11170 11171 ident = htonl(atomic_add_32_nv(&ire->ire_ident, 1)); 11172 11173 /* 11174 * len is the total length of the fragmentable data in this 11175 * datagram. For each fragment sent, we will decrement len 11176 * by the amount of fragmentable data sent in that fragment 11177 * until len reaches zero. 11178 */ 11179 len = ntohs(ip6h->ip6_plen) - (unfragmentable_len - IPV6_HDR_LEN); 11180 11181 offset = 0; 11182 prev_nexthdr_offset += wroff; 11183 11184 while (len != 0) { 11185 size_t mlen; 11186 ip6_t *fip6h; 11187 ip6_frag_t *fraghdr; 11188 int error; 11189 11190 ASSERT((hdr_ptr + hdr_chunk_len) <= hdr_mp->b_wptr); 11191 mlen = MIN(len, max_chunk); 11192 len -= mlen; 11193 11194 fip6h = (ip6_t *)(hdr_ptr + wroff); 11195 ASSERT(OK_32PTR(fip6h)); 11196 bcopy(ip6h, fip6h, unfragmentable_len); 11197 hdr_ptr[prev_nexthdr_offset] = IPPROTO_FRAGMENT; 11198 11199 fip6h->ip6_plen = htons((uint16_t)(mlen + 11200 unfragmentable_len - IPV6_HDR_LEN + sizeof (ip6_frag_t))); 11201 11202 fraghdr = (ip6_frag_t *)((unsigned char *)fip6h + 11203 unfragmentable_len); 11204 fraghdr->ip6f_nxt = nexthdr; 11205 fraghdr->ip6f_reserved = 0; 11206 fraghdr->ip6f_offlg = htons(offset) | 11207 ((len != 0) ? IP6F_MORE_FRAG : 0); 11208 fraghdr->ip6f_ident = ident; 11209 11210 /* 11211 * Record offset and size of header and data of the next packet 11212 * in the multidata message. 11213 */ 11214 PDESC_HDR_ADD(&pdi, hdr_ptr, wroff, 11215 unfragmentable_len + sizeof (ip6_frag_t), 0); 11216 PDESC_PLD_INIT(&pdi); 11217 i1 = MIN(mp->b_wptr - pld_ptr, mlen); 11218 ASSERT(i1 > 0); 11219 PDESC_PLD_SPAN_ADD(&pdi, pbuf_idx, pld_ptr, i1); 11220 if (i1 == mlen) { 11221 pld_ptr += mlen; 11222 } else { 11223 i1 = mlen - i1; 11224 mp = mp->b_cont; 11225 ASSERT(mp != NULL); 11226 ASSERT(MBLKL(mp) >= i1); 11227 /* 11228 * Attach the next payload message block to the 11229 * multidata message. 11230 */ 11231 if ((pbuf_idx = mmd_addpldbuf(mmd, mp)) < 0) 11232 goto pbuf_panic; 11233 PDESC_PLD_SPAN_ADD(&pdi, pbuf_idx, mp->b_rptr, i1); 11234 pld_ptr = mp->b_rptr + i1; 11235 } 11236 11237 if ((mmd_addpdesc(mmd, (pdescinfo_t *)&pdi, &error, 11238 KM_NOSLEEP)) == NULL) { 11239 /* 11240 * Any failure other than ENOMEM indicates that we 11241 * have passed in invalid pdesc info or parameters 11242 * to mmd_addpdesc, which must not happen. 11243 * 11244 * EINVAL is a result of failure on boundary checks 11245 * against the pdesc info contents. It should not 11246 * happen, and we panic because either there's 11247 * horrible heap corruption, and/or programming 11248 * mistake. 11249 */ 11250 if (error != ENOMEM) { 11251 cmn_err(CE_PANIC, "ip_wput_frag_mdt_v6: " 11252 "pdesc logic error detected for " 11253 "mmd %p pinfo %p (%d)\n", 11254 (void *)mmd, (void *)&pdi, error); 11255 /* NOTREACHED */ 11256 } 11257 IP6_STAT(ipst, ip6_frag_mdt_addpdescfail); 11258 /* Free unattached payload message blocks as well */ 11259 md_mp->b_cont = mp->b_cont; 11260 goto free_mmd; 11261 } 11262 11263 /* Advance fragment offset. */ 11264 offset += mlen; 11265 11266 /* Advance to location for next header in the buffer. */ 11267 hdr_ptr += hdr_chunk_len; 11268 11269 /* Did we reach the next payload message block? */ 11270 if (pld_ptr == mp->b_wptr && mp->b_cont != NULL) { 11271 mp = mp->b_cont; 11272 /* 11273 * Attach the next message block with payload 11274 * data to the multidata message. 11275 */ 11276 if ((pbuf_idx = mmd_addpldbuf(mmd, mp)) < 0) 11277 goto pbuf_panic; 11278 pld_ptr = mp->b_rptr; 11279 } 11280 } 11281 11282 ASSERT(hdr_mp->b_wptr == hdr_ptr); 11283 ASSERT(mp->b_wptr == pld_ptr); 11284 11285 /* Update IP statistics */ 11286 UPDATE_MIB(ill->ill_ip_mib, ipIfStatsOutFragCreates, pkts); 11287 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragOKs); 11288 UPDATE_MIB(ill->ill_ip_mib, ipIfStatsHCOutTransmits, pkts); 11289 /* 11290 * The ipv6 header len is accounted for in unfragmentable_len so 11291 * when calculating the fragmentation overhead just add the frag 11292 * header len. 11293 */ 11294 UPDATE_MIB(ill->ill_ip_mib, ipIfStatsHCOutOctets, 11295 (ntohs(ip6h->ip6_plen) - (unfragmentable_len - IPV6_HDR_LEN)) + 11296 pkts * (unfragmentable_len + sizeof (ip6_frag_t))); 11297 IP6_STAT_UPDATE(ipst, ip6_frag_mdt_pkt_out, pkts); 11298 11299 ire->ire_ob_pkt_count += pkts; 11300 if (ire->ire_ipif != NULL) 11301 atomic_add_32(&ire->ire_ipif->ipif_ob_pkt_count, pkts); 11302 11303 ire->ire_last_used_time = lbolt; 11304 /* Send it down */ 11305 putnext(stq, md_mp); 11306 return; 11307 11308 pbuf_panic: 11309 cmn_err(CE_PANIC, "ip_wput_frag_mdt_v6: payload buffer logic " 11310 "error for mmd %p pbuf %p (%d)", (void *)mmd, (void *)mp, 11311 pbuf_idx); 11312 /* NOTREACHED */ 11313 } 11314 11315 /* 11316 * IPv6 fragmentation. Essentially the same as IPv4 fragmentation. 11317 * We have not optimized this in terms of number of mblks 11318 * allocated. For instance, for each fragment sent we always allocate a 11319 * mblk to hold the IPv6 header and fragment header. 11320 * 11321 * Assumes that all the extension headers are contained in the first mblk. 11322 * 11323 * The fragment header is inserted after an hop-by-hop options header 11324 * and after [an optional destinations header followed by] a routing header. 11325 * 11326 * NOTE : This function does not ire_refrele the ire passed in as 11327 * the argument. 11328 */ 11329 void 11330 ip_wput_frag_v6(mblk_t *mp, ire_t *ire, uint_t reachable, conn_t *connp, 11331 int caller, int max_frag) 11332 { 11333 ip6_t *ip6h = (ip6_t *)mp->b_rptr; 11334 ip6_t *fip6h; 11335 mblk_t *hmp; 11336 mblk_t *hmp0; 11337 mblk_t *dmp; 11338 ip6_frag_t *fraghdr; 11339 size_t unfragmentable_len; 11340 size_t len; 11341 size_t mlen; 11342 size_t max_chunk; 11343 uint32_t ident; 11344 uint16_t off_flags; 11345 uint16_t offset = 0; 11346 ill_t *ill; 11347 uint8_t nexthdr; 11348 uint_t prev_nexthdr_offset; 11349 uint8_t *ptr; 11350 ip_stack_t *ipst = ire->ire_ipst; 11351 11352 ASSERT(ire->ire_type == IRE_CACHE); 11353 ill = (ill_t *)ire->ire_stq->q_ptr; 11354 11355 if (max_frag <= 0) { 11356 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails); 11357 freemsg(mp); 11358 return; 11359 } 11360 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragReqds); 11361 11362 /* 11363 * Determine the length of the unfragmentable portion of this 11364 * datagram. This consists of the IPv6 header, a potential 11365 * hop-by-hop options header, a potential pre-routing-header 11366 * destination options header, and a potential routing header. 11367 */ 11368 nexthdr = ip6h->ip6_nxt; 11369 prev_nexthdr_offset = (uint8_t *)&ip6h->ip6_nxt - (uint8_t *)ip6h; 11370 ptr = (uint8_t *)&ip6h[1]; 11371 11372 if (nexthdr == IPPROTO_HOPOPTS) { 11373 ip6_hbh_t *hbh_hdr; 11374 uint_t hdr_len; 11375 11376 hbh_hdr = (ip6_hbh_t *)ptr; 11377 hdr_len = 8 * (hbh_hdr->ip6h_len + 1); 11378 nexthdr = hbh_hdr->ip6h_nxt; 11379 prev_nexthdr_offset = (uint8_t *)&hbh_hdr->ip6h_nxt 11380 - (uint8_t *)ip6h; 11381 ptr += hdr_len; 11382 } 11383 if (nexthdr == IPPROTO_DSTOPTS) { 11384 ip6_dest_t *dest_hdr; 11385 uint_t hdr_len; 11386 11387 dest_hdr = (ip6_dest_t *)ptr; 11388 if (dest_hdr->ip6d_nxt == IPPROTO_ROUTING) { 11389 hdr_len = 8 * (dest_hdr->ip6d_len + 1); 11390 nexthdr = dest_hdr->ip6d_nxt; 11391 prev_nexthdr_offset = (uint8_t *)&dest_hdr->ip6d_nxt 11392 - (uint8_t *)ip6h; 11393 ptr += hdr_len; 11394 } 11395 } 11396 if (nexthdr == IPPROTO_ROUTING) { 11397 ip6_rthdr_t *rthdr; 11398 uint_t hdr_len; 11399 11400 rthdr = (ip6_rthdr_t *)ptr; 11401 nexthdr = rthdr->ip6r_nxt; 11402 prev_nexthdr_offset = (uint8_t *)&rthdr->ip6r_nxt 11403 - (uint8_t *)ip6h; 11404 hdr_len = 8 * (rthdr->ip6r_len + 1); 11405 ptr += hdr_len; 11406 } 11407 unfragmentable_len = (uint_t)(ptr - (uint8_t *)ip6h); 11408 11409 max_chunk = (min(max_frag, ire->ire_max_frag) - unfragmentable_len - 11410 sizeof (ip6_frag_t)) & ~7; 11411 11412 /* Check if we can use MDT to send out the frags. */ 11413 ASSERT(!IRE_IS_LOCAL(ire)); 11414 if (ipst->ips_ip_multidata_outbound && reachable == 0 && 11415 !(ire->ire_flags & RTF_MULTIRT) && ILL_MDT_CAPABLE(ill) && 11416 IP_CAN_FRAG_MDT(mp, unfragmentable_len, max_chunk)) { 11417 ip_wput_frag_mdt_v6(mp, ire, max_chunk, unfragmentable_len, 11418 nexthdr, prev_nexthdr_offset); 11419 return; 11420 } 11421 11422 /* 11423 * Allocate an mblk with enough room for the link-layer 11424 * header, the unfragmentable part of the datagram, and the 11425 * fragment header. This (or a copy) will be used as the 11426 * first mblk for each fragment we send. 11427 */ 11428 hmp = allocb_tmpl(unfragmentable_len + sizeof (ip6_frag_t) + 11429 ipst->ips_ip_wroff_extra, mp); 11430 if (hmp == NULL) { 11431 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails); 11432 freemsg(mp); 11433 return; 11434 } 11435 hmp->b_rptr += ipst->ips_ip_wroff_extra; 11436 hmp->b_wptr = hmp->b_rptr + unfragmentable_len + sizeof (ip6_frag_t); 11437 11438 fip6h = (ip6_t *)hmp->b_rptr; 11439 fraghdr = (ip6_frag_t *)(hmp->b_rptr + unfragmentable_len); 11440 11441 bcopy(ip6h, fip6h, unfragmentable_len); 11442 hmp->b_rptr[prev_nexthdr_offset] = IPPROTO_FRAGMENT; 11443 11444 ident = atomic_add_32_nv(&ire->ire_ident, 1); 11445 11446 fraghdr->ip6f_nxt = nexthdr; 11447 fraghdr->ip6f_reserved = 0; 11448 fraghdr->ip6f_offlg = 0; 11449 fraghdr->ip6f_ident = htonl(ident); 11450 11451 /* 11452 * len is the total length of the fragmentable data in this 11453 * datagram. For each fragment sent, we will decrement len 11454 * by the amount of fragmentable data sent in that fragment 11455 * until len reaches zero. 11456 */ 11457 len = ntohs(ip6h->ip6_plen) - (unfragmentable_len - IPV6_HDR_LEN); 11458 11459 /* 11460 * Move read ptr past unfragmentable portion, we don't want this part 11461 * of the data in our fragments. 11462 */ 11463 mp->b_rptr += unfragmentable_len; 11464 11465 while (len != 0) { 11466 mlen = MIN(len, max_chunk); 11467 len -= mlen; 11468 if (len != 0) { 11469 /* Not last */ 11470 hmp0 = copyb(hmp); 11471 if (hmp0 == NULL) { 11472 freeb(hmp); 11473 freemsg(mp); 11474 BUMP_MIB(ill->ill_ip_mib, 11475 ipIfStatsOutFragFails); 11476 ip1dbg(("ip_wput_frag_v6: copyb failed\n")); 11477 return; 11478 } 11479 off_flags = IP6F_MORE_FRAG; 11480 } else { 11481 /* Last fragment */ 11482 hmp0 = hmp; 11483 hmp = NULL; 11484 off_flags = 0; 11485 } 11486 fip6h = (ip6_t *)(hmp0->b_rptr); 11487 fraghdr = (ip6_frag_t *)(hmp0->b_rptr + unfragmentable_len); 11488 11489 fip6h->ip6_plen = htons((uint16_t)(mlen + 11490 unfragmentable_len - IPV6_HDR_LEN + sizeof (ip6_frag_t))); 11491 /* 11492 * Note: Optimization alert. 11493 * In IPv6 (and IPv4) protocol header, Fragment Offset 11494 * ("offset") is 13 bits wide and in 8-octet units. 11495 * In IPv6 protocol header (unlike IPv4) in a 16 bit field, 11496 * it occupies the most significant 13 bits. 11497 * (least significant 13 bits in IPv4). 11498 * We do not do any shifts here. Not shifting is same effect 11499 * as taking offset value in octet units, dividing by 8 and 11500 * then shifting 3 bits left to line it up in place in proper 11501 * place protocol header. 11502 */ 11503 fraghdr->ip6f_offlg = htons(offset) | off_flags; 11504 11505 if (!(dmp = ip_carve_mp(&mp, mlen))) { 11506 /* mp has already been freed by ip_carve_mp() */ 11507 if (hmp != NULL) 11508 freeb(hmp); 11509 freeb(hmp0); 11510 ip1dbg(("ip_carve_mp: failed\n")); 11511 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails); 11512 return; 11513 } 11514 hmp0->b_cont = dmp; 11515 /* Get the priority marking, if any */ 11516 hmp0->b_band = dmp->b_band; 11517 UPDATE_OB_PKT_COUNT(ire); 11518 ire->ire_last_used_time = lbolt; 11519 ip_xmit_v6(hmp0, ire, reachable | IP6_NO_IPPOLICY, connp, 11520 caller, NULL); 11521 reachable = 0; /* No need to redo state machine in loop */ 11522 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragCreates); 11523 offset += mlen; 11524 } 11525 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragOKs); 11526 } 11527 11528 /* 11529 * Determine if the ill and multicast aspects of that packets 11530 * "matches" the conn. 11531 */ 11532 boolean_t 11533 conn_wantpacket_v6(conn_t *connp, ill_t *ill, ip6_t *ip6h, int fanout_flags, 11534 zoneid_t zoneid) 11535 { 11536 ill_t *bound_ill; 11537 boolean_t wantpacket; 11538 in6_addr_t *v6dst_ptr = &ip6h->ip6_dst; 11539 in6_addr_t *v6src_ptr = &ip6h->ip6_src; 11540 11541 /* 11542 * conn_incoming_ill is set by IPV6_BOUND_IF which limits 11543 * unicast and multicast reception to conn_incoming_ill. 11544 * conn_wantpacket_v6 is called both for unicast and 11545 * multicast. 11546 */ 11547 bound_ill = connp->conn_incoming_ill; 11548 if (bound_ill != NULL) { 11549 if (IS_IPMP(bound_ill)) { 11550 if (bound_ill->ill_grp != ill->ill_grp) 11551 return (B_FALSE); 11552 } else { 11553 if (bound_ill != ill) 11554 return (B_FALSE); 11555 } 11556 } 11557 11558 if (connp->conn_multi_router) 11559 return (B_TRUE); 11560 11561 if (!IN6_IS_ADDR_MULTICAST(v6dst_ptr) && 11562 !IN6_IS_ADDR_V4MAPPED_CLASSD(v6dst_ptr)) { 11563 /* 11564 * Unicast case: we match the conn only if it's in the specified 11565 * zone. 11566 */ 11567 return (IPCL_ZONE_MATCH(connp, zoneid)); 11568 } 11569 11570 if ((fanout_flags & IP_FF_NO_MCAST_LOOP) && 11571 (connp->conn_zoneid == zoneid || zoneid == ALL_ZONES)) { 11572 /* 11573 * Loopback case: the sending endpoint has IP_MULTICAST_LOOP 11574 * disabled, therefore we don't dispatch the multicast packet to 11575 * the sending zone. 11576 */ 11577 return (B_FALSE); 11578 } 11579 11580 if (IS_LOOPBACK(ill) && connp->conn_zoneid != zoneid && 11581 zoneid != ALL_ZONES) { 11582 /* 11583 * Multicast packet on the loopback interface: we only match 11584 * conns who joined the group in the specified zone. 11585 */ 11586 return (B_FALSE); 11587 } 11588 11589 mutex_enter(&connp->conn_lock); 11590 wantpacket = 11591 ilg_lookup_ill_withsrc_v6(connp, v6dst_ptr, v6src_ptr, ill) != NULL; 11592 mutex_exit(&connp->conn_lock); 11593 11594 return (wantpacket); 11595 } 11596 11597 11598 /* 11599 * Transmit a packet and update any NUD state based on the flags 11600 * XXX need to "recover" any ip6i_t when doing putq! 11601 * 11602 * NOTE : This function does not ire_refrele the ire passed in as the 11603 * argument. 11604 */ 11605 void 11606 ip_xmit_v6(mblk_t *mp, ire_t *ire, uint_t flags, conn_t *connp, 11607 int caller, ipsec_out_t *io) 11608 { 11609 mblk_t *mp1; 11610 nce_t *nce = ire->ire_nce; 11611 ill_t *ill; 11612 ill_t *out_ill; 11613 uint64_t delta; 11614 ip6_t *ip6h; 11615 queue_t *stq = ire->ire_stq; 11616 ire_t *ire1 = NULL; 11617 ire_t *save_ire = ire; 11618 boolean_t multirt_send = B_FALSE; 11619 mblk_t *next_mp = NULL; 11620 ip_stack_t *ipst = ire->ire_ipst; 11621 boolean_t fp_prepend = B_FALSE; 11622 uint32_t hlen; 11623 11624 ip6h = (ip6_t *)mp->b_rptr; 11625 ASSERT(!IN6_IS_ADDR_V4MAPPED(&ire->ire_addr_v6)); 11626 ASSERT(ire->ire_ipversion == IPV6_VERSION); 11627 ASSERT(nce != NULL); 11628 ASSERT(mp->b_datap->db_type == M_DATA); 11629 ASSERT(stq != NULL); 11630 11631 ill = ire_to_ill(ire); 11632 if (!ill) { 11633 ip0dbg(("ip_xmit_v6: ire_to_ill failed\n")); 11634 freemsg(mp); 11635 return; 11636 } 11637 11638 /* Flow-control check has been done in ip_wput_ire_v6 */ 11639 if (IP_FLOW_CONTROLLED_ULP(ip6h->ip6_nxt) || caller == IP_WPUT || 11640 caller == IP_WSRV || canput(stq->q_next)) { 11641 uint32_t ill_index; 11642 11643 /* 11644 * In most cases, the emission loop below is entered only 11645 * once. Only in the case where the ire holds the 11646 * RTF_MULTIRT flag, do we loop to process all RTF_MULTIRT 11647 * flagged ires in the bucket, and send the packet 11648 * through all crossed RTF_MULTIRT routes. 11649 */ 11650 if (ire->ire_flags & RTF_MULTIRT) { 11651 /* 11652 * Multirouting case. The bucket where ire is stored 11653 * probably holds other RTF_MULTIRT flagged ires 11654 * to the destination. In this call to ip_xmit_v6, 11655 * we attempt to send the packet through all 11656 * those ires. Thus, we first ensure that ire is the 11657 * first RTF_MULTIRT ire in the bucket, 11658 * before walking the ire list. 11659 */ 11660 ire_t *first_ire; 11661 irb_t *irb = ire->ire_bucket; 11662 ASSERT(irb != NULL); 11663 multirt_send = B_TRUE; 11664 11665 /* Make sure we do not omit any multiroute ire. */ 11666 IRB_REFHOLD(irb); 11667 for (first_ire = irb->irb_ire; 11668 first_ire != NULL; 11669 first_ire = first_ire->ire_next) { 11670 if ((first_ire->ire_flags & RTF_MULTIRT) && 11671 (IN6_ARE_ADDR_EQUAL(&first_ire->ire_addr_v6, 11672 &ire->ire_addr_v6)) && 11673 !(first_ire->ire_marks & 11674 (IRE_MARK_CONDEMNED | IRE_MARK_TESTHIDDEN))) 11675 break; 11676 } 11677 11678 if ((first_ire != NULL) && (first_ire != ire)) { 11679 IRE_REFHOLD(first_ire); 11680 /* ire will be released by the caller */ 11681 ire = first_ire; 11682 nce = ire->ire_nce; 11683 stq = ire->ire_stq; 11684 ill = ire_to_ill(ire); 11685 } 11686 IRB_REFRELE(irb); 11687 } else if (connp != NULL && IPCL_IS_TCP(connp) && 11688 connp->conn_mdt_ok && !connp->conn_tcp->tcp_mdt && 11689 ILL_MDT_USABLE(ill)) { 11690 /* 11691 * This tcp connection was marked as MDT-capable, but 11692 * it has been turned off due changes in the interface. 11693 * Now that the interface support is back, turn it on 11694 * by notifying tcp. We don't directly modify tcp_mdt, 11695 * since we leave all the details to the tcp code that 11696 * knows better. 11697 */ 11698 mblk_t *mdimp = ip_mdinfo_alloc(ill->ill_mdt_capab); 11699 11700 if (mdimp == NULL) { 11701 ip0dbg(("ip_xmit_v6: can't re-enable MDT for " 11702 "connp %p (ENOMEM)\n", (void *)connp)); 11703 } else { 11704 CONN_INC_REF(connp); 11705 SQUEUE_ENTER_ONE(connp->conn_sqp, mdimp, 11706 tcp_input, connp, SQ_FILL, 11707 SQTAG_TCP_INPUT_MCTL); 11708 } 11709 } 11710 11711 do { 11712 mblk_t *mp_ip6h; 11713 11714 if (multirt_send) { 11715 irb_t *irb; 11716 /* 11717 * We are in a multiple send case, need to get 11718 * the next ire and make a duplicate of the 11719 * packet. ire1 holds here the next ire to 11720 * process in the bucket. If multirouting is 11721 * expected, any non-RTF_MULTIRT ire that has 11722 * the right destination address is ignored. 11723 */ 11724 irb = ire->ire_bucket; 11725 ASSERT(irb != NULL); 11726 11727 IRB_REFHOLD(irb); 11728 for (ire1 = ire->ire_next; 11729 ire1 != NULL; 11730 ire1 = ire1->ire_next) { 11731 if (!(ire1->ire_flags & RTF_MULTIRT)) 11732 continue; 11733 if (!IN6_ARE_ADDR_EQUAL( 11734 &ire1->ire_addr_v6, 11735 &ire->ire_addr_v6)) 11736 continue; 11737 if (ire1->ire_marks & 11738 IRE_MARK_CONDEMNED) 11739 continue; 11740 11741 /* Got one */ 11742 if (ire1 != save_ire) { 11743 IRE_REFHOLD(ire1); 11744 } 11745 break; 11746 } 11747 IRB_REFRELE(irb); 11748 11749 if (ire1 != NULL) { 11750 next_mp = copyb(mp); 11751 if ((next_mp == NULL) || 11752 ((mp->b_cont != NULL) && 11753 ((next_mp->b_cont = 11754 dupmsg(mp->b_cont)) == NULL))) { 11755 freemsg(next_mp); 11756 next_mp = NULL; 11757 ire_refrele(ire1); 11758 ire1 = NULL; 11759 } 11760 } 11761 11762 /* Last multiroute ire; don't loop anymore. */ 11763 if (ire1 == NULL) { 11764 multirt_send = B_FALSE; 11765 } 11766 } 11767 11768 ill_index = 11769 ((ill_t *)stq->q_ptr)->ill_phyint->phyint_ifindex; 11770 11771 /* Initiate IPPF processing */ 11772 if (IP6_OUT_IPP(flags, ipst)) { 11773 ip_process(IPP_LOCAL_OUT, &mp, ill_index); 11774 if (mp == NULL) { 11775 BUMP_MIB(ill->ill_ip_mib, 11776 ipIfStatsOutDiscards); 11777 if (next_mp != NULL) 11778 freemsg(next_mp); 11779 if (ire != save_ire) { 11780 ire_refrele(ire); 11781 } 11782 return; 11783 } 11784 ip6h = (ip6_t *)mp->b_rptr; 11785 } 11786 mp_ip6h = mp; 11787 11788 /* 11789 * Check for fastpath, we need to hold nce_lock to 11790 * prevent fastpath update from chaining nce_fp_mp. 11791 */ 11792 11793 ASSERT(nce->nce_ipversion != IPV4_VERSION); 11794 mutex_enter(&nce->nce_lock); 11795 if ((mp1 = nce->nce_fp_mp) != NULL) { 11796 uchar_t *rptr; 11797 11798 hlen = MBLKL(mp1); 11799 rptr = mp->b_rptr - hlen; 11800 /* 11801 * make sure there is room for the fastpath 11802 * datalink header 11803 */ 11804 if (rptr < mp->b_datap->db_base) { 11805 mp1 = copyb(mp1); 11806 mutex_exit(&nce->nce_lock); 11807 if (mp1 == NULL) { 11808 BUMP_MIB(ill->ill_ip_mib, 11809 ipIfStatsOutDiscards); 11810 freemsg(mp); 11811 if (next_mp != NULL) 11812 freemsg(next_mp); 11813 if (ire != save_ire) { 11814 ire_refrele(ire); 11815 } 11816 return; 11817 } 11818 mp1->b_cont = mp; 11819 11820 /* Get the priority marking, if any */ 11821 mp1->b_band = mp->b_band; 11822 mp = mp1; 11823 } else { 11824 mp->b_rptr = rptr; 11825 /* 11826 * fastpath - pre-pend datalink 11827 * header 11828 */ 11829 bcopy(mp1->b_rptr, rptr, hlen); 11830 mutex_exit(&nce->nce_lock); 11831 fp_prepend = B_TRUE; 11832 } 11833 } else { 11834 /* 11835 * Get the DL_UNITDATA_REQ. 11836 */ 11837 mp1 = nce->nce_res_mp; 11838 if (mp1 == NULL) { 11839 mutex_exit(&nce->nce_lock); 11840 ip1dbg(("ip_xmit_v6: No resolution " 11841 "block ire = %p\n", (void *)ire)); 11842 freemsg(mp); 11843 if (next_mp != NULL) 11844 freemsg(next_mp); 11845 if (ire != save_ire) { 11846 ire_refrele(ire); 11847 } 11848 return; 11849 } 11850 /* 11851 * Prepend the DL_UNITDATA_REQ. 11852 */ 11853 mp1 = copyb(mp1); 11854 mutex_exit(&nce->nce_lock); 11855 if (mp1 == NULL) { 11856 BUMP_MIB(ill->ill_ip_mib, 11857 ipIfStatsOutDiscards); 11858 freemsg(mp); 11859 if (next_mp != NULL) 11860 freemsg(next_mp); 11861 if (ire != save_ire) { 11862 ire_refrele(ire); 11863 } 11864 return; 11865 } 11866 mp1->b_cont = mp; 11867 11868 /* Get the priority marking, if any */ 11869 mp1->b_band = mp->b_band; 11870 mp = mp1; 11871 } 11872 11873 out_ill = (ill_t *)stq->q_ptr; 11874 11875 DTRACE_PROBE4(ip6__physical__out__start, 11876 ill_t *, NULL, ill_t *, out_ill, 11877 ip6_t *, ip6h, mblk_t *, mp); 11878 11879 FW_HOOKS6(ipst->ips_ip6_physical_out_event, 11880 ipst->ips_ipv6firewall_physical_out, 11881 NULL, out_ill, ip6h, mp, mp_ip6h, 0, ipst); 11882 11883 DTRACE_PROBE1(ip6__physical__out__end, mblk_t *, mp); 11884 11885 if (mp == NULL) { 11886 if (multirt_send) { 11887 ASSERT(ire1 != NULL); 11888 if (ire != save_ire) { 11889 ire_refrele(ire); 11890 } 11891 /* 11892 * Proceed with the next RTF_MULTIRT 11893 * ire, also set up the send-to queue 11894 * accordingly. 11895 */ 11896 ire = ire1; 11897 ire1 = NULL; 11898 stq = ire->ire_stq; 11899 nce = ire->ire_nce; 11900 ill = ire_to_ill(ire); 11901 mp = next_mp; 11902 next_mp = NULL; 11903 continue; 11904 } else { 11905 ASSERT(next_mp == NULL); 11906 ASSERT(ire1 == NULL); 11907 break; 11908 } 11909 } 11910 11911 if (ipst->ips_ip6_observe.he_interested) { 11912 zoneid_t szone; 11913 11914 /* 11915 * Both of these functions expect b_rptr to 11916 * be where the IPv6 header starts, so advance 11917 * past the link layer header. 11918 */ 11919 if (fp_prepend) 11920 mp_ip6h->b_rptr += hlen; 11921 szone = ip_get_zoneid_v6(&ip6h->ip6_src, 11922 mp_ip6h, out_ill, ipst, ALL_ZONES); 11923 ipobs_hook(mp_ip6h, IPOBS_HOOK_OUTBOUND, szone, 11924 ALL_ZONES, out_ill, ipst); 11925 if (fp_prepend) 11926 mp_ip6h->b_rptr -= hlen; 11927 } 11928 11929 /* 11930 * Update ire and MIB counters; for save_ire, this has 11931 * been done by the caller. 11932 */ 11933 if (ire != save_ire) { 11934 UPDATE_OB_PKT_COUNT(ire); 11935 ire->ire_last_used_time = lbolt; 11936 11937 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 11938 BUMP_MIB(ill->ill_ip_mib, 11939 ipIfStatsHCOutMcastPkts); 11940 UPDATE_MIB(ill->ill_ip_mib, 11941 ipIfStatsHCOutMcastOctets, 11942 ntohs(ip6h->ip6_plen) + 11943 IPV6_HDR_LEN); 11944 } 11945 } 11946 11947 /* 11948 * Send it down. XXX Do we want to flow control AH/ESP 11949 * packets that carry TCP payloads? We don't flow 11950 * control TCP packets, but we should also not 11951 * flow-control TCP packets that have been protected. 11952 * We don't have an easy way to find out if an AH/ESP 11953 * packet was originally TCP or not currently. 11954 */ 11955 if (io == NULL) { 11956 BUMP_MIB(ill->ill_ip_mib, 11957 ipIfStatsHCOutTransmits); 11958 UPDATE_MIB(ill->ill_ip_mib, 11959 ipIfStatsHCOutOctets, 11960 ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN); 11961 DTRACE_IP7(send, mblk_t *, mp, conn_t *, NULL, 11962 void_ip_t *, ip6h, __dtrace_ipsr_ill_t *, 11963 out_ill, ipha_t *, NULL, ip6_t *, ip6h, 11964 int, 0); 11965 11966 putnext(stq, mp); 11967 } else { 11968 /* 11969 * Safety Pup says: make sure this is 11970 * going to the right interface! 11971 */ 11972 if (io->ipsec_out_capab_ill_index != 11973 ill_index) { 11974 /* IPsec kstats: bump lose counter */ 11975 freemsg(mp1); 11976 } else { 11977 BUMP_MIB(ill->ill_ip_mib, 11978 ipIfStatsHCOutTransmits); 11979 UPDATE_MIB(ill->ill_ip_mib, 11980 ipIfStatsHCOutOctets, 11981 ntohs(ip6h->ip6_plen) + 11982 IPV6_HDR_LEN); 11983 DTRACE_IP7(send, mblk_t *, mp, 11984 conn_t *, NULL, void_ip_t *, ip6h, 11985 __dtrace_ipsr_ill_t *, out_ill, 11986 ipha_t *, NULL, ip6_t *, ip6h, int, 11987 0); 11988 ipsec_hw_putnext(stq, mp); 11989 } 11990 } 11991 11992 if (nce->nce_flags & (NCE_F_NONUD|NCE_F_PERMANENT)) { 11993 if (ire != save_ire) { 11994 ire_refrele(ire); 11995 } 11996 if (multirt_send) { 11997 ASSERT(ire1 != NULL); 11998 /* 11999 * Proceed with the next RTF_MULTIRT 12000 * ire, also set up the send-to queue 12001 * accordingly. 12002 */ 12003 ire = ire1; 12004 ire1 = NULL; 12005 stq = ire->ire_stq; 12006 nce = ire->ire_nce; 12007 ill = ire_to_ill(ire); 12008 mp = next_mp; 12009 next_mp = NULL; 12010 continue; 12011 } 12012 ASSERT(next_mp == NULL); 12013 ASSERT(ire1 == NULL); 12014 return; 12015 } 12016 12017 ASSERT(nce->nce_state != ND_INCOMPLETE); 12018 12019 /* 12020 * Check for upper layer advice 12021 */ 12022 if (flags & IPV6_REACHABILITY_CONFIRMATION) { 12023 /* 12024 * It should be o.k. to check the state without 12025 * a lock here, at most we lose an advice. 12026 */ 12027 nce->nce_last = TICK_TO_MSEC(lbolt64); 12028 if (nce->nce_state != ND_REACHABLE) { 12029 12030 mutex_enter(&nce->nce_lock); 12031 nce->nce_state = ND_REACHABLE; 12032 nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT; 12033 mutex_exit(&nce->nce_lock); 12034 (void) untimeout(nce->nce_timeout_id); 12035 if (ip_debug > 2) { 12036 /* ip1dbg */ 12037 pr_addr_dbg("ip_xmit_v6: state" 12038 " for %s changed to" 12039 " REACHABLE\n", AF_INET6, 12040 &ire->ire_addr_v6); 12041 } 12042 } 12043 if (ire != save_ire) { 12044 ire_refrele(ire); 12045 } 12046 if (multirt_send) { 12047 ASSERT(ire1 != NULL); 12048 /* 12049 * Proceed with the next RTF_MULTIRT 12050 * ire, also set up the send-to queue 12051 * accordingly. 12052 */ 12053 ire = ire1; 12054 ire1 = NULL; 12055 stq = ire->ire_stq; 12056 nce = ire->ire_nce; 12057 ill = ire_to_ill(ire); 12058 mp = next_mp; 12059 next_mp = NULL; 12060 continue; 12061 } 12062 ASSERT(next_mp == NULL); 12063 ASSERT(ire1 == NULL); 12064 return; 12065 } 12066 12067 delta = TICK_TO_MSEC(lbolt64) - nce->nce_last; 12068 ip1dbg(("ip_xmit_v6: delta = %" PRId64 12069 " ill_reachable_time = %d \n", delta, 12070 ill->ill_reachable_time)); 12071 if (delta > (uint64_t)ill->ill_reachable_time) { 12072 nce = ire->ire_nce; 12073 mutex_enter(&nce->nce_lock); 12074 switch (nce->nce_state) { 12075 case ND_REACHABLE: 12076 case ND_STALE: 12077 /* 12078 * ND_REACHABLE is identical to 12079 * ND_STALE in this specific case. If 12080 * reachable time has expired for this 12081 * neighbor (delta is greater than 12082 * reachable time), conceptually, the 12083 * neighbor cache is no longer in 12084 * REACHABLE state, but already in 12085 * STALE state. So the correct 12086 * transition here is to ND_DELAY. 12087 */ 12088 nce->nce_state = ND_DELAY; 12089 mutex_exit(&nce->nce_lock); 12090 NDP_RESTART_TIMER(nce, 12091 ipst->ips_delay_first_probe_time); 12092 if (ip_debug > 3) { 12093 /* ip2dbg */ 12094 pr_addr_dbg("ip_xmit_v6: state" 12095 " for %s changed to" 12096 " DELAY\n", AF_INET6, 12097 &ire->ire_addr_v6); 12098 } 12099 break; 12100 case ND_DELAY: 12101 case ND_PROBE: 12102 mutex_exit(&nce->nce_lock); 12103 /* Timers have already started */ 12104 break; 12105 case ND_UNREACHABLE: 12106 /* 12107 * ndp timer has detected that this nce 12108 * is unreachable and initiated deleting 12109 * this nce and all its associated IREs. 12110 * This is a race where we found the 12111 * ire before it was deleted and have 12112 * just sent out a packet using this 12113 * unreachable nce. 12114 */ 12115 mutex_exit(&nce->nce_lock); 12116 break; 12117 default: 12118 ASSERT(0); 12119 } 12120 } 12121 12122 if (multirt_send) { 12123 ASSERT(ire1 != NULL); 12124 /* 12125 * Proceed with the next RTF_MULTIRT ire, 12126 * Also set up the send-to queue accordingly. 12127 */ 12128 if (ire != save_ire) { 12129 ire_refrele(ire); 12130 } 12131 ire = ire1; 12132 ire1 = NULL; 12133 stq = ire->ire_stq; 12134 nce = ire->ire_nce; 12135 ill = ire_to_ill(ire); 12136 mp = next_mp; 12137 next_mp = NULL; 12138 } 12139 } while (multirt_send); 12140 /* 12141 * In the multirouting case, release the last ire used for 12142 * emission. save_ire will be released by the caller. 12143 */ 12144 if (ire != save_ire) { 12145 ire_refrele(ire); 12146 } 12147 } else { 12148 /* 12149 * Can't apply backpressure, just discard the packet. 12150 */ 12151 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); 12152 freemsg(mp); 12153 return; 12154 } 12155 } 12156 12157 /* 12158 * pr_addr_dbg function provides the needed buffer space to call 12159 * inet_ntop() function's 3rd argument. This function should be 12160 * used by any kernel routine which wants to save INET6_ADDRSTRLEN 12161 * stack buffer space in it's own stack frame. This function uses 12162 * a buffer from it's own stack and prints the information. 12163 * Example: pr_addr_dbg("func: no route for %s\n ", AF_INET, addr) 12164 * 12165 * Note: This function can call inet_ntop() once. 12166 */ 12167 void 12168 pr_addr_dbg(char *fmt1, int af, const void *addr) 12169 { 12170 char buf[INET6_ADDRSTRLEN]; 12171 12172 if (fmt1 == NULL) { 12173 ip0dbg(("pr_addr_dbg: Wrong arguments\n")); 12174 return; 12175 } 12176 12177 /* 12178 * This does not compare debug level and just prints 12179 * out. Thus it is the responsibility of the caller 12180 * to check the appropriate debug-level before calling 12181 * this function. 12182 */ 12183 if (ip_debug > 0) { 12184 printf(fmt1, inet_ntop(af, addr, buf, sizeof (buf))); 12185 } 12186 12187 12188 } 12189 12190 12191 /* 12192 * Return the length in bytes of the IPv6 headers (base header, ip6i_t 12193 * if needed and extension headers) that will be needed based on the 12194 * ip6_pkt_t structure passed by the caller. 12195 * 12196 * The returned length does not include the length of the upper level 12197 * protocol (ULP) header. 12198 */ 12199 int 12200 ip_total_hdrs_len_v6(ip6_pkt_t *ipp) 12201 { 12202 int len; 12203 12204 len = IPV6_HDR_LEN; 12205 if (ipp->ipp_fields & IPPF_HAS_IP6I) 12206 len += sizeof (ip6i_t); 12207 if (ipp->ipp_fields & IPPF_HOPOPTS) { 12208 ASSERT(ipp->ipp_hopoptslen != 0); 12209 len += ipp->ipp_hopoptslen; 12210 } 12211 if (ipp->ipp_fields & IPPF_RTHDR) { 12212 ASSERT(ipp->ipp_rthdrlen != 0); 12213 len += ipp->ipp_rthdrlen; 12214 } 12215 /* 12216 * En-route destination options 12217 * Only do them if there's a routing header as well 12218 */ 12219 if ((ipp->ipp_fields & (IPPF_RTDSTOPTS|IPPF_RTHDR)) == 12220 (IPPF_RTDSTOPTS|IPPF_RTHDR)) { 12221 ASSERT(ipp->ipp_rtdstoptslen != 0); 12222 len += ipp->ipp_rtdstoptslen; 12223 } 12224 if (ipp->ipp_fields & IPPF_DSTOPTS) { 12225 ASSERT(ipp->ipp_dstoptslen != 0); 12226 len += ipp->ipp_dstoptslen; 12227 } 12228 return (len); 12229 } 12230 12231 /* 12232 * All-purpose routine to build a header chain of an IPv6 header 12233 * followed by any required extension headers and a proto header, 12234 * preceeded (where necessary) by an ip6i_t private header. 12235 * 12236 * The fields of the IPv6 header that are derived from the ip6_pkt_t 12237 * will be filled in appropriately. 12238 * Thus the caller must fill in the rest of the IPv6 header, such as 12239 * traffic class/flowid, source address (if not set here), hoplimit (if not 12240 * set here) and destination address. 12241 * 12242 * The extension headers and ip6i_t header will all be fully filled in. 12243 */ 12244 void 12245 ip_build_hdrs_v6(uchar_t *ext_hdrs, uint_t ext_hdrs_len, 12246 ip6_pkt_t *ipp, uint8_t protocol) 12247 { 12248 uint8_t *nxthdr_ptr; 12249 uint8_t *cp; 12250 ip6i_t *ip6i; 12251 ip6_t *ip6h = (ip6_t *)ext_hdrs; 12252 12253 /* 12254 * If sending private ip6i_t header down (checksum info, nexthop, 12255 * or ifindex), adjust ip header pointer and set ip6i_t header pointer, 12256 * then fill it in. (The checksum info will be filled in by icmp). 12257 */ 12258 if (ipp->ipp_fields & IPPF_HAS_IP6I) { 12259 ip6i = (ip6i_t *)ip6h; 12260 ip6h = (ip6_t *)&ip6i[1]; 12261 12262 ip6i->ip6i_flags = 0; 12263 ip6i->ip6i_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 12264 if (ipp->ipp_fields & IPPF_IFINDEX || 12265 ipp->ipp_fields & IPPF_SCOPE_ID) { 12266 ASSERT(ipp->ipp_ifindex != 0); 12267 ip6i->ip6i_flags |= IP6I_IFINDEX; 12268 ip6i->ip6i_ifindex = ipp->ipp_ifindex; 12269 } 12270 if (ipp->ipp_fields & IPPF_ADDR) { 12271 /* 12272 * Enable per-packet source address verification if 12273 * IPV6_PKTINFO specified the source address. 12274 * ip6_src is set in the transport's _wput function. 12275 */ 12276 ASSERT(!IN6_IS_ADDR_UNSPECIFIED( 12277 &ipp->ipp_addr)); 12278 ip6i->ip6i_flags |= IP6I_VERIFY_SRC; 12279 } 12280 if (ipp->ipp_fields & IPPF_UNICAST_HOPS) { 12281 ip6h->ip6_hops = ipp->ipp_unicast_hops; 12282 /* 12283 * We need to set this flag so that IP doesn't 12284 * rewrite the IPv6 header's hoplimit with the 12285 * current default value. 12286 */ 12287 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 12288 } 12289 if (ipp->ipp_fields & IPPF_NEXTHOP) { 12290 ASSERT(!IN6_IS_ADDR_UNSPECIFIED( 12291 &ipp->ipp_nexthop)); 12292 ip6i->ip6i_flags |= IP6I_NEXTHOP; 12293 ip6i->ip6i_nexthop = ipp->ipp_nexthop; 12294 } 12295 /* 12296 * tell IP this is an ip6i_t private header 12297 */ 12298 ip6i->ip6i_nxt = IPPROTO_RAW; 12299 } 12300 /* Initialize IPv6 header */ 12301 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 12302 if (ipp->ipp_fields & IPPF_TCLASS) { 12303 ip6h->ip6_vcf = (ip6h->ip6_vcf & ~IPV6_FLOWINFO_TCLASS) | 12304 (ipp->ipp_tclass << 20); 12305 } 12306 if (ipp->ipp_fields & IPPF_ADDR) 12307 ip6h->ip6_src = ipp->ipp_addr; 12308 12309 nxthdr_ptr = (uint8_t *)&ip6h->ip6_nxt; 12310 cp = (uint8_t *)&ip6h[1]; 12311 /* 12312 * Here's where we have to start stringing together 12313 * any extension headers in the right order: 12314 * Hop-by-hop, destination, routing, and final destination opts. 12315 */ 12316 if (ipp->ipp_fields & IPPF_HOPOPTS) { 12317 /* Hop-by-hop options */ 12318 ip6_hbh_t *hbh = (ip6_hbh_t *)cp; 12319 12320 *nxthdr_ptr = IPPROTO_HOPOPTS; 12321 nxthdr_ptr = &hbh->ip6h_nxt; 12322 12323 bcopy(ipp->ipp_hopopts, cp, ipp->ipp_hopoptslen); 12324 cp += ipp->ipp_hopoptslen; 12325 } 12326 /* 12327 * En-route destination options 12328 * Only do them if there's a routing header as well 12329 */ 12330 if ((ipp->ipp_fields & (IPPF_RTDSTOPTS|IPPF_RTHDR)) == 12331 (IPPF_RTDSTOPTS|IPPF_RTHDR)) { 12332 ip6_dest_t *dst = (ip6_dest_t *)cp; 12333 12334 *nxthdr_ptr = IPPROTO_DSTOPTS; 12335 nxthdr_ptr = &dst->ip6d_nxt; 12336 12337 bcopy(ipp->ipp_rtdstopts, cp, ipp->ipp_rtdstoptslen); 12338 cp += ipp->ipp_rtdstoptslen; 12339 } 12340 /* 12341 * Routing header next 12342 */ 12343 if (ipp->ipp_fields & IPPF_RTHDR) { 12344 ip6_rthdr_t *rt = (ip6_rthdr_t *)cp; 12345 12346 *nxthdr_ptr = IPPROTO_ROUTING; 12347 nxthdr_ptr = &rt->ip6r_nxt; 12348 12349 bcopy(ipp->ipp_rthdr, cp, ipp->ipp_rthdrlen); 12350 cp += ipp->ipp_rthdrlen; 12351 } 12352 /* 12353 * Do ultimate destination options 12354 */ 12355 if (ipp->ipp_fields & IPPF_DSTOPTS) { 12356 ip6_dest_t *dest = (ip6_dest_t *)cp; 12357 12358 *nxthdr_ptr = IPPROTO_DSTOPTS; 12359 nxthdr_ptr = &dest->ip6d_nxt; 12360 12361 bcopy(ipp->ipp_dstopts, cp, ipp->ipp_dstoptslen); 12362 cp += ipp->ipp_dstoptslen; 12363 } 12364 /* 12365 * Now set the last header pointer to the proto passed in 12366 */ 12367 *nxthdr_ptr = protocol; 12368 ASSERT((int)(cp - ext_hdrs) == ext_hdrs_len); 12369 } 12370 12371 /* 12372 * Return a pointer to the routing header extension header 12373 * in the IPv6 header(s) chain passed in. 12374 * If none found, return NULL 12375 * Assumes that all extension headers are in same mblk as the v6 header 12376 */ 12377 ip6_rthdr_t * 12378 ip_find_rthdr_v6(ip6_t *ip6h, uint8_t *endptr) 12379 { 12380 ip6_dest_t *desthdr; 12381 ip6_frag_t *fraghdr; 12382 uint_t hdrlen; 12383 uint8_t nexthdr; 12384 uint8_t *ptr = (uint8_t *)&ip6h[1]; 12385 12386 if (ip6h->ip6_nxt == IPPROTO_ROUTING) 12387 return ((ip6_rthdr_t *)ptr); 12388 12389 /* 12390 * The routing header will precede all extension headers 12391 * other than the hop-by-hop and destination options 12392 * extension headers, so if we see anything other than those, 12393 * we're done and didn't find it. 12394 * We could see a destination options header alone but no 12395 * routing header, in which case we'll return NULL as soon as 12396 * we see anything after that. 12397 * Hop-by-hop and destination option headers are identical, 12398 * so we can use either one we want as a template. 12399 */ 12400 nexthdr = ip6h->ip6_nxt; 12401 while (ptr < endptr) { 12402 /* Is there enough left for len + nexthdr? */ 12403 if (ptr + MIN_EHDR_LEN > endptr) 12404 return (NULL); 12405 12406 switch (nexthdr) { 12407 case IPPROTO_HOPOPTS: 12408 case IPPROTO_DSTOPTS: 12409 /* Assumes the headers are identical for hbh and dst */ 12410 desthdr = (ip6_dest_t *)ptr; 12411 hdrlen = 8 * (desthdr->ip6d_len + 1); 12412 nexthdr = desthdr->ip6d_nxt; 12413 break; 12414 12415 case IPPROTO_ROUTING: 12416 return ((ip6_rthdr_t *)ptr); 12417 12418 case IPPROTO_FRAGMENT: 12419 fraghdr = (ip6_frag_t *)ptr; 12420 hdrlen = sizeof (ip6_frag_t); 12421 nexthdr = fraghdr->ip6f_nxt; 12422 break; 12423 12424 default: 12425 return (NULL); 12426 } 12427 ptr += hdrlen; 12428 } 12429 return (NULL); 12430 } 12431 12432 /* 12433 * Called for source-routed packets originating on this node. 12434 * Manipulates the original routing header by moving every entry up 12435 * one slot, placing the first entry in the v6 header's v6_dst field, 12436 * and placing the ultimate destination in the routing header's last 12437 * slot. 12438 * 12439 * Returns the checksum diference between the ultimate destination 12440 * (last hop in the routing header when the packet is sent) and 12441 * the first hop (ip6_dst when the packet is sent) 12442 */ 12443 /* ARGSUSED2 */ 12444 uint32_t 12445 ip_massage_options_v6(ip6_t *ip6h, ip6_rthdr_t *rth, netstack_t *ns) 12446 { 12447 uint_t numaddr; 12448 uint_t i; 12449 in6_addr_t *addrptr; 12450 in6_addr_t tmp; 12451 ip6_rthdr0_t *rthdr = (ip6_rthdr0_t *)rth; 12452 uint32_t cksm; 12453 uint32_t addrsum = 0; 12454 uint16_t *ptr; 12455 12456 /* 12457 * Perform any processing needed for source routing. 12458 * We know that all extension headers will be in the same mblk 12459 * as the IPv6 header. 12460 */ 12461 12462 /* 12463 * If no segments left in header, or the header length field is zero, 12464 * don't move hop addresses around; 12465 * Checksum difference is zero. 12466 */ 12467 if ((rthdr->ip6r0_segleft == 0) || (rthdr->ip6r0_len == 0)) 12468 return (0); 12469 12470 ptr = (uint16_t *)&ip6h->ip6_dst; 12471 cksm = 0; 12472 for (i = 0; i < (sizeof (in6_addr_t) / sizeof (uint16_t)); i++) { 12473 cksm += ptr[i]; 12474 } 12475 cksm = (cksm & 0xFFFF) + (cksm >> 16); 12476 12477 /* 12478 * Here's where the fun begins - we have to 12479 * move all addresses up one spot, take the 12480 * first hop and make it our first ip6_dst, 12481 * and place the ultimate destination in the 12482 * newly-opened last slot. 12483 */ 12484 addrptr = (in6_addr_t *)((char *)rthdr + sizeof (*rthdr)); 12485 numaddr = rthdr->ip6r0_len / 2; 12486 tmp = *addrptr; 12487 for (i = 0; i < (numaddr - 1); addrptr++, i++) { 12488 *addrptr = addrptr[1]; 12489 } 12490 *addrptr = ip6h->ip6_dst; 12491 ip6h->ip6_dst = tmp; 12492 12493 /* 12494 * From the checksummed ultimate destination subtract the checksummed 12495 * current ip6_dst (the first hop address). Return that number. 12496 * (In the v4 case, the second part of this is done in each routine 12497 * that calls ip_massage_options(). We do it all in this one place 12498 * for v6). 12499 */ 12500 ptr = (uint16_t *)&ip6h->ip6_dst; 12501 for (i = 0; i < (sizeof (in6_addr_t) / sizeof (uint16_t)); i++) { 12502 addrsum += ptr[i]; 12503 } 12504 cksm -= ((addrsum >> 16) + (addrsum & 0xFFFF)); 12505 if ((int)cksm < 0) 12506 cksm--; 12507 cksm = (cksm & 0xFFFF) + (cksm >> 16); 12508 12509 return (cksm); 12510 } 12511 12512 /* 12513 * Propagate a multicast group membership operation (join/leave) (*fn) on 12514 * all interfaces crossed by the related multirt routes. 12515 * The call is considered successful if the operation succeeds 12516 * on at least one interface. 12517 * The function is called if the destination address in the packet to send 12518 * is multirouted. 12519 */ 12520 int 12521 ip_multirt_apply_membership_v6(int (*fn)(conn_t *, boolean_t, 12522 const in6_addr_t *, int, mcast_record_t, const in6_addr_t *, mblk_t *), 12523 ire_t *ire, conn_t *connp, boolean_t checkonly, const in6_addr_t *v6grp, 12524 mcast_record_t fmode, const in6_addr_t *v6src, mblk_t *first_mp) 12525 { 12526 ire_t *ire_gw; 12527 irb_t *irb; 12528 int index, error = 0; 12529 opt_restart_t *or; 12530 ip_stack_t *ipst = ire->ire_ipst; 12531 12532 irb = ire->ire_bucket; 12533 ASSERT(irb != NULL); 12534 12535 ASSERT(DB_TYPE(first_mp) == M_CTL); 12536 or = (opt_restart_t *)first_mp->b_rptr; 12537 12538 IRB_REFHOLD(irb); 12539 for (; ire != NULL; ire = ire->ire_next) { 12540 if ((ire->ire_flags & RTF_MULTIRT) == 0) 12541 continue; 12542 if (!IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, v6grp)) 12543 continue; 12544 12545 ire_gw = ire_ftable_lookup_v6(&ire->ire_gateway_addr_v6, 0, 0, 12546 IRE_INTERFACE, NULL, NULL, ALL_ZONES, 0, NULL, 12547 MATCH_IRE_RECURSIVE | MATCH_IRE_TYPE, ipst); 12548 /* No resolver exists for the gateway; skip this ire. */ 12549 if (ire_gw == NULL) 12550 continue; 12551 index = ire_gw->ire_ipif->ipif_ill->ill_phyint->phyint_ifindex; 12552 /* 12553 * A resolver exists: we can get the interface on which we have 12554 * to apply the operation. 12555 */ 12556 error = fn(connp, checkonly, v6grp, index, fmode, v6src, 12557 first_mp); 12558 if (error == 0) 12559 or->or_private = CGTP_MCAST_SUCCESS; 12560 12561 if (ip_debug > 0) { 12562 ulong_t off; 12563 char *ksym; 12564 12565 ksym = kobj_getsymname((uintptr_t)fn, &off); 12566 ip2dbg(("ip_multirt_apply_membership_v6: " 12567 "called %s, multirt group 0x%08x via itf 0x%08x, " 12568 "error %d [success %u]\n", 12569 ksym ? ksym : "?", 12570 ntohl(V4_PART_OF_V6((*v6grp))), 12571 ntohl(V4_PART_OF_V6(ire_gw->ire_src_addr_v6)), 12572 error, or->or_private)); 12573 } 12574 12575 ire_refrele(ire_gw); 12576 if (error == EINPROGRESS) { 12577 IRB_REFRELE(irb); 12578 return (error); 12579 } 12580 } 12581 IRB_REFRELE(irb); 12582 /* 12583 * Consider the call as successful if we succeeded on at least 12584 * one interface. Otherwise, return the last encountered error. 12585 */ 12586 return (or->or_private == CGTP_MCAST_SUCCESS ? 0 : error); 12587 } 12588 12589 void 12590 *ip6_kstat_init(netstackid_t stackid, ip6_stat_t *ip6_statisticsp) 12591 { 12592 kstat_t *ksp; 12593 12594 ip6_stat_t template = { 12595 { "ip6_udp_fast_path", KSTAT_DATA_UINT64 }, 12596 { "ip6_udp_slow_path", KSTAT_DATA_UINT64 }, 12597 { "ip6_udp_fannorm", KSTAT_DATA_UINT64 }, 12598 { "ip6_udp_fanmb", KSTAT_DATA_UINT64 }, 12599 { "ip6_out_sw_cksum", KSTAT_DATA_UINT64 }, 12600 { "ip6_in_sw_cksum", KSTAT_DATA_UINT64 }, 12601 { "ip6_tcp_in_full_hw_cksum_err", KSTAT_DATA_UINT64 }, 12602 { "ip6_tcp_in_part_hw_cksum_err", KSTAT_DATA_UINT64 }, 12603 { "ip6_tcp_in_sw_cksum_err", KSTAT_DATA_UINT64 }, 12604 { "ip6_tcp_out_sw_cksum_bytes", KSTAT_DATA_UINT64 }, 12605 { "ip6_udp_in_full_hw_cksum_err", KSTAT_DATA_UINT64 }, 12606 { "ip6_udp_in_part_hw_cksum_err", KSTAT_DATA_UINT64 }, 12607 { "ip6_udp_in_sw_cksum_err", KSTAT_DATA_UINT64 }, 12608 { "ip6_udp_out_sw_cksum_bytes", KSTAT_DATA_UINT64 }, 12609 { "ip6_frag_mdt_pkt_out", KSTAT_DATA_UINT64 }, 12610 { "ip6_frag_mdt_discarded", KSTAT_DATA_UINT64 }, 12611 { "ip6_frag_mdt_allocfail", KSTAT_DATA_UINT64 }, 12612 { "ip6_frag_mdt_addpdescfail", KSTAT_DATA_UINT64 }, 12613 { "ip6_frag_mdt_allocd", KSTAT_DATA_UINT64 }, 12614 }; 12615 ksp = kstat_create_netstack("ip", 0, "ip6stat", "net", 12616 KSTAT_TYPE_NAMED, sizeof (template) / sizeof (kstat_named_t), 12617 KSTAT_FLAG_VIRTUAL, stackid); 12618 12619 if (ksp == NULL) 12620 return (NULL); 12621 12622 bcopy(&template, ip6_statisticsp, sizeof (template)); 12623 ksp->ks_data = (void *)ip6_statisticsp; 12624 ksp->ks_private = (void *)(uintptr_t)stackid; 12625 12626 kstat_install(ksp); 12627 return (ksp); 12628 } 12629 12630 void 12631 ip6_kstat_fini(netstackid_t stackid, kstat_t *ksp) 12632 { 12633 if (ksp != NULL) { 12634 ASSERT(stackid == (netstackid_t)(uintptr_t)ksp->ks_private); 12635 kstat_delete_netstack(ksp, stackid); 12636 } 12637 } 12638 12639 /* 12640 * The following two functions set and get the value for the 12641 * IPV6_SRC_PREFERENCES socket option. 12642 */ 12643 int 12644 ip6_set_src_preferences(conn_t *connp, uint32_t prefs) 12645 { 12646 /* 12647 * We only support preferences that are covered by 12648 * IPV6_PREFER_SRC_MASK. 12649 */ 12650 if (prefs & ~IPV6_PREFER_SRC_MASK) 12651 return (EINVAL); 12652 12653 /* 12654 * Look for conflicting preferences or default preferences. If 12655 * both bits of a related pair are clear, the application wants the 12656 * system's default value for that pair. Both bits in a pair can't 12657 * be set. 12658 */ 12659 if ((prefs & IPV6_PREFER_SRC_MIPMASK) == 0) { 12660 prefs |= IPV6_PREFER_SRC_MIPDEFAULT; 12661 } else if ((prefs & IPV6_PREFER_SRC_MIPMASK) == 12662 IPV6_PREFER_SRC_MIPMASK) { 12663 return (EINVAL); 12664 } 12665 if ((prefs & IPV6_PREFER_SRC_TMPMASK) == 0) { 12666 prefs |= IPV6_PREFER_SRC_TMPDEFAULT; 12667 } else if ((prefs & IPV6_PREFER_SRC_TMPMASK) == 12668 IPV6_PREFER_SRC_TMPMASK) { 12669 return (EINVAL); 12670 } 12671 if ((prefs & IPV6_PREFER_SRC_CGAMASK) == 0) { 12672 prefs |= IPV6_PREFER_SRC_CGADEFAULT; 12673 } else if ((prefs & IPV6_PREFER_SRC_CGAMASK) == 12674 IPV6_PREFER_SRC_CGAMASK) { 12675 return (EINVAL); 12676 } 12677 12678 connp->conn_src_preferences = prefs; 12679 return (0); 12680 } 12681 12682 size_t 12683 ip6_get_src_preferences(conn_t *connp, uint32_t *val) 12684 { 12685 *val = connp->conn_src_preferences; 12686 return (sizeof (connp->conn_src_preferences)); 12687 } 12688 12689 int 12690 ip6_set_pktinfo(cred_t *cr, conn_t *connp, struct in6_pktinfo *pkti) 12691 { 12692 ire_t *ire; 12693 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 12694 12695 /* 12696 * Verify the source address and ifindex. Privileged users can use 12697 * any source address. For ancillary data the source address is 12698 * checked in ip_wput_v6. 12699 */ 12700 if (pkti->ipi6_ifindex != 0) { 12701 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 12702 if (!phyint_exists(pkti->ipi6_ifindex, ipst)) { 12703 rw_exit(&ipst->ips_ill_g_lock); 12704 return (ENXIO); 12705 } 12706 rw_exit(&ipst->ips_ill_g_lock); 12707 } 12708 if (!IN6_IS_ADDR_UNSPECIFIED(&pkti->ipi6_addr) && 12709 secpolicy_net_rawaccess(cr) != 0) { 12710 ire = ire_route_lookup_v6(&pkti->ipi6_addr, 0, 0, 12711 (IRE_LOCAL|IRE_LOOPBACK), NULL, NULL, 12712 connp->conn_zoneid, NULL, MATCH_IRE_TYPE, ipst); 12713 if (ire != NULL) 12714 ire_refrele(ire); 12715 else 12716 return (ENXIO); 12717 } 12718 return (0); 12719 } 12720 12721 /* 12722 * Get the size of the IP options (including the IP headers size) 12723 * without including the AH header's size. If till_ah is B_FALSE, 12724 * and if AH header is present, dest options beyond AH header will 12725 * also be included in the returned size. 12726 */ 12727 int 12728 ipsec_ah_get_hdr_size_v6(mblk_t *mp, boolean_t till_ah) 12729 { 12730 ip6_t *ip6h; 12731 uint8_t nexthdr; 12732 uint8_t *whereptr; 12733 ip6_hbh_t *hbhhdr; 12734 ip6_dest_t *dsthdr; 12735 ip6_rthdr_t *rthdr; 12736 int ehdrlen; 12737 int size; 12738 ah_t *ah; 12739 12740 ip6h = (ip6_t *)mp->b_rptr; 12741 size = IPV6_HDR_LEN; 12742 nexthdr = ip6h->ip6_nxt; 12743 whereptr = (uint8_t *)&ip6h[1]; 12744 for (;;) { 12745 /* Assume IP has already stripped it */ 12746 ASSERT(nexthdr != IPPROTO_FRAGMENT && nexthdr != IPPROTO_RAW); 12747 switch (nexthdr) { 12748 case IPPROTO_HOPOPTS: 12749 hbhhdr = (ip6_hbh_t *)whereptr; 12750 nexthdr = hbhhdr->ip6h_nxt; 12751 ehdrlen = 8 * (hbhhdr->ip6h_len + 1); 12752 break; 12753 case IPPROTO_DSTOPTS: 12754 dsthdr = (ip6_dest_t *)whereptr; 12755 nexthdr = dsthdr->ip6d_nxt; 12756 ehdrlen = 8 * (dsthdr->ip6d_len + 1); 12757 break; 12758 case IPPROTO_ROUTING: 12759 rthdr = (ip6_rthdr_t *)whereptr; 12760 nexthdr = rthdr->ip6r_nxt; 12761 ehdrlen = 8 * (rthdr->ip6r_len + 1); 12762 break; 12763 default : 12764 if (till_ah) { 12765 ASSERT(nexthdr == IPPROTO_AH); 12766 return (size); 12767 } 12768 /* 12769 * If we don't have a AH header to traverse, 12770 * return now. This happens normally for 12771 * outbound datagrams where we have not inserted 12772 * the AH header. 12773 */ 12774 if (nexthdr != IPPROTO_AH) { 12775 return (size); 12776 } 12777 12778 /* 12779 * We don't include the AH header's size 12780 * to be symmetrical with other cases where 12781 * we either don't have a AH header (outbound) 12782 * or peek into the AH header yet (inbound and 12783 * not pulled up yet). 12784 */ 12785 ah = (ah_t *)whereptr; 12786 nexthdr = ah->ah_nexthdr; 12787 ehdrlen = (ah->ah_length << 2) + 8; 12788 12789 if (nexthdr == IPPROTO_DSTOPTS) { 12790 if (whereptr + ehdrlen >= mp->b_wptr) { 12791 /* 12792 * The destination options header 12793 * is not part of the first mblk. 12794 */ 12795 whereptr = mp->b_cont->b_rptr; 12796 } else { 12797 whereptr += ehdrlen; 12798 } 12799 12800 dsthdr = (ip6_dest_t *)whereptr; 12801 ehdrlen = 8 * (dsthdr->ip6d_len + 1); 12802 size += ehdrlen; 12803 } 12804 return (size); 12805 } 12806 whereptr += ehdrlen; 12807 size += ehdrlen; 12808 } 12809 } 12810 12811 /* 12812 * Utility routine that checks if `v6srcp' is a valid address on underlying 12813 * interface `ill'. If `ipifp' is non-NULL, it's set to a held ipif 12814 * associated with `v6srcp' on success. NOTE: if this is not called from 12815 * inside the IPSQ (ill_g_lock is not held), `ill' may be removed from the 12816 * group during or after this lookup. 12817 */ 12818 static boolean_t 12819 ipif_lookup_testaddr_v6(ill_t *ill, const in6_addr_t *v6srcp, ipif_t **ipifp) 12820 { 12821 ipif_t *ipif; 12822 12823 ipif = ipif_lookup_addr_exact_v6(v6srcp, ill, ill->ill_ipst); 12824 if (ipif != NULL) { 12825 if (ipifp != NULL) 12826 *ipifp = ipif; 12827 else 12828 ipif_refrele(ipif); 12829 return (B_TRUE); 12830 } 12831 12832 if (ip_debug > 2) { 12833 pr_addr_dbg("ipif_lookup_testaddr_v6: cannot find ipif for " 12834 "src %s\n", AF_INET6, v6srcp); 12835 } 12836 return (B_FALSE); 12837 } 12838