1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* 26 * Copyright (c) 1990 Mentat Inc. 27 */ 28 29 #include <sys/types.h> 30 #include <sys/stream.h> 31 #include <sys/dlpi.h> 32 #include <sys/stropts.h> 33 #include <sys/sysmacros.h> 34 #include <sys/strsun.h> 35 #include <sys/strlog.h> 36 #include <sys/strsubr.h> 37 #define _SUN_TPI_VERSION 2 38 #include <sys/tihdr.h> 39 #include <sys/ddi.h> 40 #include <sys/sunddi.h> 41 #include <sys/cmn_err.h> 42 #include <sys/debug.h> 43 #include <sys/sdt.h> 44 #include <sys/kobj.h> 45 #include <sys/zone.h> 46 #include <sys/neti.h> 47 #include <sys/hook.h> 48 49 #include <sys/kmem.h> 50 #include <sys/systm.h> 51 #include <sys/param.h> 52 #include <sys/socket.h> 53 #include <sys/vtrace.h> 54 #include <sys/isa_defs.h> 55 #include <sys/atomic.h> 56 #include <sys/iphada.h> 57 #include <sys/policy.h> 58 #include <net/if.h> 59 #include <net/if_types.h> 60 #include <net/route.h> 61 #include <net/if_dl.h> 62 #include <sys/sockio.h> 63 #include <netinet/in.h> 64 #include <netinet/ip6.h> 65 #include <netinet/icmp6.h> 66 #include <netinet/sctp.h> 67 68 #include <inet/common.h> 69 #include <inet/mi.h> 70 #include <inet/optcom.h> 71 #include <inet/mib2.h> 72 #include <inet/nd.h> 73 #include <inet/arp.h> 74 75 #include <inet/ip.h> 76 #include <inet/ip_impl.h> 77 #include <inet/ip6.h> 78 #include <inet/ip6_asp.h> 79 #include <inet/tcp.h> 80 #include <inet/tcp_impl.h> 81 #include <inet/udp_impl.h> 82 #include <inet/sctp/sctp_impl.h> 83 #include <inet/ipp_common.h> 84 85 #include <inet/ip_multi.h> 86 #include <inet/ip_if.h> 87 #include <inet/ip_ire.h> 88 #include <inet/ip_rts.h> 89 #include <inet/ip_ndp.h> 90 #include <net/pfkeyv2.h> 91 #include <inet/ipsec_info.h> 92 #include <inet/sadb.h> 93 #include <inet/ipsec_impl.h> 94 #include <inet/iptun/iptun_impl.h> 95 #include <inet/sctp_ip.h> 96 #include <sys/pattr.h> 97 #include <inet/ipclassifier.h> 98 #include <inet/ipsecah.h> 99 #include <inet/rawip_impl.h> 100 #include <inet/rts_impl.h> 101 #include <sys/squeue_impl.h> 102 #include <sys/squeue.h> 103 104 #include <sys/tsol/label.h> 105 #include <sys/tsol/tnet.h> 106 107 #include <rpc/pmap_prot.h> 108 109 /* Temporary; for CR 6451644 work-around */ 110 #include <sys/ethernet.h> 111 112 extern int ip_squeue_flag; 113 114 /* 115 * Naming conventions: 116 * These rules should be judiciously applied 117 * if there is a need to identify something as IPv6 versus IPv4 118 * IPv6 funcions will end with _v6 in the ip module. 119 * IPv6 funcions will end with _ipv6 in the transport modules. 120 * IPv6 macros: 121 * Some macros end with _V6; e.g. ILL_FRAG_HASH_V6 122 * Some macros start with V6_; e.g. V6_OR_V4_INADDR_ANY 123 * And then there are ..V4_PART_OF_V6. 124 * The intent is that macros in the ip module end with _V6. 125 * IPv6 global variables will start with ipv6_ 126 * IPv6 structures will start with ipv6 127 * IPv6 defined constants should start with IPV6_ 128 * (but then there are NDP_DEFAULT_VERS_PRI_AND_FLOW, etc) 129 */ 130 131 /* 132 * ip6opt_ls is used to enable IPv6 (via /etc/system on TX systems). 133 * We need to do this because we didn't obtain the IP6OPT_LS (0x0a) 134 * from IANA. This mechanism will remain in effect until an official 135 * number is obtained. 136 */ 137 uchar_t ip6opt_ls; 138 139 const in6_addr_t ipv6_all_ones = 140 { 0xffffffffU, 0xffffffffU, 0xffffffffU, 0xffffffffU }; 141 const in6_addr_t ipv6_all_zeros = { 0, 0, 0, 0 }; 142 143 #ifdef _BIG_ENDIAN 144 const in6_addr_t ipv6_unspecified_group = { 0xff000000U, 0, 0, 0 }; 145 #else /* _BIG_ENDIAN */ 146 const in6_addr_t ipv6_unspecified_group = { 0x000000ffU, 0, 0, 0 }; 147 #endif /* _BIG_ENDIAN */ 148 149 #ifdef _BIG_ENDIAN 150 const in6_addr_t ipv6_loopback = { 0, 0, 0, 0x00000001U }; 151 #else /* _BIG_ENDIAN */ 152 const in6_addr_t ipv6_loopback = { 0, 0, 0, 0x01000000U }; 153 #endif /* _BIG_ENDIAN */ 154 155 #ifdef _BIG_ENDIAN 156 const in6_addr_t ipv6_all_hosts_mcast = { 0xff020000U, 0, 0, 0x00000001U }; 157 #else /* _BIG_ENDIAN */ 158 const in6_addr_t ipv6_all_hosts_mcast = { 0x000002ffU, 0, 0, 0x01000000U }; 159 #endif /* _BIG_ENDIAN */ 160 161 #ifdef _BIG_ENDIAN 162 const in6_addr_t ipv6_all_rtrs_mcast = { 0xff020000U, 0, 0, 0x00000002U }; 163 #else /* _BIG_ENDIAN */ 164 const in6_addr_t ipv6_all_rtrs_mcast = { 0x000002ffU, 0, 0, 0x02000000U }; 165 #endif /* _BIG_ENDIAN */ 166 167 #ifdef _BIG_ENDIAN 168 const in6_addr_t ipv6_all_v2rtrs_mcast = { 0xff020000U, 0, 0, 0x00000016U }; 169 #else /* _BIG_ENDIAN */ 170 const in6_addr_t ipv6_all_v2rtrs_mcast = { 0x000002ffU, 0, 0, 0x16000000U }; 171 #endif /* _BIG_ENDIAN */ 172 173 #ifdef _BIG_ENDIAN 174 const in6_addr_t ipv6_solicited_node_mcast = 175 { 0xff020000U, 0, 0x00000001U, 0xff000000U }; 176 #else /* _BIG_ENDIAN */ 177 const in6_addr_t ipv6_solicited_node_mcast = 178 { 0x000002ffU, 0, 0x01000000U, 0x000000ffU }; 179 #endif /* _BIG_ENDIAN */ 180 181 /* Leave room for ip_newroute to tack on the src and target addresses */ 182 #define OK_RESOLVER_MP_V6(mp) \ 183 ((mp) && ((mp)->b_wptr - (mp)->b_rptr) >= (2 * IPV6_ADDR_LEN)) 184 185 #define IP6_MBLK_OK 0 186 #define IP6_MBLK_HDR_ERR 1 187 #define IP6_MBLK_LEN_ERR 2 188 189 static void icmp_inbound_too_big_v6(queue_t *, mblk_t *, ill_t *, ill_t *, 190 boolean_t, zoneid_t); 191 static void icmp_pkt_v6(queue_t *, mblk_t *, void *, size_t, 192 const in6_addr_t *, boolean_t, zoneid_t, ip_stack_t *); 193 static void icmp_redirect_v6(queue_t *, mblk_t *, ill_t *ill); 194 static int ip_bind_connected_v6(conn_t *, mblk_t **, uint8_t, in6_addr_t *, 195 uint16_t, const in6_addr_t *, ip6_pkt_t *, uint16_t, 196 boolean_t, boolean_t, cred_t *); 197 static boolean_t ip_bind_get_ire_v6(mblk_t **, ire_t *, const in6_addr_t *, 198 iulp_t *, ip_stack_t *); 199 static int ip_bind_laddr_v6(conn_t *, mblk_t **, uint8_t, 200 const in6_addr_t *, uint16_t, boolean_t); 201 static void ip_fanout_proto_v6(queue_t *, mblk_t *, ip6_t *, ill_t *, 202 ill_t *, uint8_t, uint_t, uint_t, boolean_t, zoneid_t); 203 static void ip_fanout_tcp_v6(queue_t *, mblk_t *, ip6_t *, ill_t *, 204 ill_t *, uint_t, uint_t, boolean_t, zoneid_t); 205 static void ip_fanout_udp_v6(queue_t *, mblk_t *, ip6_t *, uint32_t, 206 ill_t *, ill_t *, uint_t, boolean_t, zoneid_t); 207 static int ip_process_options_v6(queue_t *, mblk_t *, ip6_t *, 208 uint8_t *, uint_t, uint8_t, ip_stack_t *); 209 static mblk_t *ip_rput_frag_v6(ill_t *, ill_t *, mblk_t *, ip6_t *, 210 ip6_frag_t *, uint_t, uint_t *, uint32_t *, uint16_t *); 211 static boolean_t ip_source_routed_v6(ip6_t *, mblk_t *, ip_stack_t *); 212 static void ip_wput_ire_v6(queue_t *, mblk_t *, ire_t *, int, int, 213 conn_t *, int, int, zoneid_t); 214 static boolean_t ipif_lookup_testaddr_v6(ill_t *, const in6_addr_t *, 215 ipif_t **); 216 217 /* 218 * A template for an IPv6 AR_ENTRY_QUERY 219 */ 220 static areq_t ipv6_areq_template = { 221 AR_ENTRY_QUERY, /* cmd */ 222 sizeof (areq_t)+(2*IPV6_ADDR_LEN), /* name offset */ 223 sizeof (areq_t), /* name len (filled by ill_arp_alloc) */ 224 ETHERTYPE_IPV6, /* protocol, from arps perspective */ 225 sizeof (areq_t), /* target addr offset */ 226 IPV6_ADDR_LEN, /* target addr_length */ 227 0, /* flags */ 228 sizeof (areq_t) + IPV6_ADDR_LEN, /* sender addr offset */ 229 IPV6_ADDR_LEN, /* sender addr length */ 230 6, /* xmit_count */ 231 1000, /* (re)xmit_interval in milliseconds */ 232 4 /* max # of requests to buffer */ 233 /* anything else filled in by the code */ 234 }; 235 236 /* 237 * Handle IPv6 ICMP packets sent to us. Consume the mblk passed in. 238 * The message has already been checksummed and if needed, 239 * a copy has been made to be sent any interested ICMP client (conn) 240 * Note that this is different than icmp_inbound() which does the fanout 241 * to conn's as well as local processing of the ICMP packets. 242 * 243 * All error messages are passed to the matching transport stream. 244 * 245 * Zones notes: 246 * The packet is only processed in the context of the specified zone: typically 247 * only this zone will reply to an echo request. This means that the caller must 248 * call icmp_inbound_v6() for each relevant zone. 249 */ 250 static void 251 icmp_inbound_v6(queue_t *q, mblk_t *mp, ill_t *ill, ill_t *inill, 252 uint_t hdr_length, boolean_t mctl_present, uint_t flags, zoneid_t zoneid, 253 mblk_t *dl_mp) 254 { 255 icmp6_t *icmp6; 256 ip6_t *ip6h; 257 boolean_t interested; 258 in6_addr_t origsrc; 259 mblk_t *first_mp; 260 ipsec_in_t *ii; 261 ip_stack_t *ipst = ill->ill_ipst; 262 263 ASSERT(ill != NULL); 264 first_mp = mp; 265 if (mctl_present) { 266 mp = first_mp->b_cont; 267 ASSERT(mp != NULL); 268 269 ii = (ipsec_in_t *)first_mp->b_rptr; 270 ASSERT(ii->ipsec_in_type == IPSEC_IN); 271 } 272 273 ip6h = (ip6_t *)mp->b_rptr; 274 275 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInMsgs); 276 277 if ((mp->b_wptr - mp->b_rptr) < (hdr_length + ICMP6_MINLEN)) { 278 if (!pullupmsg(mp, hdr_length + ICMP6_MINLEN)) { 279 ip1dbg(("icmp_inbound_v6: pullupmsg failed\n")); 280 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors); 281 freemsg(first_mp); 282 return; 283 } 284 ip6h = (ip6_t *)mp->b_rptr; 285 } 286 if (ipst->ips_icmp_accept_clear_messages == 0) { 287 first_mp = ipsec_check_global_policy(first_mp, NULL, 288 NULL, ip6h, mctl_present, ipst->ips_netstack); 289 if (first_mp == NULL) 290 return; 291 } 292 293 /* 294 * On a labeled system, we have to check whether the zone itself is 295 * permitted to receive raw traffic. 296 */ 297 if (is_system_labeled()) { 298 if (zoneid == ALL_ZONES) 299 zoneid = tsol_packet_to_zoneid(mp); 300 if (!tsol_can_accept_raw(mp, B_FALSE)) { 301 ip1dbg(("icmp_inbound_v6: zone %d can't receive raw", 302 zoneid)); 303 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors); 304 freemsg(first_mp); 305 return; 306 } 307 } 308 309 icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]); 310 ip2dbg(("icmp_inbound_v6: type %d code %d\n", icmp6->icmp6_type, 311 icmp6->icmp6_code)); 312 interested = !(icmp6->icmp6_type & ICMP6_INFOMSG_MASK); 313 314 /* Initiate IPPF processing here */ 315 if (IP6_IN_IPP(flags, ipst)) { 316 317 /* 318 * If the ifindex changes due to SIOCSLIFINDEX 319 * packet may return to IP on the wrong ill. 320 */ 321 ip_process(IPP_LOCAL_IN, &mp, ill->ill_phyint->phyint_ifindex); 322 if (mp == NULL) { 323 if (mctl_present) { 324 freeb(first_mp); 325 } 326 return; 327 } 328 } 329 330 switch (icmp6->icmp6_type) { 331 case ICMP6_DST_UNREACH: 332 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInDestUnreachs); 333 if (icmp6->icmp6_code == ICMP6_DST_UNREACH_ADMIN) 334 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInAdminProhibs); 335 break; 336 337 case ICMP6_TIME_EXCEEDED: 338 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInTimeExcds); 339 break; 340 341 case ICMP6_PARAM_PROB: 342 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInParmProblems); 343 break; 344 345 case ICMP6_PACKET_TOO_BIG: 346 icmp_inbound_too_big_v6(q, first_mp, ill, inill, mctl_present, 347 zoneid); 348 return; 349 case ICMP6_ECHO_REQUEST: 350 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInEchos); 351 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst) && 352 !ipst->ips_ipv6_resp_echo_mcast) 353 break; 354 355 /* 356 * We must have exclusive use of the mblk to convert it to 357 * a response. 358 * If not, we copy it. 359 */ 360 if (mp->b_datap->db_ref > 1) { 361 mblk_t *mp1; 362 363 mp1 = copymsg(mp); 364 freemsg(mp); 365 if (mp1 == NULL) { 366 BUMP_MIB(ill->ill_icmp6_mib, 367 ipv6IfIcmpInErrors); 368 if (mctl_present) 369 freeb(first_mp); 370 return; 371 } 372 mp = mp1; 373 ip6h = (ip6_t *)mp->b_rptr; 374 icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]); 375 if (mctl_present) 376 first_mp->b_cont = mp; 377 else 378 first_mp = mp; 379 } 380 381 /* 382 * Turn the echo into an echo reply. 383 * Remove any extension headers (do not reverse a source route) 384 * and clear the flow id (keep traffic class for now). 385 */ 386 if (hdr_length != IPV6_HDR_LEN) { 387 int i; 388 389 for (i = 0; i < IPV6_HDR_LEN; i++) 390 mp->b_rptr[hdr_length - i - 1] = 391 mp->b_rptr[IPV6_HDR_LEN - i - 1]; 392 mp->b_rptr += (hdr_length - IPV6_HDR_LEN); 393 ip6h = (ip6_t *)mp->b_rptr; 394 ip6h->ip6_nxt = IPPROTO_ICMPV6; 395 hdr_length = IPV6_HDR_LEN; 396 } 397 ip6h->ip6_vcf &= ~IPV6_FLOWINFO_FLOWLABEL; 398 icmp6->icmp6_type = ICMP6_ECHO_REPLY; 399 400 ip6h->ip6_plen = 401 htons((uint16_t)(msgdsize(mp) - IPV6_HDR_LEN)); 402 origsrc = ip6h->ip6_src; 403 /* 404 * Reverse the source and destination addresses. 405 * If the return address is a multicast, zero out the source 406 * (ip_wput_v6 will set an address). 407 */ 408 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 409 ip6h->ip6_src = ipv6_all_zeros; 410 ip6h->ip6_dst = origsrc; 411 } else { 412 ip6h->ip6_src = ip6h->ip6_dst; 413 ip6h->ip6_dst = origsrc; 414 } 415 416 /* set the hop limit */ 417 ip6h->ip6_hops = ipst->ips_ipv6_def_hops; 418 419 /* 420 * Prepare for checksum by putting icmp length in the icmp 421 * checksum field. The checksum is calculated in ip_wput_v6. 422 */ 423 icmp6->icmp6_cksum = ip6h->ip6_plen; 424 425 if (!mctl_present) { 426 /* 427 * This packet should go out the same way as it 428 * came in i.e in clear. To make sure that global 429 * policy will not be applied to this in ip_wput, 430 * we attach a IPSEC_IN mp and clear ipsec_in_secure. 431 */ 432 ASSERT(first_mp == mp); 433 first_mp = ipsec_in_alloc(B_FALSE, ipst->ips_netstack); 434 if (first_mp == NULL) { 435 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 436 freemsg(mp); 437 return; 438 } 439 ii = (ipsec_in_t *)first_mp->b_rptr; 440 441 /* This is not a secure packet */ 442 ii->ipsec_in_secure = B_FALSE; 443 first_mp->b_cont = mp; 444 } 445 ii->ipsec_in_zoneid = zoneid; 446 ASSERT(zoneid != ALL_ZONES); 447 if (!ipsec_in_to_out(first_mp, NULL, ip6h)) { 448 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 449 return; 450 } 451 put(WR(q), first_mp); 452 return; 453 454 case ICMP6_ECHO_REPLY: 455 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInEchoReplies); 456 break; 457 458 case ND_ROUTER_SOLICIT: 459 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInRouterSolicits); 460 break; 461 462 case ND_ROUTER_ADVERT: 463 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInRouterAdvertisements); 464 break; 465 466 case ND_NEIGHBOR_SOLICIT: 467 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInNeighborSolicits); 468 if (mctl_present) 469 freeb(first_mp); 470 /* XXX may wish to pass first_mp up to ndp_input someday. */ 471 ndp_input(inill, mp, dl_mp); 472 return; 473 474 case ND_NEIGHBOR_ADVERT: 475 BUMP_MIB(ill->ill_icmp6_mib, 476 ipv6IfIcmpInNeighborAdvertisements); 477 if (mctl_present) 478 freeb(first_mp); 479 /* XXX may wish to pass first_mp up to ndp_input someday. */ 480 ndp_input(inill, mp, dl_mp); 481 return; 482 483 case ND_REDIRECT: { 484 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInRedirects); 485 486 if (ipst->ips_ipv6_ignore_redirect) 487 break; 488 489 /* 490 * As there is no upper client to deliver, we don't 491 * need the first_mp any more. 492 */ 493 if (mctl_present) 494 freeb(first_mp); 495 if (!pullupmsg(mp, -1)) { 496 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects); 497 break; 498 } 499 icmp_redirect_v6(q, mp, ill); 500 return; 501 } 502 503 /* 504 * The next three icmp messages will be handled by MLD. 505 * Pass all valid MLD packets up to any process(es) 506 * listening on a raw ICMP socket. MLD messages are 507 * freed by mld_input function. 508 */ 509 case MLD_LISTENER_QUERY: 510 case MLD_LISTENER_REPORT: 511 case MLD_LISTENER_REDUCTION: 512 if (mctl_present) 513 freeb(first_mp); 514 mld_input(q, mp, ill); 515 return; 516 default: 517 break; 518 } 519 if (interested) { 520 icmp_inbound_error_fanout_v6(q, first_mp, ip6h, icmp6, ill, 521 inill, mctl_present, zoneid); 522 } else { 523 freemsg(first_mp); 524 } 525 } 526 527 /* 528 * Process received IPv6 ICMP Packet too big. 529 * After updating any IRE it does the fanout to any matching transport streams. 530 * Assumes the IPv6 plus ICMPv6 headers have been pulled up but nothing else. 531 */ 532 /* ARGSUSED */ 533 static void 534 icmp_inbound_too_big_v6(queue_t *q, mblk_t *mp, ill_t *ill, ill_t *inill, 535 boolean_t mctl_present, zoneid_t zoneid) 536 { 537 ip6_t *ip6h; 538 ip6_t *inner_ip6h; 539 icmp6_t *icmp6; 540 uint16_t hdr_length; 541 uint32_t mtu; 542 ire_t *ire, *first_ire; 543 mblk_t *first_mp; 544 ip_stack_t *ipst = ill->ill_ipst; 545 546 first_mp = mp; 547 if (mctl_present) 548 mp = first_mp->b_cont; 549 /* 550 * We must have exclusive use of the mblk to update the MTU 551 * in the packet. 552 * If not, we copy it. 553 * 554 * If there's an M_CTL present, we know that allocated first_mp 555 * earlier in this function, so we know first_mp has refcnt of one. 556 */ 557 ASSERT(!mctl_present || first_mp->b_datap->db_ref == 1); 558 if (mp->b_datap->db_ref > 1) { 559 mblk_t *mp1; 560 561 mp1 = copymsg(mp); 562 freemsg(mp); 563 if (mp1 == NULL) { 564 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 565 if (mctl_present) 566 freeb(first_mp); 567 return; 568 } 569 mp = mp1; 570 if (mctl_present) 571 first_mp->b_cont = mp; 572 else 573 first_mp = mp; 574 } 575 ip6h = (ip6_t *)mp->b_rptr; 576 if (ip6h->ip6_nxt != IPPROTO_ICMPV6) 577 hdr_length = ip_hdr_length_v6(mp, ip6h); 578 else 579 hdr_length = IPV6_HDR_LEN; 580 581 icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]); 582 ASSERT((size_t)(mp->b_wptr - mp->b_rptr) >= hdr_length + ICMP6_MINLEN); 583 inner_ip6h = (ip6_t *)&icmp6[1]; /* Packet in error */ 584 if ((uchar_t *)&inner_ip6h[1] > mp->b_wptr) { 585 if (!pullupmsg(mp, (uchar_t *)&inner_ip6h[1] - mp->b_rptr)) { 586 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 587 freemsg(first_mp); 588 return; 589 } 590 ip6h = (ip6_t *)mp->b_rptr; 591 icmp6 = (icmp6_t *)&mp->b_rptr[hdr_length]; 592 inner_ip6h = (ip6_t *)&icmp6[1]; 593 } 594 595 /* 596 * For link local destinations matching simply on IRE type is not 597 * sufficient. Same link local addresses for different ILL's is 598 * possible. 599 */ 600 if (IN6_IS_ADDR_LINKLOCAL(&inner_ip6h->ip6_dst)) { 601 first_ire = ire_ctable_lookup_v6(&inner_ip6h->ip6_dst, NULL, 602 IRE_CACHE, ill->ill_ipif, ALL_ZONES, NULL, 603 MATCH_IRE_TYPE | MATCH_IRE_ILL, ipst); 604 605 if (first_ire == NULL) { 606 if (ip_debug > 2) { 607 /* ip1dbg */ 608 pr_addr_dbg("icmp_inbound_too_big_v6:" 609 "no ire for dst %s\n", AF_INET6, 610 &inner_ip6h->ip6_dst); 611 } 612 freemsg(first_mp); 613 return; 614 } 615 616 mtu = ntohl(icmp6->icmp6_mtu); 617 rw_enter(&first_ire->ire_bucket->irb_lock, RW_READER); 618 for (ire = first_ire; ire != NULL && 619 IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, &inner_ip6h->ip6_dst); 620 ire = ire->ire_next) { 621 mutex_enter(&ire->ire_lock); 622 if (mtu < IPV6_MIN_MTU) { 623 ip1dbg(("Received mtu less than IPv6 " 624 "min mtu %d: %d\n", IPV6_MIN_MTU, mtu)); 625 mtu = IPV6_MIN_MTU; 626 /* 627 * If an mtu less than IPv6 min mtu is received, 628 * we must include a fragment header in 629 * subsequent packets. 630 */ 631 ire->ire_frag_flag |= IPH_FRAG_HDR; 632 } 633 ip1dbg(("Received mtu from router: %d\n", mtu)); 634 ire->ire_max_frag = MIN(ire->ire_max_frag, mtu); 635 if (ire->ire_max_frag == mtu) { 636 /* Decreased it */ 637 ire->ire_marks |= IRE_MARK_PMTU; 638 } 639 /* Record the new max frag size for the ULP. */ 640 if (ire->ire_frag_flag & IPH_FRAG_HDR) { 641 /* 642 * If we need a fragment header in every packet 643 * (above case or multirouting), make sure the 644 * ULP takes it into account when computing the 645 * payload size. 646 */ 647 icmp6->icmp6_mtu = htonl(ire->ire_max_frag - 648 sizeof (ip6_frag_t)); 649 } else { 650 icmp6->icmp6_mtu = htonl(ire->ire_max_frag); 651 } 652 mutex_exit(&ire->ire_lock); 653 } 654 rw_exit(&first_ire->ire_bucket->irb_lock); 655 ire_refrele(first_ire); 656 } else { 657 irb_t *irb = NULL; 658 /* 659 * for non-link local destinations we match only on the IRE type 660 */ 661 ire = ire_ctable_lookup_v6(&inner_ip6h->ip6_dst, NULL, 662 IRE_CACHE, ill->ill_ipif, ALL_ZONES, NULL, MATCH_IRE_TYPE, 663 ipst); 664 if (ire == NULL) { 665 if (ip_debug > 2) { 666 /* ip1dbg */ 667 pr_addr_dbg("icmp_inbound_too_big_v6:" 668 "no ire for dst %s\n", 669 AF_INET6, &inner_ip6h->ip6_dst); 670 } 671 freemsg(first_mp); 672 return; 673 } 674 irb = ire->ire_bucket; 675 ire_refrele(ire); 676 rw_enter(&irb->irb_lock, RW_READER); 677 for (ire = irb->irb_ire; ire != NULL; ire = ire->ire_next) { 678 if (IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, 679 &inner_ip6h->ip6_dst)) { 680 mtu = ntohl(icmp6->icmp6_mtu); 681 mutex_enter(&ire->ire_lock); 682 if (mtu < IPV6_MIN_MTU) { 683 ip1dbg(("Received mtu less than IPv6" 684 "min mtu %d: %d\n", 685 IPV6_MIN_MTU, mtu)); 686 mtu = IPV6_MIN_MTU; 687 /* 688 * If an mtu less than IPv6 min mtu is 689 * received, we must include a fragment 690 * header in subsequent packets. 691 */ 692 ire->ire_frag_flag |= IPH_FRAG_HDR; 693 } 694 695 ip1dbg(("Received mtu from router: %d\n", mtu)); 696 ire->ire_max_frag = MIN(ire->ire_max_frag, mtu); 697 if (ire->ire_max_frag == mtu) { 698 /* Decreased it */ 699 ire->ire_marks |= IRE_MARK_PMTU; 700 } 701 /* Record the new max frag size for the ULP. */ 702 if (ire->ire_frag_flag & IPH_FRAG_HDR) { 703 /* 704 * If we need a fragment header in 705 * every packet (above case or 706 * multirouting), make sure the ULP 707 * takes it into account when computing 708 * the payload size. 709 */ 710 icmp6->icmp6_mtu = 711 htonl(ire->ire_max_frag - 712 sizeof (ip6_frag_t)); 713 } else { 714 icmp6->icmp6_mtu = 715 htonl(ire->ire_max_frag); 716 } 717 mutex_exit(&ire->ire_lock); 718 } 719 } 720 rw_exit(&irb->irb_lock); 721 } 722 icmp_inbound_error_fanout_v6(q, first_mp, ip6h, icmp6, ill, inill, 723 mctl_present, zoneid); 724 } 725 726 /* 727 * Fanout for ICMPv6 errors containing IP-in-IPv6 packets. Returns B_TRUE if a 728 * tunnel consumed the message, and B_FALSE otherwise. 729 */ 730 static boolean_t 731 icmp_inbound_iptun_fanout_v6(mblk_t *first_mp, ip6_t *rip6h, ill_t *ill, 732 ip_stack_t *ipst) 733 { 734 conn_t *connp; 735 736 if ((connp = ipcl_iptun_classify_v6(&rip6h->ip6_src, &rip6h->ip6_dst, 737 ipst)) == NULL) 738 return (B_FALSE); 739 740 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 741 connp->conn_recv(connp, first_mp, NULL); 742 CONN_DEC_REF(connp); 743 return (B_TRUE); 744 } 745 746 /* 747 * Fanout received ICMPv6 error packets to the transports. 748 * Assumes the IPv6 plus ICMPv6 headers have been pulled up but nothing else. 749 */ 750 void 751 icmp_inbound_error_fanout_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, 752 icmp6_t *icmp6, ill_t *ill, ill_t *inill, boolean_t mctl_present, 753 zoneid_t zoneid) 754 { 755 uint16_t *up; /* Pointer to ports in ULP header */ 756 uint32_t ports; /* reversed ports for fanout */ 757 ip6_t rip6h; /* With reversed addresses */ 758 uint16_t hdr_length; 759 uint8_t *nexthdrp; 760 uint8_t nexthdr; 761 mblk_t *first_mp; 762 ipsec_in_t *ii; 763 tcpha_t *tcpha; 764 conn_t *connp; 765 ip_stack_t *ipst = ill->ill_ipst; 766 767 first_mp = mp; 768 if (mctl_present) { 769 mp = first_mp->b_cont; 770 ASSERT(mp != NULL); 771 772 ii = (ipsec_in_t *)first_mp->b_rptr; 773 ASSERT(ii->ipsec_in_type == IPSEC_IN); 774 } else { 775 ii = NULL; 776 } 777 778 hdr_length = (uint16_t)((uchar_t *)icmp6 - (uchar_t *)ip6h); 779 ASSERT((size_t)(mp->b_wptr - (uchar_t *)icmp6) >= ICMP6_MINLEN); 780 781 /* 782 * Need to pullup everything in order to use 783 * ip_hdr_length_nexthdr_v6() 784 */ 785 if (mp->b_cont != NULL) { 786 if (!pullupmsg(mp, -1)) { 787 ip1dbg(("icmp_inbound_error_fanout_v6: " 788 "pullupmsg failed\n")); 789 goto drop_pkt; 790 } 791 ip6h = (ip6_t *)mp->b_rptr; 792 icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]); 793 } 794 795 ip6h = (ip6_t *)&icmp6[1]; /* Packet in error */ 796 if ((uchar_t *)&ip6h[1] > mp->b_wptr) 797 goto drop_pkt; 798 799 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_length, &nexthdrp)) 800 goto drop_pkt; 801 nexthdr = *nexthdrp; 802 803 /* Set message type, must be done after pullups */ 804 mp->b_datap->db_type = M_CTL; 805 806 /* 807 * We need a separate IP header with the source and destination 808 * addresses reversed to do fanout/classification because the ip6h in 809 * the ICMPv6 error is in the form we sent it out. 810 */ 811 rip6h.ip6_src = ip6h->ip6_dst; 812 rip6h.ip6_dst = ip6h->ip6_src; 813 rip6h.ip6_nxt = nexthdr; 814 815 /* Try to pass the ICMP message to clients who need it */ 816 switch (nexthdr) { 817 case IPPROTO_UDP: { 818 /* 819 * Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of 820 * UDP header to get the port information. 821 */ 822 if ((uchar_t *)ip6h + hdr_length + ICMP_MIN_TP_HDR_LEN > 823 mp->b_wptr) { 824 break; 825 } 826 /* Attempt to find a client stream based on port. */ 827 up = (uint16_t *)((uchar_t *)ip6h + hdr_length); 828 ((uint16_t *)&ports)[0] = up[1]; 829 ((uint16_t *)&ports)[1] = up[0]; 830 831 ip_fanout_udp_v6(q, first_mp, &rip6h, ports, ill, inill, 832 IP6_NO_IPPOLICY, mctl_present, zoneid); 833 return; 834 } 835 case IPPROTO_TCP: { 836 /* 837 * Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of 838 * the TCP header to get the port information. 839 */ 840 if ((uchar_t *)ip6h + hdr_length + ICMP_MIN_TP_HDR_LEN > 841 mp->b_wptr) { 842 break; 843 } 844 845 /* 846 * Attempt to find a client stream based on port. 847 * Note that we do a reverse lookup since the header is 848 * in the form we sent it out. 849 */ 850 tcpha = (tcpha_t *)((char *)ip6h + hdr_length); 851 connp = ipcl_tcp_lookup_reversed_ipv6(ip6h, tcpha, 852 TCPS_LISTEN, ill->ill_phyint->phyint_ifindex, ipst); 853 if (connp == NULL) { 854 goto drop_pkt; 855 } 856 857 SQUEUE_ENTER_ONE(connp->conn_sqp, first_mp, tcp_input, connp, 858 SQ_FILL, SQTAG_TCP6_INPUT_ICMP_ERR); 859 return; 860 861 } 862 case IPPROTO_SCTP: 863 /* 864 * Verify we have at least ICMP_MIN_SCTP_HDR_LEN bytes of 865 * transport header to get the port information. 866 */ 867 if ((uchar_t *)ip6h + hdr_length + ICMP_MIN_SCTP_HDR_LEN > 868 mp->b_wptr) { 869 if (!pullupmsg(mp, (uchar_t *)ip6h + hdr_length + 870 ICMP_MIN_SCTP_HDR_LEN - mp->b_rptr)) { 871 goto drop_pkt; 872 } 873 } 874 875 up = (uint16_t *)((uchar_t *)ip6h + hdr_length); 876 ((uint16_t *)&ports)[0] = up[1]; 877 ((uint16_t *)&ports)[1] = up[0]; 878 ip_fanout_sctp(first_mp, inill, (ipha_t *)ip6h, ports, 0, 879 mctl_present, IP6_NO_IPPOLICY, zoneid); 880 return; 881 case IPPROTO_ESP: 882 case IPPROTO_AH: { 883 int ipsec_rc; 884 ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec; 885 886 /* 887 * We need a IPSEC_IN in the front to fanout to AH/ESP. 888 * We will re-use the IPSEC_IN if it is already present as 889 * AH/ESP will not affect any fields in the IPSEC_IN for 890 * ICMP errors. If there is no IPSEC_IN, allocate a new 891 * one and attach it in the front. 892 */ 893 if (ii != NULL) { 894 /* 895 * ip_fanout_proto_again converts the ICMP errors 896 * that come back from AH/ESP to M_DATA so that 897 * if it is non-AH/ESP and we do a pullupmsg in 898 * this function, it would work. Convert it back 899 * to M_CTL before we send up as this is a ICMP 900 * error. This could have been generated locally or 901 * by some router. Validate the inner IPSEC 902 * headers. 903 * 904 * NOTE : ill_index is used by ip_fanout_proto_again 905 * to locate the ill. 906 */ 907 ASSERT(ill != NULL); 908 ii->ipsec_in_ill_index = 909 ill->ill_phyint->phyint_ifindex; 910 ii->ipsec_in_rill_index = 911 inill->ill_phyint->phyint_ifindex; 912 first_mp->b_cont->b_datap->db_type = M_CTL; 913 } else { 914 /* 915 * IPSEC_IN is not present. We attach a ipsec_in 916 * message and send up to IPSEC for validating 917 * and removing the IPSEC headers. Clear 918 * ipsec_in_secure so that when we return 919 * from IPSEC, we don't mistakenly think that this 920 * is a secure packet came from the network. 921 * 922 * NOTE : ill_index is used by ip_fanout_proto_again 923 * to locate the ill. 924 */ 925 ASSERT(first_mp == mp); 926 first_mp = ipsec_in_alloc(B_FALSE, ipst->ips_netstack); 927 ASSERT(ill != NULL); 928 if (first_mp == NULL) { 929 freemsg(mp); 930 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 931 return; 932 } 933 ii = (ipsec_in_t *)first_mp->b_rptr; 934 935 /* This is not a secure packet */ 936 ii->ipsec_in_secure = B_FALSE; 937 first_mp->b_cont = mp; 938 mp->b_datap->db_type = M_CTL; 939 ii->ipsec_in_ill_index = 940 ill->ill_phyint->phyint_ifindex; 941 ii->ipsec_in_rill_index = 942 inill->ill_phyint->phyint_ifindex; 943 } 944 945 if (!ipsec_loaded(ipss)) { 946 ip_proto_not_sup(q, first_mp, 0, zoneid, ipst); 947 return; 948 } 949 950 if (nexthdr == IPPROTO_ESP) 951 ipsec_rc = ipsecesp_icmp_error(first_mp); 952 else 953 ipsec_rc = ipsecah_icmp_error(first_mp); 954 if (ipsec_rc == IPSEC_STATUS_FAILED) 955 return; 956 957 ip_fanout_proto_again(first_mp, ill, inill, NULL); 958 return; 959 } 960 case IPPROTO_ENCAP: 961 case IPPROTO_IPV6: 962 if ((uint8_t *)ip6h + hdr_length + 963 (nexthdr == IPPROTO_ENCAP ? sizeof (ipha_t) : 964 sizeof (ip6_t)) > mp->b_wptr) { 965 goto drop_pkt; 966 } 967 968 if (nexthdr == IPPROTO_ENCAP || 969 !IN6_ARE_ADDR_EQUAL( 970 &((ip6_t *)(((uint8_t *)ip6h) + hdr_length))->ip6_src, 971 &ip6h->ip6_src) || 972 !IN6_ARE_ADDR_EQUAL( 973 &((ip6_t *)(((uint8_t *)ip6h) + hdr_length))->ip6_dst, 974 &ip6h->ip6_dst)) { 975 /* 976 * For tunnels that have used IPsec protection, 977 * we need to adjust the MTU to take into account 978 * the IPsec overhead. 979 */ 980 if (ii != NULL) { 981 icmp6->icmp6_mtu = htonl( 982 ntohl(icmp6->icmp6_mtu) - 983 ipsec_in_extra_length(first_mp)); 984 } 985 } else { 986 /* 987 * Self-encapsulated case. As in the ipv4 case, 988 * we need to strip the 2nd IP header. Since mp 989 * is already pulled-up, we can simply bcopy 990 * the 3rd header + data over the 2nd header. 991 */ 992 uint16_t unused_len; 993 ip6_t *inner_ip6h = (ip6_t *) 994 ((uchar_t *)ip6h + hdr_length); 995 996 /* 997 * Make sure we don't do recursion more than once. 998 */ 999 if (!ip_hdr_length_nexthdr_v6(mp, inner_ip6h, 1000 &unused_len, &nexthdrp) || 1001 *nexthdrp == IPPROTO_IPV6) { 1002 goto drop_pkt; 1003 } 1004 1005 /* 1006 * We are about to modify the packet. Make a copy if 1007 * someone else has a reference to it. 1008 */ 1009 if (DB_REF(mp) > 1) { 1010 mblk_t *mp1; 1011 uint16_t icmp6_offset; 1012 1013 mp1 = copymsg(mp); 1014 if (mp1 == NULL) { 1015 goto drop_pkt; 1016 } 1017 icmp6_offset = (uint16_t) 1018 ((uchar_t *)icmp6 - mp->b_rptr); 1019 freemsg(mp); 1020 mp = mp1; 1021 1022 icmp6 = (icmp6_t *)(mp->b_rptr + icmp6_offset); 1023 ip6h = (ip6_t *)&icmp6[1]; 1024 inner_ip6h = (ip6_t *) 1025 ((uchar_t *)ip6h + hdr_length); 1026 1027 if (mctl_present) 1028 first_mp->b_cont = mp; 1029 else 1030 first_mp = mp; 1031 } 1032 1033 /* 1034 * Need to set db_type back to M_DATA before 1035 * refeeding mp into this function. 1036 */ 1037 DB_TYPE(mp) = M_DATA; 1038 1039 /* 1040 * Copy the 3rd header + remaining data on top 1041 * of the 2nd header. 1042 */ 1043 bcopy(inner_ip6h, ip6h, 1044 mp->b_wptr - (uchar_t *)inner_ip6h); 1045 1046 /* 1047 * Subtract length of the 2nd header. 1048 */ 1049 mp->b_wptr -= hdr_length; 1050 1051 /* 1052 * Now recurse, and see what I _really_ should be 1053 * doing here. 1054 */ 1055 icmp_inbound_error_fanout_v6(q, first_mp, 1056 (ip6_t *)mp->b_rptr, icmp6, ill, inill, 1057 mctl_present, zoneid); 1058 return; 1059 } 1060 if (icmp_inbound_iptun_fanout_v6(first_mp, &rip6h, ill, ipst)) 1061 return; 1062 /* 1063 * No IP tunnel is associated with this error. Perhaps a raw 1064 * socket will want it. 1065 */ 1066 /* FALLTHRU */ 1067 default: 1068 ip_fanout_proto_v6(q, first_mp, &rip6h, ill, inill, nexthdr, 0, 1069 IP6_NO_IPPOLICY, mctl_present, zoneid); 1070 return; 1071 } 1072 /* NOTREACHED */ 1073 drop_pkt: 1074 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors); 1075 ip1dbg(("icmp_inbound_error_fanout_v6: drop pkt\n")); 1076 freemsg(first_mp); 1077 } 1078 1079 /* 1080 * Process received IPv6 ICMP Redirect messages. 1081 */ 1082 /* ARGSUSED */ 1083 static void 1084 icmp_redirect_v6(queue_t *q, mblk_t *mp, ill_t *ill) 1085 { 1086 ip6_t *ip6h; 1087 uint16_t hdr_length; 1088 nd_redirect_t *rd; 1089 ire_t *ire; 1090 ire_t *prev_ire; 1091 ire_t *redir_ire; 1092 in6_addr_t *src, *dst, *gateway; 1093 nd_opt_hdr_t *opt; 1094 nce_t *nce; 1095 int nce_flags = 0; 1096 int err = 0; 1097 boolean_t redirect_to_router = B_FALSE; 1098 int len; 1099 int optlen; 1100 iulp_t ulp_info = { 0 }; 1101 ill_t *prev_ire_ill; 1102 ipif_t *ipif; 1103 ip_stack_t *ipst = ill->ill_ipst; 1104 1105 ip6h = (ip6_t *)mp->b_rptr; 1106 if (ip6h->ip6_nxt != IPPROTO_ICMPV6) 1107 hdr_length = ip_hdr_length_v6(mp, ip6h); 1108 else 1109 hdr_length = IPV6_HDR_LEN; 1110 1111 rd = (nd_redirect_t *)&mp->b_rptr[hdr_length]; 1112 len = mp->b_wptr - mp->b_rptr - hdr_length; 1113 src = &ip6h->ip6_src; 1114 dst = &rd->nd_rd_dst; 1115 gateway = &rd->nd_rd_target; 1116 1117 /* Verify if it is a valid redirect */ 1118 if (!IN6_IS_ADDR_LINKLOCAL(src) || 1119 (ip6h->ip6_hops != IPV6_MAX_HOPS) || 1120 (rd->nd_rd_code != 0) || 1121 (len < sizeof (nd_redirect_t)) || 1122 (IN6_IS_ADDR_V4MAPPED(dst)) || 1123 (IN6_IS_ADDR_MULTICAST(dst))) { 1124 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects); 1125 freemsg(mp); 1126 return; 1127 } 1128 1129 if (!(IN6_IS_ADDR_LINKLOCAL(gateway) || 1130 IN6_ARE_ADDR_EQUAL(gateway, dst))) { 1131 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects); 1132 freemsg(mp); 1133 return; 1134 } 1135 1136 if (len > sizeof (nd_redirect_t)) { 1137 if (!ndp_verify_optlen((nd_opt_hdr_t *)&rd[1], 1138 len - sizeof (nd_redirect_t))) { 1139 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects); 1140 freemsg(mp); 1141 return; 1142 } 1143 } 1144 1145 if (!IN6_ARE_ADDR_EQUAL(gateway, dst)) { 1146 redirect_to_router = B_TRUE; 1147 nce_flags |= NCE_F_ISROUTER; 1148 } 1149 1150 /* ipif will be refreleased afterwards */ 1151 ipif = ipif_get_next_ipif(NULL, ill); 1152 if (ipif == NULL) { 1153 freemsg(mp); 1154 return; 1155 } 1156 1157 /* 1158 * Verify that the IP source address of the redirect is 1159 * the same as the current first-hop router for the specified 1160 * ICMP destination address. 1161 * Also, Make sure we had a route for the dest in question and 1162 * that route was pointing to the old gateway (the source of the 1163 * redirect packet.) 1164 */ 1165 1166 prev_ire = ire_route_lookup_v6(dst, 0, src, 0, ipif, NULL, ALL_ZONES, 1167 NULL, MATCH_IRE_GW | MATCH_IRE_ILL | MATCH_IRE_DEFAULT, ipst); 1168 1169 /* 1170 * Check that 1171 * the redirect was not from ourselves 1172 * old gateway is still directly reachable 1173 */ 1174 if (prev_ire == NULL || 1175 prev_ire->ire_type == IRE_LOCAL) { 1176 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects); 1177 ipif_refrele(ipif); 1178 goto fail_redirect; 1179 } 1180 prev_ire_ill = ire_to_ill(prev_ire); 1181 ASSERT(prev_ire_ill != NULL); 1182 if (prev_ire_ill->ill_flags & ILLF_NONUD) 1183 nce_flags |= NCE_F_NONUD; 1184 1185 /* 1186 * Should we use the old ULP info to create the new gateway? From 1187 * a user's perspective, we should inherit the info so that it 1188 * is a "smooth" transition. If we do not do that, then new 1189 * connections going thru the new gateway will have no route metrics, 1190 * which is counter-intuitive to user. From a network point of 1191 * view, this may or may not make sense even though the new gateway 1192 * is still directly connected to us so the route metrics should not 1193 * change much. 1194 * 1195 * But if the old ire_uinfo is not initialized, we do another 1196 * recursive lookup on the dest using the new gateway. There may 1197 * be a route to that. If so, use it to initialize the redirect 1198 * route. 1199 */ 1200 if (prev_ire->ire_uinfo.iulp_set) { 1201 bcopy(&prev_ire->ire_uinfo, &ulp_info, sizeof (iulp_t)); 1202 } else if (redirect_to_router) { 1203 /* 1204 * Only do the following if the redirection is really to 1205 * a router. 1206 */ 1207 ire_t *tmp_ire; 1208 ire_t *sire; 1209 1210 tmp_ire = ire_ftable_lookup_v6(dst, 0, gateway, 0, NULL, &sire, 1211 ALL_ZONES, 0, NULL, 1212 (MATCH_IRE_RECURSIVE | MATCH_IRE_GW | MATCH_IRE_DEFAULT), 1213 ipst); 1214 if (sire != NULL) { 1215 bcopy(&sire->ire_uinfo, &ulp_info, sizeof (iulp_t)); 1216 ASSERT(tmp_ire != NULL); 1217 ire_refrele(tmp_ire); 1218 ire_refrele(sire); 1219 } else if (tmp_ire != NULL) { 1220 bcopy(&tmp_ire->ire_uinfo, &ulp_info, 1221 sizeof (iulp_t)); 1222 ire_refrele(tmp_ire); 1223 } 1224 } 1225 1226 optlen = mp->b_wptr - mp->b_rptr - hdr_length - sizeof (nd_redirect_t); 1227 opt = (nd_opt_hdr_t *)&rd[1]; 1228 opt = ndp_get_option(opt, optlen, ND_OPT_TARGET_LINKADDR); 1229 if (opt != NULL) { 1230 err = ndp_lookup_then_add_v6(ill, 1231 B_FALSE, /* don't match across illgrp */ 1232 (uchar_t *)&opt[1], /* Link layer address */ 1233 gateway, 1234 &ipv6_all_ones, /* prefix mask */ 1235 &ipv6_all_zeros, /* Mapping mask */ 1236 0, 1237 nce_flags, 1238 ND_STALE, 1239 &nce); 1240 switch (err) { 1241 case 0: 1242 NCE_REFRELE(nce); 1243 break; 1244 case EEXIST: 1245 /* 1246 * Check to see if link layer address has changed and 1247 * process the nce_state accordingly. 1248 */ 1249 ndp_process(nce, (uchar_t *)&opt[1], 0, B_FALSE); 1250 NCE_REFRELE(nce); 1251 break; 1252 default: 1253 ip1dbg(("icmp_redirect_v6: NCE create failed %d\n", 1254 err)); 1255 ipif_refrele(ipif); 1256 goto fail_redirect; 1257 } 1258 } 1259 if (redirect_to_router) { 1260 /* icmp_redirect_ok_v6() must have already verified this */ 1261 ASSERT(IN6_IS_ADDR_LINKLOCAL(gateway)); 1262 1263 /* 1264 * Create a Route Association. This will allow us to remember 1265 * a router told us to use the particular gateway. 1266 */ 1267 ire = ire_create_v6( 1268 dst, 1269 &ipv6_all_ones, /* mask */ 1270 &prev_ire->ire_src_addr_v6, /* source addr */ 1271 gateway, /* gateway addr */ 1272 &prev_ire->ire_max_frag, /* max frag */ 1273 NULL, /* no src nce */ 1274 NULL, /* no rfq */ 1275 NULL, /* no stq */ 1276 IRE_HOST, 1277 prev_ire->ire_ipif, 1278 NULL, 1279 0, 1280 0, 1281 (RTF_DYNAMIC | RTF_GATEWAY | RTF_HOST), 1282 &ulp_info, 1283 NULL, 1284 NULL, 1285 ipst); 1286 } else { 1287 queue_t *stq; 1288 1289 stq = (ipif->ipif_net_type == IRE_IF_RESOLVER) 1290 ? ipif->ipif_rq : ipif->ipif_wq; 1291 1292 /* 1293 * Just create an on link entry, i.e. interface route. 1294 */ 1295 ire = ire_create_v6( 1296 dst, /* gateway == dst */ 1297 &ipv6_all_ones, /* mask */ 1298 &prev_ire->ire_src_addr_v6, /* source addr */ 1299 &ipv6_all_zeros, /* gateway addr */ 1300 &prev_ire->ire_max_frag, /* max frag */ 1301 NULL, /* no src nce */ 1302 NULL, /* ire rfq */ 1303 stq, /* ire stq */ 1304 ipif->ipif_net_type, /* IF_[NO]RESOLVER */ 1305 prev_ire->ire_ipif, 1306 &ipv6_all_ones, 1307 0, 1308 0, 1309 (RTF_DYNAMIC | RTF_HOST), 1310 &ulp_info, 1311 NULL, 1312 NULL, 1313 ipst); 1314 } 1315 1316 /* Release reference from earlier ipif_get_next_ipif() */ 1317 ipif_refrele(ipif); 1318 1319 if (ire == NULL) 1320 goto fail_redirect; 1321 1322 if (ire_add(&ire, NULL, NULL, NULL, B_FALSE) == 0) { 1323 1324 /* tell routing sockets that we received a redirect */ 1325 ip_rts_change_v6(RTM_REDIRECT, 1326 &rd->nd_rd_dst, 1327 &rd->nd_rd_target, 1328 &ipv6_all_ones, 0, &ire->ire_src_addr_v6, 1329 (RTF_DYNAMIC | RTF_GATEWAY | RTF_HOST), 0, 1330 (RTA_DST | RTA_GATEWAY | RTA_NETMASK | RTA_AUTHOR), ipst); 1331 1332 /* 1333 * Delete any existing IRE_HOST type ires for this destination. 1334 * This together with the added IRE has the effect of 1335 * modifying an existing redirect. 1336 */ 1337 redir_ire = ire_ftable_lookup_v6(dst, 0, src, IRE_HOST, 1338 ire->ire_ipif, NULL, ALL_ZONES, 0, NULL, 1339 (MATCH_IRE_GW | MATCH_IRE_TYPE | MATCH_IRE_ILL), ipst); 1340 1341 ire_refrele(ire); /* Held in ire_add_v6 */ 1342 1343 if (redir_ire != NULL) { 1344 if (redir_ire->ire_flags & RTF_DYNAMIC) 1345 ire_delete(redir_ire); 1346 ire_refrele(redir_ire); 1347 } 1348 } 1349 1350 if (prev_ire->ire_type == IRE_CACHE) 1351 ire_delete(prev_ire); 1352 ire_refrele(prev_ire); 1353 prev_ire = NULL; 1354 1355 fail_redirect: 1356 if (prev_ire != NULL) 1357 ire_refrele(prev_ire); 1358 freemsg(mp); 1359 } 1360 1361 static ill_t * 1362 ip_queue_to_ill_v6(queue_t *q, ip_stack_t *ipst) 1363 { 1364 ill_t *ill; 1365 1366 ASSERT(WR(q) == q); 1367 1368 if (q->q_next != NULL) { 1369 ill = (ill_t *)q->q_ptr; 1370 if (ILL_CAN_LOOKUP(ill)) 1371 ill_refhold(ill); 1372 else 1373 ill = NULL; 1374 } else { 1375 ill = ill_lookup_on_name(ipif_loopback_name, B_FALSE, B_TRUE, 1376 NULL, NULL, NULL, NULL, NULL, ipst); 1377 } 1378 if (ill == NULL) 1379 ip0dbg(("ip_queue_to_ill_v6: no ill\n")); 1380 return (ill); 1381 } 1382 1383 /* 1384 * Assigns an appropriate source address to the packet. 1385 * If origdst is one of our IP addresses that use it as the source. 1386 * If the queue is an ill queue then select a source from that ill. 1387 * Otherwise pick a source based on a route lookup back to the origsrc. 1388 * 1389 * src is the return parameter. Returns a pointer to src or NULL if failure. 1390 */ 1391 static in6_addr_t * 1392 icmp_pick_source_v6(queue_t *wq, in6_addr_t *origsrc, in6_addr_t *origdst, 1393 in6_addr_t *src, zoneid_t zoneid, ip_stack_t *ipst) 1394 { 1395 ill_t *ill; 1396 ire_t *ire; 1397 ipif_t *ipif; 1398 1399 ASSERT(!(wq->q_flag & QREADR)); 1400 if (wq->q_next != NULL) { 1401 ill = (ill_t *)wq->q_ptr; 1402 } else { 1403 ill = NULL; 1404 } 1405 1406 ire = ire_route_lookup_v6(origdst, 0, 0, (IRE_LOCAL|IRE_LOOPBACK), 1407 NULL, NULL, zoneid, NULL, (MATCH_IRE_TYPE|MATCH_IRE_ZONEONLY), 1408 ipst); 1409 if (ire != NULL) { 1410 /* Destined to one of our addresses */ 1411 *src = *origdst; 1412 ire_refrele(ire); 1413 return (src); 1414 } 1415 if (ire != NULL) { 1416 ire_refrele(ire); 1417 ire = NULL; 1418 } 1419 if (ill == NULL) { 1420 /* What is the route back to the original source? */ 1421 ire = ire_route_lookup_v6(origsrc, 0, 0, 0, 1422 NULL, NULL, zoneid, NULL, 1423 (MATCH_IRE_DEFAULT|MATCH_IRE_RECURSIVE), ipst); 1424 if (ire == NULL) { 1425 BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsOutNoRoutes); 1426 return (NULL); 1427 } 1428 ASSERT(ire->ire_ipif != NULL); 1429 ill = ire->ire_ipif->ipif_ill; 1430 ire_refrele(ire); 1431 } 1432 ipif = ipif_select_source_v6(ill, origsrc, B_FALSE, 1433 IPV6_PREFER_SRC_DEFAULT, zoneid); 1434 if (ipif != NULL) { 1435 *src = ipif->ipif_v6src_addr; 1436 ipif_refrele(ipif); 1437 return (src); 1438 } 1439 /* 1440 * Unusual case - can't find a usable source address to reach the 1441 * original source. Use what in the route to the source. 1442 */ 1443 ire = ire_route_lookup_v6(origsrc, 0, 0, 0, 1444 NULL, NULL, zoneid, NULL, 1445 (MATCH_IRE_DEFAULT|MATCH_IRE_RECURSIVE), ipst); 1446 if (ire == NULL) { 1447 BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsOutNoRoutes); 1448 return (NULL); 1449 } 1450 ASSERT(ire != NULL); 1451 *src = ire->ire_src_addr_v6; 1452 ire_refrele(ire); 1453 return (src); 1454 } 1455 1456 /* 1457 * Build and ship an IPv6 ICMP message using the packet data in mp, 1458 * and the ICMP header pointed to by "stuff". (May be called as 1459 * writer.) 1460 * Note: assumes that icmp_pkt_err_ok_v6 has been called to 1461 * verify that an icmp error packet can be sent. 1462 * 1463 * If q is an ill write side queue (which is the case when packets 1464 * arrive from ip_rput) then ip_wput code will ensure that packets to 1465 * link-local destinations are sent out that ill. 1466 * 1467 * If v6src_ptr is set use it as a source. Otherwise select a reasonable 1468 * source address (see above function). 1469 */ 1470 static void 1471 icmp_pkt_v6(queue_t *q, mblk_t *mp, void *stuff, size_t len, 1472 const in6_addr_t *v6src_ptr, boolean_t mctl_present, zoneid_t zoneid, 1473 ip_stack_t *ipst) 1474 { 1475 ip6_t *ip6h; 1476 in6_addr_t v6dst; 1477 size_t len_needed; 1478 size_t msg_len; 1479 mblk_t *mp1; 1480 icmp6_t *icmp6; 1481 ill_t *ill; 1482 in6_addr_t v6src; 1483 mblk_t *ipsec_mp; 1484 ipsec_out_t *io; 1485 1486 ill = ip_queue_to_ill_v6(q, ipst); 1487 if (ill == NULL) { 1488 freemsg(mp); 1489 return; 1490 } 1491 1492 if (mctl_present) { 1493 /* 1494 * If it is : 1495 * 1496 * 1) a IPSEC_OUT, then this is caused by outbound 1497 * datagram originating on this host. IPSEC processing 1498 * may or may not have been done. Refer to comments above 1499 * icmp_inbound_error_fanout for details. 1500 * 1501 * 2) a IPSEC_IN if we are generating a icmp_message 1502 * for an incoming datagram destined for us i.e called 1503 * from ip_fanout_send_icmp. 1504 */ 1505 ipsec_info_t *in; 1506 1507 ipsec_mp = mp; 1508 mp = ipsec_mp->b_cont; 1509 1510 in = (ipsec_info_t *)ipsec_mp->b_rptr; 1511 ip6h = (ip6_t *)mp->b_rptr; 1512 1513 ASSERT(in->ipsec_info_type == IPSEC_OUT || 1514 in->ipsec_info_type == IPSEC_IN); 1515 1516 if (in->ipsec_info_type == IPSEC_IN) { 1517 /* 1518 * Convert the IPSEC_IN to IPSEC_OUT. 1519 */ 1520 if (!ipsec_in_to_out(ipsec_mp, NULL, ip6h)) { 1521 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 1522 ill_refrele(ill); 1523 return; 1524 } 1525 } else { 1526 ASSERT(in->ipsec_info_type == IPSEC_OUT); 1527 io = (ipsec_out_t *)in; 1528 /* 1529 * Clear out ipsec_out_proc_begin, so we do a fresh 1530 * ire lookup. 1531 */ 1532 io->ipsec_out_proc_begin = B_FALSE; 1533 } 1534 } else { 1535 /* 1536 * This is in clear. The icmp message we are building 1537 * here should go out in clear. 1538 */ 1539 ipsec_in_t *ii; 1540 ASSERT(mp->b_datap->db_type == M_DATA); 1541 ipsec_mp = ipsec_in_alloc(B_FALSE, ipst->ips_netstack); 1542 if (ipsec_mp == NULL) { 1543 freemsg(mp); 1544 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 1545 ill_refrele(ill); 1546 return; 1547 } 1548 ii = (ipsec_in_t *)ipsec_mp->b_rptr; 1549 1550 /* This is not a secure packet */ 1551 ii->ipsec_in_secure = B_FALSE; 1552 /* 1553 * For trusted extensions using a shared IP address we can 1554 * send using any zoneid. 1555 */ 1556 if (zoneid == ALL_ZONES) 1557 ii->ipsec_in_zoneid = GLOBAL_ZONEID; 1558 else 1559 ii->ipsec_in_zoneid = zoneid; 1560 ipsec_mp->b_cont = mp; 1561 ip6h = (ip6_t *)mp->b_rptr; 1562 /* 1563 * Convert the IPSEC_IN to IPSEC_OUT. 1564 */ 1565 if (!ipsec_in_to_out(ipsec_mp, NULL, ip6h)) { 1566 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 1567 ill_refrele(ill); 1568 return; 1569 } 1570 } 1571 io = (ipsec_out_t *)ipsec_mp->b_rptr; 1572 1573 if (v6src_ptr != NULL) { 1574 v6src = *v6src_ptr; 1575 } else { 1576 if (icmp_pick_source_v6(q, &ip6h->ip6_src, &ip6h->ip6_dst, 1577 &v6src, zoneid, ipst) == NULL) { 1578 freemsg(ipsec_mp); 1579 ill_refrele(ill); 1580 return; 1581 } 1582 } 1583 v6dst = ip6h->ip6_src; 1584 len_needed = ipst->ips_ipv6_icmp_return - IPV6_HDR_LEN - len; 1585 msg_len = msgdsize(mp); 1586 if (msg_len > len_needed) { 1587 if (!adjmsg(mp, len_needed - msg_len)) { 1588 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutErrors); 1589 freemsg(ipsec_mp); 1590 ill_refrele(ill); 1591 return; 1592 } 1593 msg_len = len_needed; 1594 } 1595 mp1 = allocb_tmpl(IPV6_HDR_LEN + len, mp); 1596 if (mp1 == NULL) { 1597 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutErrors); 1598 freemsg(ipsec_mp); 1599 ill_refrele(ill); 1600 return; 1601 } 1602 ill_refrele(ill); 1603 mp1->b_cont = mp; 1604 mp = mp1; 1605 ASSERT(ipsec_mp->b_datap->db_type == M_CTL && 1606 io->ipsec_out_type == IPSEC_OUT); 1607 ipsec_mp->b_cont = mp; 1608 1609 /* 1610 * Set ipsec_out_icmp_loopback so we can let the ICMP messages this 1611 * node generates be accepted in peace by all on-host destinations. 1612 * If we do NOT assume that all on-host destinations trust 1613 * self-generated ICMP messages, then rework here, ip.c, and spd.c. 1614 * (Look for ipsec_out_icmp_loopback). 1615 */ 1616 io->ipsec_out_icmp_loopback = B_TRUE; 1617 1618 ip6h = (ip6_t *)mp->b_rptr; 1619 mp1->b_wptr = (uchar_t *)ip6h + (IPV6_HDR_LEN + len); 1620 1621 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 1622 ip6h->ip6_nxt = IPPROTO_ICMPV6; 1623 ip6h->ip6_hops = ipst->ips_ipv6_def_hops; 1624 ip6h->ip6_dst = v6dst; 1625 ip6h->ip6_src = v6src; 1626 msg_len += IPV6_HDR_LEN + len; 1627 if (msg_len > IP_MAXPACKET + IPV6_HDR_LEN) { 1628 (void) adjmsg(mp, IP_MAXPACKET + IPV6_HDR_LEN - msg_len); 1629 msg_len = IP_MAXPACKET + IPV6_HDR_LEN; 1630 } 1631 ip6h->ip6_plen = htons((uint16_t)(msgdsize(mp) - IPV6_HDR_LEN)); 1632 icmp6 = (icmp6_t *)&ip6h[1]; 1633 bcopy(stuff, (char *)icmp6, len); 1634 /* 1635 * Prepare for checksum by putting icmp length in the icmp 1636 * checksum field. The checksum is calculated in ip_wput_v6. 1637 */ 1638 icmp6->icmp6_cksum = ip6h->ip6_plen; 1639 if (icmp6->icmp6_type == ND_REDIRECT) { 1640 ip6h->ip6_hops = IPV6_MAX_HOPS; 1641 } 1642 /* Send to V6 writeside put routine */ 1643 put(q, ipsec_mp); 1644 } 1645 1646 /* 1647 * Update the output mib when ICMPv6 packets are sent. 1648 */ 1649 static void 1650 icmp_update_out_mib_v6(ill_t *ill, icmp6_t *icmp6) 1651 { 1652 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutMsgs); 1653 1654 switch (icmp6->icmp6_type) { 1655 case ICMP6_DST_UNREACH: 1656 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutDestUnreachs); 1657 if (icmp6->icmp6_code == ICMP6_DST_UNREACH_ADMIN) 1658 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutAdminProhibs); 1659 break; 1660 1661 case ICMP6_TIME_EXCEEDED: 1662 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutTimeExcds); 1663 break; 1664 1665 case ICMP6_PARAM_PROB: 1666 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutParmProblems); 1667 break; 1668 1669 case ICMP6_PACKET_TOO_BIG: 1670 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutPktTooBigs); 1671 break; 1672 1673 case ICMP6_ECHO_REQUEST: 1674 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutEchos); 1675 break; 1676 1677 case ICMP6_ECHO_REPLY: 1678 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutEchoReplies); 1679 break; 1680 1681 case ND_ROUTER_SOLICIT: 1682 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutRouterSolicits); 1683 break; 1684 1685 case ND_ROUTER_ADVERT: 1686 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutRouterAdvertisements); 1687 break; 1688 1689 case ND_NEIGHBOR_SOLICIT: 1690 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutNeighborSolicits); 1691 break; 1692 1693 case ND_NEIGHBOR_ADVERT: 1694 BUMP_MIB(ill->ill_icmp6_mib, 1695 ipv6IfIcmpOutNeighborAdvertisements); 1696 break; 1697 1698 case ND_REDIRECT: 1699 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutRedirects); 1700 break; 1701 1702 case MLD_LISTENER_QUERY: 1703 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutGroupMembQueries); 1704 break; 1705 1706 case MLD_LISTENER_REPORT: 1707 case MLD_V2_LISTENER_REPORT: 1708 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutGroupMembResponses); 1709 break; 1710 1711 case MLD_LISTENER_REDUCTION: 1712 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutGroupMembReductions); 1713 break; 1714 } 1715 } 1716 1717 /* 1718 * Check if it is ok to send an ICMPv6 error packet in 1719 * response to the IP packet in mp. 1720 * Free the message and return null if no 1721 * ICMP error packet should be sent. 1722 */ 1723 static mblk_t * 1724 icmp_pkt_err_ok_v6(queue_t *q, mblk_t *mp, 1725 boolean_t llbcast, boolean_t mcast_ok, ip_stack_t *ipst) 1726 { 1727 ip6_t *ip6h; 1728 1729 if (!mp) 1730 return (NULL); 1731 1732 ip6h = (ip6_t *)mp->b_rptr; 1733 1734 /* Check if source address uniquely identifies the host */ 1735 1736 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_src) || 1737 IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_src) || 1738 IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 1739 freemsg(mp); 1740 return (NULL); 1741 } 1742 1743 if (ip6h->ip6_nxt == IPPROTO_ICMPV6) { 1744 size_t len_needed = IPV6_HDR_LEN + ICMP6_MINLEN; 1745 icmp6_t *icmp6; 1746 1747 if (mp->b_wptr - mp->b_rptr < len_needed) { 1748 if (!pullupmsg(mp, len_needed)) { 1749 ill_t *ill; 1750 1751 ill = ip_queue_to_ill_v6(q, ipst); 1752 if (ill == NULL) { 1753 BUMP_MIB(&ipst->ips_icmp6_mib, 1754 ipv6IfIcmpInErrors); 1755 } else { 1756 BUMP_MIB(ill->ill_icmp6_mib, 1757 ipv6IfIcmpInErrors); 1758 ill_refrele(ill); 1759 } 1760 freemsg(mp); 1761 return (NULL); 1762 } 1763 ip6h = (ip6_t *)mp->b_rptr; 1764 } 1765 icmp6 = (icmp6_t *)&ip6h[1]; 1766 /* Explicitly do not generate errors in response to redirects */ 1767 if (ICMP6_IS_ERROR(icmp6->icmp6_type) || 1768 icmp6->icmp6_type == ND_REDIRECT) { 1769 freemsg(mp); 1770 return (NULL); 1771 } 1772 } 1773 /* 1774 * Check that the destination is not multicast and that the packet 1775 * was not sent on link layer broadcast or multicast. (Exception 1776 * is Packet too big message as per the draft - when mcast_ok is set.) 1777 */ 1778 if (!mcast_ok && 1779 (llbcast || IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst))) { 1780 freemsg(mp); 1781 return (NULL); 1782 } 1783 if (icmp_err_rate_limit(ipst)) { 1784 /* 1785 * Only send ICMP error packets every so often. 1786 * This should be done on a per port/source basis, 1787 * but for now this will suffice. 1788 */ 1789 freemsg(mp); 1790 return (NULL); 1791 } 1792 return (mp); 1793 } 1794 1795 /* 1796 * Generate an ICMPv6 redirect message. 1797 * Include target link layer address option if it exits. 1798 * Always include redirect header. 1799 */ 1800 static void 1801 icmp_send_redirect_v6(queue_t *q, mblk_t *mp, in6_addr_t *targetp, 1802 in6_addr_t *dest, ill_t *ill, boolean_t llbcast) 1803 { 1804 nd_redirect_t *rd; 1805 nd_opt_rd_hdr_t *rdh; 1806 uchar_t *buf; 1807 nce_t *nce = NULL; 1808 nd_opt_hdr_t *opt; 1809 int len; 1810 int ll_opt_len = 0; 1811 int max_redir_hdr_data_len; 1812 int pkt_len; 1813 in6_addr_t *srcp; 1814 ip_stack_t *ipst = ill->ill_ipst; 1815 1816 /* 1817 * We are called from ip_rput where we could 1818 * not have attached an IPSEC_IN. 1819 */ 1820 ASSERT(mp->b_datap->db_type == M_DATA); 1821 1822 mp = icmp_pkt_err_ok_v6(q, mp, llbcast, B_FALSE, ipst); 1823 if (mp == NULL) 1824 return; 1825 nce = ndp_lookup_v6(ill, B_TRUE, targetp, B_FALSE); 1826 if (nce != NULL && nce->nce_state != ND_INCOMPLETE) { 1827 ll_opt_len = (sizeof (nd_opt_hdr_t) + 1828 ill->ill_phys_addr_length + 7)/8 * 8; 1829 } 1830 len = sizeof (nd_redirect_t) + sizeof (nd_opt_rd_hdr_t) + ll_opt_len; 1831 ASSERT(len % 4 == 0); 1832 buf = kmem_alloc(len, KM_NOSLEEP); 1833 if (buf == NULL) { 1834 if (nce != NULL) 1835 NCE_REFRELE(nce); 1836 freemsg(mp); 1837 return; 1838 } 1839 1840 rd = (nd_redirect_t *)buf; 1841 rd->nd_rd_type = (uint8_t)ND_REDIRECT; 1842 rd->nd_rd_code = 0; 1843 rd->nd_rd_reserved = 0; 1844 rd->nd_rd_target = *targetp; 1845 rd->nd_rd_dst = *dest; 1846 1847 opt = (nd_opt_hdr_t *)(buf + sizeof (nd_redirect_t)); 1848 if (nce != NULL && ll_opt_len != 0) { 1849 opt->nd_opt_type = ND_OPT_TARGET_LINKADDR; 1850 opt->nd_opt_len = ll_opt_len/8; 1851 bcopy((char *)nce->nce_res_mp->b_rptr + 1852 NCE_LL_ADDR_OFFSET(ill), &opt[1], 1853 ill->ill_phys_addr_length); 1854 } 1855 if (nce != NULL) 1856 NCE_REFRELE(nce); 1857 rdh = (nd_opt_rd_hdr_t *)(buf + sizeof (nd_redirect_t) + ll_opt_len); 1858 rdh->nd_opt_rh_type = (uint8_t)ND_OPT_REDIRECTED_HEADER; 1859 /* max_redir_hdr_data_len and nd_opt_rh_len must be multiple of 8 */ 1860 max_redir_hdr_data_len = 1861 (ipst->ips_ipv6_icmp_return - IPV6_HDR_LEN - len)/8*8; 1862 pkt_len = msgdsize(mp); 1863 /* Make sure mp is 8 byte aligned */ 1864 if (pkt_len > max_redir_hdr_data_len) { 1865 rdh->nd_opt_rh_len = (max_redir_hdr_data_len + 1866 sizeof (nd_opt_rd_hdr_t))/8; 1867 (void) adjmsg(mp, max_redir_hdr_data_len - pkt_len); 1868 } else { 1869 rdh->nd_opt_rh_len = (pkt_len + sizeof (nd_opt_rd_hdr_t))/8; 1870 (void) adjmsg(mp, -(pkt_len % 8)); 1871 } 1872 rdh->nd_opt_rh_reserved1 = 0; 1873 rdh->nd_opt_rh_reserved2 = 0; 1874 /* ipif_v6src_addr contains the link-local source address */ 1875 srcp = &ill->ill_ipif->ipif_v6src_addr; 1876 1877 /* Redirects sent by router, and router is global zone */ 1878 icmp_pkt_v6(q, mp, buf, len, srcp, B_FALSE, GLOBAL_ZONEID, ipst); 1879 kmem_free(buf, len); 1880 } 1881 1882 1883 /* Generate an ICMP time exceeded message. (May be called as writer.) */ 1884 void 1885 icmp_time_exceeded_v6(queue_t *q, mblk_t *mp, uint8_t code, 1886 boolean_t llbcast, boolean_t mcast_ok, zoneid_t zoneid, 1887 ip_stack_t *ipst) 1888 { 1889 icmp6_t icmp6; 1890 boolean_t mctl_present; 1891 mblk_t *first_mp; 1892 1893 EXTRACT_PKT_MP(mp, first_mp, mctl_present); 1894 1895 mp = icmp_pkt_err_ok_v6(q, mp, llbcast, mcast_ok, ipst); 1896 if (mp == NULL) { 1897 if (mctl_present) 1898 freeb(first_mp); 1899 return; 1900 } 1901 bzero(&icmp6, sizeof (icmp6_t)); 1902 icmp6.icmp6_type = ICMP6_TIME_EXCEEDED; 1903 icmp6.icmp6_code = code; 1904 icmp_pkt_v6(q, first_mp, &icmp6, sizeof (icmp6_t), NULL, mctl_present, 1905 zoneid, ipst); 1906 } 1907 1908 /* 1909 * Generate an ICMP unreachable message. 1910 */ 1911 void 1912 icmp_unreachable_v6(queue_t *q, mblk_t *mp, uint8_t code, 1913 boolean_t llbcast, boolean_t mcast_ok, zoneid_t zoneid, 1914 ip_stack_t *ipst) 1915 { 1916 icmp6_t icmp6; 1917 boolean_t mctl_present; 1918 mblk_t *first_mp; 1919 1920 EXTRACT_PKT_MP(mp, first_mp, mctl_present); 1921 1922 mp = icmp_pkt_err_ok_v6(q, mp, llbcast, mcast_ok, ipst); 1923 if (mp == NULL) { 1924 if (mctl_present) 1925 freeb(first_mp); 1926 return; 1927 } 1928 bzero(&icmp6, sizeof (icmp6_t)); 1929 icmp6.icmp6_type = ICMP6_DST_UNREACH; 1930 icmp6.icmp6_code = code; 1931 icmp_pkt_v6(q, first_mp, &icmp6, sizeof (icmp6_t), NULL, mctl_present, 1932 zoneid, ipst); 1933 } 1934 1935 /* 1936 * Generate an ICMP pkt too big message. 1937 */ 1938 static void 1939 icmp_pkt2big_v6(queue_t *q, mblk_t *mp, uint32_t mtu, 1940 boolean_t llbcast, boolean_t mcast_ok, zoneid_t zoneid, ip_stack_t *ipst) 1941 { 1942 icmp6_t icmp6; 1943 mblk_t *first_mp; 1944 boolean_t mctl_present; 1945 1946 EXTRACT_PKT_MP(mp, first_mp, mctl_present); 1947 1948 mp = icmp_pkt_err_ok_v6(q, mp, llbcast, mcast_ok, ipst); 1949 if (mp == NULL) { 1950 if (mctl_present) 1951 freeb(first_mp); 1952 return; 1953 } 1954 bzero(&icmp6, sizeof (icmp6_t)); 1955 icmp6.icmp6_type = ICMP6_PACKET_TOO_BIG; 1956 icmp6.icmp6_code = 0; 1957 icmp6.icmp6_mtu = htonl(mtu); 1958 1959 icmp_pkt_v6(q, first_mp, &icmp6, sizeof (icmp6_t), NULL, mctl_present, 1960 zoneid, ipst); 1961 } 1962 1963 /* 1964 * Generate an ICMP parameter problem message. (May be called as writer.) 1965 * 'offset' is the offset from the beginning of the packet in error. 1966 */ 1967 static void 1968 icmp_param_problem_v6(queue_t *q, mblk_t *mp, uint8_t code, 1969 uint32_t offset, boolean_t llbcast, boolean_t mcast_ok, zoneid_t zoneid, 1970 ip_stack_t *ipst) 1971 { 1972 icmp6_t icmp6; 1973 boolean_t mctl_present; 1974 mblk_t *first_mp; 1975 1976 EXTRACT_PKT_MP(mp, first_mp, mctl_present); 1977 1978 mp = icmp_pkt_err_ok_v6(q, mp, llbcast, mcast_ok, ipst); 1979 if (mp == NULL) { 1980 if (mctl_present) 1981 freeb(first_mp); 1982 return; 1983 } 1984 bzero((char *)&icmp6, sizeof (icmp6_t)); 1985 icmp6.icmp6_type = ICMP6_PARAM_PROB; 1986 icmp6.icmp6_code = code; 1987 icmp6.icmp6_pptr = htonl(offset); 1988 icmp_pkt_v6(q, first_mp, &icmp6, sizeof (icmp6_t), NULL, mctl_present, 1989 zoneid, ipst); 1990 } 1991 1992 /* 1993 * This code will need to take into account the possibility of binding 1994 * to a link local address on a multi-homed host, in which case the 1995 * outgoing interface (from the conn) will need to be used when getting 1996 * an ire for the dst. Going through proper outgoing interface and 1997 * choosing the source address corresponding to the outgoing interface 1998 * is necessary when the destination address is a link-local address and 1999 * IPV6_BOUND_IF or IPV6_PKTINFO or scope_id has been set. 2000 * This can happen when active connection is setup; thus ipp pointer 2001 * is passed here from tcp_connect_*() routines, in non-TCP cases NULL 2002 * pointer is passed as ipp pointer. 2003 */ 2004 mblk_t * 2005 ip_bind_v6(queue_t *q, mblk_t *mp, conn_t *connp, ip6_pkt_t *ipp) 2006 { 2007 ssize_t len; 2008 int protocol; 2009 struct T_bind_req *tbr; 2010 sin6_t *sin6; 2011 ipa6_conn_t *ac6; 2012 in6_addr_t *v6srcp; 2013 in6_addr_t *v6dstp; 2014 uint16_t lport; 2015 uint16_t fport; 2016 uchar_t *ucp; 2017 int error = 0; 2018 boolean_t local_bind; 2019 ipa6_conn_x_t *acx6; 2020 boolean_t verify_dst; 2021 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 2022 cred_t *cr; 2023 2024 /* 2025 * All Solaris components should pass a db_credp 2026 * for this TPI message, hence we ASSERT. 2027 * But in case there is some other M_PROTO that looks 2028 * like a TPI message sent by some other kernel 2029 * component, we check and return an error. 2030 */ 2031 cr = msg_getcred(mp, NULL); 2032 ASSERT(cr != NULL); 2033 if (cr == NULL) { 2034 error = EINVAL; 2035 goto bad_addr; 2036 } 2037 2038 ASSERT(connp->conn_af_isv6); 2039 len = mp->b_wptr - mp->b_rptr; 2040 if (len < (sizeof (*tbr) + 1)) { 2041 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 2042 "ip_bind_v6: bogus msg, len %ld", len); 2043 goto bad_addr; 2044 } 2045 /* Back up and extract the protocol identifier. */ 2046 mp->b_wptr--; 2047 tbr = (struct T_bind_req *)mp->b_rptr; 2048 /* Reset the message type in preparation for shipping it back. */ 2049 mp->b_datap->db_type = M_PCPROTO; 2050 2051 protocol = *mp->b_wptr & 0xFF; 2052 connp->conn_ulp = (uint8_t)protocol; 2053 2054 /* 2055 * Check for a zero length address. This is from a protocol that 2056 * wants to register to receive all packets of its type. 2057 */ 2058 if (tbr->ADDR_length == 0) { 2059 if ((protocol == IPPROTO_TCP || protocol == IPPROTO_SCTP || 2060 protocol == IPPROTO_ESP || protocol == IPPROTO_AH) && 2061 ipst->ips_ipcl_proto_fanout_v6[protocol].connf_head != 2062 NULL) { 2063 /* 2064 * TCP, SCTP, AH, and ESP have single protocol fanouts. 2065 * Do not allow others to bind to these. 2066 */ 2067 goto bad_addr; 2068 } 2069 2070 /* 2071 * 2072 * The udp module never sends down a zero-length address, 2073 * and allowing this on a labeled system will break MLP 2074 * functionality. 2075 */ 2076 if (is_system_labeled() && protocol == IPPROTO_UDP) 2077 goto bad_addr; 2078 2079 /* Allow ipsec plumbing */ 2080 if (connp->conn_mac_exempt && protocol != IPPROTO_AH && 2081 protocol != IPPROTO_ESP) 2082 goto bad_addr; 2083 2084 connp->conn_srcv6 = ipv6_all_zeros; 2085 ipcl_proto_insert_v6(connp, protocol); 2086 2087 tbr->PRIM_type = T_BIND_ACK; 2088 return (mp); 2089 } 2090 2091 /* Extract the address pointer from the message. */ 2092 ucp = (uchar_t *)mi_offset_param(mp, tbr->ADDR_offset, 2093 tbr->ADDR_length); 2094 if (ucp == NULL) { 2095 ip1dbg(("ip_bind_v6: no address\n")); 2096 goto bad_addr; 2097 } 2098 if (!OK_32PTR(ucp)) { 2099 ip1dbg(("ip_bind_v6: unaligned address\n")); 2100 goto bad_addr; 2101 } 2102 2103 switch (tbr->ADDR_length) { 2104 default: 2105 ip1dbg(("ip_bind_v6: bad address length %d\n", 2106 (int)tbr->ADDR_length)); 2107 goto bad_addr; 2108 2109 case IPV6_ADDR_LEN: 2110 /* Verification of local address only */ 2111 v6srcp = (in6_addr_t *)ucp; 2112 lport = 0; 2113 local_bind = B_TRUE; 2114 break; 2115 2116 case sizeof (sin6_t): 2117 sin6 = (sin6_t *)ucp; 2118 v6srcp = &sin6->sin6_addr; 2119 lport = sin6->sin6_port; 2120 local_bind = B_TRUE; 2121 break; 2122 2123 case sizeof (ipa6_conn_t): 2124 /* 2125 * Verify that both the source and destination addresses 2126 * are valid. 2127 */ 2128 ac6 = (ipa6_conn_t *)ucp; 2129 v6srcp = &ac6->ac6_laddr; 2130 v6dstp = &ac6->ac6_faddr; 2131 fport = ac6->ac6_fport; 2132 /* For raw socket, the local port is not set. */ 2133 lport = ac6->ac6_lport != 0 ? ac6->ac6_lport : 2134 connp->conn_lport; 2135 local_bind = B_FALSE; 2136 /* Always verify destination reachability. */ 2137 verify_dst = B_TRUE; 2138 break; 2139 2140 case sizeof (ipa6_conn_x_t): 2141 /* 2142 * Verify that the source address is valid. 2143 */ 2144 acx6 = (ipa6_conn_x_t *)ucp; 2145 ac6 = &acx6->ac6x_conn; 2146 v6srcp = &ac6->ac6_laddr; 2147 v6dstp = &ac6->ac6_faddr; 2148 fport = ac6->ac6_fport; 2149 lport = ac6->ac6_lport; 2150 local_bind = B_FALSE; 2151 /* 2152 * Client that passed ipa6_conn_x_t to us specifies whether to 2153 * verify destination reachability. 2154 */ 2155 verify_dst = (acx6->ac6x_flags & ACX_VERIFY_DST) != 0; 2156 break; 2157 } 2158 if (local_bind) { 2159 error = ip_proto_bind_laddr_v6(connp, &mp->b_cont, protocol, 2160 v6srcp, lport, tbr->ADDR_length != IPV6_ADDR_LEN); 2161 } else { 2162 error = ip_proto_bind_connected_v6(connp, &mp->b_cont, protocol, 2163 v6srcp, lport, v6dstp, ipp, fport, B_TRUE, verify_dst, cr); 2164 } 2165 2166 if (error == 0) { 2167 /* Send it home. */ 2168 mp->b_datap->db_type = M_PCPROTO; 2169 tbr->PRIM_type = T_BIND_ACK; 2170 return (mp); 2171 } 2172 2173 bad_addr: 2174 ASSERT(error != EINPROGRESS); 2175 if (error > 0) 2176 mp = mi_tpi_err_ack_alloc(mp, TSYSERR, error); 2177 else 2178 mp = mi_tpi_err_ack_alloc(mp, TBADADDR, 0); 2179 return (mp); 2180 } 2181 2182 /* 2183 * Here address is verified to be a valid local address. 2184 * If the IRE_DB_REQ_TYPE mp is present, a multicast 2185 * address is also considered a valid local address. 2186 * In the case of a multicast address, however, the 2187 * upper protocol is expected to reset the src address 2188 * to 0 if it sees an ire with IN6_IS_ADDR_MULTICAST returned so that 2189 * no packets are emitted with multicast address as 2190 * source address. 2191 * The addresses valid for bind are: 2192 * (1) - in6addr_any 2193 * (2) - IP address of an UP interface 2194 * (3) - IP address of a DOWN interface 2195 * (4) - a multicast address. In this case 2196 * the conn will only receive packets destined to 2197 * the specified multicast address. Note: the 2198 * application still has to issue an 2199 * IPV6_JOIN_GROUP socket option. 2200 * 2201 * In all the above cases, the bound address must be valid in the current zone. 2202 * When the address is loopback or multicast, there might be many matching IREs 2203 * so bind has to look up based on the zone. 2204 */ 2205 /* 2206 * Verify the local IP address. Does not change the conn_t except 2207 * conn_fully_bound and conn_policy_cached. 2208 */ 2209 static int 2210 ip_bind_laddr_v6(conn_t *connp, mblk_t **mpp, uint8_t protocol, 2211 const in6_addr_t *v6src, uint16_t lport, boolean_t fanout_insert) 2212 { 2213 int error = 0; 2214 ire_t *src_ire = NULL; 2215 zoneid_t zoneid; 2216 mblk_t *mp = NULL; 2217 boolean_t ire_requested; 2218 boolean_t ipsec_policy_set; 2219 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 2220 2221 if (mpp) 2222 mp = *mpp; 2223 2224 ire_requested = (mp != NULL && DB_TYPE(mp) == IRE_DB_REQ_TYPE); 2225 ipsec_policy_set = (mp != NULL && DB_TYPE(mp) == IPSEC_POLICY_SET); 2226 2227 /* 2228 * If it was previously connected, conn_fully_bound would have 2229 * been set. 2230 */ 2231 connp->conn_fully_bound = B_FALSE; 2232 2233 zoneid = IPCL_ZONEID(connp); 2234 2235 if (!IN6_IS_ADDR_UNSPECIFIED(v6src)) { 2236 src_ire = ire_route_lookup_v6(v6src, 0, 0, 2237 0, NULL, NULL, zoneid, NULL, MATCH_IRE_ZONEONLY, ipst); 2238 /* 2239 * If an address other than in6addr_any is requested, 2240 * we verify that it is a valid address for bind 2241 * Note: Following code is in if-else-if form for 2242 * readability compared to a condition check. 2243 */ 2244 ASSERT(src_ire == NULL || !(src_ire->ire_type & IRE_BROADCAST)); 2245 /* LINTED - statement has no consequent */ 2246 if (IRE_IS_LOCAL(src_ire)) { 2247 /* 2248 * (2) Bind to address of local UP interface 2249 */ 2250 } else if (IN6_IS_ADDR_MULTICAST(v6src)) { 2251 ipif_t *multi_ipif = NULL; 2252 ire_t *save_ire; 2253 /* 2254 * (4) bind to multicast address. 2255 * Fake out the IRE returned to upper 2256 * layer to be a broadcast IRE in 2257 * ip_bind_insert_ire_v6(). 2258 * Pass other information that matches 2259 * the ipif (e.g. the source address). 2260 * conn_multicast_ill is only used for 2261 * IPv6 packets 2262 */ 2263 mutex_enter(&connp->conn_lock); 2264 if (connp->conn_multicast_ill != NULL) { 2265 (void) ipif_lookup_zoneid( 2266 connp->conn_multicast_ill, zoneid, 0, 2267 &multi_ipif); 2268 } else { 2269 /* 2270 * Look for default like 2271 * ip_wput_v6 2272 */ 2273 multi_ipif = ipif_lookup_group_v6( 2274 &ipv6_unspecified_group, zoneid, ipst); 2275 } 2276 mutex_exit(&connp->conn_lock); 2277 save_ire = src_ire; 2278 src_ire = NULL; 2279 if (multi_ipif == NULL || !ire_requested || 2280 (src_ire = ipif_to_ire_v6(multi_ipif)) == NULL) { 2281 src_ire = save_ire; 2282 error = EADDRNOTAVAIL; 2283 } else { 2284 ASSERT(src_ire != NULL); 2285 if (save_ire != NULL) 2286 ire_refrele(save_ire); 2287 } 2288 if (multi_ipif != NULL) 2289 ipif_refrele(multi_ipif); 2290 } else { 2291 if (!ip_addr_exists_v6(v6src, zoneid, ipst)) { 2292 /* 2293 * Not a valid address for bind 2294 */ 2295 error = EADDRNOTAVAIL; 2296 } 2297 } 2298 2299 if (error != 0) { 2300 /* Red Alert! Attempting to be a bogon! */ 2301 if (ip_debug > 2) { 2302 /* ip1dbg */ 2303 pr_addr_dbg("ip_bind_laddr_v6: bad src" 2304 " address %s\n", AF_INET6, v6src); 2305 } 2306 goto bad_addr; 2307 } 2308 } 2309 2310 /* 2311 * Allow setting new policies. For example, disconnects come 2312 * down as ipa_t bind. As we would have set conn_policy_cached 2313 * to B_TRUE before, we should set it to B_FALSE, so that policy 2314 * can change after the disconnect. 2315 */ 2316 connp->conn_policy_cached = B_FALSE; 2317 2318 /* If not fanout_insert this was just an address verification */ 2319 if (fanout_insert) { 2320 /* 2321 * The addresses have been verified. Time to insert in 2322 * the correct fanout list. 2323 */ 2324 connp->conn_srcv6 = *v6src; 2325 connp->conn_remv6 = ipv6_all_zeros; 2326 connp->conn_lport = lport; 2327 connp->conn_fport = 0; 2328 error = ipcl_bind_insert_v6(connp, protocol, v6src, lport); 2329 } 2330 if (error == 0) { 2331 if (ire_requested) { 2332 if (!ip_bind_get_ire_v6(mpp, src_ire, v6src, NULL, 2333 ipst)) { 2334 error = -1; 2335 goto bad_addr; 2336 } 2337 mp = *mpp; 2338 } else if (ipsec_policy_set) { 2339 if (!ip_bind_ipsec_policy_set(connp, mp)) { 2340 error = -1; 2341 goto bad_addr; 2342 } 2343 } 2344 } 2345 bad_addr: 2346 if (error != 0) { 2347 if (connp->conn_anon_port) { 2348 (void) tsol_mlp_anon(crgetzone(connp->conn_cred), 2349 connp->conn_mlp_type, connp->conn_ulp, ntohs(lport), 2350 B_FALSE); 2351 } 2352 connp->conn_mlp_type = mlptSingle; 2353 } 2354 2355 if (src_ire != NULL) 2356 ire_refrele(src_ire); 2357 2358 if (ipsec_policy_set) { 2359 ASSERT(mp != NULL); 2360 freeb(mp); 2361 /* 2362 * As of now assume that nothing else accompanies 2363 * IPSEC_POLICY_SET. 2364 */ 2365 *mpp = NULL; 2366 } 2367 2368 return (error); 2369 } 2370 int 2371 ip_proto_bind_laddr_v6(conn_t *connp, mblk_t **mpp, uint8_t protocol, 2372 const in6_addr_t *v6srcp, uint16_t lport, boolean_t fanout_insert) 2373 { 2374 int error; 2375 boolean_t orig_pkt_isv6 = connp->conn_pkt_isv6; 2376 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 2377 2378 ASSERT(connp->conn_af_isv6); 2379 connp->conn_ulp = protocol; 2380 2381 if (IN6_IS_ADDR_V4MAPPED(v6srcp) && !connp->conn_ipv6_v6only) { 2382 /* Bind to IPv4 address */ 2383 ipaddr_t v4src; 2384 2385 IN6_V4MAPPED_TO_IPADDR(v6srcp, v4src); 2386 2387 error = ip_bind_laddr_v4(connp, mpp, protocol, v4src, lport, 2388 fanout_insert); 2389 if (error != 0) 2390 goto bad_addr; 2391 connp->conn_pkt_isv6 = B_FALSE; 2392 } else { 2393 if (IN6_IS_ADDR_V4MAPPED(v6srcp)) { 2394 error = 0; 2395 goto bad_addr; 2396 } 2397 error = ip_bind_laddr_v6(connp, mpp, protocol, v6srcp, 2398 lport, fanout_insert); 2399 if (error != 0) 2400 goto bad_addr; 2401 connp->conn_pkt_isv6 = B_TRUE; 2402 } 2403 2404 if (orig_pkt_isv6 != connp->conn_pkt_isv6) 2405 ip_setpktversion(connp, connp->conn_pkt_isv6, B_TRUE, ipst); 2406 return (0); 2407 2408 bad_addr: 2409 if (error < 0) 2410 error = -TBADADDR; 2411 return (error); 2412 } 2413 2414 /* 2415 * Verify that both the source and destination addresses 2416 * are valid. If verify_dst, then destination address must also be reachable, 2417 * i.e. have a route. Protocols like TCP want this. Tunnels do not. 2418 * It takes ip6_pkt_t * as one of the arguments to determine correct 2419 * source address when IPV6_PKTINFO or scope_id is set along with a link-local 2420 * destination address. Note that parameter ipp is only useful for TCP connect 2421 * when scope_id is set or IPV6_PKTINFO option is set with an ifindex. For all 2422 * non-TCP cases, it is NULL and for all other tcp cases it is not useful. 2423 * 2424 */ 2425 int 2426 ip_bind_connected_v6(conn_t *connp, mblk_t **mpp, uint8_t protocol, 2427 in6_addr_t *v6src, uint16_t lport, const in6_addr_t *v6dst, 2428 ip6_pkt_t *ipp, uint16_t fport, boolean_t fanout_insert, 2429 boolean_t verify_dst, cred_t *cr) 2430 { 2431 ire_t *src_ire; 2432 ire_t *dst_ire; 2433 int error = 0; 2434 ire_t *sire = NULL; 2435 ire_t *md_dst_ire = NULL; 2436 ill_t *md_ill = NULL; 2437 ill_t *dst_ill = NULL; 2438 ipif_t *src_ipif = NULL; 2439 zoneid_t zoneid; 2440 boolean_t ill_held = B_FALSE; 2441 mblk_t *mp = NULL; 2442 boolean_t ire_requested = B_FALSE; 2443 boolean_t ipsec_policy_set = B_FALSE; 2444 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 2445 ts_label_t *tsl = NULL; 2446 cred_t *effective_cred = NULL; 2447 2448 if (mpp) 2449 mp = *mpp; 2450 2451 if (mp != NULL) { 2452 ire_requested = (DB_TYPE(mp) == IRE_DB_REQ_TYPE); 2453 ipsec_policy_set = (DB_TYPE(mp) == IPSEC_POLICY_SET); 2454 } 2455 2456 src_ire = dst_ire = NULL; 2457 /* 2458 * If we never got a disconnect before, clear it now. 2459 */ 2460 connp->conn_fully_bound = B_FALSE; 2461 2462 zoneid = connp->conn_zoneid; 2463 2464 /* 2465 * Check whether Trusted Solaris policy allows communication with this 2466 * host, and pretend that the destination is unreachable if not. 2467 * 2468 * This is never a problem for TCP, since that transport is known to 2469 * compute the label properly as part of the tcp_rput_other T_BIND_ACK 2470 * handling. If the remote is unreachable, it will be detected at that 2471 * point, so there's no reason to check it here. 2472 * 2473 * Note that for sendto (and other datagram-oriented friends), this 2474 * check is done as part of the data path label computation instead. 2475 * The check here is just to make non-TCP connect() report the right 2476 * error. 2477 */ 2478 if (is_system_labeled() && !IPCL_IS_TCP(connp)) { 2479 if ((error = tsol_check_dest(cr, v6dst, IPV6_VERSION, 2480 connp->conn_mac_exempt, &effective_cred)) != 0) { 2481 if (ip_debug > 2) { 2482 pr_addr_dbg( 2483 "ip_bind_connected: no label for dst %s\n", 2484 AF_INET6, v6dst); 2485 } 2486 goto bad_addr; 2487 } 2488 2489 /* 2490 * tsol_check_dest() may have created a new cred with 2491 * a modified security label. Use that cred if it exists 2492 * for ire lookups. 2493 */ 2494 if (effective_cred == NULL) { 2495 tsl = crgetlabel(cr); 2496 } else { 2497 tsl = crgetlabel(effective_cred); 2498 } 2499 } 2500 2501 if (IN6_IS_ADDR_MULTICAST(v6dst)) { 2502 ipif_t *ipif; 2503 2504 /* 2505 * Use an "emulated" IRE_BROADCAST to tell the transport it 2506 * is a multicast. 2507 * Pass other information that matches 2508 * the ipif (e.g. the source address). 2509 * 2510 * conn_multicast_ill is only used for IPv6 packets 2511 */ 2512 mutex_enter(&connp->conn_lock); 2513 if (connp->conn_multicast_ill != NULL) { 2514 (void) ipif_lookup_zoneid(connp->conn_multicast_ill, 2515 zoneid, 0, &ipif); 2516 } else { 2517 /* Look for default like ip_wput_v6 */ 2518 ipif = ipif_lookup_group_v6(v6dst, zoneid, ipst); 2519 } 2520 mutex_exit(&connp->conn_lock); 2521 if (ipif == NULL || ire_requested || 2522 (dst_ire = ipif_to_ire_v6(ipif)) == NULL) { 2523 if (ipif != NULL) 2524 ipif_refrele(ipif); 2525 if (ip_debug > 2) { 2526 /* ip1dbg */ 2527 pr_addr_dbg("ip_bind_connected_v6: bad " 2528 "connected multicast %s\n", AF_INET6, 2529 v6dst); 2530 } 2531 error = ENETUNREACH; 2532 goto bad_addr; 2533 } 2534 if (ipif != NULL) 2535 ipif_refrele(ipif); 2536 } else { 2537 dst_ire = ire_route_lookup_v6(v6dst, NULL, NULL, 0, 2538 NULL, &sire, zoneid, tsl, 2539 MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT | 2540 MATCH_IRE_PARENT | MATCH_IRE_RJ_BHOLE | MATCH_IRE_SECATTR, 2541 ipst); 2542 /* 2543 * We also prevent ire's with src address INADDR_ANY to 2544 * be used, which are created temporarily for 2545 * sending out packets from endpoints that have 2546 * conn_unspec_src set. 2547 */ 2548 if (dst_ire == NULL || 2549 (dst_ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) || 2550 IN6_IS_ADDR_UNSPECIFIED(&dst_ire->ire_src_addr_v6)) { 2551 /* 2552 * When verifying destination reachability, we always 2553 * complain. 2554 * 2555 * When not verifying destination reachability but we 2556 * found an IRE, i.e. the destination is reachable, 2557 * then the other tests still apply and we complain. 2558 */ 2559 if (verify_dst || (dst_ire != NULL)) { 2560 if (ip_debug > 2) { 2561 /* ip1dbg */ 2562 pr_addr_dbg("ip_bind_connected_v6: bad" 2563 " connected dst %s\n", AF_INET6, 2564 v6dst); 2565 } 2566 if (dst_ire == NULL || 2567 !(dst_ire->ire_type & IRE_HOST)) { 2568 error = ENETUNREACH; 2569 } else { 2570 error = EHOSTUNREACH; 2571 } 2572 goto bad_addr; 2573 } 2574 } 2575 } 2576 2577 /* 2578 * If the app does a connect(), it means that it will most likely 2579 * send more than 1 packet to the destination. It makes sense 2580 * to clear the temporary flag. 2581 */ 2582 if (dst_ire != NULL && dst_ire->ire_type == IRE_CACHE && 2583 (dst_ire->ire_marks & IRE_MARK_TEMPORARY)) { 2584 irb_t *irb = dst_ire->ire_bucket; 2585 2586 rw_enter(&irb->irb_lock, RW_WRITER); 2587 /* 2588 * We need to recheck for IRE_MARK_TEMPORARY after acquiring 2589 * the lock in order to guarantee irb_tmp_ire_cnt. 2590 */ 2591 if (dst_ire->ire_marks & IRE_MARK_TEMPORARY) { 2592 dst_ire->ire_marks &= ~IRE_MARK_TEMPORARY; 2593 irb->irb_tmp_ire_cnt--; 2594 } 2595 rw_exit(&irb->irb_lock); 2596 } 2597 2598 ASSERT(dst_ire == NULL || dst_ire->ire_ipversion == IPV6_VERSION); 2599 2600 /* 2601 * See if we should notify ULP about MDT; we do this whether or not 2602 * ire_requested is TRUE, in order to handle active connects; MDT 2603 * eligibility tests for passive connects are handled separately 2604 * through tcp_adapt_ire(). We do this before the source address 2605 * selection, because dst_ire may change after a call to 2606 * ipif_select_source_v6(). This is a best-effort check, as the 2607 * packet for this connection may not actually go through 2608 * dst_ire->ire_stq, and the exact IRE can only be known after 2609 * calling ip_newroute_v6(). This is why we further check on the 2610 * IRE during Multidata packet transmission in tcp_multisend(). 2611 */ 2612 if (ipst->ips_ip_multidata_outbound && !ipsec_policy_set && 2613 dst_ire != NULL && 2614 !(dst_ire->ire_type & (IRE_LOCAL | IRE_LOOPBACK | IRE_BROADCAST)) && 2615 (md_ill = ire_to_ill(dst_ire), md_ill != NULL) && 2616 ILL_MDT_CAPABLE(md_ill)) { 2617 md_dst_ire = dst_ire; 2618 IRE_REFHOLD(md_dst_ire); 2619 } 2620 2621 if (dst_ire != NULL && 2622 dst_ire->ire_type == IRE_LOCAL && 2623 dst_ire->ire_zoneid != zoneid && 2624 dst_ire->ire_zoneid != ALL_ZONES) { 2625 src_ire = ire_ftable_lookup_v6(v6dst, 0, 0, 0, NULL, NULL, 2626 zoneid, 0, NULL, 2627 MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT | 2628 MATCH_IRE_RJ_BHOLE, ipst); 2629 if (src_ire == NULL) { 2630 error = EHOSTUNREACH; 2631 goto bad_addr; 2632 } else if (src_ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) { 2633 if (!(src_ire->ire_type & IRE_HOST)) 2634 error = ENETUNREACH; 2635 else 2636 error = EHOSTUNREACH; 2637 goto bad_addr; 2638 } 2639 if (IN6_IS_ADDR_UNSPECIFIED(v6src)) { 2640 src_ipif = src_ire->ire_ipif; 2641 ipif_refhold(src_ipif); 2642 *v6src = src_ipif->ipif_v6lcl_addr; 2643 } 2644 ire_refrele(src_ire); 2645 src_ire = NULL; 2646 } else if (IN6_IS_ADDR_UNSPECIFIED(v6src) && dst_ire != NULL) { 2647 if ((sire != NULL) && (sire->ire_flags & RTF_SETSRC)) { 2648 *v6src = sire->ire_src_addr_v6; 2649 ire_refrele(dst_ire); 2650 dst_ire = sire; 2651 sire = NULL; 2652 } else if (dst_ire->ire_type == IRE_CACHE && 2653 (dst_ire->ire_flags & RTF_SETSRC)) { 2654 ASSERT(dst_ire->ire_zoneid == zoneid || 2655 dst_ire->ire_zoneid == ALL_ZONES); 2656 *v6src = dst_ire->ire_src_addr_v6; 2657 } else { 2658 /* 2659 * Pick a source address so that a proper inbound load 2660 * spreading would happen. Use dst_ill specified by the 2661 * app. when socket option or scopeid is set. 2662 */ 2663 int err; 2664 2665 if (ipp != NULL && ipp->ipp_ifindex != 0) { 2666 uint_t if_index; 2667 2668 /* 2669 * Scope id or IPV6_PKTINFO 2670 */ 2671 2672 if_index = ipp->ipp_ifindex; 2673 dst_ill = ill_lookup_on_ifindex( 2674 if_index, B_TRUE, NULL, NULL, NULL, NULL, 2675 ipst); 2676 if (dst_ill == NULL) { 2677 ip1dbg(("ip_bind_connected_v6:" 2678 " bad ifindex %d\n", if_index)); 2679 error = EADDRNOTAVAIL; 2680 goto bad_addr; 2681 } 2682 ill_held = B_TRUE; 2683 } else if (connp->conn_outgoing_ill != NULL) { 2684 /* 2685 * For IPV6_BOUND_IF socket option, 2686 * conn_outgoing_ill should be set 2687 * already in TCP or UDP/ICMP. 2688 */ 2689 dst_ill = conn_get_held_ill(connp, 2690 &connp->conn_outgoing_ill, &err); 2691 if (err == ILL_LOOKUP_FAILED) { 2692 ip1dbg(("ip_bind_connected_v6:" 2693 "no ill for bound_if\n")); 2694 error = EADDRNOTAVAIL; 2695 goto bad_addr; 2696 } 2697 ill_held = B_TRUE; 2698 } else if (dst_ire->ire_stq != NULL) { 2699 /* No need to hold ill here */ 2700 dst_ill = (ill_t *)dst_ire->ire_stq->q_ptr; 2701 } else { 2702 /* No need to hold ill here */ 2703 dst_ill = dst_ire->ire_ipif->ipif_ill; 2704 } 2705 if (ip6_asp_can_lookup(ipst)) { 2706 src_ipif = ipif_select_source_v6(dst_ill, 2707 v6dst, B_FALSE, connp->conn_src_preferences, 2708 zoneid); 2709 ip6_asp_table_refrele(ipst); 2710 if (src_ipif == NULL) { 2711 pr_addr_dbg("ip_bind_connected_v6: " 2712 "no usable source address for " 2713 "connection to %s\n", 2714 AF_INET6, v6dst); 2715 error = EADDRNOTAVAIL; 2716 goto bad_addr; 2717 } 2718 *v6src = src_ipif->ipif_v6lcl_addr; 2719 } else { 2720 error = EADDRNOTAVAIL; 2721 goto bad_addr; 2722 } 2723 } 2724 } 2725 2726 /* 2727 * We do ire_route_lookup_v6() here (and not an interface lookup) 2728 * as we assert that v6src should only come from an 2729 * UP interface for hard binding. 2730 */ 2731 src_ire = ire_route_lookup_v6(v6src, 0, 0, 0, NULL, 2732 NULL, zoneid, NULL, MATCH_IRE_ZONEONLY, ipst); 2733 2734 /* src_ire must be a local|loopback */ 2735 if (!IRE_IS_LOCAL(src_ire)) { 2736 if (ip_debug > 2) { 2737 /* ip1dbg */ 2738 pr_addr_dbg("ip_bind_connected_v6: bad " 2739 "connected src %s\n", AF_INET6, v6src); 2740 } 2741 error = EADDRNOTAVAIL; 2742 goto bad_addr; 2743 } 2744 2745 /* 2746 * If the source address is a loopback address, the 2747 * destination had best be local or multicast. 2748 * The transports that can't handle multicast will reject 2749 * those addresses. 2750 */ 2751 if (src_ire->ire_type == IRE_LOOPBACK && 2752 !(IRE_IS_LOCAL(dst_ire) || IN6_IS_ADDR_MULTICAST(v6dst) || 2753 IN6_IS_ADDR_V4MAPPED_CLASSD(v6dst))) { 2754 ip1dbg(("ip_bind_connected_v6: bad connected loopback\n")); 2755 error = -1; 2756 goto bad_addr; 2757 } 2758 /* 2759 * Allow setting new policies. For example, disconnects come 2760 * down as ipa_t bind. As we would have set conn_policy_cached 2761 * to B_TRUE before, we should set it to B_FALSE, so that policy 2762 * can change after the disconnect. 2763 */ 2764 connp->conn_policy_cached = B_FALSE; 2765 2766 /* 2767 * The addresses have been verified. Initialize the conn 2768 * before calling the policy as they expect the conns 2769 * initialized. 2770 */ 2771 connp->conn_srcv6 = *v6src; 2772 connp->conn_remv6 = *v6dst; 2773 connp->conn_lport = lport; 2774 connp->conn_fport = fport; 2775 2776 ASSERT(!(ipsec_policy_set && ire_requested)); 2777 if (ire_requested) { 2778 iulp_t *ulp_info = NULL; 2779 2780 /* 2781 * Note that sire will not be NULL if this is an off-link 2782 * connection and there is not cache for that dest yet. 2783 * 2784 * XXX Because of an existing bug, if there are multiple 2785 * default routes, the IRE returned now may not be the actual 2786 * default route used (default routes are chosen in a 2787 * round robin fashion). So if the metrics for different 2788 * default routes are different, we may return the wrong 2789 * metrics. This will not be a problem if the existing 2790 * bug is fixed. 2791 */ 2792 if (sire != NULL) 2793 ulp_info = &(sire->ire_uinfo); 2794 2795 if (!ip_bind_get_ire_v6(mpp, dst_ire, v6dst, ulp_info, 2796 ipst)) { 2797 error = -1; 2798 goto bad_addr; 2799 } 2800 } else if (ipsec_policy_set) { 2801 if (!ip_bind_ipsec_policy_set(connp, mp)) { 2802 error = -1; 2803 goto bad_addr; 2804 } 2805 } 2806 2807 /* 2808 * Cache IPsec policy in this conn. If we have per-socket policy, 2809 * we'll cache that. If we don't, we'll inherit global policy. 2810 * 2811 * We can't insert until the conn reflects the policy. Note that 2812 * conn_policy_cached is set by ipsec_conn_cache_policy() even for 2813 * connections where we don't have a policy. This is to prevent 2814 * global policy lookups in the inbound path. 2815 * 2816 * If we insert before we set conn_policy_cached, 2817 * CONN_INBOUND_POLICY_PRESENT_V6() check can still evaluate true 2818 * because global policy cound be non-empty. We normally call 2819 * ipsec_check_policy() for conn_policy_cached connections only if 2820 * conn_in_enforce_policy is set. But in this case, 2821 * conn_policy_cached can get set anytime since we made the 2822 * CONN_INBOUND_POLICY_PRESENT_V6() check and ipsec_check_policy() 2823 * is called, which will make the above assumption false. Thus, we 2824 * need to insert after we set conn_policy_cached. 2825 */ 2826 if ((error = ipsec_conn_cache_policy(connp, B_FALSE)) != 0) 2827 goto bad_addr; 2828 2829 /* If not fanout_insert this was just an address verification */ 2830 if (fanout_insert) { 2831 /* 2832 * The addresses have been verified. Time to insert in 2833 * the correct fanout list. 2834 */ 2835 error = ipcl_conn_insert_v6(connp, protocol, v6src, v6dst, 2836 connp->conn_ports, 2837 IPCL_IS_TCP(connp) ? connp->conn_tcp->tcp_bound_if : 0); 2838 } 2839 if (error == 0) { 2840 connp->conn_fully_bound = B_TRUE; 2841 /* 2842 * Our initial checks for MDT have passed; the IRE is not 2843 * LOCAL/LOOPBACK/BROADCAST, and the link layer seems to 2844 * be supporting MDT. Pass the IRE, IPC and ILL into 2845 * ip_mdinfo_return(), which performs further checks 2846 * against them and upon success, returns the MDT info 2847 * mblk which we will attach to the bind acknowledgment. 2848 */ 2849 if (md_dst_ire != NULL) { 2850 mblk_t *mdinfo_mp; 2851 2852 ASSERT(md_ill != NULL); 2853 ASSERT(md_ill->ill_mdt_capab != NULL); 2854 if ((mdinfo_mp = ip_mdinfo_return(md_dst_ire, connp, 2855 md_ill->ill_name, md_ill->ill_mdt_capab)) != NULL) { 2856 if (mp == NULL) { 2857 *mpp = mdinfo_mp; 2858 } else { 2859 linkb(mp, mdinfo_mp); 2860 } 2861 } 2862 } 2863 } 2864 bad_addr: 2865 if (ipsec_policy_set) { 2866 ASSERT(mp != NULL); 2867 freeb(mp); 2868 /* 2869 * As of now assume that nothing else accompanies 2870 * IPSEC_POLICY_SET. 2871 */ 2872 *mpp = NULL; 2873 } 2874 refrele_and_quit: 2875 if (src_ire != NULL) 2876 IRE_REFRELE(src_ire); 2877 if (dst_ire != NULL) 2878 IRE_REFRELE(dst_ire); 2879 if (sire != NULL) 2880 IRE_REFRELE(sire); 2881 if (src_ipif != NULL) 2882 ipif_refrele(src_ipif); 2883 if (md_dst_ire != NULL) 2884 IRE_REFRELE(md_dst_ire); 2885 if (ill_held && dst_ill != NULL) 2886 ill_refrele(dst_ill); 2887 if (effective_cred != NULL) 2888 crfree(effective_cred); 2889 return (error); 2890 } 2891 2892 /* ARGSUSED */ 2893 int 2894 ip_proto_bind_connected_v6(conn_t *connp, mblk_t **mpp, uint8_t protocol, 2895 in6_addr_t *v6srcp, uint16_t lport, const in6_addr_t *v6dstp, 2896 ip6_pkt_t *ipp, uint16_t fport, boolean_t fanout_insert, 2897 boolean_t verify_dst, cred_t *cr) 2898 { 2899 int error = 0; 2900 boolean_t orig_pkt_isv6 = connp->conn_pkt_isv6; 2901 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 2902 2903 ASSERT(connp->conn_af_isv6); 2904 connp->conn_ulp = protocol; 2905 2906 /* For raw socket, the local port is not set. */ 2907 lport = lport != 0 ? lport : connp->conn_lport; 2908 2909 /* 2910 * Bind to local and remote address. Local might be 2911 * unspecified in which case it will be extracted from 2912 * ire_src_addr_v6 2913 */ 2914 if (IN6_IS_ADDR_V4MAPPED(v6dstp) && !connp->conn_ipv6_v6only) { 2915 /* Connect to IPv4 address */ 2916 ipaddr_t v4src; 2917 ipaddr_t v4dst; 2918 2919 /* Is the source unspecified or mapped? */ 2920 if (!IN6_IS_ADDR_V4MAPPED(v6srcp) && 2921 !IN6_IS_ADDR_UNSPECIFIED(v6srcp)) { 2922 ip1dbg(("ip_proto_bind_connected_v6: " 2923 "dst is mapped, but not the src\n")); 2924 goto bad_addr; 2925 } 2926 IN6_V4MAPPED_TO_IPADDR(v6srcp, v4src); 2927 IN6_V4MAPPED_TO_IPADDR(v6dstp, v4dst); 2928 2929 /* Always verify destination reachability. */ 2930 error = ip_bind_connected_v4(connp, mpp, protocol, &v4src, 2931 lport, v4dst, fport, B_TRUE, B_TRUE, cr); 2932 if (error != 0) 2933 goto bad_addr; 2934 IN6_IPADDR_TO_V4MAPPED(v4src, v6srcp); 2935 connp->conn_pkt_isv6 = B_FALSE; 2936 } else if (IN6_IS_ADDR_V4MAPPED(v6srcp)) { 2937 ip1dbg(("ip_proto_bind_connected_v6: " 2938 "src is mapped, but not the dst\n")); 2939 goto bad_addr; 2940 } else { 2941 error = ip_bind_connected_v6(connp, mpp, protocol, v6srcp, 2942 lport, v6dstp, ipp, fport, B_TRUE, verify_dst, cr); 2943 if (error != 0) 2944 goto bad_addr; 2945 connp->conn_pkt_isv6 = B_TRUE; 2946 } 2947 2948 if (orig_pkt_isv6 != connp->conn_pkt_isv6) 2949 ip_setpktversion(connp, connp->conn_pkt_isv6, B_TRUE, ipst); 2950 2951 /* Send it home. */ 2952 return (0); 2953 2954 bad_addr: 2955 if (error == 0) 2956 error = -TBADADDR; 2957 return (error); 2958 } 2959 2960 /* 2961 * Get the ire in *mpp. Returns false if it fails (due to lack of space). 2962 * Makes the IRE be IRE_BROADCAST if dst is a multicast address. 2963 */ 2964 /* ARGSUSED4 */ 2965 static boolean_t 2966 ip_bind_get_ire_v6(mblk_t **mpp, ire_t *ire, const in6_addr_t *dst, 2967 iulp_t *ulp_info, ip_stack_t *ipst) 2968 { 2969 mblk_t *mp = *mpp; 2970 ire_t *ret_ire; 2971 2972 ASSERT(mp != NULL); 2973 2974 if (ire != NULL) { 2975 /* 2976 * mp initialized above to IRE_DB_REQ_TYPE 2977 * appended mblk. Its <upper protocol>'s 2978 * job to make sure there is room. 2979 */ 2980 if ((mp->b_datap->db_lim - mp->b_rptr) < sizeof (ire_t)) 2981 return (B_FALSE); 2982 2983 mp->b_datap->db_type = IRE_DB_TYPE; 2984 mp->b_wptr = mp->b_rptr + sizeof (ire_t); 2985 bcopy(ire, mp->b_rptr, sizeof (ire_t)); 2986 ret_ire = (ire_t *)mp->b_rptr; 2987 if (IN6_IS_ADDR_MULTICAST(dst) || 2988 IN6_IS_ADDR_V4MAPPED_CLASSD(dst)) { 2989 ret_ire->ire_type = IRE_BROADCAST; 2990 ret_ire->ire_addr_v6 = *dst; 2991 } 2992 if (ulp_info != NULL) { 2993 bcopy(ulp_info, &(ret_ire->ire_uinfo), 2994 sizeof (iulp_t)); 2995 } 2996 ret_ire->ire_mp = mp; 2997 } else { 2998 /* 2999 * No IRE was found. Remove IRE mblk. 3000 */ 3001 *mpp = mp->b_cont; 3002 freeb(mp); 3003 } 3004 return (B_TRUE); 3005 } 3006 3007 /* 3008 * Add an ip6i_t header to the front of the mblk. 3009 * Inline if possible else allocate a separate mblk containing only the ip6i_t. 3010 * Returns NULL if allocation fails (and frees original message). 3011 * Used in outgoing path when going through ip_newroute_*v6(). 3012 * Used in incoming path to pass ifindex to transports. 3013 */ 3014 mblk_t * 3015 ip_add_info_v6(mblk_t *mp, ill_t *ill, const in6_addr_t *dst) 3016 { 3017 mblk_t *mp1; 3018 ip6i_t *ip6i; 3019 ip6_t *ip6h; 3020 3021 ip6h = (ip6_t *)mp->b_rptr; 3022 ip6i = (ip6i_t *)(mp->b_rptr - sizeof (ip6i_t)); 3023 if ((uchar_t *)ip6i < mp->b_datap->db_base || 3024 mp->b_datap->db_ref > 1) { 3025 mp1 = allocb(sizeof (ip6i_t), BPRI_MED); 3026 if (mp1 == NULL) { 3027 freemsg(mp); 3028 return (NULL); 3029 } 3030 mp1->b_wptr = mp1->b_rptr = mp1->b_datap->db_lim; 3031 mp1->b_cont = mp; 3032 mp = mp1; 3033 ip6i = (ip6i_t *)(mp->b_rptr - sizeof (ip6i_t)); 3034 } 3035 mp->b_rptr = (uchar_t *)ip6i; 3036 ip6i->ip6i_vcf = ip6h->ip6_vcf; 3037 ip6i->ip6i_nxt = IPPROTO_RAW; 3038 if (ill != NULL) { 3039 ip6i->ip6i_flags = IP6I_IFINDEX; 3040 /* 3041 * If `ill' is in an IPMP group, make sure we use the IPMP 3042 * interface index so that e.g. IPV6_RECVPKTINFO will get the 3043 * IPMP interface index and not an underlying interface index. 3044 */ 3045 if (IS_UNDER_IPMP(ill)) 3046 ip6i->ip6i_ifindex = ipmp_ill_get_ipmp_ifindex(ill); 3047 else 3048 ip6i->ip6i_ifindex = ill->ill_phyint->phyint_ifindex; 3049 } else { 3050 ip6i->ip6i_flags = 0; 3051 } 3052 ip6i->ip6i_nexthop = *dst; 3053 return (mp); 3054 } 3055 3056 /* 3057 * Handle protocols with which IP is less intimate. There 3058 * can be more than one stream bound to a particular 3059 * protocol. When this is the case, normally each one gets a copy 3060 * of any incoming packets. 3061 * 3062 * Zones notes: 3063 * Packets will be distributed to streams in all zones. This is really only 3064 * useful for ICMPv6 as only applications in the global zone can create raw 3065 * sockets for other protocols. 3066 */ 3067 static void 3068 ip_fanout_proto_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, ill_t *ill, 3069 ill_t *inill, uint8_t nexthdr, uint_t nexthdr_offset, uint_t flags, 3070 boolean_t mctl_present, zoneid_t zoneid) 3071 { 3072 queue_t *rq; 3073 mblk_t *mp1, *first_mp1; 3074 in6_addr_t dst = ip6h->ip6_dst; 3075 in6_addr_t src = ip6h->ip6_src; 3076 mblk_t *first_mp = mp; 3077 boolean_t secure, shared_addr; 3078 conn_t *connp, *first_connp, *next_connp; 3079 connf_t *connfp; 3080 ip_stack_t *ipst = inill->ill_ipst; 3081 ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec; 3082 3083 if (mctl_present) { 3084 mp = first_mp->b_cont; 3085 secure = ipsec_in_is_secure(first_mp); 3086 ASSERT(mp != NULL); 3087 } else { 3088 secure = B_FALSE; 3089 } 3090 3091 shared_addr = (zoneid == ALL_ZONES); 3092 if (shared_addr) { 3093 /* 3094 * We don't allow multilevel ports for raw IP, so no need to 3095 * check for that here. 3096 */ 3097 zoneid = tsol_packet_to_zoneid(mp); 3098 } 3099 3100 connfp = &ipst->ips_ipcl_proto_fanout_v6[nexthdr]; 3101 mutex_enter(&connfp->connf_lock); 3102 connp = connfp->connf_head; 3103 for (connp = connfp->connf_head; connp != NULL; 3104 connp = connp->conn_next) { 3105 if (IPCL_PROTO_MATCH_V6(connp, nexthdr, ip6h, ill, flags, 3106 zoneid) && 3107 (!is_system_labeled() || 3108 tsol_receive_local(mp, &dst, IPV6_VERSION, shared_addr, 3109 connp))) 3110 break; 3111 } 3112 3113 if (connp == NULL) { 3114 /* 3115 * No one bound to this port. Is 3116 * there a client that wants all 3117 * unclaimed datagrams? 3118 */ 3119 mutex_exit(&connfp->connf_lock); 3120 if (ip_fanout_send_icmp_v6(q, first_mp, flags, 3121 ICMP6_PARAM_PROB, ICMP6_PARAMPROB_NEXTHEADER, 3122 nexthdr_offset, mctl_present, zoneid, ipst)) { 3123 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInUnknownProtos); 3124 } 3125 3126 return; 3127 } 3128 3129 ASSERT(IPCL_IS_NONSTR(connp) || connp->conn_upq != NULL); 3130 3131 CONN_INC_REF(connp); 3132 first_connp = connp; 3133 3134 /* 3135 * XXX: Fix the multiple protocol listeners case. We should not 3136 * be walking the conn->next list here. 3137 */ 3138 connp = connp->conn_next; 3139 for (;;) { 3140 while (connp != NULL) { 3141 if (IPCL_PROTO_MATCH_V6(connp, nexthdr, ip6h, ill, 3142 flags, zoneid) && 3143 (!is_system_labeled() || 3144 tsol_receive_local(mp, &dst, IPV6_VERSION, 3145 shared_addr, connp))) 3146 break; 3147 connp = connp->conn_next; 3148 } 3149 3150 /* 3151 * Just copy the data part alone. The mctl part is 3152 * needed just for verifying policy and it is never 3153 * sent up. 3154 */ 3155 if (connp == NULL || 3156 (((first_mp1 = dupmsg(first_mp)) == NULL) && 3157 ((first_mp1 = ip_copymsg(first_mp)) == NULL))) { 3158 /* 3159 * No more intested clients or memory 3160 * allocation failed 3161 */ 3162 connp = first_connp; 3163 break; 3164 } 3165 ASSERT(IPCL_IS_NONSTR(connp) || connp->conn_rq != NULL); 3166 mp1 = mctl_present ? first_mp1->b_cont : first_mp1; 3167 CONN_INC_REF(connp); 3168 mutex_exit(&connfp->connf_lock); 3169 rq = connp->conn_rq; 3170 /* 3171 * For link-local always add ifindex so that transport can set 3172 * sin6_scope_id. Avoid it for ICMP error fanout. 3173 */ 3174 if ((connp->conn_ip_recvpktinfo || 3175 IN6_IS_ADDR_LINKLOCAL(&src)) && 3176 (flags & IP_FF_IPINFO)) { 3177 /* Add header */ 3178 mp1 = ip_add_info_v6(mp1, inill, &dst); 3179 } 3180 if (mp1 == NULL) { 3181 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3182 } else if ( 3183 (IPCL_IS_NONSTR(connp) && PROTO_FLOW_CNTRLD(connp)) || 3184 (!IPCL_IS_NONSTR(connp) && !canputnext(rq))) { 3185 if (flags & IP_FF_RAWIP) { 3186 BUMP_MIB(ill->ill_ip_mib, 3187 rawipIfStatsInOverflows); 3188 } else { 3189 BUMP_MIB(ill->ill_icmp6_mib, 3190 ipv6IfIcmpInOverflows); 3191 } 3192 3193 freemsg(mp1); 3194 } else { 3195 ASSERT(!IPCL_IS_IPTUN(connp)); 3196 3197 if (CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) || 3198 secure) { 3199 first_mp1 = ipsec_check_inbound_policy( 3200 first_mp1, connp, NULL, ip6h, mctl_present); 3201 } 3202 if (first_mp1 != NULL) { 3203 if (mctl_present) 3204 freeb(first_mp1); 3205 BUMP_MIB(ill->ill_ip_mib, 3206 ipIfStatsHCInDelivers); 3207 (connp->conn_recv)(connp, mp1, NULL); 3208 } 3209 } 3210 mutex_enter(&connfp->connf_lock); 3211 /* Follow the next pointer before releasing the conn. */ 3212 next_connp = connp->conn_next; 3213 CONN_DEC_REF(connp); 3214 connp = next_connp; 3215 } 3216 3217 /* Last one. Send it upstream. */ 3218 mutex_exit(&connfp->connf_lock); 3219 3220 /* Initiate IPPF processing */ 3221 if (IP6_IN_IPP(flags, ipst)) { 3222 uint_t ifindex; 3223 3224 mutex_enter(&ill->ill_lock); 3225 ifindex = ill->ill_phyint->phyint_ifindex; 3226 mutex_exit(&ill->ill_lock); 3227 ip_process(IPP_LOCAL_IN, &mp, ifindex); 3228 if (mp == NULL) { 3229 CONN_DEC_REF(connp); 3230 if (mctl_present) 3231 freeb(first_mp); 3232 return; 3233 } 3234 } 3235 3236 /* 3237 * For link-local always add ifindex so that transport can set 3238 * sin6_scope_id. Avoid it for ICMP error fanout. 3239 */ 3240 if ((connp->conn_ip_recvpktinfo || IN6_IS_ADDR_LINKLOCAL(&src)) && 3241 (flags & IP_FF_IPINFO)) { 3242 /* Add header */ 3243 mp = ip_add_info_v6(mp, inill, &dst); 3244 if (mp == NULL) { 3245 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3246 CONN_DEC_REF(connp); 3247 if (mctl_present) 3248 freeb(first_mp); 3249 return; 3250 } else if (mctl_present) { 3251 first_mp->b_cont = mp; 3252 } else { 3253 first_mp = mp; 3254 } 3255 } 3256 3257 rq = connp->conn_rq; 3258 if ((IPCL_IS_NONSTR(connp) && PROTO_FLOW_CNTRLD(connp)) || 3259 (!IPCL_IS_NONSTR(connp) && !canputnext(rq))) { 3260 3261 if (flags & IP_FF_RAWIP) { 3262 BUMP_MIB(ill->ill_ip_mib, rawipIfStatsInOverflows); 3263 } else { 3264 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInOverflows); 3265 } 3266 3267 freemsg(first_mp); 3268 } else { 3269 ASSERT(!IPCL_IS_IPTUN(connp)); 3270 3271 if (CONN_INBOUND_POLICY_PRESENT(connp, ipss) || secure) { 3272 first_mp = ipsec_check_inbound_policy(first_mp, connp, 3273 NULL, ip6h, mctl_present); 3274 if (first_mp == NULL) { 3275 CONN_DEC_REF(connp); 3276 return; 3277 } 3278 } 3279 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 3280 (connp->conn_recv)(connp, mp, NULL); 3281 if (mctl_present) 3282 freeb(first_mp); 3283 } 3284 CONN_DEC_REF(connp); 3285 } 3286 3287 /* 3288 * Send an ICMP error after patching up the packet appropriately. Returns 3289 * non-zero if the appropriate MIB should be bumped; zero otherwise. 3290 */ 3291 int 3292 ip_fanout_send_icmp_v6(queue_t *q, mblk_t *mp, uint_t flags, 3293 uint_t icmp_type, uint8_t icmp_code, uint_t nexthdr_offset, 3294 boolean_t mctl_present, zoneid_t zoneid, ip_stack_t *ipst) 3295 { 3296 ip6_t *ip6h; 3297 mblk_t *first_mp; 3298 boolean_t secure; 3299 unsigned char db_type; 3300 ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec; 3301 3302 first_mp = mp; 3303 if (mctl_present) { 3304 mp = mp->b_cont; 3305 secure = ipsec_in_is_secure(first_mp); 3306 ASSERT(mp != NULL); 3307 } else { 3308 /* 3309 * If this is an ICMP error being reported - which goes 3310 * up as M_CTLs, we need to convert them to M_DATA till 3311 * we finish checking with global policy because 3312 * ipsec_check_global_policy() assumes M_DATA as clear 3313 * and M_CTL as secure. 3314 */ 3315 db_type = mp->b_datap->db_type; 3316 mp->b_datap->db_type = M_DATA; 3317 secure = B_FALSE; 3318 } 3319 /* 3320 * We are generating an icmp error for some inbound packet. 3321 * Called from all ip_fanout_(udp, tcp, proto) functions. 3322 * Before we generate an error, check with global policy 3323 * to see whether this is allowed to enter the system. As 3324 * there is no "conn", we are checking with global policy. 3325 */ 3326 ip6h = (ip6_t *)mp->b_rptr; 3327 if (secure || ipss->ipsec_inbound_v6_policy_present) { 3328 first_mp = ipsec_check_global_policy(first_mp, NULL, 3329 NULL, ip6h, mctl_present, ipst->ips_netstack); 3330 if (first_mp == NULL) 3331 return (0); 3332 } 3333 3334 if (!mctl_present) 3335 mp->b_datap->db_type = db_type; 3336 3337 if (flags & IP_FF_SEND_ICMP) { 3338 if (flags & IP_FF_HDR_COMPLETE) { 3339 if (ip_hdr_complete_v6(ip6h, zoneid, ipst)) { 3340 freemsg(first_mp); 3341 return (1); 3342 } 3343 } 3344 switch (icmp_type) { 3345 case ICMP6_DST_UNREACH: 3346 icmp_unreachable_v6(WR(q), first_mp, icmp_code, 3347 B_FALSE, B_FALSE, zoneid, ipst); 3348 break; 3349 case ICMP6_PARAM_PROB: 3350 icmp_param_problem_v6(WR(q), first_mp, icmp_code, 3351 nexthdr_offset, B_FALSE, B_FALSE, zoneid, ipst); 3352 break; 3353 default: 3354 #ifdef DEBUG 3355 panic("ip_fanout_send_icmp_v6: wrong type"); 3356 /*NOTREACHED*/ 3357 #else 3358 freemsg(first_mp); 3359 break; 3360 #endif 3361 } 3362 } else { 3363 freemsg(first_mp); 3364 return (0); 3365 } 3366 3367 return (1); 3368 } 3369 3370 /* 3371 * Fanout for TCP packets 3372 * The caller puts <fport, lport> in the ports parameter. 3373 */ 3374 static void 3375 ip_fanout_tcp_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, ill_t *ill, ill_t *inill, 3376 uint_t flags, uint_t hdr_len, boolean_t mctl_present, zoneid_t zoneid) 3377 { 3378 mblk_t *first_mp; 3379 boolean_t secure; 3380 conn_t *connp; 3381 tcph_t *tcph; 3382 boolean_t syn_present = B_FALSE; 3383 ip_stack_t *ipst = inill->ill_ipst; 3384 ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec; 3385 3386 first_mp = mp; 3387 if (mctl_present) { 3388 mp = first_mp->b_cont; 3389 secure = ipsec_in_is_secure(first_mp); 3390 ASSERT(mp != NULL); 3391 } else { 3392 secure = B_FALSE; 3393 } 3394 3395 connp = ipcl_classify_v6(mp, IPPROTO_TCP, hdr_len, zoneid, ipst); 3396 3397 if (connp == NULL || 3398 !conn_wantpacket_v6(connp, ill, ip6h, flags, zoneid)) { 3399 /* 3400 * No hard-bound match. Send Reset. 3401 */ 3402 dblk_t *dp = mp->b_datap; 3403 uint32_t ill_index; 3404 3405 ASSERT((dp->db_struioflag & STRUIO_IP) == 0); 3406 3407 /* Initiate IPPf processing, if needed. */ 3408 if (IPP_ENABLED(IPP_LOCAL_IN, ipst) && 3409 (flags & IP6_NO_IPPOLICY)) { 3410 ill_index = ill->ill_phyint->phyint_ifindex; 3411 ip_process(IPP_LOCAL_IN, &first_mp, ill_index); 3412 if (first_mp == NULL) { 3413 if (connp != NULL) 3414 CONN_DEC_REF(connp); 3415 return; 3416 } 3417 } 3418 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 3419 if (connp != NULL) { 3420 ip_xmit_reset_serialize(first_mp, hdr_len, zoneid, 3421 ipst->ips_netstack->netstack_tcp, connp); 3422 CONN_DEC_REF(connp); 3423 } else { 3424 tcp_xmit_listeners_reset(first_mp, hdr_len, zoneid, 3425 ipst->ips_netstack->netstack_tcp, NULL); 3426 } 3427 3428 return; 3429 } 3430 3431 tcph = (tcph_t *)&mp->b_rptr[hdr_len]; 3432 if ((tcph->th_flags[0] & (TH_SYN|TH_ACK|TH_RST|TH_URG)) == TH_SYN) { 3433 if (IPCL_IS_TCP(connp)) { 3434 squeue_t *sqp; 3435 3436 /* 3437 * If the queue belongs to a conn, and fused tcp 3438 * loopback is enabled, assign the eager's squeue 3439 * to be that of the active connect's. 3440 */ 3441 if ((flags & IP_FF_LOOPBACK) && do_tcp_fusion && 3442 CONN_Q(q) && IPCL_IS_TCP(Q_TO_CONN(q)) && 3443 !CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) && 3444 !secure && 3445 !IP6_IN_IPP(flags, ipst)) { 3446 ASSERT(Q_TO_CONN(q)->conn_sqp != NULL); 3447 sqp = Q_TO_CONN(q)->conn_sqp; 3448 } else { 3449 sqp = IP_SQUEUE_GET(lbolt); 3450 } 3451 3452 mp->b_datap->db_struioflag |= STRUIO_EAGER; 3453 DB_CKSUMSTART(mp) = (intptr_t)sqp; 3454 3455 /* 3456 * db_cksumstuff is unused in the incoming 3457 * path; Thus store the ifindex here. It will 3458 * be cleared in tcp_conn_create_v6(). 3459 */ 3460 DB_CKSUMSTUFF(mp) = 3461 (intptr_t)ill->ill_phyint->phyint_ifindex; 3462 syn_present = B_TRUE; 3463 } 3464 } 3465 3466 if (IPCL_IS_TCP(connp) && IPCL_IS_BOUND(connp) && !syn_present) { 3467 uint_t flags = (unsigned int)tcph->th_flags[0] & 0xFF; 3468 if ((flags & TH_RST) || (flags & TH_URG)) { 3469 CONN_DEC_REF(connp); 3470 freemsg(first_mp); 3471 return; 3472 } 3473 if (flags & TH_ACK) { 3474 ip_xmit_reset_serialize(first_mp, hdr_len, zoneid, 3475 ipst->ips_netstack->netstack_tcp, connp); 3476 CONN_DEC_REF(connp); 3477 return; 3478 } 3479 3480 CONN_DEC_REF(connp); 3481 freemsg(first_mp); 3482 return; 3483 } 3484 3485 if (CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) || secure) { 3486 first_mp = ipsec_check_inbound_policy(first_mp, connp, 3487 NULL, ip6h, mctl_present); 3488 if (first_mp == NULL) { 3489 CONN_DEC_REF(connp); 3490 return; 3491 } 3492 if (IPCL_IS_TCP(connp) && IPCL_IS_BOUND(connp)) { 3493 ASSERT(syn_present); 3494 if (mctl_present) { 3495 ASSERT(first_mp != mp); 3496 first_mp->b_datap->db_struioflag |= 3497 STRUIO_POLICY; 3498 } else { 3499 ASSERT(first_mp == mp); 3500 mp->b_datap->db_struioflag &= 3501 ~STRUIO_EAGER; 3502 mp->b_datap->db_struioflag |= 3503 STRUIO_POLICY; 3504 } 3505 } else { 3506 /* 3507 * Discard first_mp early since we're dealing with a 3508 * fully-connected conn_t and tcp doesn't do policy in 3509 * this case. Also, if someone is bound to IPPROTO_TCP 3510 * over raw IP, they don't expect to see a M_CTL. 3511 */ 3512 if (mctl_present) { 3513 freeb(first_mp); 3514 mctl_present = B_FALSE; 3515 } 3516 first_mp = mp; 3517 } 3518 } 3519 3520 /* Initiate IPPF processing */ 3521 if (IP6_IN_IPP(flags, ipst)) { 3522 uint_t ifindex; 3523 3524 mutex_enter(&ill->ill_lock); 3525 ifindex = ill->ill_phyint->phyint_ifindex; 3526 mutex_exit(&ill->ill_lock); 3527 ip_process(IPP_LOCAL_IN, &mp, ifindex); 3528 if (mp == NULL) { 3529 CONN_DEC_REF(connp); 3530 if (mctl_present) { 3531 freeb(first_mp); 3532 } 3533 return; 3534 } else if (mctl_present) { 3535 /* 3536 * ip_add_info_v6 might return a new mp. 3537 */ 3538 ASSERT(first_mp != mp); 3539 first_mp->b_cont = mp; 3540 } else { 3541 first_mp = mp; 3542 } 3543 } 3544 3545 /* 3546 * For link-local always add ifindex so that TCP can bind to that 3547 * interface. Avoid it for ICMP error fanout. 3548 */ 3549 if (!syn_present && ((connp->conn_ip_recvpktinfo || 3550 IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src)) && 3551 (flags & IP_FF_IPINFO))) { 3552 /* Add header */ 3553 mp = ip_add_info_v6(mp, inill, &ip6h->ip6_dst); 3554 if (mp == NULL) { 3555 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3556 CONN_DEC_REF(connp); 3557 if (mctl_present) 3558 freeb(first_mp); 3559 return; 3560 } else if (mctl_present) { 3561 ASSERT(first_mp != mp); 3562 first_mp->b_cont = mp; 3563 } else { 3564 first_mp = mp; 3565 } 3566 } 3567 3568 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 3569 if (IPCL_IS_TCP(connp)) { 3570 SQUEUE_ENTER_ONE(connp->conn_sqp, first_mp, connp->conn_recv, 3571 connp, ip_squeue_flag, SQTAG_IP6_TCP_INPUT); 3572 } else { 3573 /* SOCK_RAW, IPPROTO_TCP case */ 3574 (connp->conn_recv)(connp, first_mp, NULL); 3575 CONN_DEC_REF(connp); 3576 } 3577 } 3578 3579 /* 3580 * Fanout for UDP packets. 3581 * The caller puts <fport, lport> in the ports parameter. 3582 * ire_type must be IRE_BROADCAST for multicast and broadcast packets. 3583 * 3584 * If SO_REUSEADDR is set all multicast and broadcast packets 3585 * will be delivered to all streams bound to the same port. 3586 * 3587 * Zones notes: 3588 * Multicast packets will be distributed to streams in all zones. 3589 */ 3590 static void 3591 ip_fanout_udp_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, uint32_t ports, 3592 ill_t *ill, ill_t *inill, uint_t flags, boolean_t mctl_present, 3593 zoneid_t zoneid) 3594 { 3595 uint32_t dstport, srcport; 3596 in6_addr_t dst; 3597 mblk_t *first_mp; 3598 boolean_t secure; 3599 conn_t *connp; 3600 connf_t *connfp; 3601 conn_t *first_conn; 3602 conn_t *next_conn; 3603 mblk_t *mp1, *first_mp1; 3604 in6_addr_t src; 3605 boolean_t shared_addr; 3606 ip_stack_t *ipst = inill->ill_ipst; 3607 ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec; 3608 3609 first_mp = mp; 3610 if (mctl_present) { 3611 mp = first_mp->b_cont; 3612 secure = ipsec_in_is_secure(first_mp); 3613 ASSERT(mp != NULL); 3614 } else { 3615 secure = B_FALSE; 3616 } 3617 3618 /* Extract ports in net byte order */ 3619 dstport = htons(ntohl(ports) & 0xFFFF); 3620 srcport = htons(ntohl(ports) >> 16); 3621 dst = ip6h->ip6_dst; 3622 src = ip6h->ip6_src; 3623 3624 shared_addr = (zoneid == ALL_ZONES); 3625 if (shared_addr) { 3626 /* 3627 * No need to handle exclusive-stack zones since ALL_ZONES 3628 * only applies to the shared stack. 3629 */ 3630 zoneid = tsol_mlp_findzone(IPPROTO_UDP, dstport); 3631 /* 3632 * If no shared MLP is found, tsol_mlp_findzone returns 3633 * ALL_ZONES. In that case, we assume it's SLP, and 3634 * search for the zone based on the packet label. 3635 * That will also return ALL_ZONES on failure, but 3636 * we never allow conn_zoneid to be set to ALL_ZONES. 3637 */ 3638 if (zoneid == ALL_ZONES) 3639 zoneid = tsol_packet_to_zoneid(mp); 3640 } 3641 3642 /* Attempt to find a client stream based on destination port. */ 3643 connfp = &ipst->ips_ipcl_udp_fanout[IPCL_UDP_HASH(dstport, ipst)]; 3644 mutex_enter(&connfp->connf_lock); 3645 connp = connfp->connf_head; 3646 if (!IN6_IS_ADDR_MULTICAST(&dst)) { 3647 /* 3648 * Not multicast. Send to the one (first) client we find. 3649 */ 3650 while (connp != NULL) { 3651 if (IPCL_UDP_MATCH_V6(connp, dstport, dst, srcport, 3652 src) && IPCL_ZONE_MATCH(connp, zoneid) && 3653 conn_wantpacket_v6(connp, ill, ip6h, 3654 flags, zoneid)) { 3655 break; 3656 } 3657 connp = connp->conn_next; 3658 } 3659 if (connp == NULL || connp->conn_upq == NULL) 3660 goto notfound; 3661 3662 if (is_system_labeled() && 3663 !tsol_receive_local(mp, &dst, IPV6_VERSION, shared_addr, 3664 connp)) 3665 goto notfound; 3666 3667 /* Found a client */ 3668 CONN_INC_REF(connp); 3669 mutex_exit(&connfp->connf_lock); 3670 3671 if ((IPCL_IS_NONSTR(connp) && PROTO_FLOW_CNTRLD(connp)) || 3672 (!IPCL_IS_NONSTR(connp) && CONN_UDP_FLOWCTLD(connp))) { 3673 freemsg(first_mp); 3674 CONN_DEC_REF(connp); 3675 return; 3676 } 3677 if (CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) || secure) { 3678 first_mp = ipsec_check_inbound_policy(first_mp, 3679 connp, NULL, ip6h, mctl_present); 3680 if (first_mp == NULL) { 3681 CONN_DEC_REF(connp); 3682 return; 3683 } 3684 } 3685 /* Initiate IPPF processing */ 3686 if (IP6_IN_IPP(flags, ipst)) { 3687 uint_t ifindex; 3688 3689 mutex_enter(&ill->ill_lock); 3690 ifindex = ill->ill_phyint->phyint_ifindex; 3691 mutex_exit(&ill->ill_lock); 3692 ip_process(IPP_LOCAL_IN, &mp, ifindex); 3693 if (mp == NULL) { 3694 CONN_DEC_REF(connp); 3695 if (mctl_present) 3696 freeb(first_mp); 3697 return; 3698 } 3699 } 3700 /* 3701 * For link-local always add ifindex so that 3702 * transport can set sin6_scope_id. Avoid it for 3703 * ICMP error fanout. 3704 */ 3705 if ((connp->conn_ip_recvpktinfo || 3706 IN6_IS_ADDR_LINKLOCAL(&src)) && 3707 (flags & IP_FF_IPINFO)) { 3708 /* Add header */ 3709 mp = ip_add_info_v6(mp, inill, &dst); 3710 if (mp == NULL) { 3711 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3712 CONN_DEC_REF(connp); 3713 if (mctl_present) 3714 freeb(first_mp); 3715 return; 3716 } else if (mctl_present) { 3717 first_mp->b_cont = mp; 3718 } else { 3719 first_mp = mp; 3720 } 3721 } 3722 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 3723 3724 /* Send it upstream */ 3725 (connp->conn_recv)(connp, mp, NULL); 3726 3727 IP6_STAT(ipst, ip6_udp_fannorm); 3728 CONN_DEC_REF(connp); 3729 if (mctl_present) 3730 freeb(first_mp); 3731 return; 3732 } 3733 3734 while (connp != NULL) { 3735 if ((IPCL_UDP_MATCH_V6(connp, dstport, dst, srcport, src)) && 3736 conn_wantpacket_v6(connp, ill, ip6h, flags, zoneid) && 3737 (!is_system_labeled() || 3738 tsol_receive_local(mp, &dst, IPV6_VERSION, shared_addr, 3739 connp))) 3740 break; 3741 connp = connp->conn_next; 3742 } 3743 3744 if (connp == NULL || connp->conn_upq == NULL) 3745 goto notfound; 3746 3747 first_conn = connp; 3748 3749 CONN_INC_REF(connp); 3750 connp = connp->conn_next; 3751 for (;;) { 3752 while (connp != NULL) { 3753 if (IPCL_UDP_MATCH_V6(connp, dstport, dst, srcport, 3754 src) && conn_wantpacket_v6(connp, ill, ip6h, 3755 flags, zoneid) && 3756 (!is_system_labeled() || 3757 tsol_receive_local(mp, &dst, IPV6_VERSION, 3758 shared_addr, connp))) 3759 break; 3760 connp = connp->conn_next; 3761 } 3762 /* 3763 * Just copy the data part alone. The mctl part is 3764 * needed just for verifying policy and it is never 3765 * sent up. 3766 */ 3767 if (connp == NULL || 3768 (((first_mp1 = dupmsg(first_mp)) == NULL) && 3769 ((first_mp1 = ip_copymsg(first_mp)) == NULL))) { 3770 /* 3771 * No more interested clients or memory 3772 * allocation failed 3773 */ 3774 connp = first_conn; 3775 break; 3776 } 3777 mp1 = mctl_present ? first_mp1->b_cont : first_mp1; 3778 CONN_INC_REF(connp); 3779 mutex_exit(&connfp->connf_lock); 3780 /* 3781 * For link-local always add ifindex so that transport 3782 * can set sin6_scope_id. Avoid it for ICMP error 3783 * fanout. 3784 */ 3785 if ((connp->conn_ip_recvpktinfo || 3786 IN6_IS_ADDR_LINKLOCAL(&src)) && 3787 (flags & IP_FF_IPINFO)) { 3788 /* Add header */ 3789 mp1 = ip_add_info_v6(mp1, inill, &dst); 3790 } 3791 /* mp1 could have changed */ 3792 if (mctl_present) 3793 first_mp1->b_cont = mp1; 3794 else 3795 first_mp1 = mp1; 3796 if (mp1 == NULL) { 3797 if (mctl_present) 3798 freeb(first_mp1); 3799 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3800 goto next_one; 3801 } 3802 if ((IPCL_IS_NONSTR(connp) && PROTO_FLOW_CNTRLD(connp)) || 3803 (!IPCL_IS_NONSTR(connp) && CONN_UDP_FLOWCTLD(connp))) { 3804 BUMP_MIB(ill->ill_ip_mib, udpIfStatsInOverflows); 3805 freemsg(first_mp1); 3806 goto next_one; 3807 } 3808 3809 if (CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) || secure) { 3810 first_mp1 = ipsec_check_inbound_policy 3811 (first_mp1, connp, NULL, ip6h, 3812 mctl_present); 3813 } 3814 if (first_mp1 != NULL) { 3815 if (mctl_present) 3816 freeb(first_mp1); 3817 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 3818 3819 /* Send it upstream */ 3820 (connp->conn_recv)(connp, mp1, NULL); 3821 } 3822 next_one: 3823 mutex_enter(&connfp->connf_lock); 3824 /* Follow the next pointer before releasing the conn. */ 3825 next_conn = connp->conn_next; 3826 IP6_STAT(ipst, ip6_udp_fanmb); 3827 CONN_DEC_REF(connp); 3828 connp = next_conn; 3829 } 3830 3831 /* Last one. Send it upstream. */ 3832 mutex_exit(&connfp->connf_lock); 3833 3834 /* Initiate IPPF processing */ 3835 if (IP6_IN_IPP(flags, ipst)) { 3836 uint_t ifindex; 3837 3838 mutex_enter(&ill->ill_lock); 3839 ifindex = ill->ill_phyint->phyint_ifindex; 3840 mutex_exit(&ill->ill_lock); 3841 ip_process(IPP_LOCAL_IN, &mp, ifindex); 3842 if (mp == NULL) { 3843 CONN_DEC_REF(connp); 3844 if (mctl_present) { 3845 freeb(first_mp); 3846 } 3847 return; 3848 } 3849 } 3850 3851 /* 3852 * For link-local always add ifindex so that transport can set 3853 * sin6_scope_id. Avoid it for ICMP error fanout. 3854 */ 3855 if ((connp->conn_ip_recvpktinfo || 3856 IN6_IS_ADDR_LINKLOCAL(&src)) && (flags & IP_FF_IPINFO)) { 3857 /* Add header */ 3858 mp = ip_add_info_v6(mp, inill, &dst); 3859 if (mp == NULL) { 3860 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3861 CONN_DEC_REF(connp); 3862 if (mctl_present) 3863 freeb(first_mp); 3864 return; 3865 } else if (mctl_present) { 3866 first_mp->b_cont = mp; 3867 } else { 3868 first_mp = mp; 3869 } 3870 } 3871 if ((IPCL_IS_NONSTR(connp) && PROTO_FLOW_CNTRLD(connp)) || 3872 (!IPCL_IS_NONSTR(connp) && CONN_UDP_FLOWCTLD(connp))) { 3873 BUMP_MIB(ill->ill_ip_mib, udpIfStatsInOverflows); 3874 freemsg(mp); 3875 } else { 3876 if (CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) || secure) { 3877 first_mp = ipsec_check_inbound_policy(first_mp, 3878 connp, NULL, ip6h, mctl_present); 3879 if (first_mp == NULL) { 3880 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3881 CONN_DEC_REF(connp); 3882 return; 3883 } 3884 } 3885 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 3886 3887 /* Send it upstream */ 3888 (connp->conn_recv)(connp, mp, NULL); 3889 } 3890 IP6_STAT(ipst, ip6_udp_fanmb); 3891 CONN_DEC_REF(connp); 3892 if (mctl_present) 3893 freeb(first_mp); 3894 return; 3895 3896 notfound: 3897 mutex_exit(&connfp->connf_lock); 3898 /* 3899 * No one bound to this port. Is 3900 * there a client that wants all 3901 * unclaimed datagrams? 3902 */ 3903 if (ipst->ips_ipcl_proto_fanout_v6[IPPROTO_UDP].connf_head != NULL) { 3904 ip_fanout_proto_v6(q, first_mp, ip6h, ill, inill, IPPROTO_UDP, 3905 0, flags | IP_FF_RAWIP | IP_FF_IPINFO, mctl_present, 3906 zoneid); 3907 } else { 3908 if (ip_fanout_send_icmp_v6(q, first_mp, flags, 3909 ICMP6_DST_UNREACH, ICMP6_DST_UNREACH_NOPORT, 0, 3910 mctl_present, zoneid, ipst)) { 3911 BUMP_MIB(ill->ill_ip_mib, udpIfStatsNoPorts); 3912 } 3913 } 3914 } 3915 3916 /* 3917 * int ip_find_hdr_v6() 3918 * 3919 * This routine is used by the upper layer protocols and the IP tunnel 3920 * module to: 3921 * - Set extension header pointers to appropriate locations 3922 * - Determine IPv6 header length and return it 3923 * - Return a pointer to the last nexthdr value 3924 * 3925 * The caller must initialize ipp_fields. 3926 * 3927 * NOTE: If multiple extension headers of the same type are present, 3928 * ip_find_hdr_v6() will set the respective extension header pointers 3929 * to the first one that it encounters in the IPv6 header. It also 3930 * skips fragment headers. This routine deals with malformed packets 3931 * of various sorts in which case the returned length is up to the 3932 * malformed part. 3933 */ 3934 int 3935 ip_find_hdr_v6(mblk_t *mp, ip6_t *ip6h, ip6_pkt_t *ipp, uint8_t *nexthdrp) 3936 { 3937 uint_t length, ehdrlen; 3938 uint8_t nexthdr; 3939 uint8_t *whereptr, *endptr; 3940 ip6_dest_t *tmpdstopts; 3941 ip6_rthdr_t *tmprthdr; 3942 ip6_hbh_t *tmphopopts; 3943 ip6_frag_t *tmpfraghdr; 3944 3945 length = IPV6_HDR_LEN; 3946 whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */ 3947 endptr = mp->b_wptr; 3948 3949 nexthdr = ip6h->ip6_nxt; 3950 while (whereptr < endptr) { 3951 /* Is there enough left for len + nexthdr? */ 3952 if (whereptr + MIN_EHDR_LEN > endptr) 3953 goto done; 3954 3955 switch (nexthdr) { 3956 case IPPROTO_HOPOPTS: 3957 tmphopopts = (ip6_hbh_t *)whereptr; 3958 ehdrlen = 8 * (tmphopopts->ip6h_len + 1); 3959 if ((uchar_t *)tmphopopts + ehdrlen > endptr) 3960 goto done; 3961 nexthdr = tmphopopts->ip6h_nxt; 3962 /* return only 1st hbh */ 3963 if (!(ipp->ipp_fields & IPPF_HOPOPTS)) { 3964 ipp->ipp_fields |= IPPF_HOPOPTS; 3965 ipp->ipp_hopopts = tmphopopts; 3966 ipp->ipp_hopoptslen = ehdrlen; 3967 } 3968 break; 3969 case IPPROTO_DSTOPTS: 3970 tmpdstopts = (ip6_dest_t *)whereptr; 3971 ehdrlen = 8 * (tmpdstopts->ip6d_len + 1); 3972 if ((uchar_t *)tmpdstopts + ehdrlen > endptr) 3973 goto done; 3974 nexthdr = tmpdstopts->ip6d_nxt; 3975 /* 3976 * ipp_dstopts is set to the destination header after a 3977 * routing header. 3978 * Assume it is a post-rthdr destination header 3979 * and adjust when we find an rthdr. 3980 */ 3981 if (!(ipp->ipp_fields & IPPF_DSTOPTS)) { 3982 ipp->ipp_fields |= IPPF_DSTOPTS; 3983 ipp->ipp_dstopts = tmpdstopts; 3984 ipp->ipp_dstoptslen = ehdrlen; 3985 } 3986 break; 3987 case IPPROTO_ROUTING: 3988 tmprthdr = (ip6_rthdr_t *)whereptr; 3989 ehdrlen = 8 * (tmprthdr->ip6r_len + 1); 3990 if ((uchar_t *)tmprthdr + ehdrlen > endptr) 3991 goto done; 3992 nexthdr = tmprthdr->ip6r_nxt; 3993 /* return only 1st rthdr */ 3994 if (!(ipp->ipp_fields & IPPF_RTHDR)) { 3995 ipp->ipp_fields |= IPPF_RTHDR; 3996 ipp->ipp_rthdr = tmprthdr; 3997 ipp->ipp_rthdrlen = ehdrlen; 3998 } 3999 /* 4000 * Make any destination header we've seen be a 4001 * pre-rthdr destination header. 4002 */ 4003 if (ipp->ipp_fields & IPPF_DSTOPTS) { 4004 ipp->ipp_fields &= ~IPPF_DSTOPTS; 4005 ipp->ipp_fields |= IPPF_RTDSTOPTS; 4006 ipp->ipp_rtdstopts = ipp->ipp_dstopts; 4007 ipp->ipp_dstopts = NULL; 4008 ipp->ipp_rtdstoptslen = ipp->ipp_dstoptslen; 4009 ipp->ipp_dstoptslen = 0; 4010 } 4011 break; 4012 case IPPROTO_FRAGMENT: 4013 tmpfraghdr = (ip6_frag_t *)whereptr; 4014 ehdrlen = sizeof (ip6_frag_t); 4015 if ((uchar_t *)tmpfraghdr + ehdrlen > endptr) 4016 goto done; 4017 nexthdr = tmpfraghdr->ip6f_nxt; 4018 if (!(ipp->ipp_fields & IPPF_FRAGHDR)) { 4019 ipp->ipp_fields |= IPPF_FRAGHDR; 4020 ipp->ipp_fraghdr = tmpfraghdr; 4021 ipp->ipp_fraghdrlen = ehdrlen; 4022 } 4023 break; 4024 case IPPROTO_NONE: 4025 default: 4026 goto done; 4027 } 4028 length += ehdrlen; 4029 whereptr += ehdrlen; 4030 } 4031 done: 4032 if (nexthdrp != NULL) 4033 *nexthdrp = nexthdr; 4034 return (length); 4035 } 4036 4037 int 4038 ip_hdr_complete_v6(ip6_t *ip6h, zoneid_t zoneid, ip_stack_t *ipst) 4039 { 4040 ire_t *ire; 4041 4042 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 4043 ire = ire_lookup_local_v6(zoneid, ipst); 4044 if (ire == NULL) { 4045 ip1dbg(("ip_hdr_complete_v6: no source IRE\n")); 4046 return (1); 4047 } 4048 ip6h->ip6_src = ire->ire_addr_v6; 4049 ire_refrele(ire); 4050 } 4051 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 4052 ip6h->ip6_hops = ipst->ips_ipv6_def_hops; 4053 return (0); 4054 } 4055 4056 /* 4057 * Try to determine where and what are the IPv6 header length and 4058 * pointer to nexthdr value for the upper layer protocol (or an 4059 * unknown next hdr). 4060 * 4061 * Parameters returns a pointer to the nexthdr value; 4062 * Must handle malformed packets of various sorts. 4063 * Function returns failure for malformed cases. 4064 */ 4065 boolean_t 4066 ip_hdr_length_nexthdr_v6(mblk_t *mp, ip6_t *ip6h, uint16_t *hdr_length_ptr, 4067 uint8_t **nexthdrpp) 4068 { 4069 uint16_t length; 4070 uint_t ehdrlen; 4071 uint8_t *nexthdrp; 4072 uint8_t *whereptr; 4073 uint8_t *endptr; 4074 ip6_dest_t *desthdr; 4075 ip6_rthdr_t *rthdr; 4076 ip6_frag_t *fraghdr; 4077 4078 ASSERT((IPH_HDR_VERSION(ip6h) & ~IP_FORWARD_PROG_BIT) == IPV6_VERSION); 4079 length = IPV6_HDR_LEN; 4080 whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */ 4081 endptr = mp->b_wptr; 4082 4083 nexthdrp = &ip6h->ip6_nxt; 4084 while (whereptr < endptr) { 4085 /* Is there enough left for len + nexthdr? */ 4086 if (whereptr + MIN_EHDR_LEN > endptr) 4087 break; 4088 4089 switch (*nexthdrp) { 4090 case IPPROTO_HOPOPTS: 4091 case IPPROTO_DSTOPTS: 4092 /* Assumes the headers are identical for hbh and dst */ 4093 desthdr = (ip6_dest_t *)whereptr; 4094 ehdrlen = 8 * (desthdr->ip6d_len + 1); 4095 if ((uchar_t *)desthdr + ehdrlen > endptr) 4096 return (B_FALSE); 4097 nexthdrp = &desthdr->ip6d_nxt; 4098 break; 4099 case IPPROTO_ROUTING: 4100 rthdr = (ip6_rthdr_t *)whereptr; 4101 ehdrlen = 8 * (rthdr->ip6r_len + 1); 4102 if ((uchar_t *)rthdr + ehdrlen > endptr) 4103 return (B_FALSE); 4104 nexthdrp = &rthdr->ip6r_nxt; 4105 break; 4106 case IPPROTO_FRAGMENT: 4107 fraghdr = (ip6_frag_t *)whereptr; 4108 ehdrlen = sizeof (ip6_frag_t); 4109 if ((uchar_t *)&fraghdr[1] > endptr) 4110 return (B_FALSE); 4111 nexthdrp = &fraghdr->ip6f_nxt; 4112 break; 4113 case IPPROTO_NONE: 4114 /* No next header means we're finished */ 4115 default: 4116 *hdr_length_ptr = length; 4117 *nexthdrpp = nexthdrp; 4118 return (B_TRUE); 4119 } 4120 length += ehdrlen; 4121 whereptr += ehdrlen; 4122 *hdr_length_ptr = length; 4123 *nexthdrpp = nexthdrp; 4124 } 4125 switch (*nexthdrp) { 4126 case IPPROTO_HOPOPTS: 4127 case IPPROTO_DSTOPTS: 4128 case IPPROTO_ROUTING: 4129 case IPPROTO_FRAGMENT: 4130 /* 4131 * If any know extension headers are still to be processed, 4132 * the packet's malformed (or at least all the IP header(s) are 4133 * not in the same mblk - and that should never happen. 4134 */ 4135 return (B_FALSE); 4136 4137 default: 4138 /* 4139 * If we get here, we know that all of the IP headers were in 4140 * the same mblk, even if the ULP header is in the next mblk. 4141 */ 4142 *hdr_length_ptr = length; 4143 *nexthdrpp = nexthdrp; 4144 return (B_TRUE); 4145 } 4146 } 4147 4148 /* 4149 * Return the length of the IPv6 related headers (including extension headers) 4150 * Returns a length even if the packet is malformed. 4151 */ 4152 int 4153 ip_hdr_length_v6(mblk_t *mp, ip6_t *ip6h) 4154 { 4155 uint16_t hdr_len; 4156 uint8_t *nexthdrp; 4157 4158 (void) ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_len, &nexthdrp); 4159 return (hdr_len); 4160 } 4161 4162 /* 4163 * IPv6 - 4164 * ip_newroute_v6 is called by ip_rput_data_v6 or ip_wput_v6 whenever we need 4165 * to send out a packet to a destination address for which we do not have 4166 * specific routing information. 4167 * 4168 * Handle non-multicast packets. If ill is non-NULL the match is done 4169 * for that ill. 4170 * 4171 * When a specific ill is specified (using IPV6_PKTINFO, 4172 * IPV6_MULTICAST_IF, or IPV6_BOUND_IF) we will only match 4173 * on routing entries (ftable and ctable) that have a matching 4174 * ire->ire_ipif->ipif_ill. Thus this can only be used 4175 * for destinations that are on-link for the specific ill 4176 * and that can appear on multiple links. Thus it is useful 4177 * for multicast destinations, link-local destinations, and 4178 * at some point perhaps for site-local destinations (if the 4179 * node sits at a site boundary). 4180 * We create the cache entries in the regular ctable since 4181 * it can not "confuse" things for other destinations. 4182 * 4183 * NOTE : These are the scopes of some of the variables that point at IRE, 4184 * which needs to be followed while making any future modifications 4185 * to avoid memory leaks. 4186 * 4187 * - ire and sire are the entries looked up initially by 4188 * ire_ftable_lookup_v6. 4189 * - ipif_ire is used to hold the interface ire associated with 4190 * the new cache ire. But it's scope is limited, so we always REFRELE 4191 * it before branching out to error paths. 4192 * - save_ire is initialized before ire_create, so that ire returned 4193 * by ire_create will not over-write the ire. We REFRELE save_ire 4194 * before breaking out of the switch. 4195 * 4196 * Thus on failures, we have to REFRELE only ire and sire, if they 4197 * are not NULL. 4198 */ 4199 /* ARGSUSED */ 4200 void 4201 ip_newroute_v6(queue_t *q, mblk_t *mp, const in6_addr_t *v6dstp, 4202 const in6_addr_t *v6srcp, ill_t *ill, zoneid_t zoneid, ip_stack_t *ipst) 4203 { 4204 in6_addr_t v6gw; 4205 in6_addr_t dst; 4206 ire_t *ire = NULL; 4207 ipif_t *src_ipif = NULL; 4208 ill_t *dst_ill = NULL; 4209 ire_t *sire = NULL; 4210 ire_t *save_ire; 4211 ip6_t *ip6h; 4212 int err = 0; 4213 mblk_t *first_mp; 4214 ipsec_out_t *io; 4215 ushort_t ire_marks = 0; 4216 int match_flags; 4217 ire_t *first_sire = NULL; 4218 mblk_t *copy_mp = NULL; 4219 mblk_t *xmit_mp = NULL; 4220 in6_addr_t save_dst; 4221 uint32_t multirt_flags = 4222 MULTIRT_CACHEGW | MULTIRT_USESTAMP | MULTIRT_SETSTAMP; 4223 boolean_t multirt_is_resolvable; 4224 boolean_t multirt_resolve_next; 4225 boolean_t need_rele = B_FALSE; 4226 boolean_t ip6_asp_table_held = B_FALSE; 4227 tsol_ire_gw_secattr_t *attrp = NULL; 4228 tsol_gcgrp_t *gcgrp = NULL; 4229 tsol_gcgrp_addr_t ga; 4230 4231 ASSERT(!IN6_IS_ADDR_MULTICAST(v6dstp)); 4232 4233 first_mp = mp; 4234 if (mp->b_datap->db_type == M_CTL) { 4235 mp = mp->b_cont; 4236 io = (ipsec_out_t *)first_mp->b_rptr; 4237 ASSERT(io->ipsec_out_type == IPSEC_OUT); 4238 } else { 4239 io = NULL; 4240 } 4241 4242 ip6h = (ip6_t *)mp->b_rptr; 4243 4244 if (IN6_IS_ADDR_LOOPBACK(v6dstp)) { 4245 ip1dbg(("ip_newroute_v6: dst with loopback addr\n")); 4246 goto icmp_err_ret; 4247 } else if (IN6_IS_ADDR_LOOPBACK(v6srcp)) { 4248 ip1dbg(("ip_newroute_v6: src with loopback addr\n")); 4249 goto icmp_err_ret; 4250 } 4251 4252 /* 4253 * If this IRE is created for forwarding or it is not for 4254 * TCP traffic, mark it as temporary. 4255 * 4256 * Is it sufficient just to check the next header?? 4257 */ 4258 if (mp->b_prev != NULL || !IP_FLOW_CONTROLLED_ULP(ip6h->ip6_nxt)) 4259 ire_marks |= IRE_MARK_TEMPORARY; 4260 4261 /* 4262 * Get what we can from ire_ftable_lookup_v6 which will follow an IRE 4263 * chain until it gets the most specific information available. 4264 * For example, we know that there is no IRE_CACHE for this dest, 4265 * but there may be an IRE_OFFSUBNET which specifies a gateway. 4266 * ire_ftable_lookup_v6 will look up the gateway, etc. 4267 */ 4268 4269 if (ill == NULL) { 4270 match_flags = MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT | 4271 MATCH_IRE_PARENT | MATCH_IRE_RJ_BHOLE | MATCH_IRE_SECATTR; 4272 ire = ire_ftable_lookup_v6(v6dstp, 0, 0, 0, 4273 NULL, &sire, zoneid, 0, msg_getlabel(mp), 4274 match_flags, ipst); 4275 } else { 4276 match_flags = MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT | 4277 MATCH_IRE_RJ_BHOLE | MATCH_IRE_ILL; 4278 match_flags |= MATCH_IRE_PARENT | MATCH_IRE_SECATTR; 4279 4280 /* 4281 * Because nce_xmit() calls ip_output_v6() and NCEs are always 4282 * tied to an underlying interface, IS_UNDER_IPMP() may be 4283 * true even when building IREs that will be used for data 4284 * traffic. As such, use the packet's source address to 4285 * determine whether the traffic is test traffic, and set 4286 * MATCH_IRE_MARK_TESTHIDDEN if so. 4287 */ 4288 if (IS_UNDER_IPMP(ill) && !IN6_IS_ADDR_UNSPECIFIED(v6srcp)) { 4289 if (ipif_lookup_testaddr_v6(ill, v6srcp, NULL)) 4290 match_flags |= MATCH_IRE_MARK_TESTHIDDEN; 4291 } 4292 4293 ire = ire_ftable_lookup_v6(v6dstp, NULL, NULL, 0, ill->ill_ipif, 4294 &sire, zoneid, 0, msg_getlabel(mp), match_flags, ipst); 4295 } 4296 4297 ip3dbg(("ip_newroute_v6: ire_ftable_lookup_v6() " 4298 "returned ire %p, sire %p\n", (void *)ire, (void *)sire)); 4299 4300 /* 4301 * We enter a loop that will be run only once in most cases. 4302 * The loop is re-entered in the case where the destination 4303 * can be reached through multiple RTF_MULTIRT-flagged routes. 4304 * The intention is to compute multiple routes to a single 4305 * destination in a single ip_newroute_v6 call. 4306 * The information is contained in sire->ire_flags. 4307 */ 4308 do { 4309 multirt_resolve_next = B_FALSE; 4310 4311 if (dst_ill != NULL) { 4312 ill_refrele(dst_ill); 4313 dst_ill = NULL; 4314 } 4315 if (src_ipif != NULL) { 4316 ipif_refrele(src_ipif); 4317 src_ipif = NULL; 4318 } 4319 if ((sire != NULL) && sire->ire_flags & RTF_MULTIRT) { 4320 ip3dbg(("ip_newroute_v6: starting new resolution " 4321 "with first_mp %p, tag %d\n", 4322 (void *)first_mp, MULTIRT_DEBUG_TAGGED(first_mp))); 4323 4324 /* 4325 * We check if there are trailing unresolved routes for 4326 * the destination contained in sire. 4327 */ 4328 multirt_is_resolvable = ire_multirt_lookup_v6(&ire, 4329 &sire, multirt_flags, msg_getlabel(mp), ipst); 4330 4331 ip3dbg(("ip_newroute_v6: multirt_is_resolvable %d, " 4332 "ire %p, sire %p\n", 4333 multirt_is_resolvable, (void *)ire, (void *)sire)); 4334 4335 if (!multirt_is_resolvable) { 4336 /* 4337 * No more multirt routes to resolve; give up 4338 * (all routes resolved or no more resolvable 4339 * routes). 4340 */ 4341 if (ire != NULL) { 4342 ire_refrele(ire); 4343 ire = NULL; 4344 } 4345 } else { 4346 ASSERT(sire != NULL); 4347 ASSERT(ire != NULL); 4348 /* 4349 * We simply use first_sire as a flag that 4350 * indicates if a resolvable multirt route has 4351 * already been found during the preceding 4352 * loops. If it is not the case, we may have 4353 * to send an ICMP error to report that the 4354 * destination is unreachable. We do not 4355 * IRE_REFHOLD first_sire. 4356 */ 4357 if (first_sire == NULL) { 4358 first_sire = sire; 4359 } 4360 } 4361 } 4362 if ((ire == NULL) || (ire == sire)) { 4363 /* 4364 * either ire == NULL (the destination cannot be 4365 * resolved) or ire == sire (the gateway cannot be 4366 * resolved). At this point, there are no more routes 4367 * to resolve for the destination, thus we exit. 4368 */ 4369 if (ip_debug > 3) { 4370 /* ip2dbg */ 4371 pr_addr_dbg("ip_newroute_v6: " 4372 "can't resolve %s\n", AF_INET6, v6dstp); 4373 } 4374 ip3dbg(("ip_newroute_v6: " 4375 "ire %p, sire %p, first_sire %p\n", 4376 (void *)ire, (void *)sire, (void *)first_sire)); 4377 4378 if (sire != NULL) { 4379 ire_refrele(sire); 4380 sire = NULL; 4381 } 4382 4383 if (first_sire != NULL) { 4384 /* 4385 * At least one multirt route has been found 4386 * in the same ip_newroute() call; there is no 4387 * need to report an ICMP error. 4388 * first_sire was not IRE_REFHOLDed. 4389 */ 4390 MULTIRT_DEBUG_UNTAG(first_mp); 4391 freemsg(first_mp); 4392 return; 4393 } 4394 ip_rts_change_v6(RTM_MISS, v6dstp, 0, 0, 0, 0, 0, 0, 4395 RTA_DST, ipst); 4396 goto icmp_err_ret; 4397 } 4398 4399 ASSERT(ire->ire_ipversion == IPV6_VERSION); 4400 4401 /* 4402 * Verify that the returned IRE does not have either the 4403 * RTF_REJECT or RTF_BLACKHOLE flags set and that the IRE is 4404 * either an IRE_CACHE, IRE_IF_NORESOLVER or IRE_IF_RESOLVER. 4405 */ 4406 if ((ire->ire_flags & (RTF_REJECT | RTF_BLACKHOLE)) || 4407 (ire->ire_type & (IRE_CACHE | IRE_INTERFACE)) == 0) 4408 goto icmp_err_ret; 4409 4410 /* 4411 * Increment the ire_ob_pkt_count field for ire if it is an 4412 * INTERFACE (IF_RESOLVER or IF_NORESOLVER) IRE type, and 4413 * increment the same for the parent IRE, sire, if it is some 4414 * sort of prefix IRE (which includes DEFAULT, PREFIX, and HOST) 4415 */ 4416 if ((ire->ire_type & IRE_INTERFACE) != 0) { 4417 UPDATE_OB_PKT_COUNT(ire); 4418 ire->ire_last_used_time = lbolt; 4419 } 4420 4421 if (sire != NULL) { 4422 mutex_enter(&sire->ire_lock); 4423 v6gw = sire->ire_gateway_addr_v6; 4424 mutex_exit(&sire->ire_lock); 4425 ASSERT((sire->ire_type & (IRE_CACHETABLE | 4426 IRE_INTERFACE)) == 0); 4427 UPDATE_OB_PKT_COUNT(sire); 4428 sire->ire_last_used_time = lbolt; 4429 } else { 4430 v6gw = ipv6_all_zeros; 4431 } 4432 4433 /* 4434 * We have a route to reach the destination. Find the 4435 * appropriate ill, then get a source address that matches the 4436 * right scope via ipif_select_source_v6(). 4437 * 4438 * If we are here trying to create an IRE_CACHE for an offlink 4439 * destination and have an IRE_CACHE entry for VNI, then use 4440 * ire_stq instead since VNI's queue is a black hole. 4441 * 4442 * Note: While we pick a dst_ill we are really only interested 4443 * in the ill for load spreading. The source ipif is 4444 * determined by source address selection below. 4445 */ 4446 if ((ire->ire_type == IRE_CACHE) && 4447 IS_VNI(ire->ire_ipif->ipif_ill)) { 4448 dst_ill = ire->ire_stq->q_ptr; 4449 ill_refhold(dst_ill); 4450 } else { 4451 ill_t *ill = ire->ire_ipif->ipif_ill; 4452 4453 if (IS_IPMP(ill)) { 4454 dst_ill = 4455 ipmp_illgrp_hold_next_ill(ill->ill_grp); 4456 } else { 4457 dst_ill = ill; 4458 ill_refhold(dst_ill); 4459 } 4460 } 4461 4462 if (dst_ill == NULL) { 4463 if (ip_debug > 2) { 4464 pr_addr_dbg("ip_newroute_v6 : no dst " 4465 "ill for dst %s\n", AF_INET6, v6dstp); 4466 } 4467 goto icmp_err_ret; 4468 } 4469 4470 if (ill != NULL && dst_ill != ill && 4471 !IS_IN_SAME_ILLGRP(dst_ill, ill)) { 4472 /* 4473 * We should have found a route matching "ill" 4474 * as we called ire_ftable_lookup_v6 with 4475 * MATCH_IRE_ILL. Rather than asserting when 4476 * there is a mismatch, we just drop the packet. 4477 */ 4478 ip0dbg(("ip_newroute_v6: BOUND_IF failed: " 4479 "dst_ill %s ill %s\n", dst_ill->ill_name, 4480 ill->ill_name)); 4481 goto icmp_err_ret; 4482 } 4483 4484 /* 4485 * Pick a source address which matches the scope of the 4486 * destination address. 4487 * For RTF_SETSRC routes, the source address is imposed by the 4488 * parent ire (sire). 4489 */ 4490 ASSERT(src_ipif == NULL); 4491 4492 /* 4493 * Because nce_xmit() calls ip_output_v6() and NCEs are always 4494 * tied to the underlying interface, IS_UNDER_IPMP() may be 4495 * true even when building IREs that will be used for data 4496 * traffic. As such, see if the packet's source address is a 4497 * test address, and if so use that test address's ipif for 4498 * the IRE so that the logic that sets IRE_MARK_TESTHIDDEN in 4499 * ire_add_v6() can work properly. 4500 */ 4501 if (ill != NULL && IS_UNDER_IPMP(ill)) 4502 (void) ipif_lookup_testaddr_v6(ill, v6srcp, &src_ipif); 4503 4504 if (src_ipif == NULL && ire->ire_type == IRE_IF_RESOLVER && 4505 !IN6_IS_ADDR_UNSPECIFIED(&v6gw) && 4506 ip6_asp_can_lookup(ipst)) { 4507 /* 4508 * The ire cache entry we're adding is for the 4509 * gateway itself. The source address in this case 4510 * is relative to the gateway's address. 4511 */ 4512 ip6_asp_table_held = B_TRUE; 4513 src_ipif = ipif_select_source_v6(dst_ill, &v6gw, 4514 B_TRUE, IPV6_PREFER_SRC_DEFAULT, zoneid); 4515 if (src_ipif != NULL) 4516 ire_marks |= IRE_MARK_USESRC_CHECK; 4517 } else if (src_ipif == NULL) { 4518 if ((sire != NULL) && (sire->ire_flags & RTF_SETSRC)) { 4519 /* 4520 * Check that the ipif matching the requested 4521 * source address still exists. 4522 */ 4523 src_ipif = ipif_lookup_addr_v6( 4524 &sire->ire_src_addr_v6, NULL, zoneid, 4525 NULL, NULL, NULL, NULL, ipst); 4526 } 4527 if (src_ipif == NULL && ip6_asp_can_lookup(ipst)) { 4528 ip6_asp_table_held = B_TRUE; 4529 src_ipif = ipif_select_source_v6(dst_ill, 4530 v6dstp, B_FALSE, 4531 IPV6_PREFER_SRC_DEFAULT, zoneid); 4532 if (src_ipif != NULL) 4533 ire_marks |= IRE_MARK_USESRC_CHECK; 4534 } 4535 } 4536 4537 if (src_ipif == NULL) { 4538 if (ip_debug > 2) { 4539 /* ip1dbg */ 4540 pr_addr_dbg("ip_newroute_v6: no src for " 4541 "dst %s\n", AF_INET6, v6dstp); 4542 printf("ip_newroute_v6: interface name %s\n", 4543 dst_ill->ill_name); 4544 } 4545 goto icmp_err_ret; 4546 } 4547 4548 if (ip_debug > 3) { 4549 /* ip2dbg */ 4550 pr_addr_dbg("ip_newroute_v6: first hop %s\n", 4551 AF_INET6, &v6gw); 4552 } 4553 ip2dbg(("\tire type %s (%d)\n", 4554 ip_nv_lookup(ire_nv_tbl, ire->ire_type), ire->ire_type)); 4555 4556 /* 4557 * At this point in ip_newroute_v6(), ire is either the 4558 * IRE_CACHE of the next-hop gateway for an off-subnet 4559 * destination or an IRE_INTERFACE type that should be used 4560 * to resolve an on-subnet destination or an on-subnet 4561 * next-hop gateway. 4562 * 4563 * In the IRE_CACHE case, we have the following : 4564 * 4565 * 1) src_ipif - used for getting a source address. 4566 * 4567 * 2) dst_ill - from which we derive ire_stq/ire_rfq. This 4568 * means packets using this IRE_CACHE will go out on dst_ill. 4569 * 4570 * 3) The IRE sire will point to the prefix that is the longest 4571 * matching route for the destination. These prefix types 4572 * include IRE_DEFAULT, IRE_PREFIX, IRE_HOST. 4573 * 4574 * The newly created IRE_CACHE entry for the off-subnet 4575 * destination is tied to both the prefix route and the 4576 * interface route used to resolve the next-hop gateway 4577 * via the ire_phandle and ire_ihandle fields, respectively. 4578 * 4579 * In the IRE_INTERFACE case, we have the following : 4580 * 4581 * 1) src_ipif - used for getting a source address. 4582 * 4583 * 2) dst_ill - from which we derive ire_stq/ire_rfq. This 4584 * means packets using the IRE_CACHE that we will build 4585 * here will go out on dst_ill. 4586 * 4587 * 3) sire may or may not be NULL. But, the IRE_CACHE that is 4588 * to be created will only be tied to the IRE_INTERFACE that 4589 * was derived from the ire_ihandle field. 4590 * 4591 * If sire is non-NULL, it means the destination is off-link 4592 * and we will first create the IRE_CACHE for the gateway. 4593 * Next time through ip_newroute_v6, we will create the 4594 * IRE_CACHE for the final destination as described above. 4595 */ 4596 save_ire = ire; 4597 switch (ire->ire_type) { 4598 case IRE_CACHE: { 4599 ire_t *ipif_ire; 4600 4601 ASSERT(sire != NULL); 4602 if (IN6_IS_ADDR_UNSPECIFIED(&v6gw)) { 4603 mutex_enter(&ire->ire_lock); 4604 v6gw = ire->ire_gateway_addr_v6; 4605 mutex_exit(&ire->ire_lock); 4606 } 4607 /* 4608 * We need 3 ire's to create a new cache ire for an 4609 * off-link destination from the cache ire of the 4610 * gateway. 4611 * 4612 * 1. The prefix ire 'sire' 4613 * 2. The cache ire of the gateway 'ire' 4614 * 3. The interface ire 'ipif_ire' 4615 * 4616 * We have (1) and (2). We lookup (3) below. 4617 * 4618 * If there is no interface route to the gateway, 4619 * it is a race condition, where we found the cache 4620 * but the inteface route has been deleted. 4621 */ 4622 ipif_ire = ire_ihandle_lookup_offlink_v6(ire, sire); 4623 if (ipif_ire == NULL) { 4624 ip1dbg(("ip_newroute_v6:" 4625 "ire_ihandle_lookup_offlink_v6 failed\n")); 4626 goto icmp_err_ret; 4627 } 4628 4629 /* 4630 * Note: the new ire inherits RTF_SETSRC 4631 * and RTF_MULTIRT to propagate these flags from prefix 4632 * to cache. 4633 */ 4634 4635 /* 4636 * Check cached gateway IRE for any security 4637 * attributes; if found, associate the gateway 4638 * credentials group to the destination IRE. 4639 */ 4640 if ((attrp = save_ire->ire_gw_secattr) != NULL) { 4641 mutex_enter(&attrp->igsa_lock); 4642 if ((gcgrp = attrp->igsa_gcgrp) != NULL) 4643 GCGRP_REFHOLD(gcgrp); 4644 mutex_exit(&attrp->igsa_lock); 4645 } 4646 4647 ire = ire_create_v6( 4648 v6dstp, /* dest address */ 4649 &ipv6_all_ones, /* mask */ 4650 &src_ipif->ipif_v6src_addr, /* source address */ 4651 &v6gw, /* gateway address */ 4652 &save_ire->ire_max_frag, 4653 NULL, /* src nce */ 4654 dst_ill->ill_rq, /* recv-from queue */ 4655 dst_ill->ill_wq, /* send-to queue */ 4656 IRE_CACHE, 4657 src_ipif, 4658 &sire->ire_mask_v6, /* Parent mask */ 4659 sire->ire_phandle, /* Parent handle */ 4660 ipif_ire->ire_ihandle, /* Interface handle */ 4661 sire->ire_flags & /* flags if any */ 4662 (RTF_SETSRC | RTF_MULTIRT), 4663 &(sire->ire_uinfo), 4664 NULL, 4665 gcgrp, 4666 ipst); 4667 4668 if (ire == NULL) { 4669 if (gcgrp != NULL) { 4670 GCGRP_REFRELE(gcgrp); 4671 gcgrp = NULL; 4672 } 4673 ire_refrele(save_ire); 4674 ire_refrele(ipif_ire); 4675 break; 4676 } 4677 4678 /* reference now held by IRE */ 4679 gcgrp = NULL; 4680 4681 ire->ire_marks |= ire_marks; 4682 4683 /* 4684 * Prevent sire and ipif_ire from getting deleted. The 4685 * newly created ire is tied to both of them via the 4686 * phandle and ihandle respectively. 4687 */ 4688 IRB_REFHOLD(sire->ire_bucket); 4689 /* Has it been removed already ? */ 4690 if (sire->ire_marks & IRE_MARK_CONDEMNED) { 4691 IRB_REFRELE(sire->ire_bucket); 4692 ire_refrele(ipif_ire); 4693 ire_refrele(save_ire); 4694 break; 4695 } 4696 4697 IRB_REFHOLD(ipif_ire->ire_bucket); 4698 /* Has it been removed already ? */ 4699 if (ipif_ire->ire_marks & IRE_MARK_CONDEMNED) { 4700 IRB_REFRELE(ipif_ire->ire_bucket); 4701 IRB_REFRELE(sire->ire_bucket); 4702 ire_refrele(ipif_ire); 4703 ire_refrele(save_ire); 4704 break; 4705 } 4706 4707 xmit_mp = first_mp; 4708 if (ire->ire_flags & RTF_MULTIRT) { 4709 copy_mp = copymsg(first_mp); 4710 if (copy_mp != NULL) { 4711 xmit_mp = copy_mp; 4712 MULTIRT_DEBUG_TAG(first_mp); 4713 } 4714 } 4715 ire_add_then_send(q, ire, xmit_mp); 4716 if (ip6_asp_table_held) { 4717 ip6_asp_table_refrele(ipst); 4718 ip6_asp_table_held = B_FALSE; 4719 } 4720 ire_refrele(save_ire); 4721 4722 /* Assert that sire is not deleted yet. */ 4723 ASSERT(sire->ire_ptpn != NULL); 4724 IRB_REFRELE(sire->ire_bucket); 4725 4726 /* Assert that ipif_ire is not deleted yet. */ 4727 ASSERT(ipif_ire->ire_ptpn != NULL); 4728 IRB_REFRELE(ipif_ire->ire_bucket); 4729 ire_refrele(ipif_ire); 4730 4731 if (copy_mp != NULL) { 4732 /* 4733 * Search for the next unresolved 4734 * multirt route. 4735 */ 4736 copy_mp = NULL; 4737 ipif_ire = NULL; 4738 ire = NULL; 4739 /* re-enter the loop */ 4740 multirt_resolve_next = B_TRUE; 4741 continue; 4742 } 4743 ire_refrele(sire); 4744 ill_refrele(dst_ill); 4745 ipif_refrele(src_ipif); 4746 return; 4747 } 4748 case IRE_IF_NORESOLVER: 4749 /* 4750 * We have what we need to build an IRE_CACHE. 4751 * 4752 * handle the Gated case, where we create 4753 * a NORESOLVER route for loopback. 4754 */ 4755 if (dst_ill->ill_net_type != IRE_IF_NORESOLVER) 4756 break; 4757 /* 4758 * TSol note: We are creating the ire cache for the 4759 * destination 'dst'. If 'dst' is offlink, going 4760 * through the first hop 'gw', the security attributes 4761 * of 'dst' must be set to point to the gateway 4762 * credentials of gateway 'gw'. If 'dst' is onlink, it 4763 * is possible that 'dst' is a potential gateway that is 4764 * referenced by some route that has some security 4765 * attributes. Thus in the former case, we need to do a 4766 * gcgrp_lookup of 'gw' while in the latter case we 4767 * need to do gcgrp_lookup of 'dst' itself. 4768 */ 4769 ga.ga_af = AF_INET6; 4770 if (!IN6_IS_ADDR_UNSPECIFIED(&v6gw)) 4771 ga.ga_addr = v6gw; 4772 else 4773 ga.ga_addr = *v6dstp; 4774 gcgrp = gcgrp_lookup(&ga, B_FALSE); 4775 4776 /* 4777 * Note: the new ire inherits sire flags RTF_SETSRC 4778 * and RTF_MULTIRT to propagate those rules from prefix 4779 * to cache. 4780 */ 4781 ire = ire_create_v6( 4782 v6dstp, /* dest address */ 4783 &ipv6_all_ones, /* mask */ 4784 &src_ipif->ipif_v6src_addr, /* source address */ 4785 &v6gw, /* gateway address */ 4786 &save_ire->ire_max_frag, 4787 NULL, /* no src nce */ 4788 dst_ill->ill_rq, /* recv-from queue */ 4789 dst_ill->ill_wq, /* send-to queue */ 4790 IRE_CACHE, 4791 src_ipif, 4792 &save_ire->ire_mask_v6, /* Parent mask */ 4793 (sire != NULL) ? /* Parent handle */ 4794 sire->ire_phandle : 0, 4795 save_ire->ire_ihandle, /* Interface handle */ 4796 (sire != NULL) ? /* flags if any */ 4797 sire->ire_flags & 4798 (RTF_SETSRC | RTF_MULTIRT) : 0, 4799 &(save_ire->ire_uinfo), 4800 NULL, 4801 gcgrp, 4802 ipst); 4803 4804 if (ire == NULL) { 4805 if (gcgrp != NULL) { 4806 GCGRP_REFRELE(gcgrp); 4807 gcgrp = NULL; 4808 } 4809 ire_refrele(save_ire); 4810 break; 4811 } 4812 4813 /* reference now held by IRE */ 4814 gcgrp = NULL; 4815 4816 ire->ire_marks |= ire_marks; 4817 4818 if (!IN6_IS_ADDR_UNSPECIFIED(&v6gw)) 4819 dst = v6gw; 4820 else 4821 dst = *v6dstp; 4822 err = ndp_noresolver(dst_ill, &dst); 4823 if (err != 0) { 4824 ire_refrele(save_ire); 4825 break; 4826 } 4827 4828 /* Prevent save_ire from getting deleted */ 4829 IRB_REFHOLD(save_ire->ire_bucket); 4830 /* Has it been removed already ? */ 4831 if (save_ire->ire_marks & IRE_MARK_CONDEMNED) { 4832 IRB_REFRELE(save_ire->ire_bucket); 4833 ire_refrele(save_ire); 4834 break; 4835 } 4836 4837 xmit_mp = first_mp; 4838 /* 4839 * In case of MULTIRT, a copy of the current packet 4840 * to send is made to further re-enter the 4841 * loop and attempt another route resolution 4842 */ 4843 if ((sire != NULL) && sire->ire_flags & RTF_MULTIRT) { 4844 copy_mp = copymsg(first_mp); 4845 if (copy_mp != NULL) { 4846 xmit_mp = copy_mp; 4847 MULTIRT_DEBUG_TAG(first_mp); 4848 } 4849 } 4850 ire_add_then_send(q, ire, xmit_mp); 4851 if (ip6_asp_table_held) { 4852 ip6_asp_table_refrele(ipst); 4853 ip6_asp_table_held = B_FALSE; 4854 } 4855 4856 /* Assert that it is not deleted yet. */ 4857 ASSERT(save_ire->ire_ptpn != NULL); 4858 IRB_REFRELE(save_ire->ire_bucket); 4859 ire_refrele(save_ire); 4860 4861 if (copy_mp != NULL) { 4862 /* 4863 * If we found a (no)resolver, we ignore any 4864 * trailing top priority IRE_CACHE in 4865 * further loops. This ensures that we do not 4866 * omit any (no)resolver despite the priority 4867 * in this call. 4868 * IRE_CACHE, if any, will be processed 4869 * by another thread entering ip_newroute(), 4870 * (on resolver response, for example). 4871 * We use this to force multiple parallel 4872 * resolution as soon as a packet needs to be 4873 * sent. The result is, after one packet 4874 * emission all reachable routes are generally 4875 * resolved. 4876 * Otherwise, complete resolution of MULTIRT 4877 * routes would require several emissions as 4878 * side effect. 4879 */ 4880 multirt_flags &= ~MULTIRT_CACHEGW; 4881 4882 /* 4883 * Search for the next unresolved multirt 4884 * route. 4885 */ 4886 copy_mp = NULL; 4887 save_ire = NULL; 4888 ire = NULL; 4889 /* re-enter the loop */ 4890 multirt_resolve_next = B_TRUE; 4891 continue; 4892 } 4893 4894 /* Don't need sire anymore */ 4895 if (sire != NULL) 4896 ire_refrele(sire); 4897 ill_refrele(dst_ill); 4898 ipif_refrele(src_ipif); 4899 return; 4900 4901 case IRE_IF_RESOLVER: 4902 /* 4903 * We can't build an IRE_CACHE yet, but at least we 4904 * found a resolver that can help. 4905 */ 4906 dst = *v6dstp; 4907 4908 /* 4909 * To be at this point in the code with a non-zero gw 4910 * means that dst is reachable through a gateway that 4911 * we have never resolved. By changing dst to the gw 4912 * addr we resolve the gateway first. When 4913 * ire_add_then_send() tries to put the IP dg to dst, 4914 * it will reenter ip_newroute() at which time we will 4915 * find the IRE_CACHE for the gw and create another 4916 * IRE_CACHE above (for dst itself). 4917 */ 4918 if (!IN6_IS_ADDR_UNSPECIFIED(&v6gw)) { 4919 save_dst = dst; 4920 dst = v6gw; 4921 v6gw = ipv6_all_zeros; 4922 } 4923 if (dst_ill->ill_flags & ILLF_XRESOLV) { 4924 /* 4925 * Ask the external resolver to do its thing. 4926 * Make an mblk chain in the following form: 4927 * ARQ_REQ_MBLK-->IRE_MBLK-->packet 4928 */ 4929 mblk_t *ire_mp; 4930 mblk_t *areq_mp; 4931 areq_t *areq; 4932 in6_addr_t *addrp; 4933 4934 ip1dbg(("ip_newroute_v6:ILLF_XRESOLV\n")); 4935 if (ip6_asp_table_held) { 4936 ip6_asp_table_refrele(ipst); 4937 ip6_asp_table_held = B_FALSE; 4938 } 4939 ire = ire_create_mp_v6( 4940 &dst, /* dest address */ 4941 &ipv6_all_ones, /* mask */ 4942 &src_ipif->ipif_v6src_addr, 4943 /* source address */ 4944 &v6gw, /* gateway address */ 4945 NULL, /* no src nce */ 4946 dst_ill->ill_rq, /* recv-from queue */ 4947 dst_ill->ill_wq, /* send-to queue */ 4948 IRE_CACHE, 4949 src_ipif, 4950 &save_ire->ire_mask_v6, /* Parent mask */ 4951 0, 4952 save_ire->ire_ihandle, 4953 /* Interface handle */ 4954 0, /* flags if any */ 4955 &(save_ire->ire_uinfo), 4956 NULL, 4957 NULL, 4958 ipst); 4959 4960 ire_refrele(save_ire); 4961 if (ire == NULL) { 4962 ip1dbg(("ip_newroute_v6:" 4963 "ire is NULL\n")); 4964 break; 4965 } 4966 4967 if ((sire != NULL) && 4968 (sire->ire_flags & RTF_MULTIRT)) { 4969 /* 4970 * processing a copy of the packet to 4971 * send for further resolution loops 4972 */ 4973 copy_mp = copymsg(first_mp); 4974 if (copy_mp != NULL) 4975 MULTIRT_DEBUG_TAG(copy_mp); 4976 } 4977 ire->ire_marks |= ire_marks; 4978 ire_mp = ire->ire_mp; 4979 /* 4980 * Now create or find an nce for this interface. 4981 * The hw addr will need to to be set from 4982 * the reply to the AR_ENTRY_QUERY that 4983 * we're about to send. This will be done in 4984 * ire_add_v6(). 4985 */ 4986 err = ndp_resolver(dst_ill, &dst, mp, zoneid); 4987 switch (err) { 4988 case 0: 4989 /* 4990 * New cache entry created. 4991 * Break, then ask the external 4992 * resolver. 4993 */ 4994 break; 4995 case EINPROGRESS: 4996 /* 4997 * Resolution in progress; 4998 * packet has been queued by 4999 * ndp_resolver(). 5000 */ 5001 ire_delete(ire); 5002 ire = NULL; 5003 /* 5004 * Check if another multirt 5005 * route must be resolved. 5006 */ 5007 if (copy_mp != NULL) { 5008 /* 5009 * If we found a resolver, we 5010 * ignore any trailing top 5011 * priority IRE_CACHE in 5012 * further loops. The reason is 5013 * the same as for noresolver. 5014 */ 5015 multirt_flags &= 5016 ~MULTIRT_CACHEGW; 5017 /* 5018 * Search for the next 5019 * unresolved multirt route. 5020 */ 5021 first_mp = copy_mp; 5022 copy_mp = NULL; 5023 mp = first_mp; 5024 if (mp->b_datap->db_type == 5025 M_CTL) { 5026 mp = mp->b_cont; 5027 } 5028 ASSERT(sire != NULL); 5029 dst = save_dst; 5030 /* 5031 * re-enter the loop 5032 */ 5033 multirt_resolve_next = 5034 B_TRUE; 5035 continue; 5036 } 5037 5038 if (sire != NULL) 5039 ire_refrele(sire); 5040 ill_refrele(dst_ill); 5041 ipif_refrele(src_ipif); 5042 return; 5043 default: 5044 /* 5045 * Transient error; packet will be 5046 * freed. 5047 */ 5048 ire_delete(ire); 5049 ire = NULL; 5050 break; 5051 } 5052 if (err != 0) 5053 break; 5054 /* 5055 * Now set up the AR_ENTRY_QUERY and send it. 5056 */ 5057 areq_mp = ill_arp_alloc(dst_ill, 5058 (uchar_t *)&ipv6_areq_template, 5059 (caddr_t)&dst); 5060 if (areq_mp == NULL) { 5061 ip1dbg(("ip_newroute_v6:" 5062 "areq_mp is NULL\n")); 5063 freemsg(ire_mp); 5064 break; 5065 } 5066 areq = (areq_t *)areq_mp->b_rptr; 5067 addrp = (in6_addr_t *)((char *)areq + 5068 areq->areq_target_addr_offset); 5069 *addrp = dst; 5070 addrp = (in6_addr_t *)((char *)areq + 5071 areq->areq_sender_addr_offset); 5072 *addrp = src_ipif->ipif_v6src_addr; 5073 /* 5074 * link the chain, then send up to the resolver. 5075 */ 5076 linkb(areq_mp, ire_mp); 5077 linkb(areq_mp, mp); 5078 ip1dbg(("ip_newroute_v6:" 5079 "putnext to resolver\n")); 5080 putnext(dst_ill->ill_rq, areq_mp); 5081 /* 5082 * Check if another multirt route 5083 * must be resolved. 5084 */ 5085 ire = NULL; 5086 if (copy_mp != NULL) { 5087 /* 5088 * If we find a resolver, we ignore any 5089 * trailing top priority IRE_CACHE in 5090 * further loops. The reason is the 5091 * same as for noresolver. 5092 */ 5093 multirt_flags &= ~MULTIRT_CACHEGW; 5094 /* 5095 * Search for the next unresolved 5096 * multirt route. 5097 */ 5098 first_mp = copy_mp; 5099 copy_mp = NULL; 5100 mp = first_mp; 5101 if (mp->b_datap->db_type == M_CTL) { 5102 mp = mp->b_cont; 5103 } 5104 ASSERT(sire != NULL); 5105 dst = save_dst; 5106 /* 5107 * re-enter the loop 5108 */ 5109 multirt_resolve_next = B_TRUE; 5110 continue; 5111 } 5112 5113 if (sire != NULL) 5114 ire_refrele(sire); 5115 ill_refrele(dst_ill); 5116 ipif_refrele(src_ipif); 5117 return; 5118 } 5119 /* 5120 * Non-external resolver case. 5121 * 5122 * TSol note: Please see the note above the 5123 * IRE_IF_NORESOLVER case. 5124 */ 5125 ga.ga_af = AF_INET6; 5126 ga.ga_addr = dst; 5127 gcgrp = gcgrp_lookup(&ga, B_FALSE); 5128 5129 ire = ire_create_v6( 5130 &dst, /* dest address */ 5131 &ipv6_all_ones, /* mask */ 5132 &src_ipif->ipif_v6src_addr, /* source address */ 5133 &v6gw, /* gateway address */ 5134 &save_ire->ire_max_frag, 5135 NULL, /* no src nce */ 5136 dst_ill->ill_rq, /* recv-from queue */ 5137 dst_ill->ill_wq, /* send-to queue */ 5138 IRE_CACHE, 5139 src_ipif, 5140 &save_ire->ire_mask_v6, /* Parent mask */ 5141 0, 5142 save_ire->ire_ihandle, /* Interface handle */ 5143 0, /* flags if any */ 5144 &(save_ire->ire_uinfo), 5145 NULL, 5146 gcgrp, 5147 ipst); 5148 5149 if (ire == NULL) { 5150 if (gcgrp != NULL) { 5151 GCGRP_REFRELE(gcgrp); 5152 gcgrp = NULL; 5153 } 5154 ire_refrele(save_ire); 5155 break; 5156 } 5157 5158 /* reference now held by IRE */ 5159 gcgrp = NULL; 5160 5161 if ((sire != NULL) && 5162 (sire->ire_flags & RTF_MULTIRT)) { 5163 copy_mp = copymsg(first_mp); 5164 if (copy_mp != NULL) 5165 MULTIRT_DEBUG_TAG(copy_mp); 5166 } 5167 5168 ire->ire_marks |= ire_marks; 5169 err = ndp_resolver(dst_ill, &dst, first_mp, zoneid); 5170 switch (err) { 5171 case 0: 5172 /* Prevent save_ire from getting deleted */ 5173 IRB_REFHOLD(save_ire->ire_bucket); 5174 /* Has it been removed already ? */ 5175 if (save_ire->ire_marks & IRE_MARK_CONDEMNED) { 5176 IRB_REFRELE(save_ire->ire_bucket); 5177 ire_refrele(save_ire); 5178 break; 5179 } 5180 5181 /* 5182 * We have a resolved cache entry, 5183 * add in the IRE. 5184 */ 5185 ire_add_then_send(q, ire, first_mp); 5186 if (ip6_asp_table_held) { 5187 ip6_asp_table_refrele(ipst); 5188 ip6_asp_table_held = B_FALSE; 5189 } 5190 5191 /* Assert that it is not deleted yet. */ 5192 ASSERT(save_ire->ire_ptpn != NULL); 5193 IRB_REFRELE(save_ire->ire_bucket); 5194 ire_refrele(save_ire); 5195 /* 5196 * Check if another multirt route 5197 * must be resolved. 5198 */ 5199 ire = NULL; 5200 if (copy_mp != NULL) { 5201 /* 5202 * If we find a resolver, we ignore any 5203 * trailing top priority IRE_CACHE in 5204 * further loops. The reason is the 5205 * same as for noresolver. 5206 */ 5207 multirt_flags &= ~MULTIRT_CACHEGW; 5208 /* 5209 * Search for the next unresolved 5210 * multirt route. 5211 */ 5212 first_mp = copy_mp; 5213 copy_mp = NULL; 5214 mp = first_mp; 5215 if (mp->b_datap->db_type == M_CTL) { 5216 mp = mp->b_cont; 5217 } 5218 ASSERT(sire != NULL); 5219 dst = save_dst; 5220 /* 5221 * re-enter the loop 5222 */ 5223 multirt_resolve_next = B_TRUE; 5224 continue; 5225 } 5226 5227 if (sire != NULL) 5228 ire_refrele(sire); 5229 ill_refrele(dst_ill); 5230 ipif_refrele(src_ipif); 5231 return; 5232 5233 case EINPROGRESS: 5234 /* 5235 * mp was consumed - presumably queued. 5236 * No need for ire, presumably resolution is 5237 * in progress, and ire will be added when the 5238 * address is resolved. 5239 */ 5240 if (ip6_asp_table_held) { 5241 ip6_asp_table_refrele(ipst); 5242 ip6_asp_table_held = B_FALSE; 5243 } 5244 ASSERT(ire->ire_nce == NULL); 5245 ire_delete(ire); 5246 ire_refrele(save_ire); 5247 /* 5248 * Check if another multirt route 5249 * must be resolved. 5250 */ 5251 ire = NULL; 5252 if (copy_mp != NULL) { 5253 /* 5254 * If we find a resolver, we ignore any 5255 * trailing top priority IRE_CACHE in 5256 * further loops. The reason is the 5257 * same as for noresolver. 5258 */ 5259 multirt_flags &= ~MULTIRT_CACHEGW; 5260 /* 5261 * Search for the next unresolved 5262 * multirt route. 5263 */ 5264 first_mp = copy_mp; 5265 copy_mp = NULL; 5266 mp = first_mp; 5267 if (mp->b_datap->db_type == M_CTL) { 5268 mp = mp->b_cont; 5269 } 5270 ASSERT(sire != NULL); 5271 dst = save_dst; 5272 /* 5273 * re-enter the loop 5274 */ 5275 multirt_resolve_next = B_TRUE; 5276 continue; 5277 } 5278 if (sire != NULL) 5279 ire_refrele(sire); 5280 ill_refrele(dst_ill); 5281 ipif_refrele(src_ipif); 5282 return; 5283 default: 5284 /* Some transient error */ 5285 ASSERT(ire->ire_nce == NULL); 5286 ire_refrele(save_ire); 5287 break; 5288 } 5289 break; 5290 default: 5291 break; 5292 } 5293 if (ip6_asp_table_held) { 5294 ip6_asp_table_refrele(ipst); 5295 ip6_asp_table_held = B_FALSE; 5296 } 5297 } while (multirt_resolve_next); 5298 5299 err_ret: 5300 ip1dbg(("ip_newroute_v6: dropped\n")); 5301 if (src_ipif != NULL) 5302 ipif_refrele(src_ipif); 5303 if (dst_ill != NULL) { 5304 need_rele = B_TRUE; 5305 ill = dst_ill; 5306 } 5307 if (ill != NULL) { 5308 if (mp->b_prev != NULL) { 5309 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 5310 } else { 5311 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); 5312 } 5313 5314 if (need_rele) 5315 ill_refrele(ill); 5316 } else { 5317 if (mp->b_prev != NULL) { 5318 BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsInDiscards); 5319 } else { 5320 BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsOutDiscards); 5321 } 5322 } 5323 /* Did this packet originate externally? */ 5324 if (mp->b_prev) { 5325 mp->b_next = NULL; 5326 mp->b_prev = NULL; 5327 } 5328 if (copy_mp != NULL) { 5329 MULTIRT_DEBUG_UNTAG(copy_mp); 5330 freemsg(copy_mp); 5331 } 5332 MULTIRT_DEBUG_UNTAG(first_mp); 5333 freemsg(first_mp); 5334 if (ire != NULL) 5335 ire_refrele(ire); 5336 if (sire != NULL) 5337 ire_refrele(sire); 5338 return; 5339 5340 icmp_err_ret: 5341 if (ip6_asp_table_held) 5342 ip6_asp_table_refrele(ipst); 5343 if (src_ipif != NULL) 5344 ipif_refrele(src_ipif); 5345 if (dst_ill != NULL) { 5346 need_rele = B_TRUE; 5347 ill = dst_ill; 5348 } 5349 ip1dbg(("ip_newroute_v6: no route\n")); 5350 if (sire != NULL) 5351 ire_refrele(sire); 5352 /* 5353 * We need to set sire to NULL to avoid double freeing if we 5354 * ever goto err_ret from below. 5355 */ 5356 sire = NULL; 5357 ip6h = (ip6_t *)mp->b_rptr; 5358 /* Skip ip6i_t header if present */ 5359 if (ip6h->ip6_nxt == IPPROTO_RAW) { 5360 /* Make sure the IPv6 header is present */ 5361 if ((mp->b_wptr - (uchar_t *)ip6h) < 5362 sizeof (ip6i_t) + IPV6_HDR_LEN) { 5363 if (!pullupmsg(mp, sizeof (ip6i_t) + IPV6_HDR_LEN)) { 5364 ip1dbg(("ip_newroute_v6: pullupmsg failed\n")); 5365 goto err_ret; 5366 } 5367 } 5368 mp->b_rptr += sizeof (ip6i_t); 5369 ip6h = (ip6_t *)mp->b_rptr; 5370 } 5371 /* Did this packet originate externally? */ 5372 if (mp->b_prev) { 5373 if (ill != NULL) { 5374 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInNoRoutes); 5375 } else { 5376 BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsInNoRoutes); 5377 } 5378 mp->b_next = NULL; 5379 mp->b_prev = NULL; 5380 q = WR(q); 5381 } else { 5382 if (ill != NULL) { 5383 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutNoRoutes); 5384 } else { 5385 BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsOutNoRoutes); 5386 } 5387 if (ip_hdr_complete_v6(ip6h, zoneid, ipst)) { 5388 /* Failed */ 5389 if (copy_mp != NULL) { 5390 MULTIRT_DEBUG_UNTAG(copy_mp); 5391 freemsg(copy_mp); 5392 } 5393 MULTIRT_DEBUG_UNTAG(first_mp); 5394 freemsg(first_mp); 5395 if (ire != NULL) 5396 ire_refrele(ire); 5397 if (need_rele) 5398 ill_refrele(ill); 5399 return; 5400 } 5401 } 5402 5403 if (need_rele) 5404 ill_refrele(ill); 5405 5406 /* 5407 * At this point we will have ire only if RTF_BLACKHOLE 5408 * or RTF_REJECT flags are set on the IRE. It will not 5409 * generate ICMP6_DST_UNREACH_NOROUTE if RTF_BLACKHOLE is set. 5410 */ 5411 if (ire != NULL) { 5412 if (ire->ire_flags & RTF_BLACKHOLE) { 5413 ire_refrele(ire); 5414 if (copy_mp != NULL) { 5415 MULTIRT_DEBUG_UNTAG(copy_mp); 5416 freemsg(copy_mp); 5417 } 5418 MULTIRT_DEBUG_UNTAG(first_mp); 5419 freemsg(first_mp); 5420 return; 5421 } 5422 ire_refrele(ire); 5423 } 5424 if (ip_debug > 3) { 5425 /* ip2dbg */ 5426 pr_addr_dbg("ip_newroute_v6: no route to %s\n", 5427 AF_INET6, v6dstp); 5428 } 5429 icmp_unreachable_v6(WR(q), first_mp, ICMP6_DST_UNREACH_NOROUTE, 5430 B_FALSE, B_FALSE, zoneid, ipst); 5431 } 5432 5433 /* 5434 * ip_newroute_ipif_v6 is called by ip_wput_v6 and ip_wput_ipsec_out_v6 whenever 5435 * we need to send out a packet to a destination address for which we do not 5436 * have specific routing information. It is only used for multicast packets. 5437 * 5438 * If unspec_src we allow creating an IRE with source address zero. 5439 * ire_send_v6() will delete it after the packet is sent. 5440 */ 5441 void 5442 ip_newroute_ipif_v6(queue_t *q, mblk_t *mp, ipif_t *ipif, 5443 const in6_addr_t *v6dstp, const in6_addr_t *v6srcp, int unspec_src, 5444 zoneid_t zoneid) 5445 { 5446 ire_t *ire = NULL; 5447 ipif_t *src_ipif = NULL; 5448 int err = 0; 5449 ill_t *dst_ill = NULL; 5450 ire_t *save_ire; 5451 ipsec_out_t *io; 5452 ill_t *ill; 5453 mblk_t *first_mp; 5454 ire_t *fire = NULL; 5455 mblk_t *copy_mp = NULL; 5456 const in6_addr_t *ire_v6srcp; 5457 boolean_t probe = B_FALSE; 5458 boolean_t multirt_resolve_next; 5459 boolean_t ipif_held = B_FALSE; 5460 boolean_t ill_held = B_FALSE; 5461 boolean_t ip6_asp_table_held = B_FALSE; 5462 ip_stack_t *ipst = ipif->ipif_ill->ill_ipst; 5463 5464 /* 5465 * This loop is run only once in most cases. 5466 * We loop to resolve further routes only when the destination 5467 * can be reached through multiple RTF_MULTIRT-flagged ires. 5468 */ 5469 do { 5470 multirt_resolve_next = B_FALSE; 5471 if (dst_ill != NULL) { 5472 ill_refrele(dst_ill); 5473 dst_ill = NULL; 5474 } 5475 5476 if (src_ipif != NULL) { 5477 ipif_refrele(src_ipif); 5478 src_ipif = NULL; 5479 } 5480 ASSERT(ipif != NULL); 5481 ill = ipif->ipif_ill; 5482 5483 ASSERT(!IN6_IS_ADDR_V4MAPPED(v6dstp)); 5484 if (ip_debug > 2) { 5485 /* ip1dbg */ 5486 pr_addr_dbg("ip_newroute_ipif_v6: v6dst %s\n", 5487 AF_INET6, v6dstp); 5488 printf("ip_newroute_ipif_v6: if %s, v6 %d\n", 5489 ill->ill_name, ipif->ipif_isv6); 5490 } 5491 5492 first_mp = mp; 5493 if (mp->b_datap->db_type == M_CTL) { 5494 mp = mp->b_cont; 5495 io = (ipsec_out_t *)first_mp->b_rptr; 5496 ASSERT(io->ipsec_out_type == IPSEC_OUT); 5497 } else { 5498 io = NULL; 5499 } 5500 5501 /* 5502 * If the interface is a pt-pt interface we look for an 5503 * IRE_IF_RESOLVER or IRE_IF_NORESOLVER that matches both the 5504 * local_address and the pt-pt destination address. 5505 * Otherwise we just match the local address. 5506 */ 5507 if (!(ill->ill_flags & ILLF_MULTICAST)) { 5508 goto err_ret; 5509 } 5510 5511 /* 5512 * We check if an IRE_OFFSUBNET for the addr that goes through 5513 * ipif exists. We need it to determine if the RTF_SETSRC and/or 5514 * RTF_MULTIRT flags must be honored. 5515 */ 5516 fire = ipif_lookup_multi_ire_v6(ipif, v6dstp); 5517 ip2dbg(("ip_newroute_ipif_v6: " 5518 "ipif_lookup_multi_ire_v6(" 5519 "ipif %p, dst %08x) = fire %p\n", 5520 (void *)ipif, ntohl(V4_PART_OF_V6((*v6dstp))), 5521 (void *)fire)); 5522 5523 ASSERT(src_ipif == NULL); 5524 5525 /* 5526 * Because nce_xmit() calls ip_output_v6() and NCEs are always 5527 * tied to the underlying interface, IS_UNDER_IPMP() may be 5528 * true even when building IREs that will be used for data 5529 * traffic. As such, see if the packet's source address is a 5530 * test address, and if so use that test address's ipif for 5531 * the IRE so that the logic that sets IRE_MARK_TESTHIDDEN in 5532 * ire_add_v6() can work properly. 5533 */ 5534 if (IS_UNDER_IPMP(ill)) 5535 probe = ipif_lookup_testaddr_v6(ill, v6srcp, &src_ipif); 5536 5537 /* 5538 * Determine the outbound (destination) ill for this route. 5539 * If IPMP is not in use, that's the same as our ill. If IPMP 5540 * is in-use and we're on the IPMP interface, or we're on an 5541 * underlying ill but sending data traffic, use a suitable 5542 * destination ill from the group. The latter case covers a 5543 * subtle edge condition with multicast: when we bring up an 5544 * IPv6 data address, we will create an NCE on an underlying 5545 * interface, and send solitications to ff02::1, which would 5546 * take us through here, and cause us to create an IRE for 5547 * ff02::1. To meet our defined semantics for multicast (and 5548 * ensure there aren't unexpected echoes), that IRE needs to 5549 * use the IPMP group's nominated multicast interface. 5550 * 5551 * Note: the source ipif is determined by source address 5552 * selection later. 5553 */ 5554 if (IS_IPMP(ill) || (IS_UNDER_IPMP(ill) && !probe)) { 5555 ill_t *ipmp_ill; 5556 ipmp_illgrp_t *illg; 5557 5558 if (IS_UNDER_IPMP(ill)) { 5559 ipmp_ill = ipmp_ill_hold_ipmp_ill(ill); 5560 } else { 5561 ipmp_ill = ill; 5562 ill_refhold(ipmp_ill); /* for symmetry */ 5563 } 5564 5565 if (ipmp_ill == NULL) 5566 goto err_ret; 5567 5568 illg = ipmp_ill->ill_grp; 5569 if (IN6_IS_ADDR_MULTICAST(v6dstp)) 5570 dst_ill = ipmp_illgrp_hold_cast_ill(illg); 5571 else 5572 dst_ill = ipmp_illgrp_hold_next_ill(illg); 5573 5574 ill_refrele(ipmp_ill); 5575 } else { 5576 dst_ill = ill; 5577 ill_refhold(dst_ill); /* for symmetry */ 5578 } 5579 5580 if (dst_ill == NULL) { 5581 if (ip_debug > 2) { 5582 pr_addr_dbg("ip_newroute_ipif_v6: " 5583 "no dst ill for dst %s\n", 5584 AF_INET6, v6dstp); 5585 } 5586 goto err_ret; 5587 } 5588 5589 /* 5590 * Pick a source address which matches the scope of the 5591 * destination address. 5592 * For RTF_SETSRC routes, the source address is imposed by the 5593 * parent ire (fire). 5594 */ 5595 5596 if (src_ipif == NULL && fire != NULL && 5597 (fire->ire_flags & RTF_SETSRC)) { 5598 /* 5599 * Check that the ipif matching the requested source 5600 * address still exists. 5601 */ 5602 src_ipif = ipif_lookup_addr_v6(&fire->ire_src_addr_v6, 5603 NULL, zoneid, NULL, NULL, NULL, NULL, ipst); 5604 } 5605 5606 if (src_ipif == NULL && ip6_asp_can_lookup(ipst)) { 5607 ip6_asp_table_held = B_TRUE; 5608 src_ipif = ipif_select_source_v6(dst_ill, v6dstp, 5609 B_FALSE, IPV6_PREFER_SRC_DEFAULT, zoneid); 5610 } 5611 5612 if (src_ipif == NULL) { 5613 if (!unspec_src) { 5614 if (ip_debug > 2) { 5615 /* ip1dbg */ 5616 pr_addr_dbg("ip_newroute_ipif_v6: " 5617 "no src for dst %s\n", 5618 AF_INET6, v6dstp); 5619 printf(" through interface %s\n", 5620 dst_ill->ill_name); 5621 } 5622 goto err_ret; 5623 } 5624 ire_v6srcp = &ipv6_all_zeros; 5625 src_ipif = ipif; 5626 ipif_refhold(src_ipif); 5627 } else { 5628 ire_v6srcp = &src_ipif->ipif_v6src_addr; 5629 } 5630 5631 ire = ipif_to_ire_v6(ipif); 5632 if (ire == NULL) { 5633 if (ip_debug > 2) { 5634 /* ip1dbg */ 5635 pr_addr_dbg("ip_newroute_ipif_v6: v6src %s\n", 5636 AF_INET6, &ipif->ipif_v6lcl_addr); 5637 printf("ip_newroute_ipif_v6: " 5638 "if %s\n", dst_ill->ill_name); 5639 } 5640 goto err_ret; 5641 } 5642 if (ire->ire_flags & (RTF_REJECT | RTF_BLACKHOLE)) 5643 goto err_ret; 5644 5645 ASSERT(ire->ire_ipversion == IPV6_VERSION); 5646 5647 ip1dbg(("ip_newroute_ipif_v6: interface type %s (%d),", 5648 ip_nv_lookup(ire_nv_tbl, ire->ire_type), ire->ire_type)); 5649 if (ip_debug > 2) { 5650 /* ip1dbg */ 5651 pr_addr_dbg(" address %s\n", 5652 AF_INET6, &ire->ire_src_addr_v6); 5653 } 5654 save_ire = ire; 5655 ip2dbg(("ip_newroute_ipif: ire %p, ipif %p\n", 5656 (void *)ire, (void *)ipif)); 5657 5658 if ((fire != NULL) && (fire->ire_flags & RTF_MULTIRT)) { 5659 /* 5660 * an IRE_OFFSUBET was looked up 5661 * on that interface. 5662 * this ire has RTF_MULTIRT flag, 5663 * so the resolution loop 5664 * will be re-entered to resolve 5665 * additional routes on other 5666 * interfaces. For that purpose, 5667 * a copy of the packet is 5668 * made at this point. 5669 */ 5670 fire->ire_last_used_time = lbolt; 5671 copy_mp = copymsg(first_mp); 5672 if (copy_mp) { 5673 MULTIRT_DEBUG_TAG(copy_mp); 5674 } 5675 } 5676 5677 switch (ire->ire_type) { 5678 case IRE_IF_NORESOLVER: { 5679 /* 5680 * We have what we need to build an IRE_CACHE. 5681 * 5682 * handle the Gated case, where we create 5683 * a NORESOLVER route for loopback. 5684 */ 5685 if (dst_ill->ill_net_type != IRE_IF_NORESOLVER) 5686 break; 5687 /* 5688 * The newly created ire will inherit the flags of the 5689 * parent ire, if any. 5690 */ 5691 ire = ire_create_v6( 5692 v6dstp, /* dest address */ 5693 &ipv6_all_ones, /* mask */ 5694 ire_v6srcp, /* source address */ 5695 NULL, /* gateway address */ 5696 &save_ire->ire_max_frag, 5697 NULL, /* no src nce */ 5698 dst_ill->ill_rq, /* recv-from queue */ 5699 dst_ill->ill_wq, /* send-to queue */ 5700 IRE_CACHE, 5701 src_ipif, 5702 NULL, 5703 (fire != NULL) ? /* Parent handle */ 5704 fire->ire_phandle : 0, 5705 save_ire->ire_ihandle, /* Interface handle */ 5706 (fire != NULL) ? 5707 (fire->ire_flags & (RTF_SETSRC | RTF_MULTIRT)) : 5708 0, 5709 &ire_uinfo_null, 5710 NULL, 5711 NULL, 5712 ipst); 5713 5714 if (ire == NULL) { 5715 ire_refrele(save_ire); 5716 break; 5717 } 5718 5719 err = ndp_noresolver(dst_ill, v6dstp); 5720 if (err != 0) { 5721 ire_refrele(save_ire); 5722 break; 5723 } 5724 5725 /* Prevent save_ire from getting deleted */ 5726 IRB_REFHOLD(save_ire->ire_bucket); 5727 /* Has it been removed already ? */ 5728 if (save_ire->ire_marks & IRE_MARK_CONDEMNED) { 5729 IRB_REFRELE(save_ire->ire_bucket); 5730 ire_refrele(save_ire); 5731 break; 5732 } 5733 5734 ire_add_then_send(q, ire, first_mp); 5735 if (ip6_asp_table_held) { 5736 ip6_asp_table_refrele(ipst); 5737 ip6_asp_table_held = B_FALSE; 5738 } 5739 5740 /* Assert that it is not deleted yet. */ 5741 ASSERT(save_ire->ire_ptpn != NULL); 5742 IRB_REFRELE(save_ire->ire_bucket); 5743 ire_refrele(save_ire); 5744 if (fire != NULL) { 5745 ire_refrele(fire); 5746 fire = NULL; 5747 } 5748 5749 /* 5750 * The resolution loop is re-entered if we 5751 * actually are in a multirouting case. 5752 */ 5753 if (copy_mp != NULL) { 5754 boolean_t need_resolve = 5755 ire_multirt_need_resolve_v6(v6dstp, 5756 msg_getlabel(copy_mp), ipst); 5757 if (!need_resolve) { 5758 MULTIRT_DEBUG_UNTAG(copy_mp); 5759 freemsg(copy_mp); 5760 copy_mp = NULL; 5761 } else { 5762 /* 5763 * ipif_lookup_group_v6() calls 5764 * ire_lookup_multi_v6() that uses 5765 * ire_ftable_lookup_v6() to find 5766 * an IRE_INTERFACE for the group. 5767 * In the multirt case, 5768 * ire_lookup_multi_v6() then invokes 5769 * ire_multirt_lookup_v6() to find 5770 * the next resolvable ire. 5771 * As a result, we obtain a new 5772 * interface, derived from the 5773 * next ire. 5774 */ 5775 if (ipif_held) { 5776 ipif_refrele(ipif); 5777 ipif_held = B_FALSE; 5778 } 5779 ipif = ipif_lookup_group_v6(v6dstp, 5780 zoneid, ipst); 5781 ip2dbg(("ip_newroute_ipif: " 5782 "multirt dst %08x, ipif %p\n", 5783 ntohl(V4_PART_OF_V6((*v6dstp))), 5784 (void *)ipif)); 5785 if (ipif != NULL) { 5786 ipif_held = B_TRUE; 5787 mp = copy_mp; 5788 copy_mp = NULL; 5789 multirt_resolve_next = 5790 B_TRUE; 5791 continue; 5792 } else { 5793 freemsg(copy_mp); 5794 } 5795 } 5796 } 5797 ill_refrele(dst_ill); 5798 if (ipif_held) { 5799 ipif_refrele(ipif); 5800 ipif_held = B_FALSE; 5801 } 5802 if (src_ipif != NULL) 5803 ipif_refrele(src_ipif); 5804 return; 5805 } 5806 case IRE_IF_RESOLVER: { 5807 5808 ASSERT(dst_ill->ill_isv6); 5809 5810 /* 5811 * We obtain a partial IRE_CACHE which we will pass 5812 * along with the resolver query. When the response 5813 * comes back it will be there ready for us to add. 5814 */ 5815 /* 5816 * the newly created ire will inherit the flags of the 5817 * parent ire, if any. 5818 */ 5819 ire = ire_create_v6( 5820 v6dstp, /* dest address */ 5821 &ipv6_all_ones, /* mask */ 5822 ire_v6srcp, /* source address */ 5823 NULL, /* gateway address */ 5824 &save_ire->ire_max_frag, 5825 NULL, /* src nce */ 5826 dst_ill->ill_rq, /* recv-from queue */ 5827 dst_ill->ill_wq, /* send-to queue */ 5828 IRE_CACHE, 5829 src_ipif, 5830 NULL, 5831 (fire != NULL) ? /* Parent handle */ 5832 fire->ire_phandle : 0, 5833 save_ire->ire_ihandle, /* Interface handle */ 5834 (fire != NULL) ? 5835 (fire->ire_flags & (RTF_SETSRC | RTF_MULTIRT)) : 5836 0, 5837 &ire_uinfo_null, 5838 NULL, 5839 NULL, 5840 ipst); 5841 5842 if (ire == NULL) { 5843 ire_refrele(save_ire); 5844 break; 5845 } 5846 5847 /* Resolve and add ire to the ctable */ 5848 err = ndp_resolver(dst_ill, v6dstp, first_mp, zoneid); 5849 switch (err) { 5850 case 0: 5851 /* Prevent save_ire from getting deleted */ 5852 IRB_REFHOLD(save_ire->ire_bucket); 5853 /* Has it been removed already ? */ 5854 if (save_ire->ire_marks & IRE_MARK_CONDEMNED) { 5855 IRB_REFRELE(save_ire->ire_bucket); 5856 ire_refrele(save_ire); 5857 break; 5858 } 5859 /* 5860 * We have a resolved cache entry, 5861 * add in the IRE. 5862 */ 5863 ire_add_then_send(q, ire, first_mp); 5864 if (ip6_asp_table_held) { 5865 ip6_asp_table_refrele(ipst); 5866 ip6_asp_table_held = B_FALSE; 5867 } 5868 5869 /* Assert that it is not deleted yet. */ 5870 ASSERT(save_ire->ire_ptpn != NULL); 5871 IRB_REFRELE(save_ire->ire_bucket); 5872 ire_refrele(save_ire); 5873 if (fire != NULL) { 5874 ire_refrele(fire); 5875 fire = NULL; 5876 } 5877 5878 /* 5879 * The resolution loop is re-entered if we 5880 * actually are in a multirouting case. 5881 */ 5882 if (copy_mp != NULL) { 5883 boolean_t need_resolve = 5884 ire_multirt_need_resolve_v6(v6dstp, 5885 msg_getlabel(copy_mp), ipst); 5886 if (!need_resolve) { 5887 MULTIRT_DEBUG_UNTAG(copy_mp); 5888 freemsg(copy_mp); 5889 copy_mp = NULL; 5890 } else { 5891 /* 5892 * ipif_lookup_group_v6() calls 5893 * ire_lookup_multi_v6() that 5894 * uses ire_ftable_lookup_v6() 5895 * to find an IRE_INTERFACE for 5896 * the group. In the multirt 5897 * case, ire_lookup_multi_v6() 5898 * then invokes 5899 * ire_multirt_lookup_v6() to 5900 * find the next resolvable ire. 5901 * As a result, we obtain a new 5902 * interface, derived from the 5903 * next ire. 5904 */ 5905 if (ipif_held) { 5906 ipif_refrele(ipif); 5907 ipif_held = B_FALSE; 5908 } 5909 ipif = ipif_lookup_group_v6( 5910 v6dstp, zoneid, ipst); 5911 ip2dbg(("ip_newroute_ipif: " 5912 "multirt dst %08x, " 5913 "ipif %p\n", 5914 ntohl(V4_PART_OF_V6( 5915 (*v6dstp))), 5916 (void *)ipif)); 5917 if (ipif != NULL) { 5918 ipif_held = B_TRUE; 5919 mp = copy_mp; 5920 copy_mp = NULL; 5921 multirt_resolve_next = 5922 B_TRUE; 5923 continue; 5924 } else { 5925 freemsg(copy_mp); 5926 } 5927 } 5928 } 5929 ill_refrele(dst_ill); 5930 if (ipif_held) { 5931 ipif_refrele(ipif); 5932 ipif_held = B_FALSE; 5933 } 5934 if (src_ipif != NULL) 5935 ipif_refrele(src_ipif); 5936 return; 5937 5938 case EINPROGRESS: 5939 /* 5940 * mp was consumed - presumably queued. 5941 * No need for ire, presumably resolution is 5942 * in progress, and ire will be added when the 5943 * address is resolved. 5944 */ 5945 if (ip6_asp_table_held) { 5946 ip6_asp_table_refrele(ipst); 5947 ip6_asp_table_held = B_FALSE; 5948 } 5949 ire_delete(ire); 5950 ire_refrele(save_ire); 5951 if (fire != NULL) { 5952 ire_refrele(fire); 5953 fire = NULL; 5954 } 5955 5956 /* 5957 * The resolution loop is re-entered if we 5958 * actually are in a multirouting case. 5959 */ 5960 if (copy_mp != NULL) { 5961 boolean_t need_resolve = 5962 ire_multirt_need_resolve_v6(v6dstp, 5963 msg_getlabel(copy_mp), ipst); 5964 if (!need_resolve) { 5965 MULTIRT_DEBUG_UNTAG(copy_mp); 5966 freemsg(copy_mp); 5967 copy_mp = NULL; 5968 } else { 5969 /* 5970 * ipif_lookup_group_v6() calls 5971 * ire_lookup_multi_v6() that 5972 * uses ire_ftable_lookup_v6() 5973 * to find an IRE_INTERFACE for 5974 * the group. In the multirt 5975 * case, ire_lookup_multi_v6() 5976 * then invokes 5977 * ire_multirt_lookup_v6() to 5978 * find the next resolvable ire. 5979 * As a result, we obtain a new 5980 * interface, derived from the 5981 * next ire. 5982 */ 5983 if (ipif_held) { 5984 ipif_refrele(ipif); 5985 ipif_held = B_FALSE; 5986 } 5987 ipif = ipif_lookup_group_v6( 5988 v6dstp, zoneid, ipst); 5989 ip2dbg(("ip_newroute_ipif: " 5990 "multirt dst %08x, " 5991 "ipif %p\n", 5992 ntohl(V4_PART_OF_V6( 5993 (*v6dstp))), 5994 (void *)ipif)); 5995 if (ipif != NULL) { 5996 ipif_held = B_TRUE; 5997 mp = copy_mp; 5998 copy_mp = NULL; 5999 multirt_resolve_next = 6000 B_TRUE; 6001 continue; 6002 } else { 6003 freemsg(copy_mp); 6004 } 6005 } 6006 } 6007 ill_refrele(dst_ill); 6008 if (ipif_held) { 6009 ipif_refrele(ipif); 6010 ipif_held = B_FALSE; 6011 } 6012 if (src_ipif != NULL) 6013 ipif_refrele(src_ipif); 6014 return; 6015 default: 6016 /* Some transient error */ 6017 ire_refrele(save_ire); 6018 break; 6019 } 6020 break; 6021 } 6022 default: 6023 break; 6024 } 6025 if (ip6_asp_table_held) { 6026 ip6_asp_table_refrele(ipst); 6027 ip6_asp_table_held = B_FALSE; 6028 } 6029 } while (multirt_resolve_next); 6030 6031 err_ret: 6032 if (ip6_asp_table_held) 6033 ip6_asp_table_refrele(ipst); 6034 if (ire != NULL) 6035 ire_refrele(ire); 6036 if (fire != NULL) 6037 ire_refrele(fire); 6038 if (ipif != NULL && ipif_held) 6039 ipif_refrele(ipif); 6040 if (src_ipif != NULL) 6041 ipif_refrele(src_ipif); 6042 6043 /* Multicast - no point in trying to generate ICMP error */ 6044 if (dst_ill != NULL) { 6045 ill = dst_ill; 6046 ill_held = B_TRUE; 6047 } 6048 if (mp->b_prev || mp->b_next) { 6049 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 6050 } else { 6051 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); 6052 } 6053 ip1dbg(("ip_newroute_ipif_v6: dropped\n")); 6054 mp->b_next = NULL; 6055 mp->b_prev = NULL; 6056 freemsg(first_mp); 6057 if (ill_held) 6058 ill_refrele(ill); 6059 } 6060 6061 /* 6062 * Parse and process any hop-by-hop or destination options. 6063 * 6064 * Assumes that q is an ill read queue so that ICMP errors for link-local 6065 * destinations are sent out the correct interface. 6066 * 6067 * Returns -1 if there was an error and mp has been consumed. 6068 * Returns 0 if no special action is needed. 6069 * Returns 1 if the packet contained a router alert option for this node 6070 * which is verified to be "interesting/known" for our implementation. 6071 * 6072 * XXX Note: In future as more hbh or dest options are defined, 6073 * it may be better to have different routines for hbh and dest 6074 * options as opt_type fields other than IP6OPT_PAD1 and IP6OPT_PADN 6075 * may have same value in different namespaces. Or is it same namespace ?? 6076 * Current code checks for each opt_type (other than pads) if it is in 6077 * the expected nexthdr (hbh or dest) 6078 */ 6079 static int 6080 ip_process_options_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, 6081 uint8_t *optptr, uint_t optlen, uint8_t hdr_type, ip_stack_t *ipst) 6082 { 6083 uint8_t opt_type; 6084 uint_t optused; 6085 int ret = 0; 6086 mblk_t *first_mp; 6087 const char *errtype; 6088 zoneid_t zoneid; 6089 ill_t *ill = q->q_ptr; 6090 ipif_t *ipif; 6091 6092 first_mp = mp; 6093 if (mp->b_datap->db_type == M_CTL) { 6094 mp = mp->b_cont; 6095 } 6096 6097 while (optlen != 0) { 6098 opt_type = *optptr; 6099 if (opt_type == IP6OPT_PAD1) { 6100 optused = 1; 6101 } else { 6102 if (optlen < 2) 6103 goto bad_opt; 6104 errtype = "malformed"; 6105 if (opt_type == ip6opt_ls) { 6106 optused = 2 + optptr[1]; 6107 if (optused > optlen) 6108 goto bad_opt; 6109 } else switch (opt_type) { 6110 case IP6OPT_PADN: 6111 /* 6112 * Note:We don't verify that (N-2) pad octets 6113 * are zero as required by spec. Adhere to 6114 * "be liberal in what you accept..." part of 6115 * implementation philosophy (RFC791,RFC1122) 6116 */ 6117 optused = 2 + optptr[1]; 6118 if (optused > optlen) 6119 goto bad_opt; 6120 break; 6121 6122 case IP6OPT_JUMBO: 6123 if (hdr_type != IPPROTO_HOPOPTS) 6124 goto opt_error; 6125 goto opt_error; /* XXX Not implemented! */ 6126 6127 case IP6OPT_ROUTER_ALERT: { 6128 struct ip6_opt_router *or; 6129 6130 if (hdr_type != IPPROTO_HOPOPTS) 6131 goto opt_error; 6132 optused = 2 + optptr[1]; 6133 if (optused > optlen) 6134 goto bad_opt; 6135 or = (struct ip6_opt_router *)optptr; 6136 /* Check total length and alignment */ 6137 if (optused != sizeof (*or) || 6138 ((uintptr_t)or->ip6or_value & 0x1) != 0) 6139 goto opt_error; 6140 /* Check value */ 6141 switch (*((uint16_t *)or->ip6or_value)) { 6142 case IP6_ALERT_MLD: 6143 case IP6_ALERT_RSVP: 6144 ret = 1; 6145 } 6146 break; 6147 } 6148 case IP6OPT_HOME_ADDRESS: { 6149 /* 6150 * Minimal support for the home address option 6151 * (which is required by all IPv6 nodes). 6152 * Implement by just swapping the home address 6153 * and source address. 6154 * XXX Note: this has IPsec implications since 6155 * AH needs to take this into account. 6156 * Also, when IPsec is used we need to ensure 6157 * that this is only processed once 6158 * in the received packet (to avoid swapping 6159 * back and forth). 6160 * NOTE:This option processing is considered 6161 * to be unsafe and prone to a denial of 6162 * service attack. 6163 * The current processing is not safe even with 6164 * IPsec secured IP packets. Since the home 6165 * address option processing requirement still 6166 * is in the IETF draft and in the process of 6167 * being redefined for its usage, it has been 6168 * decided to turn off the option by default. 6169 * If this section of code needs to be executed, 6170 * ndd variable ip6_ignore_home_address_opt 6171 * should be set to 0 at the user's own risk. 6172 */ 6173 struct ip6_opt_home_address *oh; 6174 in6_addr_t tmp; 6175 6176 if (ipst->ips_ipv6_ignore_home_address_opt) 6177 goto opt_error; 6178 6179 if (hdr_type != IPPROTO_DSTOPTS) 6180 goto opt_error; 6181 optused = 2 + optptr[1]; 6182 if (optused > optlen) 6183 goto bad_opt; 6184 6185 /* 6186 * We did this dest. opt the first time 6187 * around (i.e. before AH processing). 6188 * If we've done AH... stop now. 6189 */ 6190 if (first_mp != mp) { 6191 ipsec_in_t *ii; 6192 6193 ii = (ipsec_in_t *)first_mp->b_rptr; 6194 if (ii->ipsec_in_ah_sa != NULL) 6195 break; 6196 } 6197 6198 oh = (struct ip6_opt_home_address *)optptr; 6199 /* Check total length and alignment */ 6200 if (optused < sizeof (*oh) || 6201 ((uintptr_t)oh->ip6oh_addr & 0x7) != 0) 6202 goto opt_error; 6203 /* Swap ip6_src and the home address */ 6204 tmp = ip6h->ip6_src; 6205 /* XXX Note: only 8 byte alignment option */ 6206 ip6h->ip6_src = *(in6_addr_t *)oh->ip6oh_addr; 6207 *(in6_addr_t *)oh->ip6oh_addr = tmp; 6208 break; 6209 } 6210 6211 case IP6OPT_TUNNEL_LIMIT: 6212 if (hdr_type != IPPROTO_DSTOPTS) { 6213 goto opt_error; 6214 } 6215 optused = 2 + optptr[1]; 6216 if (optused > optlen) { 6217 goto bad_opt; 6218 } 6219 if (optused != 3) { 6220 goto opt_error; 6221 } 6222 break; 6223 6224 default: 6225 errtype = "unknown"; 6226 /* FALLTHROUGH */ 6227 opt_error: 6228 /* Determine which zone should send error */ 6229 zoneid = ipif_lookup_addr_zoneid_v6( 6230 &ip6h->ip6_dst, ill, ipst); 6231 switch (IP6OPT_TYPE(opt_type)) { 6232 case IP6OPT_TYPE_SKIP: 6233 optused = 2 + optptr[1]; 6234 if (optused > optlen) 6235 goto bad_opt; 6236 ip1dbg(("ip_process_options_v6: %s " 6237 "opt 0x%x skipped\n", 6238 errtype, opt_type)); 6239 break; 6240 case IP6OPT_TYPE_DISCARD: 6241 ip1dbg(("ip_process_options_v6: %s " 6242 "opt 0x%x; packet dropped\n", 6243 errtype, opt_type)); 6244 freemsg(first_mp); 6245 return (-1); 6246 case IP6OPT_TYPE_ICMP: 6247 if (zoneid == ALL_ZONES) { 6248 freemsg(first_mp); 6249 return (-1); 6250 } 6251 icmp_param_problem_v6(WR(q), first_mp, 6252 ICMP6_PARAMPROB_OPTION, 6253 (uint32_t)(optptr - 6254 (uint8_t *)ip6h), 6255 B_FALSE, B_FALSE, zoneid, ipst); 6256 return (-1); 6257 case IP6OPT_TYPE_FORCEICMP: 6258 /* 6259 * If we don't have a zone and the dst 6260 * addr is multicast, then pick a zone 6261 * based on the inbound interface. 6262 */ 6263 if (zoneid == ALL_ZONES && 6264 IN6_IS_ADDR_MULTICAST( 6265 &ip6h->ip6_dst)) { 6266 ipif = ipif_select_source_v6( 6267 ill, &ip6h->ip6_src, 6268 B_TRUE, 6269 IPV6_PREFER_SRC_DEFAULT, 6270 ALL_ZONES); 6271 if (ipif != NULL) { 6272 zoneid = 6273 ipif->ipif_zoneid; 6274 ipif_refrele(ipif); 6275 } 6276 } 6277 if (zoneid == ALL_ZONES) { 6278 freemsg(first_mp); 6279 return (-1); 6280 } 6281 icmp_param_problem_v6(WR(q), first_mp, 6282 ICMP6_PARAMPROB_OPTION, 6283 (uint32_t)(optptr - 6284 (uint8_t *)ip6h), 6285 B_FALSE, B_TRUE, zoneid, ipst); 6286 return (-1); 6287 default: 6288 ASSERT(0); 6289 } 6290 } 6291 } 6292 optlen -= optused; 6293 optptr += optused; 6294 } 6295 return (ret); 6296 6297 bad_opt: 6298 /* Determine which zone should send error */ 6299 zoneid = ipif_lookup_addr_zoneid_v6(&ip6h->ip6_dst, ill, ipst); 6300 if (zoneid == ALL_ZONES) { 6301 freemsg(first_mp); 6302 } else { 6303 icmp_param_problem_v6(WR(q), first_mp, ICMP6_PARAMPROB_OPTION, 6304 (uint32_t)(optptr - (uint8_t *)ip6h), 6305 B_FALSE, B_FALSE, zoneid, ipst); 6306 } 6307 return (-1); 6308 } 6309 6310 /* 6311 * Process a routing header that is not yet empty. 6312 * Because of RFC 5095, we now reject all route headers. 6313 */ 6314 static void 6315 ip_process_rthdr(queue_t *q, mblk_t *mp, ip6_t *ip6h, ip6_rthdr_t *rth, 6316 ill_t *ill, mblk_t *hada_mp) 6317 { 6318 ip_stack_t *ipst = ill->ill_ipst; 6319 6320 ASSERT(rth->ip6r_segleft != 0); 6321 6322 if (!ipst->ips_ipv6_forward_src_routed) { 6323 /* XXX Check for source routed out same interface? */ 6324 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 6325 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInAddrErrors); 6326 freemsg(hada_mp); 6327 freemsg(mp); 6328 return; 6329 } 6330 if (hada_mp != NULL) { 6331 freemsg(hada_mp); 6332 freemsg(mp); 6333 return; 6334 } 6335 /* Sent by forwarding path, and router is global zone */ 6336 icmp_param_problem_v6(WR(q), mp, ICMP6_PARAMPROB_HEADER, 6337 (uint32_t)((uchar_t *)&rth->ip6r_type - (uchar_t *)ip6h), B_FALSE, 6338 B_FALSE, GLOBAL_ZONEID, ipst); 6339 } 6340 6341 /* 6342 * Read side put procedure for IPv6 module. 6343 */ 6344 void 6345 ip_rput_v6(queue_t *q, mblk_t *mp) 6346 { 6347 mblk_t *first_mp; 6348 mblk_t *hada_mp = NULL; 6349 ip6_t *ip6h; 6350 boolean_t ll_multicast = B_FALSE; 6351 boolean_t mctl_present = B_FALSE; 6352 ill_t *ill; 6353 struct iocblk *iocp; 6354 uint_t flags = 0; 6355 mblk_t *dl_mp; 6356 ip_stack_t *ipst; 6357 int check; 6358 6359 ill = (ill_t *)q->q_ptr; 6360 ipst = ill->ill_ipst; 6361 if (ill->ill_state_flags & ILL_CONDEMNED) { 6362 union DL_primitives *dl; 6363 6364 dl = (union DL_primitives *)mp->b_rptr; 6365 /* 6366 * Things are opening or closing - only accept DLPI 6367 * ack messages. If the stream is closing and ip_wsrv 6368 * has completed, ip_close is out of the qwait, but has 6369 * not yet completed qprocsoff. Don't proceed any further 6370 * because the ill has been cleaned up and things hanging 6371 * off the ill have been freed. 6372 */ 6373 if ((mp->b_datap->db_type != M_PCPROTO) || 6374 (dl->dl_primitive == DL_UNITDATA_IND)) { 6375 inet_freemsg(mp); 6376 return; 6377 } 6378 } 6379 6380 dl_mp = NULL; 6381 switch (mp->b_datap->db_type) { 6382 case M_DATA: { 6383 int hlen; 6384 uchar_t *ucp; 6385 struct ether_header *eh; 6386 dl_unitdata_ind_t *dui; 6387 6388 /* 6389 * This is a work-around for CR 6451644, a bug in Nemo. It 6390 * should be removed when that problem is fixed. 6391 */ 6392 if (ill->ill_mactype == DL_ETHER && 6393 (hlen = MBLKHEAD(mp)) >= sizeof (struct ether_header) && 6394 (ucp = mp->b_rptr)[-1] == (ETHERTYPE_IPV6 & 0xFF) && 6395 ucp[-2] == (ETHERTYPE_IPV6 >> 8)) { 6396 if (hlen >= sizeof (struct ether_vlan_header) && 6397 ucp[-5] == 0 && ucp[-6] == 0x81) 6398 ucp -= sizeof (struct ether_vlan_header); 6399 else 6400 ucp -= sizeof (struct ether_header); 6401 /* 6402 * If it's a group address, then fabricate a 6403 * DL_UNITDATA_IND message. 6404 */ 6405 if ((ll_multicast = (ucp[0] & 1)) != 0 && 6406 (dl_mp = allocb(DL_UNITDATA_IND_SIZE + 16, 6407 BPRI_HI)) != NULL) { 6408 eh = (struct ether_header *)ucp; 6409 dui = (dl_unitdata_ind_t *)dl_mp->b_rptr; 6410 DB_TYPE(dl_mp) = M_PROTO; 6411 dl_mp->b_wptr = (uchar_t *)(dui + 1) + 16; 6412 dui->dl_primitive = DL_UNITDATA_IND; 6413 dui->dl_dest_addr_length = 8; 6414 dui->dl_dest_addr_offset = DL_UNITDATA_IND_SIZE; 6415 dui->dl_src_addr_length = 8; 6416 dui->dl_src_addr_offset = DL_UNITDATA_IND_SIZE + 6417 8; 6418 dui->dl_group_address = 1; 6419 ucp = (uchar_t *)(dui + 1); 6420 if (ill->ill_sap_length > 0) 6421 ucp += ill->ill_sap_length; 6422 bcopy(&eh->ether_dhost, ucp, 6); 6423 bcopy(&eh->ether_shost, ucp + 8, 6); 6424 ucp = (uchar_t *)(dui + 1); 6425 if (ill->ill_sap_length < 0) 6426 ucp += 8 + ill->ill_sap_length; 6427 bcopy(&eh->ether_type, ucp, 2); 6428 bcopy(&eh->ether_type, ucp + 8, 2); 6429 } 6430 } 6431 break; 6432 } 6433 6434 case M_PROTO: 6435 case M_PCPROTO: 6436 if (((dl_unitdata_ind_t *)mp->b_rptr)->dl_primitive != 6437 DL_UNITDATA_IND) { 6438 /* Go handle anything other than data elsewhere. */ 6439 ip_rput_dlpi(q, mp); 6440 return; 6441 } 6442 ll_multicast = ip_get_dlpi_mbcast(ill, mp); 6443 6444 /* Save the DLPI header. */ 6445 dl_mp = mp; 6446 mp = mp->b_cont; 6447 dl_mp->b_cont = NULL; 6448 break; 6449 case M_BREAK: 6450 panic("ip_rput_v6: got an M_BREAK"); 6451 /*NOTREACHED*/ 6452 case M_IOCACK: 6453 iocp = (struct iocblk *)mp->b_rptr; 6454 switch (iocp->ioc_cmd) { 6455 case DL_IOC_HDR_INFO: 6456 ill = (ill_t *)q->q_ptr; 6457 ill_fastpath_ack(ill, mp); 6458 return; 6459 default: 6460 putnext(q, mp); 6461 return; 6462 } 6463 /* FALLTHRU */ 6464 case M_ERROR: 6465 case M_HANGUP: 6466 mutex_enter(&ill->ill_lock); 6467 if (ill->ill_state_flags & ILL_CONDEMNED) { 6468 mutex_exit(&ill->ill_lock); 6469 freemsg(mp); 6470 return; 6471 } 6472 ill_refhold_locked(ill); 6473 mutex_exit(&ill->ill_lock); 6474 qwriter_ip(ill, q, mp, ip_rput_other, CUR_OP, B_FALSE); 6475 return; 6476 case M_CTL: 6477 if ((MBLKL(mp) > sizeof (int)) && 6478 ((da_ipsec_t *)mp->b_rptr)->da_type == IPHADA_M_CTL) { 6479 ASSERT(MBLKL(mp) >= sizeof (da_ipsec_t)); 6480 mctl_present = B_TRUE; 6481 break; 6482 } 6483 putnext(q, mp); 6484 return; 6485 case M_IOCNAK: 6486 iocp = (struct iocblk *)mp->b_rptr; 6487 switch (iocp->ioc_cmd) { 6488 case DL_IOC_HDR_INFO: 6489 ip_rput_other(NULL, q, mp, NULL); 6490 return; 6491 default: 6492 break; 6493 } 6494 /* FALLTHRU */ 6495 default: 6496 putnext(q, mp); 6497 return; 6498 } 6499 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInReceives); 6500 UPDATE_MIB(ill->ill_ip_mib, ipIfStatsHCInOctets, 6501 (mp->b_cont == NULL) ? MBLKL(mp) : msgdsize(mp)); 6502 /* 6503 * if db_ref > 1 then copymsg and free original. Packet may be 6504 * changed and do not want other entity who has a reference to this 6505 * message to trip over the changes. This is a blind change because 6506 * trying to catch all places that might change packet is too 6507 * difficult (since it may be a module above this one). 6508 */ 6509 if (mp->b_datap->db_ref > 1) { 6510 mblk_t *mp1; 6511 6512 mp1 = copymsg(mp); 6513 freemsg(mp); 6514 if (mp1 == NULL) { 6515 first_mp = NULL; 6516 goto discard; 6517 } 6518 mp = mp1; 6519 } 6520 first_mp = mp; 6521 if (mctl_present) { 6522 hada_mp = first_mp; 6523 mp = first_mp->b_cont; 6524 } 6525 6526 if ((check = ip_check_v6_mblk(mp, ill)) == IP6_MBLK_HDR_ERR) { 6527 freemsg(mp); 6528 return; 6529 } 6530 6531 ip6h = (ip6_t *)mp->b_rptr; 6532 6533 /* 6534 * ip:::receive must see ipv6 packets with a full header, 6535 * and so is placed after the IP6_MBLK_HDR_ERR check. 6536 */ 6537 DTRACE_IP7(receive, mblk_t *, first_mp, conn_t *, NULL, void_ip_t *, 6538 ip6h, __dtrace_ipsr_ill_t *, ill, ipha_t *, NULL, ip6_t *, ip6h, 6539 int, 0); 6540 6541 if (check != IP6_MBLK_OK) { 6542 freemsg(mp); 6543 return; 6544 } 6545 6546 DTRACE_PROBE4(ip6__physical__in__start, 6547 ill_t *, ill, ill_t *, NULL, 6548 ip6_t *, ip6h, mblk_t *, first_mp); 6549 6550 FW_HOOKS6(ipst->ips_ip6_physical_in_event, 6551 ipst->ips_ipv6firewall_physical_in, 6552 ill, NULL, ip6h, first_mp, mp, ll_multicast, ipst); 6553 6554 DTRACE_PROBE1(ip6__physical__in__end, mblk_t *, first_mp); 6555 6556 if (first_mp == NULL) 6557 return; 6558 6559 /* 6560 * Attach any necessary label information to this packet. 6561 */ 6562 if (is_system_labeled() && !tsol_get_pkt_label(mp, IPV6_VERSION)) { 6563 if (ip6opt_ls != 0) 6564 ip0dbg(("tsol_get_pkt_label v6 failed\n")); 6565 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors); 6566 goto discard; 6567 } 6568 6569 /* IP observability hook. */ 6570 if (ipst->ips_ip6_observe.he_interested) { 6571 zoneid_t dzone; 6572 6573 dzone = ip_get_zoneid_v6(&ip6h->ip6_dst, mp, ill, ipst, 6574 ALL_ZONES); 6575 ipobs_hook(mp, IPOBS_HOOK_INBOUND, ALL_ZONES, dzone, 6576 ill, ipst); 6577 } 6578 6579 if ((ip6h->ip6_vcf & IPV6_VERS_AND_FLOW_MASK) == 6580 IPV6_DEFAULT_VERS_AND_FLOW) { 6581 /* 6582 * It may be a bit too expensive to do this mapped address 6583 * check here, but in the interest of robustness, it seems 6584 * like the correct place. 6585 * TODO: Avoid this check for e.g. connected TCP sockets 6586 */ 6587 if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_src)) { 6588 ip1dbg(("ip_rput_v6: pkt with mapped src addr\n")); 6589 goto discard; 6590 } 6591 6592 if (IN6_IS_ADDR_LOOPBACK(&ip6h->ip6_src)) { 6593 ip1dbg(("ip_rput_v6: pkt with loopback src")); 6594 goto discard; 6595 } else if (IN6_IS_ADDR_LOOPBACK(&ip6h->ip6_dst)) { 6596 ip1dbg(("ip_rput_v6: pkt with loopback dst")); 6597 goto discard; 6598 } 6599 6600 flags |= (ll_multicast ? IP6_IN_LLMCAST : 0); 6601 ip_rput_data_v6(q, ill, mp, ip6h, flags, hada_mp, dl_mp); 6602 } else { 6603 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInWrongIPVersion); 6604 goto discard; 6605 } 6606 freemsg(dl_mp); 6607 return; 6608 6609 discard: 6610 if (dl_mp != NULL) 6611 freeb(dl_mp); 6612 freemsg(first_mp); 6613 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 6614 } 6615 6616 /* 6617 * Walk through the IPv6 packet in mp and see if there's an AH header 6618 * in it. See if the AH header needs to get done before other headers in 6619 * the packet. (Worker function for ipsec_early_ah_v6().) 6620 */ 6621 #define IPSEC_HDR_DONT_PROCESS 0 6622 #define IPSEC_HDR_PROCESS 1 6623 #define IPSEC_MEMORY_ERROR 2 /* or malformed packet */ 6624 static int 6625 ipsec_needs_processing_v6(mblk_t *mp, uint8_t *nexthdr) 6626 { 6627 uint_t length; 6628 uint_t ehdrlen; 6629 uint8_t *whereptr; 6630 uint8_t *endptr; 6631 uint8_t *nexthdrp; 6632 ip6_dest_t *desthdr; 6633 ip6_rthdr_t *rthdr; 6634 ip6_t *ip6h; 6635 6636 /* 6637 * For now just pullup everything. In general, the less pullups, 6638 * the better, but there's so much squirrelling through anyway, 6639 * it's just easier this way. 6640 */ 6641 if (!pullupmsg(mp, -1)) { 6642 return (IPSEC_MEMORY_ERROR); 6643 } 6644 6645 ip6h = (ip6_t *)mp->b_rptr; 6646 length = IPV6_HDR_LEN; 6647 whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */ 6648 endptr = mp->b_wptr; 6649 6650 /* 6651 * We can't just use the argument nexthdr in the place 6652 * of nexthdrp becaue we don't dereference nexthdrp 6653 * till we confirm whether it is a valid address. 6654 */ 6655 nexthdrp = &ip6h->ip6_nxt; 6656 while (whereptr < endptr) { 6657 /* Is there enough left for len + nexthdr? */ 6658 if (whereptr + MIN_EHDR_LEN > endptr) 6659 return (IPSEC_MEMORY_ERROR); 6660 6661 switch (*nexthdrp) { 6662 case IPPROTO_HOPOPTS: 6663 case IPPROTO_DSTOPTS: 6664 /* Assumes the headers are identical for hbh and dst */ 6665 desthdr = (ip6_dest_t *)whereptr; 6666 ehdrlen = 8 * (desthdr->ip6d_len + 1); 6667 if ((uchar_t *)desthdr + ehdrlen > endptr) 6668 return (IPSEC_MEMORY_ERROR); 6669 /* 6670 * Return DONT_PROCESS because the destination 6671 * options header may be for each hop in a 6672 * routing-header, and we only want AH if we're 6673 * finished with routing headers. 6674 */ 6675 if (*nexthdrp == IPPROTO_DSTOPTS) 6676 return (IPSEC_HDR_DONT_PROCESS); 6677 nexthdrp = &desthdr->ip6d_nxt; 6678 break; 6679 case IPPROTO_ROUTING: 6680 rthdr = (ip6_rthdr_t *)whereptr; 6681 6682 /* 6683 * If there's more hops left on the routing header, 6684 * return now with DON'T PROCESS. 6685 */ 6686 if (rthdr->ip6r_segleft > 0) 6687 return (IPSEC_HDR_DONT_PROCESS); 6688 6689 ehdrlen = 8 * (rthdr->ip6r_len + 1); 6690 if ((uchar_t *)rthdr + ehdrlen > endptr) 6691 return (IPSEC_MEMORY_ERROR); 6692 nexthdrp = &rthdr->ip6r_nxt; 6693 break; 6694 case IPPROTO_FRAGMENT: 6695 /* Wait for reassembly */ 6696 return (IPSEC_HDR_DONT_PROCESS); 6697 case IPPROTO_AH: 6698 *nexthdr = IPPROTO_AH; 6699 return (IPSEC_HDR_PROCESS); 6700 case IPPROTO_NONE: 6701 /* No next header means we're finished */ 6702 default: 6703 return (IPSEC_HDR_DONT_PROCESS); 6704 } 6705 length += ehdrlen; 6706 whereptr += ehdrlen; 6707 } 6708 /* 6709 * Malformed/truncated packet. 6710 */ 6711 return (IPSEC_MEMORY_ERROR); 6712 } 6713 6714 /* 6715 * Path for AH if options are present. If this is the first time we are 6716 * sending a datagram to AH, allocate a IPSEC_IN message and prepend it. 6717 * Otherwise, just fanout. Return value answers the boolean question: 6718 * "Did I consume the mblk you sent me?" 6719 * 6720 * Sometimes AH needs to be done before other IPv6 headers for security 6721 * reasons. This function (and its ipsec_needs_processing_v6() above) 6722 * indicates if that is so, and fans out to the appropriate IPsec protocol 6723 * for the datagram passed in. 6724 */ 6725 static boolean_t 6726 ipsec_early_ah_v6(queue_t *q, mblk_t *first_mp, boolean_t mctl_present, 6727 ill_t *ill, ill_t *inill, mblk_t *hada_mp, zoneid_t zoneid) 6728 { 6729 mblk_t *mp; 6730 uint8_t nexthdr; 6731 ipsec_in_t *ii = NULL; 6732 ah_t *ah; 6733 ipsec_status_t ipsec_rc; 6734 ip_stack_t *ipst = ill->ill_ipst; 6735 netstack_t *ns = ipst->ips_netstack; 6736 ipsec_stack_t *ipss = ns->netstack_ipsec; 6737 6738 ASSERT((hada_mp == NULL) || (!mctl_present)); 6739 6740 switch (ipsec_needs_processing_v6( 6741 (mctl_present ? first_mp->b_cont : first_mp), &nexthdr)) { 6742 case IPSEC_MEMORY_ERROR: 6743 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 6744 freemsg(hada_mp); 6745 freemsg(first_mp); 6746 return (B_TRUE); 6747 case IPSEC_HDR_DONT_PROCESS: 6748 return (B_FALSE); 6749 } 6750 6751 /* Default means send it to AH! */ 6752 ASSERT(nexthdr == IPPROTO_AH); 6753 if (!mctl_present) { 6754 mp = first_mp; 6755 first_mp = ipsec_in_alloc(B_FALSE, ipst->ips_netstack); 6756 if (first_mp == NULL) { 6757 ip1dbg(("ipsec_early_ah_v6: IPSEC_IN " 6758 "allocation failure.\n")); 6759 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 6760 freemsg(hada_mp); 6761 freemsg(mp); 6762 return (B_TRUE); 6763 } 6764 /* 6765 * Store the ill_index so that when we come back 6766 * from IPSEC we ride on the same queue. 6767 */ 6768 ii = (ipsec_in_t *)first_mp->b_rptr; 6769 ii->ipsec_in_ill_index = ill->ill_phyint->phyint_ifindex; 6770 ii->ipsec_in_rill_index = inill->ill_phyint->phyint_ifindex; 6771 first_mp->b_cont = mp; 6772 } 6773 /* 6774 * Cache hardware acceleration info. 6775 */ 6776 if (hada_mp != NULL) { 6777 ASSERT(ii != NULL); 6778 IPSECHW_DEBUG(IPSECHW_PKT, ("ipsec_early_ah_v6: " 6779 "caching data attr.\n")); 6780 ii->ipsec_in_accelerated = B_TRUE; 6781 ii->ipsec_in_da = hada_mp; 6782 } 6783 6784 if (!ipsec_loaded(ipss)) { 6785 ip_proto_not_sup(q, first_mp, IP_FF_SEND_ICMP, zoneid, ipst); 6786 return (B_TRUE); 6787 } 6788 6789 ah = ipsec_inbound_ah_sa(first_mp, ns); 6790 if (ah == NULL) 6791 return (B_TRUE); 6792 ASSERT(ii->ipsec_in_ah_sa != NULL); 6793 ASSERT(ii->ipsec_in_ah_sa->ipsa_input_func != NULL); 6794 ipsec_rc = ii->ipsec_in_ah_sa->ipsa_input_func(first_mp, ah); 6795 6796 switch (ipsec_rc) { 6797 case IPSEC_STATUS_SUCCESS: 6798 /* we're done with IPsec processing, send it up */ 6799 ip_fanout_proto_again(first_mp, ill, inill, NULL); 6800 break; 6801 case IPSEC_STATUS_FAILED: 6802 BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsInDiscards); 6803 break; 6804 case IPSEC_STATUS_PENDING: 6805 /* no action needed */ 6806 break; 6807 } 6808 return (B_TRUE); 6809 } 6810 6811 static boolean_t 6812 ip_iptun_input_v6(mblk_t *ipsec_mp, mblk_t *data_mp, 6813 size_t hdr_len, uint8_t nexthdr, zoneid_t zoneid, ill_t *ill, 6814 ip_stack_t *ipst) 6815 { 6816 conn_t *connp; 6817 6818 ASSERT(ipsec_mp == NULL || ipsec_mp->b_cont == data_mp); 6819 6820 connp = ipcl_classify_v6(data_mp, nexthdr, hdr_len, zoneid, ipst); 6821 if (connp != NULL) { 6822 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 6823 connp->conn_recv(connp, ipsec_mp != NULL ? ipsec_mp : data_mp, 6824 NULL); 6825 CONN_DEC_REF(connp); 6826 return (B_TRUE); 6827 } 6828 return (B_FALSE); 6829 } 6830 6831 /* 6832 * Validate the IPv6 mblk for alignment. 6833 */ 6834 int 6835 ip_check_v6_mblk(mblk_t *mp, ill_t *ill) 6836 { 6837 int pkt_len, ip6_len; 6838 ip6_t *ip6h = (ip6_t *)mp->b_rptr; 6839 6840 /* check for alignment and full IPv6 header */ 6841 if (!OK_32PTR((uchar_t *)ip6h) || 6842 (mp->b_wptr - (uchar_t *)ip6h) < IPV6_HDR_LEN) { 6843 if (!pullupmsg(mp, IPV6_HDR_LEN)) { 6844 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 6845 ip1dbg(("ip_rput_v6: pullupmsg failed\n")); 6846 return (IP6_MBLK_HDR_ERR); 6847 } 6848 ip6h = (ip6_t *)mp->b_rptr; 6849 } 6850 6851 ASSERT(OK_32PTR((uchar_t *)ip6h) && 6852 (mp->b_wptr - (uchar_t *)ip6h) >= IPV6_HDR_LEN); 6853 6854 if (mp->b_cont == NULL) 6855 pkt_len = mp->b_wptr - mp->b_rptr; 6856 else 6857 pkt_len = msgdsize(mp); 6858 ip6_len = ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN; 6859 6860 /* 6861 * Check for bogus (too short packet) and packet which 6862 * was padded by the link layer. 6863 */ 6864 if (ip6_len != pkt_len) { 6865 ssize_t diff; 6866 6867 if (ip6_len > pkt_len) { 6868 ip1dbg(("ip_rput_data_v6: packet too short %d %d\n", 6869 ip6_len, pkt_len)); 6870 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInTruncatedPkts); 6871 return (IP6_MBLK_LEN_ERR); 6872 } 6873 diff = (ssize_t)(pkt_len - ip6_len); 6874 6875 if (!adjmsg(mp, -diff)) { 6876 ip1dbg(("ip_rput_data_v6: adjmsg failed\n")); 6877 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 6878 return (IP6_MBLK_LEN_ERR); 6879 } 6880 6881 /* 6882 * adjmsg may have freed an mblk from the chain, hence 6883 * invalidate any hw checksum here. This will force IP to 6884 * calculate the checksum in sw, but only for this packet. 6885 */ 6886 DB_CKSUMFLAGS(mp) = 0; 6887 } 6888 return (IP6_MBLK_OK); 6889 } 6890 6891 /* 6892 * ip_rput_data_v6 -- received IPv6 packets in M_DATA messages show up here. 6893 * ip_rput_v6 has already verified alignment, the min length, the version, 6894 * and db_ref = 1. 6895 * 6896 * The ill passed in (the arg named inill) is the ill that the packet 6897 * actually arrived on. We need to remember this when saving the 6898 * input interface index into potential IPV6_PKTINFO data in 6899 * ip_add_info_v6(). 6900 * 6901 * This routine doesn't free dl_mp; that's the caller's responsibility on 6902 * return. (Note that the callers are complex enough that there's no tail 6903 * recursion here anyway.) 6904 */ 6905 void 6906 ip_rput_data_v6(queue_t *q, ill_t *inill, mblk_t *mp, ip6_t *ip6h, 6907 uint_t flags, mblk_t *hada_mp, mblk_t *dl_mp) 6908 { 6909 ire_t *ire = NULL; 6910 ill_t *ill = inill; 6911 ill_t *outill; 6912 uint8_t *whereptr; 6913 uint8_t nexthdr; 6914 uint16_t remlen; 6915 uint_t prev_nexthdr_offset; 6916 uint_t used; 6917 size_t old_pkt_len; 6918 size_t pkt_len; 6919 uint16_t ip6_len; 6920 uint_t hdr_len; 6921 boolean_t mctl_present; 6922 mblk_t *first_mp; 6923 mblk_t *first_mp1; 6924 boolean_t no_forward; 6925 ip6_hbh_t *hbhhdr; 6926 boolean_t ll_multicast = (flags & IP6_IN_LLMCAST); 6927 conn_t *connp; 6928 uint32_t ports; 6929 zoneid_t zoneid = GLOBAL_ZONEID; 6930 uint16_t hck_flags, reass_hck_flags; 6931 uint32_t reass_sum; 6932 boolean_t cksum_err; 6933 mblk_t *mp1; 6934 ip_stack_t *ipst = inill->ill_ipst; 6935 6936 EXTRACT_PKT_MP(mp, first_mp, mctl_present); 6937 6938 if (hada_mp != NULL) { 6939 /* 6940 * It's an IPsec accelerated packet. 6941 * Keep a pointer to the data attributes around until 6942 * we allocate the ipsecinfo structure. 6943 */ 6944 IPSECHW_DEBUG(IPSECHW_PKT, 6945 ("ip_rput_data_v6: inbound HW accelerated IPsec pkt\n")); 6946 hada_mp->b_cont = NULL; 6947 /* 6948 * Since it is accelerated, it came directly from 6949 * the ill. 6950 */ 6951 ASSERT(mctl_present == B_FALSE); 6952 ASSERT(mp->b_datap->db_type != M_CTL); 6953 } 6954 6955 ip6h = (ip6_t *)mp->b_rptr; 6956 ip6_len = ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN; 6957 old_pkt_len = pkt_len = ip6_len; 6958 6959 if (ILL_HCKSUM_CAPABLE(ill) && !mctl_present && dohwcksum) 6960 hck_flags = DB_CKSUMFLAGS(mp); 6961 else 6962 hck_flags = 0; 6963 6964 /* Clear checksum flags in case we need to forward */ 6965 DB_CKSUMFLAGS(mp) = 0; 6966 reass_sum = reass_hck_flags = 0; 6967 6968 nexthdr = ip6h->ip6_nxt; 6969 6970 prev_nexthdr_offset = (uint_t)((uchar_t *)&ip6h->ip6_nxt - 6971 (uchar_t *)ip6h); 6972 whereptr = (uint8_t *)&ip6h[1]; 6973 remlen = pkt_len - IPV6_HDR_LEN; /* Track how much is left */ 6974 6975 /* Process hop by hop header options */ 6976 if (nexthdr == IPPROTO_HOPOPTS) { 6977 uint_t ehdrlen; 6978 uint8_t *optptr; 6979 6980 if (remlen < MIN_EHDR_LEN) 6981 goto pkt_too_short; 6982 if (mp->b_cont != NULL && 6983 whereptr + MIN_EHDR_LEN > mp->b_wptr) { 6984 if (!pullupmsg(mp, IPV6_HDR_LEN + MIN_EHDR_LEN)) { 6985 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 6986 freemsg(hada_mp); 6987 freemsg(first_mp); 6988 return; 6989 } 6990 ip6h = (ip6_t *)mp->b_rptr; 6991 whereptr = (uint8_t *)ip6h + pkt_len - remlen; 6992 } 6993 hbhhdr = (ip6_hbh_t *)whereptr; 6994 nexthdr = hbhhdr->ip6h_nxt; 6995 prev_nexthdr_offset = (uint_t)(whereptr - (uint8_t *)ip6h); 6996 ehdrlen = 8 * (hbhhdr->ip6h_len + 1); 6997 6998 if (remlen < ehdrlen) 6999 goto pkt_too_short; 7000 if (mp->b_cont != NULL && 7001 whereptr + ehdrlen > mp->b_wptr) { 7002 if (!pullupmsg(mp, IPV6_HDR_LEN + ehdrlen)) { 7003 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 7004 freemsg(hada_mp); 7005 freemsg(first_mp); 7006 return; 7007 } 7008 ip6h = (ip6_t *)mp->b_rptr; 7009 whereptr = (uint8_t *)ip6h + pkt_len - remlen; 7010 hbhhdr = (ip6_hbh_t *)whereptr; 7011 } 7012 7013 optptr = whereptr + 2; 7014 whereptr += ehdrlen; 7015 remlen -= ehdrlen; 7016 switch (ip_process_options_v6(q, first_mp, ip6h, optptr, 7017 ehdrlen - 2, IPPROTO_HOPOPTS, ipst)) { 7018 case -1: 7019 /* 7020 * Packet has been consumed and any 7021 * needed ICMP messages sent. 7022 */ 7023 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors); 7024 freemsg(hada_mp); 7025 return; 7026 case 0: 7027 /* no action needed */ 7028 break; 7029 case 1: 7030 /* Known router alert */ 7031 goto ipv6forus; 7032 } 7033 } 7034 7035 /* 7036 * On incoming v6 multicast packets we will bypass the ire table, 7037 * and assume that the read queue corresponds to the targetted 7038 * interface. 7039 * 7040 * The effect of this is the same as the IPv4 original code, but is 7041 * much cleaner I think. See ip_rput for how that was done. 7042 */ 7043 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 7044 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInMcastPkts); 7045 UPDATE_MIB(ill->ill_ip_mib, ipIfStatsHCInMcastOctets, pkt_len); 7046 7047 /* 7048 * So that we don't end up with dups, only one ill in an IPMP 7049 * group is nominated to receive multicast data traffic. 7050 * However, link-locals on any underlying interfaces will have 7051 * joined their solicited-node multicast addresses and we must 7052 * accept those packets. (We don't attempt to precisely 7053 * filter out duplicate solicited-node multicast packets since 7054 * e.g. an IPMP interface and underlying interface may have 7055 * the same solicited-node multicast address.) Note that we 7056 * won't generally have duplicates because we only issue a 7057 * DL_ENABMULTI_REQ on one interface in a group; the exception 7058 * is when PHYI_MULTI_BCAST is set. 7059 */ 7060 if (IS_UNDER_IPMP(ill) && !ill->ill_nom_cast && 7061 !IN6_IS_ADDR_MC_SOLICITEDNODE(&ip6h->ip6_dst)) { 7062 goto drop_pkt; 7063 } 7064 7065 /* 7066 * XXX TODO Give to mrouted to for multicast forwarding. 7067 */ 7068 if (ilm_lookup_ill_v6(ill, &ip6h->ip6_dst, B_FALSE, 7069 ALL_ZONES) == NULL) { 7070 if (ip_debug > 3) { 7071 /* ip2dbg */ 7072 pr_addr_dbg("ip_rput_data_v6: got mcast packet" 7073 " which is not for us: %s\n", AF_INET6, 7074 &ip6h->ip6_dst); 7075 } 7076 drop_pkt: BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 7077 freemsg(hada_mp); 7078 freemsg(first_mp); 7079 return; 7080 } 7081 if (ip_debug > 3) { 7082 /* ip2dbg */ 7083 pr_addr_dbg("ip_rput_data_v6: multicast for us: %s\n", 7084 AF_INET6, &ip6h->ip6_dst); 7085 } 7086 zoneid = GLOBAL_ZONEID; 7087 goto ipv6forus; 7088 } 7089 7090 /* 7091 * Find an ire that matches destination. For link-local addresses 7092 * we have to match the ill. 7093 * TBD for site local addresses. 7094 */ 7095 if (IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_dst)) { 7096 ire = ire_ctable_lookup_v6(&ip6h->ip6_dst, NULL, 7097 IRE_CACHE|IRE_LOCAL, ill->ill_ipif, ALL_ZONES, NULL, 7098 MATCH_IRE_TYPE | MATCH_IRE_ILL, ipst); 7099 } else { 7100 ire = ire_cache_lookup_v6(&ip6h->ip6_dst, ALL_ZONES, 7101 msg_getlabel(mp), ipst); 7102 7103 if (ire != NULL && ire->ire_stq != NULL && 7104 ire->ire_zoneid != GLOBAL_ZONEID && 7105 ire->ire_zoneid != ALL_ZONES) { 7106 /* 7107 * Should only use IREs that are visible from the 7108 * global zone for forwarding. 7109 */ 7110 ire_refrele(ire); 7111 ire = ire_cache_lookup_v6(&ip6h->ip6_dst, 7112 GLOBAL_ZONEID, msg_getlabel(mp), ipst); 7113 } 7114 } 7115 7116 if (ire == NULL) { 7117 /* 7118 * No matching IRE found. Mark this packet as having 7119 * originated externally. 7120 */ 7121 if (!(ill->ill_flags & ILLF_ROUTER) || ll_multicast) { 7122 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 7123 if (!(ill->ill_flags & ILLF_ROUTER)) { 7124 BUMP_MIB(ill->ill_ip_mib, 7125 ipIfStatsInAddrErrors); 7126 } 7127 freemsg(hada_mp); 7128 freemsg(first_mp); 7129 return; 7130 } 7131 if (ip6h->ip6_hops <= 1) { 7132 if (hada_mp != NULL) 7133 goto hada_drop; 7134 /* Sent by forwarding path, and router is global zone */ 7135 icmp_time_exceeded_v6(WR(q), first_mp, 7136 ICMP6_TIME_EXCEED_TRANSIT, ll_multicast, B_FALSE, 7137 GLOBAL_ZONEID, ipst); 7138 return; 7139 } 7140 /* 7141 * Per RFC 3513 section 2.5.2, we must not forward packets with 7142 * an unspecified source address. 7143 */ 7144 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 7145 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 7146 freemsg(hada_mp); 7147 freemsg(first_mp); 7148 return; 7149 } 7150 mp->b_prev = (mblk_t *)(uintptr_t) 7151 ill->ill_phyint->phyint_ifindex; 7152 ip_newroute_v6(q, mp, &ip6h->ip6_dst, &ip6h->ip6_src, 7153 IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_dst) ? ill : NULL, 7154 GLOBAL_ZONEID, ipst); 7155 return; 7156 } 7157 /* we have a matching IRE */ 7158 if (ire->ire_stq != NULL) { 7159 /* 7160 * To be quicker, we may wish not to chase pointers 7161 * (ire->ire_ipif->ipif_ill...) and instead store the 7162 * forwarding policy in the ire. An unfortunate side- 7163 * effect of this would be requiring an ire flush whenever 7164 * the ILLF_ROUTER flag changes. For now, chase pointers 7165 * once and store in the boolean no_forward. 7166 * 7167 * This appears twice to keep it out of the non-forwarding, 7168 * yes-it's-for-us-on-the-right-interface case. 7169 */ 7170 no_forward = ((ill->ill_flags & 7171 ire->ire_ipif->ipif_ill->ill_flags & ILLF_ROUTER) == 0); 7172 7173 ASSERT(first_mp == mp); 7174 /* 7175 * This ire has a send-to queue - forward the packet. 7176 */ 7177 if (no_forward || ll_multicast || (hada_mp != NULL)) { 7178 freemsg(hada_mp); 7179 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 7180 if (no_forward) { 7181 BUMP_MIB(ill->ill_ip_mib, 7182 ipIfStatsInAddrErrors); 7183 } 7184 freemsg(mp); 7185 ire_refrele(ire); 7186 return; 7187 } 7188 /* 7189 * ipIfStatsHCInForwDatagrams should only be increment if there 7190 * will be an attempt to forward the packet, which is why we 7191 * increment after the above condition has been checked. 7192 */ 7193 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInForwDatagrams); 7194 if (ip6h->ip6_hops <= 1) { 7195 ip1dbg(("ip_rput_data_v6: hop limit expired.\n")); 7196 /* Sent by forwarding path, and router is global zone */ 7197 icmp_time_exceeded_v6(WR(q), mp, 7198 ICMP6_TIME_EXCEED_TRANSIT, ll_multicast, B_FALSE, 7199 GLOBAL_ZONEID, ipst); 7200 ire_refrele(ire); 7201 return; 7202 } 7203 /* 7204 * Per RFC 3513 section 2.5.2, we must not forward packets with 7205 * an unspecified source address. 7206 */ 7207 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 7208 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 7209 freemsg(mp); 7210 ire_refrele(ire); 7211 return; 7212 } 7213 7214 if (is_system_labeled()) { 7215 mblk_t *mp1; 7216 7217 if ((mp1 = tsol_ip_forward(ire, mp)) == NULL) { 7218 BUMP_MIB(ill->ill_ip_mib, 7219 ipIfStatsForwProhibits); 7220 freemsg(mp); 7221 ire_refrele(ire); 7222 return; 7223 } 7224 /* Size may have changed */ 7225 mp = mp1; 7226 ip6h = (ip6_t *)mp->b_rptr; 7227 pkt_len = msgdsize(mp); 7228 } 7229 7230 if (pkt_len > ire->ire_max_frag) { 7231 int max_frag = ire->ire_max_frag; 7232 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInTooBigErrors); 7233 /* 7234 * Handle labeled packet resizing. 7235 */ 7236 if (is_system_labeled()) { 7237 max_frag = tsol_pmtu_adjust(mp, max_frag, 7238 pkt_len - old_pkt_len, AF_INET6); 7239 } 7240 7241 /* Sent by forwarding path, and router is global zone */ 7242 icmp_pkt2big_v6(WR(q), mp, max_frag, 7243 ll_multicast, B_TRUE, GLOBAL_ZONEID, ipst); 7244 ire_refrele(ire); 7245 return; 7246 } 7247 7248 /* 7249 * Check to see if we're forwarding the packet to a 7250 * different link from which it came. If so, check the 7251 * source and destination addresses since routers must not 7252 * forward any packets with link-local source or 7253 * destination addresses to other links. Otherwise (if 7254 * we're forwarding onto the same link), conditionally send 7255 * a redirect message. 7256 */ 7257 if (ire->ire_rfq != q && 7258 !IS_IN_SAME_ILLGRP(ill, (ill_t *)ire->ire_rfq->q_ptr)) { 7259 if (IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_dst) || 7260 IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src)) { 7261 BUMP_MIB(ill->ill_ip_mib, 7262 ipIfStatsInAddrErrors); 7263 freemsg(mp); 7264 ire_refrele(ire); 7265 return; 7266 } 7267 /* TBD add site-local check at site boundary? */ 7268 } else if (ipst->ips_ipv6_send_redirects) { 7269 in6_addr_t *v6targ; 7270 in6_addr_t gw_addr_v6; 7271 ire_t *src_ire_v6 = NULL; 7272 7273 /* 7274 * Don't send a redirect when forwarding a source 7275 * routed packet. 7276 */ 7277 if (ip_source_routed_v6(ip6h, mp, ipst)) 7278 goto forward; 7279 7280 mutex_enter(&ire->ire_lock); 7281 gw_addr_v6 = ire->ire_gateway_addr_v6; 7282 mutex_exit(&ire->ire_lock); 7283 if (!IN6_IS_ADDR_UNSPECIFIED(&gw_addr_v6)) { 7284 v6targ = &gw_addr_v6; 7285 /* 7286 * We won't send redirects to a router 7287 * that doesn't have a link local 7288 * address, but will forward. 7289 */ 7290 if (!IN6_IS_ADDR_LINKLOCAL(v6targ)) { 7291 BUMP_MIB(ill->ill_ip_mib, 7292 ipIfStatsInAddrErrors); 7293 goto forward; 7294 } 7295 } else { 7296 v6targ = &ip6h->ip6_dst; 7297 } 7298 7299 src_ire_v6 = ire_ftable_lookup_v6(&ip6h->ip6_src, 7300 NULL, NULL, IRE_INTERFACE, ire->ire_ipif, NULL, 7301 GLOBAL_ZONEID, 0, NULL, 7302 MATCH_IRE_IPIF | MATCH_IRE_TYPE, 7303 ipst); 7304 7305 if (src_ire_v6 != NULL) { 7306 /* 7307 * The source is directly connected. 7308 */ 7309 mp1 = copymsg(mp); 7310 if (mp1 != NULL) { 7311 icmp_send_redirect_v6(WR(q), 7312 mp1, v6targ, &ip6h->ip6_dst, 7313 ill, B_FALSE); 7314 } 7315 ire_refrele(src_ire_v6); 7316 } 7317 } 7318 7319 forward: 7320 /* Hoplimit verified above */ 7321 ip6h->ip6_hops--; 7322 7323 outill = ire->ire_ipif->ipif_ill; 7324 7325 DTRACE_PROBE4(ip6__forwarding__start, 7326 ill_t *, inill, ill_t *, outill, 7327 ip6_t *, ip6h, mblk_t *, mp); 7328 7329 FW_HOOKS6(ipst->ips_ip6_forwarding_event, 7330 ipst->ips_ipv6firewall_forwarding, 7331 inill, outill, ip6h, mp, mp, 0, ipst); 7332 7333 DTRACE_PROBE1(ip6__forwarding__end, mblk_t *, mp); 7334 7335 if (mp != NULL) { 7336 UPDATE_IB_PKT_COUNT(ire); 7337 ire->ire_last_used_time = lbolt; 7338 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCOutForwDatagrams); 7339 ip_xmit_v6(mp, ire, 0, NULL, B_FALSE, NULL); 7340 } 7341 IRE_REFRELE(ire); 7342 return; 7343 } 7344 7345 /* 7346 * Need to put on correct queue for reassembly to find it. 7347 * No need to use put() since reassembly has its own locks. 7348 * Note: multicast packets and packets destined to addresses 7349 * assigned to loopback (ire_rfq is NULL) will be reassembled on 7350 * the arriving ill. Unlike the IPv4 case, enabling strict 7351 * destination multihoming will prevent accepting packets 7352 * addressed to an IRE_LOCAL on lo0. 7353 */ 7354 if (ire->ire_rfq != q) { 7355 if ((ire = ip_check_multihome(&ip6h->ip6_dst, ire, ill)) 7356 == NULL) { 7357 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 7358 freemsg(hada_mp); 7359 freemsg(first_mp); 7360 return; 7361 } 7362 if (ire->ire_rfq != NULL) { 7363 q = ire->ire_rfq; 7364 ill = (ill_t *)q->q_ptr; 7365 ASSERT(ill != NULL); 7366 } 7367 } 7368 7369 zoneid = ire->ire_zoneid; 7370 UPDATE_IB_PKT_COUNT(ire); 7371 ire->ire_last_used_time = lbolt; 7372 /* Don't use the ire after this point, we'll NULL it out to be sure. */ 7373 ire_refrele(ire); 7374 ire = NULL; 7375 ipv6forus: 7376 /* 7377 * Looks like this packet is for us one way or another. 7378 * This is where we'll process destination headers etc. 7379 */ 7380 for (; ; ) { 7381 switch (nexthdr) { 7382 case IPPROTO_TCP: { 7383 uint16_t *up; 7384 uint32_t sum; 7385 int offset; 7386 7387 hdr_len = pkt_len - remlen; 7388 7389 if (hada_mp != NULL) { 7390 ip0dbg(("tcp hada drop\n")); 7391 goto hada_drop; 7392 } 7393 7394 7395 /* TCP needs all of the TCP header */ 7396 if (remlen < TCP_MIN_HEADER_LENGTH) 7397 goto pkt_too_short; 7398 if (mp->b_cont != NULL && 7399 whereptr + TCP_MIN_HEADER_LENGTH > mp->b_wptr) { 7400 if (!pullupmsg(mp, 7401 hdr_len + TCP_MIN_HEADER_LENGTH)) { 7402 BUMP_MIB(ill->ill_ip_mib, 7403 ipIfStatsInDiscards); 7404 freemsg(first_mp); 7405 return; 7406 } 7407 hck_flags = 0; 7408 ip6h = (ip6_t *)mp->b_rptr; 7409 whereptr = (uint8_t *)ip6h + hdr_len; 7410 } 7411 /* 7412 * Extract the offset field from the TCP header. 7413 */ 7414 offset = ((uchar_t *)ip6h)[hdr_len + 12] >> 4; 7415 if (offset != 5) { 7416 if (offset < 5) { 7417 ip1dbg(("ip_rput_data_v6: short " 7418 "TCP data offset")); 7419 BUMP_MIB(ill->ill_ip_mib, 7420 ipIfStatsInDiscards); 7421 freemsg(first_mp); 7422 return; 7423 } 7424 /* 7425 * There must be TCP options. 7426 * Make sure we can grab them. 7427 */ 7428 offset <<= 2; 7429 if (remlen < offset) 7430 goto pkt_too_short; 7431 if (mp->b_cont != NULL && 7432 whereptr + offset > mp->b_wptr) { 7433 if (!pullupmsg(mp, 7434 hdr_len + offset)) { 7435 BUMP_MIB(ill->ill_ip_mib, 7436 ipIfStatsInDiscards); 7437 freemsg(first_mp); 7438 return; 7439 } 7440 hck_flags = 0; 7441 ip6h = (ip6_t *)mp->b_rptr; 7442 whereptr = (uint8_t *)ip6h + hdr_len; 7443 } 7444 } 7445 7446 up = (uint16_t *)&ip6h->ip6_src; 7447 /* 7448 * TCP checksum calculation. First sum up the 7449 * pseudo-header fields: 7450 * - Source IPv6 address 7451 * - Destination IPv6 address 7452 * - TCP payload length 7453 * - TCP protocol ID 7454 */ 7455 sum = htons(IPPROTO_TCP + remlen) + 7456 up[0] + up[1] + up[2] + up[3] + 7457 up[4] + up[5] + up[6] + up[7] + 7458 up[8] + up[9] + up[10] + up[11] + 7459 up[12] + up[13] + up[14] + up[15]; 7460 7461 /* Fold initial sum */ 7462 sum = (sum & 0xffff) + (sum >> 16); 7463 7464 mp1 = mp->b_cont; 7465 7466 if ((hck_flags & (HCK_FULLCKSUM|HCK_PARTIALCKSUM)) == 0) 7467 IP6_STAT(ipst, ip6_in_sw_cksum); 7468 7469 IP_CKSUM_RECV(hck_flags, sum, (uchar_t *) 7470 ((uchar_t *)mp->b_rptr + DB_CKSUMSTART(mp)), 7471 (int32_t)(whereptr - (uchar_t *)mp->b_rptr), 7472 mp, mp1, cksum_err); 7473 7474 if (cksum_err) { 7475 BUMP_MIB(ill->ill_ip_mib, tcpIfStatsInErrs); 7476 7477 if (hck_flags & HCK_FULLCKSUM) { 7478 IP6_STAT(ipst, 7479 ip6_tcp_in_full_hw_cksum_err); 7480 } else if (hck_flags & HCK_PARTIALCKSUM) { 7481 IP6_STAT(ipst, 7482 ip6_tcp_in_part_hw_cksum_err); 7483 } else { 7484 IP6_STAT(ipst, ip6_tcp_in_sw_cksum_err); 7485 } 7486 freemsg(first_mp); 7487 return; 7488 } 7489 tcp_fanout: 7490 ip_fanout_tcp_v6(q, first_mp, ip6h, ill, inill, 7491 (flags|IP_FF_SEND_ICMP|IP_FF_SYN_ADDIRE| 7492 IP_FF_IPINFO), hdr_len, mctl_present, zoneid); 7493 return; 7494 } 7495 case IPPROTO_SCTP: 7496 { 7497 sctp_hdr_t *sctph; 7498 uint32_t calcsum, pktsum; 7499 uint_t hdr_len = pkt_len - remlen; 7500 sctp_stack_t *sctps; 7501 7502 sctps = inill->ill_ipst->ips_netstack->netstack_sctp; 7503 7504 /* SCTP needs all of the SCTP header */ 7505 if (remlen < sizeof (*sctph)) { 7506 goto pkt_too_short; 7507 } 7508 if (whereptr + sizeof (*sctph) > mp->b_wptr) { 7509 ASSERT(mp->b_cont != NULL); 7510 if (!pullupmsg(mp, hdr_len + sizeof (*sctph))) { 7511 BUMP_MIB(ill->ill_ip_mib, 7512 ipIfStatsInDiscards); 7513 freemsg(mp); 7514 return; 7515 } 7516 ip6h = (ip6_t *)mp->b_rptr; 7517 whereptr = (uint8_t *)ip6h + hdr_len; 7518 } 7519 7520 sctph = (sctp_hdr_t *)(mp->b_rptr + hdr_len); 7521 /* checksum */ 7522 pktsum = sctph->sh_chksum; 7523 sctph->sh_chksum = 0; 7524 calcsum = sctp_cksum(mp, hdr_len); 7525 if (calcsum != pktsum) { 7526 BUMP_MIB(&sctps->sctps_mib, sctpChecksumError); 7527 freemsg(mp); 7528 return; 7529 } 7530 sctph->sh_chksum = pktsum; 7531 ports = *(uint32_t *)(mp->b_rptr + hdr_len); 7532 if ((connp = sctp_fanout(&ip6h->ip6_src, &ip6h->ip6_dst, 7533 ports, zoneid, mp, sctps)) == NULL) { 7534 ip_fanout_sctp_raw(first_mp, ill, 7535 (ipha_t *)ip6h, B_FALSE, ports, 7536 mctl_present, 7537 (flags|IP_FF_SEND_ICMP|IP_FF_IPINFO), 7538 B_TRUE, zoneid); 7539 return; 7540 } 7541 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 7542 sctp_input(connp, (ipha_t *)ip6h, mp, first_mp, ill, 7543 B_FALSE, mctl_present); 7544 return; 7545 } 7546 case IPPROTO_UDP: { 7547 uint16_t *up; 7548 uint32_t sum; 7549 7550 hdr_len = pkt_len - remlen; 7551 7552 if (hada_mp != NULL) { 7553 ip0dbg(("udp hada drop\n")); 7554 goto hada_drop; 7555 } 7556 7557 /* Verify that at least the ports are present */ 7558 if (remlen < UDPH_SIZE) 7559 goto pkt_too_short; 7560 if (mp->b_cont != NULL && 7561 whereptr + UDPH_SIZE > mp->b_wptr) { 7562 if (!pullupmsg(mp, hdr_len + UDPH_SIZE)) { 7563 BUMP_MIB(ill->ill_ip_mib, 7564 ipIfStatsInDiscards); 7565 freemsg(first_mp); 7566 return; 7567 } 7568 hck_flags = 0; 7569 ip6h = (ip6_t *)mp->b_rptr; 7570 whereptr = (uint8_t *)ip6h + hdr_len; 7571 } 7572 7573 /* 7574 * Before going through the regular checksum 7575 * calculation, make sure the received checksum 7576 * is non-zero. RFC 2460 says, a 0x0000 checksum 7577 * in a UDP packet (within IPv6 packet) is invalid 7578 * and should be replaced by 0xffff. This makes 7579 * sense as regular checksum calculation will 7580 * pass for both the cases i.e. 0x0000 and 0xffff. 7581 * Removing one of the case makes error detection 7582 * stronger. 7583 */ 7584 7585 if (((udpha_t *)whereptr)->uha_checksum == 0) { 7586 /* 0x0000 checksum is invalid */ 7587 ip1dbg(("ip_rput_data_v6: Invalid UDP " 7588 "checksum value 0x0000\n")); 7589 BUMP_MIB(ill->ill_ip_mib, 7590 udpIfStatsInCksumErrs); 7591 freemsg(first_mp); 7592 return; 7593 } 7594 7595 up = (uint16_t *)&ip6h->ip6_src; 7596 7597 /* 7598 * UDP checksum calculation. First sum up the 7599 * pseudo-header fields: 7600 * - Source IPv6 address 7601 * - Destination IPv6 address 7602 * - UDP payload length 7603 * - UDP protocol ID 7604 */ 7605 7606 sum = htons(IPPROTO_UDP + remlen) + 7607 up[0] + up[1] + up[2] + up[3] + 7608 up[4] + up[5] + up[6] + up[7] + 7609 up[8] + up[9] + up[10] + up[11] + 7610 up[12] + up[13] + up[14] + up[15]; 7611 7612 /* Fold initial sum */ 7613 sum = (sum & 0xffff) + (sum >> 16); 7614 7615 if (reass_hck_flags != 0) { 7616 hck_flags = reass_hck_flags; 7617 7618 IP_CKSUM_RECV_REASS(hck_flags, 7619 (int32_t)(whereptr - (uchar_t *)mp->b_rptr), 7620 sum, reass_sum, cksum_err); 7621 } else { 7622 mp1 = mp->b_cont; 7623 7624 IP_CKSUM_RECV(hck_flags, sum, (uchar_t *) 7625 ((uchar_t *)mp->b_rptr + DB_CKSUMSTART(mp)), 7626 (int32_t)(whereptr - (uchar_t *)mp->b_rptr), 7627 mp, mp1, cksum_err); 7628 } 7629 7630 if ((hck_flags & (HCK_FULLCKSUM|HCK_PARTIALCKSUM)) == 0) 7631 IP6_STAT(ipst, ip6_in_sw_cksum); 7632 7633 if (cksum_err) { 7634 BUMP_MIB(ill->ill_ip_mib, 7635 udpIfStatsInCksumErrs); 7636 7637 if (hck_flags & HCK_FULLCKSUM) 7638 IP6_STAT(ipst, 7639 ip6_udp_in_full_hw_cksum_err); 7640 else if (hck_flags & HCK_PARTIALCKSUM) 7641 IP6_STAT(ipst, 7642 ip6_udp_in_part_hw_cksum_err); 7643 else 7644 IP6_STAT(ipst, ip6_udp_in_sw_cksum_err); 7645 7646 freemsg(first_mp); 7647 return; 7648 } 7649 goto udp_fanout; 7650 } 7651 case IPPROTO_ICMPV6: { 7652 uint16_t *up; 7653 uint32_t sum; 7654 uint_t hdr_len = pkt_len - remlen; 7655 7656 if (hada_mp != NULL) { 7657 ip0dbg(("icmp hada drop\n")); 7658 goto hada_drop; 7659 } 7660 7661 up = (uint16_t *)&ip6h->ip6_src; 7662 sum = htons(IPPROTO_ICMPV6 + remlen) + 7663 up[0] + up[1] + up[2] + up[3] + 7664 up[4] + up[5] + up[6] + up[7] + 7665 up[8] + up[9] + up[10] + up[11] + 7666 up[12] + up[13] + up[14] + up[15]; 7667 sum = (sum & 0xffff) + (sum >> 16); 7668 sum = IP_CSUM(mp, hdr_len, sum); 7669 if (sum != 0) { 7670 /* IPv6 ICMP checksum failed */ 7671 ip1dbg(("ip_rput_data_v6: ICMPv6 checksum " 7672 "failed %x\n", 7673 sum)); 7674 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInMsgs); 7675 BUMP_MIB(ill->ill_icmp6_mib, 7676 ipv6IfIcmpInErrors); 7677 freemsg(first_mp); 7678 return; 7679 } 7680 7681 icmp_fanout: 7682 /* Check variable for testing applications */ 7683 if (ipst->ips_ipv6_drop_inbound_icmpv6) { 7684 freemsg(first_mp); 7685 return; 7686 } 7687 /* 7688 * Assume that there is always at least one conn for 7689 * ICMPv6 (in.ndpd) i.e. don't optimize the case 7690 * where there is no conn. 7691 */ 7692 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 7693 ilm_t *ilm; 7694 ilm_walker_t ilw; 7695 7696 ASSERT(!IS_LOOPBACK(ill)); 7697 /* 7698 * In the multicast case, applications may have 7699 * joined the group from different zones, so we 7700 * need to deliver the packet to each of them. 7701 * Loop through the multicast memberships 7702 * structures (ilm) on the receive ill and send 7703 * a copy of the packet up each matching one. 7704 */ 7705 ilm = ilm_walker_start(&ilw, inill); 7706 for (; ilm != NULL; 7707 ilm = ilm_walker_step(&ilw, ilm)) { 7708 if (!IN6_ARE_ADDR_EQUAL( 7709 &ilm->ilm_v6addr, &ip6h->ip6_dst)) 7710 continue; 7711 if (!ipif_lookup_zoneid( 7712 ilw.ilw_walk_ill, ilm->ilm_zoneid, 7713 IPIF_UP, NULL)) 7714 continue; 7715 7716 first_mp1 = ip_copymsg(first_mp); 7717 if (first_mp1 == NULL) 7718 continue; 7719 icmp_inbound_v6(q, first_mp1, 7720 ilw.ilw_walk_ill, inill, 7721 hdr_len, mctl_present, 0, 7722 ilm->ilm_zoneid, dl_mp); 7723 } 7724 ilm_walker_finish(&ilw); 7725 } else { 7726 first_mp1 = ip_copymsg(first_mp); 7727 if (first_mp1 != NULL) 7728 icmp_inbound_v6(q, first_mp1, ill, 7729 inill, hdr_len, mctl_present, 0, 7730 zoneid, dl_mp); 7731 } 7732 goto proto_fanout; 7733 } 7734 case IPPROTO_ENCAP: 7735 case IPPROTO_IPV6: 7736 if (ip_iptun_input_v6(mctl_present ? first_mp : NULL, 7737 mp, pkt_len - remlen, nexthdr, zoneid, ill, ipst)) { 7738 return; 7739 } 7740 /* 7741 * If there was no IP tunnel data-link bound to 7742 * receive this packet, then we fall through to 7743 * allow potential raw sockets bound to either of 7744 * these protocols to pick it up. 7745 */ 7746 /* FALLTHRU */ 7747 proto_fanout: 7748 default: { 7749 /* 7750 * Handle protocols with which IPv6 is less intimate. 7751 */ 7752 uint_t proto_flags = IP_FF_RAWIP|IP_FF_IPINFO; 7753 7754 if (hada_mp != NULL) { 7755 ip0dbg(("default hada drop\n")); 7756 goto hada_drop; 7757 } 7758 7759 /* 7760 * Enable sending ICMP for "Unknown" nexthdr 7761 * case. i.e. where we did not FALLTHRU from 7762 * IPPROTO_ICMPV6 processing case above. 7763 * If we did FALLTHRU, then the packet has already been 7764 * processed for IPPF, don't process it again in 7765 * ip_fanout_proto_v6; set IP6_NO_IPPOLICY in the 7766 * flags 7767 */ 7768 if (nexthdr != IPPROTO_ICMPV6) 7769 proto_flags |= IP_FF_SEND_ICMP; 7770 else 7771 proto_flags |= IP6_NO_IPPOLICY; 7772 7773 ip_fanout_proto_v6(q, first_mp, ip6h, ill, inill, 7774 nexthdr, prev_nexthdr_offset, (flags|proto_flags), 7775 mctl_present, zoneid); 7776 return; 7777 } 7778 7779 case IPPROTO_DSTOPTS: { 7780 uint_t ehdrlen; 7781 uint8_t *optptr; 7782 ip6_dest_t *desthdr; 7783 7784 /* If packet is too short, look no further */ 7785 if (remlen < MIN_EHDR_LEN) 7786 goto pkt_too_short; 7787 7788 /* Check if AH is present. */ 7789 if (ipsec_early_ah_v6(q, first_mp, mctl_present, ill, 7790 inill, hada_mp, zoneid)) { 7791 return; 7792 } 7793 7794 /* 7795 * Reinitialize pointers, as ipsec_early_ah_v6() does 7796 * complete pullups. We don't have to do more pullups 7797 * as a result. 7798 */ 7799 whereptr = (uint8_t *)((uintptr_t)mp->b_rptr + 7800 (uintptr_t)(whereptr - ((uint8_t *)ip6h))); 7801 ip6h = (ip6_t *)mp->b_rptr; 7802 7803 desthdr = (ip6_dest_t *)whereptr; 7804 nexthdr = desthdr->ip6d_nxt; 7805 prev_nexthdr_offset = (uint_t)(whereptr - 7806 (uint8_t *)ip6h); 7807 ehdrlen = 8 * (desthdr->ip6d_len + 1); 7808 if (remlen < ehdrlen) 7809 goto pkt_too_short; 7810 optptr = whereptr + 2; 7811 /* 7812 * Note: XXX This code does not seem to make 7813 * distinction between Destination Options Header 7814 * being before/after Routing Header which can 7815 * happen if we are at the end of source route. 7816 * This may become significant in future. 7817 * (No real significant Destination Options are 7818 * defined/implemented yet ). 7819 */ 7820 switch (ip_process_options_v6(q, first_mp, ip6h, optptr, 7821 ehdrlen - 2, IPPROTO_DSTOPTS, ipst)) { 7822 case -1: 7823 /* 7824 * Packet has been consumed and any needed 7825 * ICMP errors sent. 7826 */ 7827 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors); 7828 freemsg(hada_mp); 7829 return; 7830 case 0: 7831 /* No action needed continue */ 7832 break; 7833 case 1: 7834 /* 7835 * Unnexpected return value 7836 * (Router alert is a Hop-by-Hop option) 7837 */ 7838 #ifdef DEBUG 7839 panic("ip_rput_data_v6: router " 7840 "alert hbh opt indication in dest opt"); 7841 /*NOTREACHED*/ 7842 #else 7843 freemsg(hada_mp); 7844 freemsg(first_mp); 7845 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 7846 return; 7847 #endif 7848 } 7849 used = ehdrlen; 7850 break; 7851 } 7852 case IPPROTO_FRAGMENT: { 7853 ip6_frag_t *fraghdr; 7854 size_t no_frag_hdr_len; 7855 7856 if (hada_mp != NULL) { 7857 ip0dbg(("frag hada drop\n")); 7858 goto hada_drop; 7859 } 7860 7861 ASSERT(first_mp == mp); 7862 if (remlen < sizeof (ip6_frag_t)) 7863 goto pkt_too_short; 7864 7865 if (mp->b_cont != NULL && 7866 whereptr + sizeof (ip6_frag_t) > mp->b_wptr) { 7867 if (!pullupmsg(mp, 7868 pkt_len - remlen + sizeof (ip6_frag_t))) { 7869 BUMP_MIB(ill->ill_ip_mib, 7870 ipIfStatsInDiscards); 7871 freemsg(mp); 7872 return; 7873 } 7874 hck_flags = 0; 7875 ip6h = (ip6_t *)mp->b_rptr; 7876 whereptr = (uint8_t *)ip6h + pkt_len - remlen; 7877 } 7878 7879 fraghdr = (ip6_frag_t *)whereptr; 7880 used = (uint_t)sizeof (ip6_frag_t); 7881 BUMP_MIB(ill->ill_ip_mib, ipIfStatsReasmReqds); 7882 7883 /* 7884 * Invoke the CGTP (multirouting) filtering module to 7885 * process the incoming packet. Packets identified as 7886 * duplicates must be discarded. Filtering is active 7887 * only if the the ip_cgtp_filter ndd variable is 7888 * non-zero. 7889 */ 7890 if (ipst->ips_ip_cgtp_filter && 7891 ipst->ips_ip_cgtp_filter_ops != NULL) { 7892 int cgtp_flt_pkt; 7893 netstackid_t stackid; 7894 7895 stackid = ipst->ips_netstack->netstack_stackid; 7896 7897 cgtp_flt_pkt = 7898 ipst->ips_ip_cgtp_filter_ops->cfo_filter_v6( 7899 stackid, inill->ill_phyint->phyint_ifindex, 7900 ip6h, fraghdr); 7901 if (cgtp_flt_pkt == CGTP_IP_PKT_DUPLICATE) { 7902 freemsg(mp); 7903 return; 7904 } 7905 } 7906 7907 /* Restore the flags */ 7908 DB_CKSUMFLAGS(mp) = hck_flags; 7909 7910 mp = ip_rput_frag_v6(ill, inill, mp, ip6h, fraghdr, 7911 remlen - used, &prev_nexthdr_offset, 7912 &reass_sum, &reass_hck_flags); 7913 if (mp == NULL) { 7914 /* Reassembly is still pending */ 7915 return; 7916 } 7917 /* The first mblk are the headers before the frag hdr */ 7918 BUMP_MIB(ill->ill_ip_mib, ipIfStatsReasmOKs); 7919 7920 first_mp = mp; /* mp has most likely changed! */ 7921 no_frag_hdr_len = mp->b_wptr - mp->b_rptr; 7922 ip6h = (ip6_t *)mp->b_rptr; 7923 nexthdr = ((char *)ip6h)[prev_nexthdr_offset]; 7924 whereptr = mp->b_rptr + no_frag_hdr_len; 7925 remlen = ntohs(ip6h->ip6_plen) + 7926 (uint16_t)(IPV6_HDR_LEN - no_frag_hdr_len); 7927 pkt_len = msgdsize(mp); 7928 used = 0; 7929 break; 7930 } 7931 case IPPROTO_HOPOPTS: { 7932 if (hada_mp != NULL) { 7933 ip0dbg(("hop hada drop\n")); 7934 goto hada_drop; 7935 } 7936 /* 7937 * Illegal header sequence. 7938 * (Hop-by-hop headers are processed above 7939 * and required to immediately follow IPv6 header) 7940 */ 7941 icmp_param_problem_v6(WR(q), first_mp, 7942 ICMP6_PARAMPROB_NEXTHEADER, 7943 prev_nexthdr_offset, 7944 B_FALSE, B_FALSE, zoneid, ipst); 7945 return; 7946 } 7947 case IPPROTO_ROUTING: { 7948 uint_t ehdrlen; 7949 ip6_rthdr_t *rthdr; 7950 7951 /* If packet is too short, look no further */ 7952 if (remlen < MIN_EHDR_LEN) 7953 goto pkt_too_short; 7954 7955 /* Check if AH is present. */ 7956 if (ipsec_early_ah_v6(q, first_mp, mctl_present, ill, 7957 inill, hada_mp, zoneid)) { 7958 return; 7959 } 7960 7961 /* 7962 * Reinitialize pointers, as ipsec_early_ah_v6() does 7963 * complete pullups. We don't have to do more pullups 7964 * as a result. 7965 */ 7966 whereptr = (uint8_t *)((uintptr_t)mp->b_rptr + 7967 (uintptr_t)(whereptr - ((uint8_t *)ip6h))); 7968 ip6h = (ip6_t *)mp->b_rptr; 7969 7970 rthdr = (ip6_rthdr_t *)whereptr; 7971 nexthdr = rthdr->ip6r_nxt; 7972 prev_nexthdr_offset = (uint_t)(whereptr - 7973 (uint8_t *)ip6h); 7974 ehdrlen = 8 * (rthdr->ip6r_len + 1); 7975 if (remlen < ehdrlen) 7976 goto pkt_too_short; 7977 if (rthdr->ip6r_segleft != 0) { 7978 /* Not end of source route */ 7979 if (ll_multicast) { 7980 BUMP_MIB(ill->ill_ip_mib, 7981 ipIfStatsForwProhibits); 7982 freemsg(hada_mp); 7983 freemsg(mp); 7984 return; 7985 } 7986 ip_process_rthdr(q, mp, ip6h, rthdr, ill, 7987 hada_mp); 7988 return; 7989 } 7990 used = ehdrlen; 7991 break; 7992 } 7993 case IPPROTO_AH: 7994 case IPPROTO_ESP: { 7995 /* 7996 * Fast path for AH/ESP. If this is the first time 7997 * we are sending a datagram to AH/ESP, allocate 7998 * a IPSEC_IN message and prepend it. Otherwise, 7999 * just fanout. 8000 */ 8001 8002 ipsec_in_t *ii; 8003 int ipsec_rc; 8004 ipsec_stack_t *ipss; 8005 8006 ipss = ipst->ips_netstack->netstack_ipsec; 8007 if (!mctl_present) { 8008 ASSERT(first_mp == mp); 8009 first_mp = ipsec_in_alloc(B_FALSE, 8010 ipst->ips_netstack); 8011 if (first_mp == NULL) { 8012 ip1dbg(("ip_rput_data_v6: IPSEC_IN " 8013 "allocation failure.\n")); 8014 BUMP_MIB(ill->ill_ip_mib, 8015 ipIfStatsInDiscards); 8016 freemsg(mp); 8017 return; 8018 } 8019 /* 8020 * Store the ill_index so that when we come back 8021 * from IPSEC we ride on the same queue. 8022 */ 8023 ii = (ipsec_in_t *)first_mp->b_rptr; 8024 ii->ipsec_in_ill_index = 8025 ill->ill_phyint->phyint_ifindex; 8026 ii->ipsec_in_rill_index = 8027 inill->ill_phyint->phyint_ifindex; 8028 first_mp->b_cont = mp; 8029 /* 8030 * Cache hardware acceleration info. 8031 */ 8032 if (hada_mp != NULL) { 8033 IPSECHW_DEBUG(IPSECHW_PKT, 8034 ("ip_rput_data_v6: " 8035 "caching data attr.\n")); 8036 ii->ipsec_in_accelerated = B_TRUE; 8037 ii->ipsec_in_da = hada_mp; 8038 hada_mp = NULL; 8039 } 8040 } else { 8041 ii = (ipsec_in_t *)first_mp->b_rptr; 8042 } 8043 8044 if (!ipsec_loaded(ipss)) { 8045 ip_proto_not_sup(q, first_mp, IP_FF_SEND_ICMP, 8046 zoneid, ipst); 8047 return; 8048 } 8049 8050 /* select inbound SA and have IPsec process the pkt */ 8051 if (nexthdr == IPPROTO_ESP) { 8052 esph_t *esph = ipsec_inbound_esp_sa(first_mp, 8053 ipst->ips_netstack); 8054 if (esph == NULL) 8055 return; 8056 ASSERT(ii->ipsec_in_esp_sa != NULL); 8057 ASSERT(ii->ipsec_in_esp_sa->ipsa_input_func != 8058 NULL); 8059 ipsec_rc = ii->ipsec_in_esp_sa->ipsa_input_func( 8060 first_mp, esph); 8061 } else { 8062 ah_t *ah = ipsec_inbound_ah_sa(first_mp, 8063 ipst->ips_netstack); 8064 if (ah == NULL) 8065 return; 8066 ASSERT(ii->ipsec_in_ah_sa != NULL); 8067 ASSERT(ii->ipsec_in_ah_sa->ipsa_input_func != 8068 NULL); 8069 ipsec_rc = ii->ipsec_in_ah_sa->ipsa_input_func( 8070 first_mp, ah); 8071 } 8072 8073 switch (ipsec_rc) { 8074 case IPSEC_STATUS_SUCCESS: 8075 break; 8076 case IPSEC_STATUS_FAILED: 8077 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 8078 /* FALLTHRU */ 8079 case IPSEC_STATUS_PENDING: 8080 return; 8081 } 8082 /* we're done with IPsec processing, send it up */ 8083 ip_fanout_proto_again(first_mp, ill, inill, NULL); 8084 return; 8085 } 8086 case IPPROTO_NONE: 8087 /* All processing is done. Count as "delivered". */ 8088 freemsg(hada_mp); 8089 freemsg(first_mp); 8090 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 8091 return; 8092 } 8093 whereptr += used; 8094 ASSERT(remlen >= used); 8095 remlen -= used; 8096 } 8097 /* NOTREACHED */ 8098 8099 pkt_too_short: 8100 ip1dbg(("ip_rput_data_v6: packet too short %d %lu %d\n", 8101 ip6_len, pkt_len, remlen)); 8102 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInTruncatedPkts); 8103 freemsg(hada_mp); 8104 freemsg(first_mp); 8105 return; 8106 udp_fanout: 8107 if (mctl_present || IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 8108 connp = NULL; 8109 } else { 8110 connp = ipcl_classify_v6(mp, IPPROTO_UDP, hdr_len, zoneid, 8111 ipst); 8112 if ((connp != NULL) && (connp->conn_upq == NULL)) { 8113 CONN_DEC_REF(connp); 8114 connp = NULL; 8115 } 8116 } 8117 8118 if (connp == NULL) { 8119 uint32_t ports; 8120 8121 ports = *(uint32_t *)(mp->b_rptr + hdr_len + 8122 UDP_PORTS_OFFSET); 8123 IP6_STAT(ipst, ip6_udp_slow_path); 8124 ip_fanout_udp_v6(q, first_mp, ip6h, ports, ill, inill, 8125 (flags|IP_FF_SEND_ICMP|IP_FF_IPINFO), mctl_present, 8126 zoneid); 8127 return; 8128 } 8129 8130 if ((IPCL_IS_NONSTR(connp) && PROTO_FLOW_CNTRLD(connp)) || 8131 (!IPCL_IS_NONSTR(connp) && CONN_UDP_FLOWCTLD(connp))) { 8132 freemsg(first_mp); 8133 BUMP_MIB(ill->ill_ip_mib, udpIfStatsInOverflows); 8134 CONN_DEC_REF(connp); 8135 return; 8136 } 8137 8138 /* Initiate IPPF processing */ 8139 if (IP6_IN_IPP(flags, ipst)) { 8140 ip_process(IPP_LOCAL_IN, &mp, ill->ill_phyint->phyint_ifindex); 8141 if (mp == NULL) { 8142 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 8143 CONN_DEC_REF(connp); 8144 return; 8145 } 8146 } 8147 8148 if (connp->conn_ip_recvpktinfo || 8149 IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src)) { 8150 mp = ip_add_info_v6(mp, inill, &ip6h->ip6_dst); 8151 if (mp == NULL) { 8152 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 8153 CONN_DEC_REF(connp); 8154 return; 8155 } 8156 } 8157 8158 IP6_STAT(ipst, ip6_udp_fast_path); 8159 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 8160 8161 /* Send it upstream */ 8162 (connp->conn_recv)(connp, mp, NULL); 8163 8164 CONN_DEC_REF(connp); 8165 freemsg(hada_mp); 8166 return; 8167 8168 hada_drop: 8169 ip1dbg(("ip_rput_data_v6: malformed accelerated packet\n")); 8170 /* IPsec kstats: bump counter here */ 8171 freemsg(hada_mp); 8172 freemsg(first_mp); 8173 } 8174 8175 /* 8176 * Reassemble fragment. 8177 * When it returns a completed message the first mblk will only contain 8178 * the headers prior to the fragment header. 8179 * 8180 * prev_nexthdr_offset is an offset indication of where the nexthdr field is 8181 * of the preceding header. This is needed to patch the previous header's 8182 * nexthdr field when reassembly completes. 8183 */ 8184 static mblk_t * 8185 ip_rput_frag_v6(ill_t *ill, ill_t *inill, mblk_t *mp, ip6_t *ip6h, 8186 ip6_frag_t *fraghdr, uint_t remlen, uint_t *prev_nexthdr_offset, 8187 uint32_t *cksum_val, uint16_t *cksum_flags) 8188 { 8189 uint32_t ident = ntohl(fraghdr->ip6f_ident); 8190 uint16_t offset; 8191 boolean_t more_frags; 8192 uint8_t nexthdr = fraghdr->ip6f_nxt; 8193 in6_addr_t *v6dst_ptr; 8194 in6_addr_t *v6src_ptr; 8195 uint_t end; 8196 uint_t hdr_length; 8197 size_t count; 8198 ipf_t *ipf; 8199 ipf_t **ipfp; 8200 ipfb_t *ipfb; 8201 mblk_t *mp1; 8202 uint8_t ecn_info = 0; 8203 size_t msg_len; 8204 mblk_t *tail_mp; 8205 mblk_t *t_mp; 8206 boolean_t pruned = B_FALSE; 8207 uint32_t sum_val; 8208 uint16_t sum_flags; 8209 ip_stack_t *ipst = ill->ill_ipst; 8210 8211 if (cksum_val != NULL) 8212 *cksum_val = 0; 8213 if (cksum_flags != NULL) 8214 *cksum_flags = 0; 8215 8216 /* 8217 * We utilize hardware computed checksum info only for UDP since 8218 * IP fragmentation is a normal occurence for the protocol. In 8219 * addition, checksum offload support for IP fragments carrying 8220 * UDP payload is commonly implemented across network adapters. 8221 */ 8222 ASSERT(inill != NULL); 8223 if (nexthdr == IPPROTO_UDP && dohwcksum && ILL_HCKSUM_CAPABLE(inill) && 8224 (DB_CKSUMFLAGS(mp) & (HCK_FULLCKSUM | HCK_PARTIALCKSUM))) { 8225 mblk_t *mp1 = mp->b_cont; 8226 int32_t len; 8227 8228 /* Record checksum information from the packet */ 8229 sum_val = (uint32_t)DB_CKSUM16(mp); 8230 sum_flags = DB_CKSUMFLAGS(mp); 8231 8232 /* fragmented payload offset from beginning of mblk */ 8233 offset = (uint16_t)((uchar_t *)&fraghdr[1] - mp->b_rptr); 8234 8235 if ((sum_flags & HCK_PARTIALCKSUM) && 8236 (mp1 == NULL || mp1->b_cont == NULL) && 8237 offset >= (uint16_t)DB_CKSUMSTART(mp) && 8238 ((len = offset - (uint16_t)DB_CKSUMSTART(mp)) & 1) == 0) { 8239 uint32_t adj; 8240 /* 8241 * Partial checksum has been calculated by hardware 8242 * and attached to the packet; in addition, any 8243 * prepended extraneous data is even byte aligned. 8244 * If any such data exists, we adjust the checksum; 8245 * this would also handle any postpended data. 8246 */ 8247 IP_ADJCKSUM_PARTIAL(mp->b_rptr + DB_CKSUMSTART(mp), 8248 mp, mp1, len, adj); 8249 8250 /* One's complement subtract extraneous checksum */ 8251 if (adj >= sum_val) 8252 sum_val = ~(adj - sum_val) & 0xFFFF; 8253 else 8254 sum_val -= adj; 8255 } 8256 } else { 8257 sum_val = 0; 8258 sum_flags = 0; 8259 } 8260 8261 /* Clear hardware checksumming flag */ 8262 DB_CKSUMFLAGS(mp) = 0; 8263 8264 /* 8265 * Note: Fragment offset in header is in 8-octet units. 8266 * Clearing least significant 3 bits not only extracts 8267 * it but also gets it in units of octets. 8268 */ 8269 offset = ntohs(fraghdr->ip6f_offlg) & ~7; 8270 more_frags = (fraghdr->ip6f_offlg & IP6F_MORE_FRAG); 8271 8272 /* 8273 * Is the more frags flag on and the payload length not a multiple 8274 * of eight? 8275 */ 8276 if (more_frags && (ntohs(ip6h->ip6_plen) & 7)) { 8277 zoneid_t zoneid; 8278 8279 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors); 8280 zoneid = ipif_lookup_addr_zoneid_v6(&ip6h->ip6_dst, ill, ipst); 8281 if (zoneid == ALL_ZONES) { 8282 freemsg(mp); 8283 return (NULL); 8284 } 8285 icmp_param_problem_v6(ill->ill_wq, mp, ICMP6_PARAMPROB_HEADER, 8286 (uint32_t)((char *)&ip6h->ip6_plen - 8287 (char *)ip6h), B_FALSE, B_FALSE, zoneid, ipst); 8288 return (NULL); 8289 } 8290 8291 v6src_ptr = &ip6h->ip6_src; 8292 v6dst_ptr = &ip6h->ip6_dst; 8293 end = remlen; 8294 8295 hdr_length = (uint_t)((char *)&fraghdr[1] - (char *)ip6h); 8296 end += offset; 8297 8298 /* 8299 * Would fragment cause reassembled packet to have a payload length 8300 * greater than IP_MAXPACKET - the max payload size? 8301 */ 8302 if (end > IP_MAXPACKET) { 8303 zoneid_t zoneid; 8304 8305 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors); 8306 zoneid = ipif_lookup_addr_zoneid_v6(&ip6h->ip6_dst, ill, ipst); 8307 if (zoneid == ALL_ZONES) { 8308 freemsg(mp); 8309 return (NULL); 8310 } 8311 icmp_param_problem_v6(ill->ill_wq, mp, ICMP6_PARAMPROB_HEADER, 8312 (uint32_t)((char *)&fraghdr->ip6f_offlg - 8313 (char *)ip6h), B_FALSE, B_FALSE, zoneid, ipst); 8314 return (NULL); 8315 } 8316 8317 /* 8318 * This packet just has one fragment. Reassembly not 8319 * needed. 8320 */ 8321 if (!more_frags && offset == 0) { 8322 goto reass_done; 8323 } 8324 8325 /* 8326 * Drop the fragmented as early as possible, if 8327 * we don't have resource(s) to re-assemble. 8328 */ 8329 if (ipst->ips_ip_reass_queue_bytes == 0) { 8330 freemsg(mp); 8331 return (NULL); 8332 } 8333 8334 /* Record the ECN field info. */ 8335 ecn_info = (uint8_t)(ntohl(ip6h->ip6_vcf & htonl(~0xFFCFFFFF)) >> 20); 8336 /* 8337 * If this is not the first fragment, dump the unfragmentable 8338 * portion of the packet. 8339 */ 8340 if (offset) 8341 mp->b_rptr = (uchar_t *)&fraghdr[1]; 8342 8343 /* 8344 * Fragmentation reassembly. Each ILL has a hash table for 8345 * queueing packets undergoing reassembly for all IPIFs 8346 * associated with the ILL. The hash is based on the packet 8347 * IP ident field. The ILL frag hash table was allocated 8348 * as a timer block at the time the ILL was created. Whenever 8349 * there is anything on the reassembly queue, the timer will 8350 * be running. 8351 */ 8352 msg_len = MBLKSIZE(mp); 8353 tail_mp = mp; 8354 while (tail_mp->b_cont != NULL) { 8355 tail_mp = tail_mp->b_cont; 8356 msg_len += MBLKSIZE(tail_mp); 8357 } 8358 /* 8359 * If the reassembly list for this ILL will get too big 8360 * prune it. 8361 */ 8362 8363 if ((msg_len + sizeof (*ipf) + ill->ill_frag_count) >= 8364 ipst->ips_ip_reass_queue_bytes) { 8365 ill_frag_prune(ill, 8366 (ipst->ips_ip_reass_queue_bytes < msg_len) ? 0 : 8367 (ipst->ips_ip_reass_queue_bytes - msg_len)); 8368 pruned = B_TRUE; 8369 } 8370 8371 ipfb = &ill->ill_frag_hash_tbl[ILL_FRAG_HASH_V6(*v6src_ptr, ident)]; 8372 mutex_enter(&ipfb->ipfb_lock); 8373 8374 ipfp = &ipfb->ipfb_ipf; 8375 /* Try to find an existing fragment queue for this packet. */ 8376 for (;;) { 8377 ipf = ipfp[0]; 8378 if (ipf) { 8379 /* 8380 * It has to match on ident, source address, and 8381 * dest address. 8382 */ 8383 if (ipf->ipf_ident == ident && 8384 IN6_ARE_ADDR_EQUAL(&ipf->ipf_v6src, v6src_ptr) && 8385 IN6_ARE_ADDR_EQUAL(&ipf->ipf_v6dst, v6dst_ptr)) { 8386 8387 /* 8388 * If we have received too many 8389 * duplicate fragments for this packet 8390 * free it. 8391 */ 8392 if (ipf->ipf_num_dups > ip_max_frag_dups) { 8393 ill_frag_free_pkts(ill, ipfb, ipf, 1); 8394 freemsg(mp); 8395 mutex_exit(&ipfb->ipfb_lock); 8396 return (NULL); 8397 } 8398 8399 break; 8400 } 8401 ipfp = &ipf->ipf_hash_next; 8402 continue; 8403 } 8404 8405 8406 /* 8407 * If we pruned the list, do we want to store this new 8408 * fragment?. We apply an optimization here based on the 8409 * fact that most fragments will be received in order. 8410 * So if the offset of this incoming fragment is zero, 8411 * it is the first fragment of a new packet. We will 8412 * keep it. Otherwise drop the fragment, as we have 8413 * probably pruned the packet already (since the 8414 * packet cannot be found). 8415 */ 8416 8417 if (pruned && offset != 0) { 8418 mutex_exit(&ipfb->ipfb_lock); 8419 freemsg(mp); 8420 return (NULL); 8421 } 8422 8423 /* New guy. Allocate a frag message. */ 8424 mp1 = allocb(sizeof (*ipf), BPRI_MED); 8425 if (!mp1) { 8426 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 8427 freemsg(mp); 8428 partial_reass_done: 8429 mutex_exit(&ipfb->ipfb_lock); 8430 return (NULL); 8431 } 8432 8433 if (ipfb->ipfb_frag_pkts >= MAX_FRAG_PKTS(ipst)) { 8434 /* 8435 * Too many fragmented packets in this hash bucket. 8436 * Free the oldest. 8437 */ 8438 ill_frag_free_pkts(ill, ipfb, ipfb->ipfb_ipf, 1); 8439 } 8440 8441 mp1->b_cont = mp; 8442 8443 /* Initialize the fragment header. */ 8444 ipf = (ipf_t *)mp1->b_rptr; 8445 ipf->ipf_mp = mp1; 8446 ipf->ipf_ptphn = ipfp; 8447 ipfp[0] = ipf; 8448 ipf->ipf_hash_next = NULL; 8449 ipf->ipf_ident = ident; 8450 ipf->ipf_v6src = *v6src_ptr; 8451 ipf->ipf_v6dst = *v6dst_ptr; 8452 /* Record reassembly start time. */ 8453 ipf->ipf_timestamp = gethrestime_sec(); 8454 /* Record ipf generation and account for frag header */ 8455 ipf->ipf_gen = ill->ill_ipf_gen++; 8456 ipf->ipf_count = MBLKSIZE(mp1); 8457 ipf->ipf_protocol = nexthdr; 8458 ipf->ipf_nf_hdr_len = 0; 8459 ipf->ipf_prev_nexthdr_offset = 0; 8460 ipf->ipf_last_frag_seen = B_FALSE; 8461 ipf->ipf_ecn = ecn_info; 8462 ipf->ipf_num_dups = 0; 8463 ipfb->ipfb_frag_pkts++; 8464 ipf->ipf_checksum = 0; 8465 ipf->ipf_checksum_flags = 0; 8466 8467 /* Store checksum value in fragment header */ 8468 if (sum_flags != 0) { 8469 sum_val = (sum_val & 0xFFFF) + (sum_val >> 16); 8470 sum_val = (sum_val & 0xFFFF) + (sum_val >> 16); 8471 ipf->ipf_checksum = sum_val; 8472 ipf->ipf_checksum_flags = sum_flags; 8473 } 8474 8475 /* 8476 * We handle reassembly two ways. In the easy case, 8477 * where all the fragments show up in order, we do 8478 * minimal bookkeeping, and just clip new pieces on 8479 * the end. If we ever see a hole, then we go off 8480 * to ip_reassemble which has to mark the pieces and 8481 * keep track of the number of holes, etc. Obviously, 8482 * the point of having both mechanisms is so we can 8483 * handle the easy case as efficiently as possible. 8484 */ 8485 if (offset == 0) { 8486 /* Easy case, in-order reassembly so far. */ 8487 /* Update the byte count */ 8488 ipf->ipf_count += msg_len; 8489 ipf->ipf_tail_mp = tail_mp; 8490 /* 8491 * Keep track of next expected offset in 8492 * ipf_end. 8493 */ 8494 ipf->ipf_end = end; 8495 ipf->ipf_nf_hdr_len = hdr_length; 8496 ipf->ipf_prev_nexthdr_offset = *prev_nexthdr_offset; 8497 } else { 8498 /* Hard case, hole at the beginning. */ 8499 ipf->ipf_tail_mp = NULL; 8500 /* 8501 * ipf_end == 0 means that we have given up 8502 * on easy reassembly. 8503 */ 8504 ipf->ipf_end = 0; 8505 8506 /* Forget checksum offload from now on */ 8507 ipf->ipf_checksum_flags = 0; 8508 8509 /* 8510 * ipf_hole_cnt is set by ip_reassemble. 8511 * ipf_count is updated by ip_reassemble. 8512 * No need to check for return value here 8513 * as we don't expect reassembly to complete or 8514 * fail for the first fragment itself. 8515 */ 8516 (void) ip_reassemble(mp, ipf, offset, more_frags, ill, 8517 msg_len); 8518 } 8519 /* Update per ipfb and ill byte counts */ 8520 ipfb->ipfb_count += ipf->ipf_count; 8521 ASSERT(ipfb->ipfb_count > 0); /* Wraparound */ 8522 atomic_add_32(&ill->ill_frag_count, ipf->ipf_count); 8523 /* If the frag timer wasn't already going, start it. */ 8524 mutex_enter(&ill->ill_lock); 8525 ill_frag_timer_start(ill); 8526 mutex_exit(&ill->ill_lock); 8527 goto partial_reass_done; 8528 } 8529 8530 /* 8531 * If the packet's flag has changed (it could be coming up 8532 * from an interface different than the previous, therefore 8533 * possibly different checksum capability), then forget about 8534 * any stored checksum states. Otherwise add the value to 8535 * the existing one stored in the fragment header. 8536 */ 8537 if (sum_flags != 0 && sum_flags == ipf->ipf_checksum_flags) { 8538 sum_val += ipf->ipf_checksum; 8539 sum_val = (sum_val & 0xFFFF) + (sum_val >> 16); 8540 sum_val = (sum_val & 0xFFFF) + (sum_val >> 16); 8541 ipf->ipf_checksum = sum_val; 8542 } else if (ipf->ipf_checksum_flags != 0) { 8543 /* Forget checksum offload from now on */ 8544 ipf->ipf_checksum_flags = 0; 8545 } 8546 8547 /* 8548 * We have a new piece of a datagram which is already being 8549 * reassembled. Update the ECN info if all IP fragments 8550 * are ECN capable. If there is one which is not, clear 8551 * all the info. If there is at least one which has CE 8552 * code point, IP needs to report that up to transport. 8553 */ 8554 if (ecn_info != IPH_ECN_NECT && ipf->ipf_ecn != IPH_ECN_NECT) { 8555 if (ecn_info == IPH_ECN_CE) 8556 ipf->ipf_ecn = IPH_ECN_CE; 8557 } else { 8558 ipf->ipf_ecn = IPH_ECN_NECT; 8559 } 8560 8561 if (offset && ipf->ipf_end == offset) { 8562 /* The new fragment fits at the end */ 8563 ipf->ipf_tail_mp->b_cont = mp; 8564 /* Update the byte count */ 8565 ipf->ipf_count += msg_len; 8566 /* Update per ipfb and ill byte counts */ 8567 ipfb->ipfb_count += msg_len; 8568 ASSERT(ipfb->ipfb_count > 0); /* Wraparound */ 8569 atomic_add_32(&ill->ill_frag_count, msg_len); 8570 if (more_frags) { 8571 /* More to come. */ 8572 ipf->ipf_end = end; 8573 ipf->ipf_tail_mp = tail_mp; 8574 goto partial_reass_done; 8575 } 8576 } else { 8577 /* 8578 * Go do the hard cases. 8579 * Call ip_reassemble(). 8580 */ 8581 int ret; 8582 8583 if (offset == 0) { 8584 if (ipf->ipf_prev_nexthdr_offset == 0) { 8585 ipf->ipf_nf_hdr_len = hdr_length; 8586 ipf->ipf_prev_nexthdr_offset = 8587 *prev_nexthdr_offset; 8588 } 8589 } 8590 /* Save current byte count */ 8591 count = ipf->ipf_count; 8592 ret = ip_reassemble(mp, ipf, offset, more_frags, ill, msg_len); 8593 8594 /* Count of bytes added and subtracted (freeb()ed) */ 8595 count = ipf->ipf_count - count; 8596 if (count) { 8597 /* Update per ipfb and ill byte counts */ 8598 ipfb->ipfb_count += count; 8599 ASSERT(ipfb->ipfb_count > 0); /* Wraparound */ 8600 atomic_add_32(&ill->ill_frag_count, count); 8601 } 8602 if (ret == IP_REASS_PARTIAL) { 8603 goto partial_reass_done; 8604 } else if (ret == IP_REASS_FAILED) { 8605 /* Reassembly failed. Free up all resources */ 8606 ill_frag_free_pkts(ill, ipfb, ipf, 1); 8607 for (t_mp = mp; t_mp != NULL; t_mp = t_mp->b_cont) { 8608 IP_REASS_SET_START(t_mp, 0); 8609 IP_REASS_SET_END(t_mp, 0); 8610 } 8611 freemsg(mp); 8612 goto partial_reass_done; 8613 } 8614 8615 /* We will reach here iff 'ret' is IP_REASS_COMPLETE */ 8616 } 8617 /* 8618 * We have completed reassembly. Unhook the frag header from 8619 * the reassembly list. 8620 * 8621 * Grab the unfragmentable header length next header value out 8622 * of the first fragment 8623 */ 8624 ASSERT(ipf->ipf_nf_hdr_len != 0); 8625 hdr_length = ipf->ipf_nf_hdr_len; 8626 8627 /* 8628 * Before we free the frag header, record the ECN info 8629 * to report back to the transport. 8630 */ 8631 ecn_info = ipf->ipf_ecn; 8632 8633 /* 8634 * Store the nextheader field in the header preceding the fragment 8635 * header 8636 */ 8637 nexthdr = ipf->ipf_protocol; 8638 *prev_nexthdr_offset = ipf->ipf_prev_nexthdr_offset; 8639 ipfp = ipf->ipf_ptphn; 8640 8641 /* We need to supply these to caller */ 8642 if ((sum_flags = ipf->ipf_checksum_flags) != 0) 8643 sum_val = ipf->ipf_checksum; 8644 else 8645 sum_val = 0; 8646 8647 mp1 = ipf->ipf_mp; 8648 count = ipf->ipf_count; 8649 ipf = ipf->ipf_hash_next; 8650 if (ipf) 8651 ipf->ipf_ptphn = ipfp; 8652 ipfp[0] = ipf; 8653 atomic_add_32(&ill->ill_frag_count, -count); 8654 ASSERT(ipfb->ipfb_count >= count); 8655 ipfb->ipfb_count -= count; 8656 ipfb->ipfb_frag_pkts--; 8657 mutex_exit(&ipfb->ipfb_lock); 8658 /* Ditch the frag header. */ 8659 mp = mp1->b_cont; 8660 freeb(mp1); 8661 8662 /* 8663 * Make sure the packet is good by doing some sanity 8664 * check. If bad we can silentely drop the packet. 8665 */ 8666 reass_done: 8667 if (hdr_length < sizeof (ip6_frag_t)) { 8668 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors); 8669 ip1dbg(("ip_rput_frag_v6: bad packet\n")); 8670 freemsg(mp); 8671 return (NULL); 8672 } 8673 8674 /* 8675 * Remove the fragment header from the initial header by 8676 * splitting the mblk into the non-fragmentable header and 8677 * everthing after the fragment extension header. This has the 8678 * side effect of putting all the headers that need destination 8679 * processing into the b_cont block-- on return this fact is 8680 * used in order to avoid having to look at the extensions 8681 * already processed. 8682 * 8683 * Note that this code assumes that the unfragmentable portion 8684 * of the header is in the first mblk and increments 8685 * the read pointer past it. If this assumption is broken 8686 * this code fails badly. 8687 */ 8688 if (mp->b_rptr + hdr_length != mp->b_wptr) { 8689 mblk_t *nmp; 8690 8691 if (!(nmp = dupb(mp))) { 8692 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 8693 ip1dbg(("ip_rput_frag_v6: dupb failed\n")); 8694 freemsg(mp); 8695 return (NULL); 8696 } 8697 nmp->b_cont = mp->b_cont; 8698 mp->b_cont = nmp; 8699 nmp->b_rptr += hdr_length; 8700 } 8701 mp->b_wptr = mp->b_rptr + hdr_length - sizeof (ip6_frag_t); 8702 8703 ip6h = (ip6_t *)mp->b_rptr; 8704 ((char *)ip6h)[*prev_nexthdr_offset] = nexthdr; 8705 8706 /* Restore original IP length in header. */ 8707 ip6h->ip6_plen = htons((uint16_t)(msgdsize(mp) - IPV6_HDR_LEN)); 8708 /* Record the ECN info. */ 8709 ip6h->ip6_vcf &= htonl(0xFFCFFFFF); 8710 ip6h->ip6_vcf |= htonl(ecn_info << 20); 8711 8712 /* Reassembly is successful; return checksum information if needed */ 8713 if (cksum_val != NULL) 8714 *cksum_val = sum_val; 8715 if (cksum_flags != NULL) 8716 *cksum_flags = sum_flags; 8717 8718 return (mp); 8719 } 8720 8721 /* 8722 * Given an mblk and a ptr, find the destination address in an IPv6 routing 8723 * header. 8724 */ 8725 static in6_addr_t 8726 pluck_out_dst(mblk_t *mp, uint8_t *whereptr, in6_addr_t oldrv) 8727 { 8728 ip6_rthdr0_t *rt0; 8729 int segleft, numaddr; 8730 in6_addr_t *ap, rv = oldrv; 8731 8732 rt0 = (ip6_rthdr0_t *)whereptr; 8733 if (rt0->ip6r0_type != 0 && rt0->ip6r0_type != 2) { 8734 DTRACE_PROBE2(pluck_out_dst_unknown_type, mblk_t *, mp, 8735 uint8_t *, whereptr); 8736 return (rv); 8737 } 8738 segleft = rt0->ip6r0_segleft; 8739 numaddr = rt0->ip6r0_len / 2; 8740 8741 if ((rt0->ip6r0_len & 0x1) || 8742 whereptr + (rt0->ip6r0_len + 1) * 8 > mp->b_wptr || 8743 (segleft > rt0->ip6r0_len / 2)) { 8744 /* 8745 * Corrupt packet. Either the routing header length is odd 8746 * (can't happen) or mismatched compared to the packet, or the 8747 * number of addresses is. Return what we can. This will 8748 * only be a problem on forwarded packets that get squeezed 8749 * through an outbound tunnel enforcing IPsec Tunnel Mode. 8750 */ 8751 DTRACE_PROBE2(pluck_out_dst_badpkt, mblk_t *, mp, uint8_t *, 8752 whereptr); 8753 return (rv); 8754 } 8755 8756 if (segleft != 0) { 8757 ap = (in6_addr_t *)((char *)rt0 + sizeof (*rt0)); 8758 rv = ap[numaddr - 1]; 8759 } 8760 8761 return (rv); 8762 } 8763 8764 /* 8765 * Walk through the options to see if there is a routing header. 8766 * If present get the destination which is the last address of 8767 * the option. 8768 */ 8769 in6_addr_t 8770 ip_get_dst_v6(ip6_t *ip6h, mblk_t *mp, boolean_t *is_fragment) 8771 { 8772 mblk_t *current_mp = mp; 8773 uint8_t nexthdr; 8774 uint8_t *whereptr; 8775 int ehdrlen; 8776 in6_addr_t rv; 8777 8778 whereptr = (uint8_t *)ip6h; 8779 ehdrlen = sizeof (ip6_t); 8780 8781 /* We assume at least the IPv6 base header is within one mblk. */ 8782 ASSERT(mp->b_rptr <= whereptr && mp->b_wptr >= whereptr + ehdrlen); 8783 8784 rv = ip6h->ip6_dst; 8785 nexthdr = ip6h->ip6_nxt; 8786 if (is_fragment != NULL) 8787 *is_fragment = B_FALSE; 8788 8789 /* 8790 * We also assume (thanks to ipsec_tun_outbound()'s pullup) that 8791 * no extension headers will be split across mblks. 8792 */ 8793 8794 while (nexthdr == IPPROTO_HOPOPTS || nexthdr == IPPROTO_DSTOPTS || 8795 nexthdr == IPPROTO_ROUTING) { 8796 if (nexthdr == IPPROTO_ROUTING) 8797 rv = pluck_out_dst(current_mp, whereptr, rv); 8798 8799 /* 8800 * All IPv6 extension headers have the next-header in byte 8801 * 0, and the (length - 8) in 8-byte-words. 8802 */ 8803 while (whereptr + ehdrlen >= current_mp->b_wptr) { 8804 ehdrlen -= (current_mp->b_wptr - whereptr); 8805 current_mp = current_mp->b_cont; 8806 if (current_mp == NULL) { 8807 /* Bad packet. Return what we can. */ 8808 DTRACE_PROBE3(ip_get_dst_v6_badpkt, mblk_t *, 8809 mp, mblk_t *, current_mp, ip6_t *, ip6h); 8810 goto done; 8811 } 8812 whereptr = current_mp->b_rptr; 8813 } 8814 whereptr += ehdrlen; 8815 8816 nexthdr = *whereptr; 8817 ASSERT(whereptr + 1 < current_mp->b_wptr); 8818 ehdrlen = (*(whereptr + 1) + 1) * 8; 8819 } 8820 8821 done: 8822 if (nexthdr == IPPROTO_FRAGMENT && is_fragment != NULL) 8823 *is_fragment = B_TRUE; 8824 return (rv); 8825 } 8826 8827 /* 8828 * ip_source_routed_v6: 8829 * This function is called by redirect code in ip_rput_data_v6 to 8830 * know whether this packet is source routed through this node i.e 8831 * whether this node (router) is part of the journey. This 8832 * function is called under two cases : 8833 * 8834 * case 1 : Routing header was processed by this node and 8835 * ip_process_rthdr replaced ip6_dst with the next hop 8836 * and we are forwarding the packet to the next hop. 8837 * 8838 * case 2 : Routing header was not processed by this node and we 8839 * are just forwarding the packet. 8840 * 8841 * For case (1) we don't want to send redirects. For case(2) we 8842 * want to send redirects. 8843 */ 8844 static boolean_t 8845 ip_source_routed_v6(ip6_t *ip6h, mblk_t *mp, ip_stack_t *ipst) 8846 { 8847 uint8_t nexthdr; 8848 in6_addr_t *addrptr; 8849 ip6_rthdr0_t *rthdr; 8850 uint8_t numaddr; 8851 ip6_hbh_t *hbhhdr; 8852 uint_t ehdrlen; 8853 uint8_t *byteptr; 8854 8855 ip2dbg(("ip_source_routed_v6\n")); 8856 nexthdr = ip6h->ip6_nxt; 8857 ehdrlen = IPV6_HDR_LEN; 8858 8859 /* if a routing hdr is preceeded by HOPOPT or DSTOPT */ 8860 while (nexthdr == IPPROTO_HOPOPTS || 8861 nexthdr == IPPROTO_DSTOPTS) { 8862 byteptr = (uint8_t *)ip6h + ehdrlen; 8863 /* 8864 * Check if we have already processed 8865 * packets or we are just a forwarding 8866 * router which only pulled up msgs up 8867 * to IPV6HDR and one HBH ext header 8868 */ 8869 if (byteptr + MIN_EHDR_LEN > mp->b_wptr) { 8870 ip2dbg(("ip_source_routed_v6: Extension" 8871 " headers not processed\n")); 8872 return (B_FALSE); 8873 } 8874 hbhhdr = (ip6_hbh_t *)byteptr; 8875 nexthdr = hbhhdr->ip6h_nxt; 8876 ehdrlen = ehdrlen + 8 * (hbhhdr->ip6h_len + 1); 8877 } 8878 switch (nexthdr) { 8879 case IPPROTO_ROUTING: 8880 byteptr = (uint8_t *)ip6h + ehdrlen; 8881 /* 8882 * If for some reason, we haven't pulled up 8883 * the routing hdr data mblk, then we must 8884 * not have processed it at all. So for sure 8885 * we are not part of the source routed journey. 8886 */ 8887 if (byteptr + MIN_EHDR_LEN > mp->b_wptr) { 8888 ip2dbg(("ip_source_routed_v6: Routing" 8889 " header not processed\n")); 8890 return (B_FALSE); 8891 } 8892 rthdr = (ip6_rthdr0_t *)byteptr; 8893 /* 8894 * Either we are an intermediate router or the 8895 * last hop before destination and we have 8896 * already processed the routing header. 8897 * If segment_left is greater than or equal to zero, 8898 * then we must be the (numaddr - segleft) entry 8899 * of the routing header. Although ip6r0_segleft 8900 * is a unit8_t variable, we still check for zero 8901 * or greater value, if in case the data type 8902 * is changed someday in future. 8903 */ 8904 if (rthdr->ip6r0_segleft > 0 || 8905 rthdr->ip6r0_segleft == 0) { 8906 ire_t *ire = NULL; 8907 8908 numaddr = rthdr->ip6r0_len / 2; 8909 addrptr = (in6_addr_t *)((char *)rthdr + 8910 sizeof (*rthdr)); 8911 addrptr += (numaddr - (rthdr->ip6r0_segleft + 1)); 8912 if (addrptr != NULL) { 8913 ire = ire_ctable_lookup_v6(addrptr, NULL, 8914 IRE_LOCAL, NULL, ALL_ZONES, NULL, 8915 MATCH_IRE_TYPE, 8916 ipst); 8917 if (ire != NULL) { 8918 ire_refrele(ire); 8919 return (B_TRUE); 8920 } 8921 ip1dbg(("ip_source_routed_v6: No ire found\n")); 8922 } 8923 } 8924 /* FALLTHRU */ 8925 default: 8926 ip2dbg(("ip_source_routed_v6: Not source routed here\n")); 8927 return (B_FALSE); 8928 } 8929 } 8930 8931 /* 8932 * ip_wput_v6 -- Packets sent down from transport modules show up here. 8933 * Assumes that the following set of headers appear in the first 8934 * mblk: 8935 * ip6i_t (if present) CAN also appear as a separate mblk. 8936 * ip6_t 8937 * Any extension headers 8938 * TCP/UDP/SCTP header (if present) 8939 * The routine can handle an ICMPv6 header that is not in the first mblk. 8940 * 8941 * The order to determine the outgoing interface is as follows: 8942 * 1. If an ip6i_t with IP6I_IFINDEX set then use that ill. 8943 * 2. If q is an ill queue and (link local or multicast destination) then 8944 * use that ill. 8945 * 3. If IPV6_BOUND_IF has been set use that ill. 8946 * 4. For multicast: if IPV6_MULTICAST_IF has been set use it. Otherwise 8947 * look for the best IRE match for the unspecified group to determine 8948 * the ill. 8949 * 5. For unicast: Just do an IRE lookup for the best match. 8950 * 8951 * arg2 is always a queue_t *. 8952 * When that queue is an ill_t (i.e. q_next != NULL), then arg must be 8953 * the zoneid. 8954 * When that queue is not an ill_t, then arg must be a conn_t pointer. 8955 */ 8956 void 8957 ip_output_v6(void *arg, mblk_t *mp, void *arg2, int caller) 8958 { 8959 conn_t *connp = NULL; 8960 queue_t *q = (queue_t *)arg2; 8961 ire_t *ire = NULL; 8962 ire_t *sctp_ire = NULL; 8963 ip6_t *ip6h; 8964 in6_addr_t *v6dstp; 8965 ill_t *ill = NULL; 8966 ipif_t *ipif; 8967 ip6i_t *ip6i; 8968 int cksum_request; /* -1 => normal. */ 8969 /* 1 => Skip TCP/UDP/SCTP checksum */ 8970 /* Otherwise contains insert offset for checksum */ 8971 int unspec_src; 8972 boolean_t do_outrequests; /* Increment OutRequests? */ 8973 mib2_ipIfStatsEntry_t *mibptr; 8974 int match_flags = MATCH_IRE_ILL; 8975 mblk_t *first_mp; 8976 boolean_t mctl_present; 8977 ipsec_out_t *io; 8978 boolean_t multirt_need_resolve = B_FALSE; 8979 mblk_t *copy_mp = NULL; 8980 int err = 0; 8981 int ip6i_flags = 0; 8982 zoneid_t zoneid; 8983 ill_t *saved_ill = NULL; 8984 boolean_t conn_lock_held; 8985 boolean_t need_decref = B_FALSE; 8986 ip_stack_t *ipst; 8987 8988 if (q->q_next != NULL) { 8989 ill = (ill_t *)q->q_ptr; 8990 ipst = ill->ill_ipst; 8991 } else { 8992 connp = (conn_t *)arg; 8993 ASSERT(connp != NULL); 8994 ipst = connp->conn_netstack->netstack_ip; 8995 } 8996 8997 /* 8998 * Highest bit in version field is Reachability Confirmation bit 8999 * used by NUD in ip_xmit_v6(). 9000 */ 9001 #ifdef _BIG_ENDIAN 9002 #define IPVER(ip6h) ((((uint32_t *)ip6h)[0] >> 28) & 0x7) 9003 #else 9004 #define IPVER(ip6h) ((((uint32_t *)ip6h)[0] >> 4) & 0x7) 9005 #endif 9006 9007 /* 9008 * M_CTL comes from 5 places 9009 * 9010 * 1) TCP sends down IPSEC_OUT(M_CTL) for detached connections 9011 * both V4 and V6 datagrams. 9012 * 9013 * 2) AH/ESP sends down M_CTL after doing their job with both 9014 * V4 and V6 datagrams. 9015 * 9016 * 3) NDP callbacks when nce is resolved and IPSEC_OUT has been 9017 * attached. 9018 * 9019 * 4) Notifications from an external resolver (for XRESOLV ifs) 9020 * 9021 * 5) AH/ESP send down IPSEC_CTL(M_CTL) to be relayed to hardware for 9022 * IPsec hardware acceleration support. 9023 * 9024 * We need to handle (1)'s IPv6 case and (3) here. For the 9025 * IPv4 case in (1), and (2), IPSEC processing has already 9026 * started. The code in ip_wput() already knows how to handle 9027 * continuing IPSEC processing (for IPv4 and IPv6). All other 9028 * M_CTLs (including case (4)) are passed on to ip_wput_nondata() 9029 * for handling. 9030 */ 9031 first_mp = mp; 9032 mctl_present = B_FALSE; 9033 io = NULL; 9034 9035 /* Multidata transmit? */ 9036 if (DB_TYPE(mp) == M_MULTIDATA) { 9037 /* 9038 * We should never get here, since all Multidata messages 9039 * originating from tcp should have been directed over to 9040 * tcp_multisend() in the first place. 9041 */ 9042 BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsOutDiscards); 9043 freemsg(mp); 9044 return; 9045 } else if (DB_TYPE(mp) == M_CTL) { 9046 uint32_t mctltype = 0; 9047 uint32_t mlen = MBLKL(first_mp); 9048 9049 mp = mp->b_cont; 9050 mctl_present = B_TRUE; 9051 io = (ipsec_out_t *)first_mp->b_rptr; 9052 9053 /* 9054 * Validate this M_CTL message. The only three types of 9055 * M_CTL messages we expect to see in this code path are 9056 * ipsec_out_t or ipsec_in_t structures (allocated as 9057 * ipsec_info_t unions), or ipsec_ctl_t structures. 9058 * The ipsec_out_type and ipsec_in_type overlap in the two 9059 * data structures, and they are either set to IPSEC_OUT 9060 * or IPSEC_IN depending on which data structure it is. 9061 * ipsec_ctl_t is an IPSEC_CTL. 9062 * 9063 * All other M_CTL messages are sent to ip_wput_nondata() 9064 * for handling. 9065 */ 9066 if (mlen >= sizeof (io->ipsec_out_type)) 9067 mctltype = io->ipsec_out_type; 9068 9069 if ((mlen == sizeof (ipsec_ctl_t)) && 9070 (mctltype == IPSEC_CTL)) { 9071 ip_output(arg, first_mp, arg2, caller); 9072 return; 9073 } 9074 9075 if ((mlen < sizeof (ipsec_info_t)) || 9076 (mctltype != IPSEC_OUT && mctltype != IPSEC_IN) || 9077 mp == NULL) { 9078 ip_wput_nondata(NULL, q, first_mp, NULL); 9079 return; 9080 } 9081 /* NDP callbacks have q_next non-NULL. That's case #3. */ 9082 if (q->q_next == NULL) { 9083 ip6h = (ip6_t *)mp->b_rptr; 9084 /* 9085 * For a freshly-generated TCP dgram that needs IPV6 9086 * processing, don't call ip_wput immediately. We can 9087 * tell this by the ipsec_out_proc_begin. In-progress 9088 * IPSEC_OUT messages have proc_begin set to TRUE, 9089 * and we want to send all IPSEC_IN messages to 9090 * ip_wput() for IPsec processing or finishing. 9091 */ 9092 if (mctltype == IPSEC_IN || 9093 IPVER(ip6h) != IPV6_VERSION || 9094 io->ipsec_out_proc_begin) { 9095 mibptr = &ipst->ips_ip6_mib; 9096 goto notv6; 9097 } 9098 } 9099 } else if (DB_TYPE(mp) != M_DATA) { 9100 ip_wput_nondata(NULL, q, mp, NULL); 9101 return; 9102 } 9103 9104 ip6h = (ip6_t *)mp->b_rptr; 9105 9106 if (IPVER(ip6h) != IPV6_VERSION) { 9107 mibptr = &ipst->ips_ip6_mib; 9108 goto notv6; 9109 } 9110 9111 if (is_system_labeled() && DB_TYPE(mp) == M_DATA && 9112 (connp == NULL || !connp->conn_ulp_labeled)) { 9113 cred_t *cr; 9114 pid_t pid; 9115 9116 if (connp != NULL) { 9117 ASSERT(CONN_CRED(connp) != NULL); 9118 cr = BEST_CRED(mp, connp, &pid); 9119 err = tsol_check_label_v6(cr, &mp, 9120 connp->conn_mac_exempt, ipst, pid); 9121 } else if ((cr = msg_getcred(mp, &pid)) != NULL) { 9122 err = tsol_check_label_v6(cr, &mp, B_FALSE, ipst, pid); 9123 } 9124 if (mctl_present) 9125 first_mp->b_cont = mp; 9126 else 9127 first_mp = mp; 9128 if (err != 0) { 9129 DTRACE_PROBE3( 9130 tsol_ip_log_drop_checklabel_ip6, char *, 9131 "conn(1), failed to check/update mp(2)", 9132 conn_t, connp, mblk_t, mp); 9133 freemsg(first_mp); 9134 return; 9135 } 9136 ip6h = (ip6_t *)mp->b_rptr; 9137 } 9138 if (q->q_next != NULL) { 9139 /* 9140 * We don't know if this ill will be used for IPv6 9141 * until the ILLF_IPV6 flag is set via SIOCSLIFNAME. 9142 * ipif_set_values() sets the ill_isv6 flag to true if 9143 * ILLF_IPV6 is set. If the ill_isv6 flag isn't true, 9144 * just drop the packet. 9145 */ 9146 if (!ill->ill_isv6) { 9147 ip1dbg(("ip_wput_v6: Received an IPv6 packet before " 9148 "ILLF_IPV6 was set\n")); 9149 freemsg(first_mp); 9150 return; 9151 } 9152 /* For uniformity do a refhold */ 9153 mutex_enter(&ill->ill_lock); 9154 if (!ILL_CAN_LOOKUP(ill)) { 9155 mutex_exit(&ill->ill_lock); 9156 freemsg(first_mp); 9157 return; 9158 } 9159 ill_refhold_locked(ill); 9160 mutex_exit(&ill->ill_lock); 9161 mibptr = ill->ill_ip_mib; 9162 9163 ASSERT(mibptr != NULL); 9164 unspec_src = 0; 9165 BUMP_MIB(mibptr, ipIfStatsHCOutRequests); 9166 do_outrequests = B_FALSE; 9167 zoneid = (zoneid_t)(uintptr_t)arg; 9168 } else { 9169 ASSERT(connp != NULL); 9170 zoneid = connp->conn_zoneid; 9171 9172 /* is queue flow controlled? */ 9173 if ((q->q_first || connp->conn_draining) && 9174 (caller == IP_WPUT)) { 9175 /* 9176 * 1) TCP sends down M_CTL for detached connections. 9177 * 2) AH/ESP sends down M_CTL. 9178 * 9179 * We don't flow control either of the above. Only 9180 * UDP and others are flow controlled for which we 9181 * can't have a M_CTL. 9182 */ 9183 ASSERT(first_mp == mp); 9184 (void) putq(q, mp); 9185 return; 9186 } 9187 mibptr = &ipst->ips_ip6_mib; 9188 unspec_src = connp->conn_unspec_src; 9189 do_outrequests = B_TRUE; 9190 if (mp->b_flag & MSGHASREF) { 9191 mp->b_flag &= ~MSGHASREF; 9192 ASSERT(connp->conn_ulp == IPPROTO_SCTP); 9193 SCTP_EXTRACT_IPINFO(mp, sctp_ire); 9194 need_decref = B_TRUE; 9195 } 9196 9197 /* 9198 * If there is a policy, try to attach an ipsec_out in 9199 * the front. At the end, first_mp either points to a 9200 * M_DATA message or IPSEC_OUT message linked to a 9201 * M_DATA message. We have to do it now as we might 9202 * lose the "conn" if we go through ip_newroute. 9203 */ 9204 if (!mctl_present && 9205 (connp->conn_out_enforce_policy || 9206 connp->conn_latch != NULL)) { 9207 ASSERT(first_mp == mp); 9208 /* XXX Any better way to get the protocol fast ? */ 9209 if (((mp = ipsec_attach_ipsec_out(&mp, connp, NULL, 9210 connp->conn_ulp, ipst->ips_netstack)) == NULL)) { 9211 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 9212 if (need_decref) 9213 CONN_DEC_REF(connp); 9214 return; 9215 } else { 9216 ASSERT(mp->b_datap->db_type == M_CTL); 9217 first_mp = mp; 9218 mp = mp->b_cont; 9219 mctl_present = B_TRUE; 9220 io = (ipsec_out_t *)first_mp->b_rptr; 9221 } 9222 } 9223 } 9224 9225 /* check for alignment and full IPv6 header */ 9226 if (!OK_32PTR((uchar_t *)ip6h) || 9227 (mp->b_wptr - (uchar_t *)ip6h) < IPV6_HDR_LEN) { 9228 ip0dbg(("ip_wput_v6: bad alignment or length\n")); 9229 if (do_outrequests) 9230 BUMP_MIB(mibptr, ipIfStatsHCOutRequests); 9231 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 9232 freemsg(first_mp); 9233 if (ill != NULL) 9234 ill_refrele(ill); 9235 if (need_decref) 9236 CONN_DEC_REF(connp); 9237 return; 9238 } 9239 v6dstp = &ip6h->ip6_dst; 9240 cksum_request = -1; 9241 ip6i = NULL; 9242 9243 /* 9244 * Once neighbor discovery has completed, ndp_process() will provide 9245 * locally generated packets for which processing can be reattempted. 9246 * In these cases, connp is NULL and the original zone is part of a 9247 * prepended ipsec_out_t. 9248 */ 9249 if (io != NULL) { 9250 /* 9251 * When coming from icmp_input_v6, the zoneid might not match 9252 * for the loopback case, because inside icmp_input_v6 the 9253 * queue_t is a conn queue from the sending side. 9254 */ 9255 zoneid = io->ipsec_out_zoneid; 9256 ASSERT(zoneid != ALL_ZONES); 9257 } 9258 9259 if (ip6h->ip6_nxt == IPPROTO_RAW) { 9260 /* 9261 * This is an ip6i_t header followed by an ip6_hdr. 9262 * Check which fields are set. 9263 * 9264 * When the packet comes from a transport we should have 9265 * all needed headers in the first mblk. However, when 9266 * going through ip_newroute*_v6 the ip6i might be in 9267 * a separate mblk when we return here. In that case 9268 * we pullup everything to ensure that extension and transport 9269 * headers "stay" in the first mblk. 9270 */ 9271 ip6i = (ip6i_t *)ip6h; 9272 ip6i_flags = ip6i->ip6i_flags; 9273 9274 ASSERT((mp->b_wptr - (uchar_t *)ip6i) == sizeof (ip6i_t) || 9275 ((mp->b_wptr - (uchar_t *)ip6i) >= 9276 sizeof (ip6i_t) + IPV6_HDR_LEN)); 9277 9278 if ((mp->b_wptr - (uchar_t *)ip6i) == sizeof (ip6i_t)) { 9279 if (!pullupmsg(mp, -1)) { 9280 ip1dbg(("ip_wput_v6: pullupmsg failed\n")); 9281 if (do_outrequests) { 9282 BUMP_MIB(mibptr, 9283 ipIfStatsHCOutRequests); 9284 } 9285 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 9286 freemsg(first_mp); 9287 if (ill != NULL) 9288 ill_refrele(ill); 9289 if (need_decref) 9290 CONN_DEC_REF(connp); 9291 return; 9292 } 9293 ip6h = (ip6_t *)mp->b_rptr; 9294 v6dstp = &ip6h->ip6_dst; 9295 ip6i = (ip6i_t *)ip6h; 9296 } 9297 ip6h = (ip6_t *)&ip6i[1]; 9298 9299 /* 9300 * Advance rptr past the ip6i_t to get ready for 9301 * transmitting the packet. However, if the packet gets 9302 * passed to ip_newroute*_v6 then rptr is moved back so 9303 * that the ip6i_t header can be inspected when the 9304 * packet comes back here after passing through 9305 * ire_add_then_send. 9306 */ 9307 mp->b_rptr = (uchar_t *)ip6h; 9308 9309 if (ip6i->ip6i_flags & IP6I_IFINDEX) { 9310 ASSERT(ip6i->ip6i_ifindex != 0); 9311 if (ill != NULL) 9312 ill_refrele(ill); 9313 ill = ill_lookup_on_ifindex(ip6i->ip6i_ifindex, 1, 9314 NULL, NULL, NULL, NULL, ipst); 9315 if (ill == NULL) { 9316 if (do_outrequests) { 9317 BUMP_MIB(mibptr, 9318 ipIfStatsHCOutRequests); 9319 } 9320 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 9321 ip1dbg(("ip_wput_v6: bad ifindex %d\n", 9322 ip6i->ip6i_ifindex)); 9323 if (need_decref) 9324 CONN_DEC_REF(connp); 9325 freemsg(first_mp); 9326 return; 9327 } 9328 mibptr = ill->ill_ip_mib; 9329 /* 9330 * Preserve the index so that when we return from 9331 * IPSEC processing, we know where to send the packet. 9332 */ 9333 if (mctl_present) { 9334 ASSERT(io != NULL); 9335 io->ipsec_out_ill_index = ip6i->ip6i_ifindex; 9336 } 9337 } 9338 if (ip6i->ip6i_flags & IP6I_VERIFY_SRC) { 9339 cred_t *cr = msg_getcred(mp, NULL); 9340 9341 /* rpcmod doesn't send down db_credp for UDP packets */ 9342 if (cr == NULL) { 9343 if (connp != NULL) 9344 cr = connp->conn_cred; 9345 else 9346 cr = ill->ill_credp; 9347 } 9348 9349 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)); 9350 if (secpolicy_net_rawaccess(cr) != 0) { 9351 /* 9352 * Use IPCL_ZONEID to honor SO_ALLZONES. 9353 */ 9354 ire = ire_route_lookup_v6(&ip6h->ip6_src, 9355 0, 0, (IRE_LOCAL|IRE_LOOPBACK), NULL, 9356 NULL, connp != NULL ? 9357 IPCL_ZONEID(connp) : zoneid, NULL, 9358 MATCH_IRE_TYPE | MATCH_IRE_ZONEONLY, ipst); 9359 if (ire == NULL) { 9360 if (do_outrequests) 9361 BUMP_MIB(mibptr, 9362 ipIfStatsHCOutRequests); 9363 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 9364 ip1dbg(("ip_wput_v6: bad source " 9365 "addr\n")); 9366 freemsg(first_mp); 9367 if (ill != NULL) 9368 ill_refrele(ill); 9369 if (need_decref) 9370 CONN_DEC_REF(connp); 9371 return; 9372 } 9373 ire_refrele(ire); 9374 } 9375 /* No need to verify again when using ip_newroute */ 9376 ip6i->ip6i_flags &= ~IP6I_VERIFY_SRC; 9377 } 9378 if (!(ip6i->ip6i_flags & IP6I_NEXTHOP)) { 9379 /* 9380 * Make sure they match since ip_newroute*_v6 etc might 9381 * (unknown to them) inspect ip6i_nexthop when 9382 * they think they access ip6_dst. 9383 */ 9384 ip6i->ip6i_nexthop = ip6h->ip6_dst; 9385 } 9386 if (ip6i->ip6i_flags & IP6I_NO_ULP_CKSUM) 9387 cksum_request = 1; 9388 if (ip6i->ip6i_flags & IP6I_RAW_CHECKSUM) 9389 cksum_request = ip6i->ip6i_checksum_off; 9390 if (ip6i->ip6i_flags & IP6I_UNSPEC_SRC) 9391 unspec_src = 1; 9392 9393 if (do_outrequests && ill != NULL) { 9394 BUMP_MIB(mibptr, ipIfStatsHCOutRequests); 9395 do_outrequests = B_FALSE; 9396 } 9397 /* 9398 * Store ip6i_t info that we need after we come back 9399 * from IPSEC processing. 9400 */ 9401 if (mctl_present) { 9402 ASSERT(io != NULL); 9403 io->ipsec_out_unspec_src = unspec_src; 9404 } 9405 } 9406 if (connp != NULL && connp->conn_dontroute) 9407 ip6h->ip6_hops = 1; 9408 9409 if (IN6_IS_ADDR_MULTICAST(v6dstp)) 9410 goto ipv6multicast; 9411 9412 /* 1. If an ip6i_t with IP6I_IFINDEX set then use that ill. */ 9413 if (ip6i != NULL && (ip6i->ip6i_flags & IP6I_IFINDEX)) { 9414 ASSERT(ill != NULL); 9415 goto send_from_ill; 9416 } 9417 9418 /* 9419 * 2. If q is an ill queue and there's a link-local destination 9420 * then use that ill. 9421 */ 9422 if (ill != NULL && IN6_IS_ADDR_LINKLOCAL(v6dstp)) 9423 goto send_from_ill; 9424 9425 /* 3. If IPV6_BOUND_IF has been set use that ill. */ 9426 if (connp != NULL && connp->conn_outgoing_ill != NULL) { 9427 ill_t *conn_outgoing_ill; 9428 9429 conn_outgoing_ill = conn_get_held_ill(connp, 9430 &connp->conn_outgoing_ill, &err); 9431 if (err == ILL_LOOKUP_FAILED) { 9432 if (ill != NULL) 9433 ill_refrele(ill); 9434 if (need_decref) 9435 CONN_DEC_REF(connp); 9436 freemsg(first_mp); 9437 return; 9438 } 9439 if (ill != NULL) 9440 ill_refrele(ill); 9441 ill = conn_outgoing_ill; 9442 mibptr = ill->ill_ip_mib; 9443 goto send_from_ill; 9444 } 9445 9446 /* 9447 * 4. For unicast: Just do an IRE lookup for the best match. 9448 * If we get here for a link-local address it is rather random 9449 * what interface we pick on a multihomed host. 9450 * *If* there is an IRE_CACHE (and the link-local address 9451 * isn't duplicated on multi links) this will find the IRE_CACHE. 9452 * Otherwise it will use one of the matching IRE_INTERFACE routes 9453 * for the link-local prefix. Hence, applications 9454 * *should* be encouraged to specify an outgoing interface when sending 9455 * to a link local address. 9456 */ 9457 if (connp == NULL || (IP_FLOW_CONTROLLED_ULP(connp->conn_ulp) && 9458 !connp->conn_fully_bound)) { 9459 /* 9460 * We cache IRE_CACHEs to avoid lookups. We don't do 9461 * this for the tcp global queue and listen end point 9462 * as it does not really have a real destination to 9463 * talk to. 9464 */ 9465 ire = ire_cache_lookup_v6(v6dstp, zoneid, msg_getlabel(mp), 9466 ipst); 9467 } else { 9468 /* 9469 * IRE_MARK_CONDEMNED is marked in ire_delete. We don't 9470 * grab a lock here to check for CONDEMNED as it is okay 9471 * to send a packet or two with the IRE_CACHE that is going 9472 * away. 9473 */ 9474 mutex_enter(&connp->conn_lock); 9475 ire = sctp_ire != NULL ? sctp_ire : connp->conn_ire_cache; 9476 if (ire != NULL && 9477 IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, v6dstp) && 9478 !(ire->ire_marks & IRE_MARK_CONDEMNED)) { 9479 9480 IRE_REFHOLD(ire); 9481 mutex_exit(&connp->conn_lock); 9482 9483 } else { 9484 boolean_t cached = B_FALSE; 9485 9486 connp->conn_ire_cache = NULL; 9487 mutex_exit(&connp->conn_lock); 9488 /* Release the old ire */ 9489 if (ire != NULL && sctp_ire == NULL) 9490 IRE_REFRELE_NOTR(ire); 9491 9492 ire = ire_cache_lookup_v6(v6dstp, zoneid, 9493 msg_getlabel(mp), ipst); 9494 if (ire != NULL) { 9495 IRE_REFHOLD_NOTR(ire); 9496 9497 mutex_enter(&connp->conn_lock); 9498 if (CONN_CACHE_IRE(connp) && 9499 (connp->conn_ire_cache == NULL)) { 9500 rw_enter(&ire->ire_bucket->irb_lock, 9501 RW_READER); 9502 if (!(ire->ire_marks & 9503 IRE_MARK_CONDEMNED)) { 9504 connp->conn_ire_cache = ire; 9505 cached = B_TRUE; 9506 } 9507 rw_exit(&ire->ire_bucket->irb_lock); 9508 } 9509 mutex_exit(&connp->conn_lock); 9510 9511 /* 9512 * We can continue to use the ire but since it 9513 * was not cached, we should drop the extra 9514 * reference. 9515 */ 9516 if (!cached) 9517 IRE_REFRELE_NOTR(ire); 9518 } 9519 } 9520 } 9521 9522 if (ire != NULL) { 9523 if (do_outrequests) { 9524 /* Handle IRE_LOCAL's that might appear here */ 9525 if (ire->ire_type == IRE_CACHE) { 9526 mibptr = ((ill_t *)ire->ire_stq->q_ptr)-> 9527 ill_ip_mib; 9528 } else { 9529 mibptr = ire->ire_ipif->ipif_ill->ill_ip_mib; 9530 } 9531 BUMP_MIB(mibptr, ipIfStatsHCOutRequests); 9532 } 9533 9534 /* 9535 * Check if the ire has the RTF_MULTIRT flag, inherited 9536 * from an IRE_OFFSUBNET ire entry in ip_newroute(). 9537 */ 9538 if (ire->ire_flags & RTF_MULTIRT) { 9539 /* 9540 * Force hop limit of multirouted packets if required. 9541 * The hop limit of such packets is bounded by the 9542 * ip_multirt_ttl ndd variable. 9543 * NDP packets must have a hop limit of 255; don't 9544 * change the hop limit in that case. 9545 */ 9546 if ((ipst->ips_ip_multirt_ttl > 0) && 9547 (ip6h->ip6_hops > ipst->ips_ip_multirt_ttl) && 9548 (ip6h->ip6_hops != IPV6_MAX_HOPS)) { 9549 if (ip_debug > 3) { 9550 ip2dbg(("ip_wput_v6: forcing multirt " 9551 "hop limit to %d (was %d) ", 9552 ipst->ips_ip_multirt_ttl, 9553 ip6h->ip6_hops)); 9554 pr_addr_dbg("v6dst %s\n", AF_INET6, 9555 &ire->ire_addr_v6); 9556 } 9557 ip6h->ip6_hops = ipst->ips_ip_multirt_ttl; 9558 } 9559 9560 /* 9561 * We look at this point if there are pending 9562 * unresolved routes. ire_multirt_need_resolve_v6() 9563 * checks in O(n) that all IRE_OFFSUBNET ire 9564 * entries for the packet's destination and 9565 * flagged RTF_MULTIRT are currently resolved. 9566 * If some remain unresolved, we do a copy 9567 * of the current message. It will be used 9568 * to initiate additional route resolutions. 9569 */ 9570 multirt_need_resolve = 9571 ire_multirt_need_resolve_v6(&ire->ire_addr_v6, 9572 msg_getlabel(first_mp), ipst); 9573 ip2dbg(("ip_wput_v6: ire %p, " 9574 "multirt_need_resolve %d, first_mp %p\n", 9575 (void *)ire, multirt_need_resolve, 9576 (void *)first_mp)); 9577 if (multirt_need_resolve) { 9578 copy_mp = copymsg(first_mp); 9579 if (copy_mp != NULL) { 9580 MULTIRT_DEBUG_TAG(copy_mp); 9581 } 9582 } 9583 } 9584 ip_wput_ire_v6(q, first_mp, ire, unspec_src, cksum_request, 9585 connp, caller, ip6i_flags, zoneid); 9586 if (need_decref) { 9587 CONN_DEC_REF(connp); 9588 connp = NULL; 9589 } 9590 IRE_REFRELE(ire); 9591 9592 /* 9593 * Try to resolve another multiroute if 9594 * ire_multirt_need_resolve_v6() deemed it necessary. 9595 * copy_mp will be consumed (sent or freed) by 9596 * ip_newroute_v6(). 9597 */ 9598 if (copy_mp != NULL) { 9599 if (mctl_present) { 9600 ip6h = (ip6_t *)copy_mp->b_cont->b_rptr; 9601 } else { 9602 ip6h = (ip6_t *)copy_mp->b_rptr; 9603 } 9604 ip_newroute_v6(q, copy_mp, &ip6h->ip6_dst, 9605 &ip6h->ip6_src, NULL, zoneid, ipst); 9606 } 9607 if (ill != NULL) 9608 ill_refrele(ill); 9609 return; 9610 } 9611 9612 /* 9613 * No full IRE for this destination. Send it to 9614 * ip_newroute_v6 to see if anything else matches. 9615 * Mark this packet as having originated on this 9616 * machine. 9617 * Update rptr if there was an ip6i_t header. 9618 */ 9619 mp->b_prev = NULL; 9620 mp->b_next = NULL; 9621 if (ip6i != NULL) 9622 mp->b_rptr -= sizeof (ip6i_t); 9623 9624 if (unspec_src) { 9625 if (ip6i == NULL) { 9626 /* 9627 * Add ip6i_t header to carry unspec_src 9628 * until the packet comes back in ip_wput_v6. 9629 */ 9630 mp = ip_add_info_v6(mp, NULL, v6dstp); 9631 if (mp == NULL) { 9632 if (do_outrequests) 9633 BUMP_MIB(mibptr, 9634 ipIfStatsHCOutRequests); 9635 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 9636 if (mctl_present) 9637 freeb(first_mp); 9638 if (ill != NULL) 9639 ill_refrele(ill); 9640 if (need_decref) 9641 CONN_DEC_REF(connp); 9642 return; 9643 } 9644 ip6i = (ip6i_t *)mp->b_rptr; 9645 9646 if (mctl_present) { 9647 ASSERT(first_mp != mp); 9648 first_mp->b_cont = mp; 9649 } else { 9650 first_mp = mp; 9651 } 9652 9653 if ((mp->b_wptr - (uchar_t *)ip6i) == 9654 sizeof (ip6i_t)) { 9655 /* 9656 * ndp_resolver called from ip_newroute_v6 9657 * expects pulled up message. 9658 */ 9659 if (!pullupmsg(mp, -1)) { 9660 ip1dbg(("ip_wput_v6: pullupmsg" 9661 " failed\n")); 9662 if (do_outrequests) { 9663 BUMP_MIB(mibptr, 9664 ipIfStatsHCOutRequests); 9665 } 9666 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 9667 freemsg(first_mp); 9668 if (ill != NULL) 9669 ill_refrele(ill); 9670 if (need_decref) 9671 CONN_DEC_REF(connp); 9672 return; 9673 } 9674 ip6i = (ip6i_t *)mp->b_rptr; 9675 } 9676 ip6h = (ip6_t *)&ip6i[1]; 9677 v6dstp = &ip6h->ip6_dst; 9678 } 9679 ip6i->ip6i_flags |= IP6I_UNSPEC_SRC; 9680 if (mctl_present) { 9681 ASSERT(io != NULL); 9682 io->ipsec_out_unspec_src = unspec_src; 9683 } 9684 } 9685 if (do_outrequests) 9686 BUMP_MIB(mibptr, ipIfStatsHCOutRequests); 9687 if (need_decref) 9688 CONN_DEC_REF(connp); 9689 ip_newroute_v6(q, first_mp, v6dstp, &ip6h->ip6_src, NULL, zoneid, ipst); 9690 if (ill != NULL) 9691 ill_refrele(ill); 9692 return; 9693 9694 9695 /* 9696 * Handle multicast packets with or without an conn. 9697 * Assumes that the transports set ip6_hops taking 9698 * IPV6_MULTICAST_HOPS (and the other ways to set the hoplimit) 9699 * into account. 9700 */ 9701 ipv6multicast: 9702 ip2dbg(("ip_wput_v6: multicast\n")); 9703 9704 /* 9705 * Hold the conn_lock till we refhold the ill of interest that is 9706 * pointed to from the conn. Since we cannot do an ill/ipif_refrele 9707 * while holding any locks, postpone the refrele until after the 9708 * conn_lock is dropped. 9709 */ 9710 if (connp != NULL) { 9711 mutex_enter(&connp->conn_lock); 9712 conn_lock_held = B_TRUE; 9713 } else { 9714 conn_lock_held = B_FALSE; 9715 } 9716 if (ip6i != NULL && (ip6i->ip6i_flags & IP6I_IFINDEX)) { 9717 /* 1. If an ip6i_t with IP6I_IFINDEX set then use that ill. */ 9718 ASSERT(ill != NULL); 9719 } else if (ill != NULL) { 9720 /* 9721 * 2. If q is an ill queue and (link local or multicast 9722 * destination) then use that ill. 9723 * We don't need the ipif initialization here. 9724 * This useless assert below is just to prevent lint from 9725 * reporting a null body if statement. 9726 */ 9727 ASSERT(ill != NULL); 9728 } else if (connp != NULL) { 9729 /* 9730 * 3. If IPV6_BOUND_IF has been set use that ill. 9731 * 9732 * 4. For multicast: if IPV6_MULTICAST_IF has been set use it. 9733 * Otherwise look for the best IRE match for the unspecified 9734 * group to determine the ill. 9735 * 9736 * conn_multicast_ill is used for only IPv6 packets. 9737 * conn_multicast_ipif is used for only IPv4 packets. 9738 * Thus a PF_INET6 socket send both IPv4 and IPv6 9739 * multicast packets using different IP*_MULTICAST_IF 9740 * interfaces. 9741 */ 9742 if (connp->conn_outgoing_ill != NULL) { 9743 err = ill_check_and_refhold(connp->conn_outgoing_ill); 9744 if (err == ILL_LOOKUP_FAILED) { 9745 ip1dbg(("ip_output_v6: multicast" 9746 " conn_outgoing_ill no ipif\n")); 9747 multicast_discard: 9748 ASSERT(saved_ill == NULL); 9749 if (conn_lock_held) 9750 mutex_exit(&connp->conn_lock); 9751 if (ill != NULL) 9752 ill_refrele(ill); 9753 freemsg(first_mp); 9754 if (do_outrequests) 9755 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 9756 if (need_decref) 9757 CONN_DEC_REF(connp); 9758 return; 9759 } 9760 ill = connp->conn_outgoing_ill; 9761 } else if (connp->conn_multicast_ill != NULL) { 9762 err = ill_check_and_refhold(connp->conn_multicast_ill); 9763 if (err == ILL_LOOKUP_FAILED) { 9764 ip1dbg(("ip_output_v6: multicast" 9765 " conn_multicast_ill no ipif\n")); 9766 goto multicast_discard; 9767 } 9768 ill = connp->conn_multicast_ill; 9769 } else { 9770 mutex_exit(&connp->conn_lock); 9771 conn_lock_held = B_FALSE; 9772 ipif = ipif_lookup_group_v6(v6dstp, zoneid, ipst); 9773 if (ipif == NULL) { 9774 ip1dbg(("ip_output_v6: multicast no ipif\n")); 9775 goto multicast_discard; 9776 } 9777 /* 9778 * We have a ref to this ipif, so we can safely 9779 * access ipif_ill. 9780 */ 9781 ill = ipif->ipif_ill; 9782 mutex_enter(&ill->ill_lock); 9783 if (!ILL_CAN_LOOKUP(ill)) { 9784 mutex_exit(&ill->ill_lock); 9785 ipif_refrele(ipif); 9786 ill = NULL; 9787 ip1dbg(("ip_output_v6: multicast no ipif\n")); 9788 goto multicast_discard; 9789 } 9790 ill_refhold_locked(ill); 9791 mutex_exit(&ill->ill_lock); 9792 ipif_refrele(ipif); 9793 /* 9794 * Save binding until IPV6_MULTICAST_IF 9795 * changes it 9796 */ 9797 mutex_enter(&connp->conn_lock); 9798 connp->conn_multicast_ill = ill; 9799 mutex_exit(&connp->conn_lock); 9800 } 9801 } 9802 if (conn_lock_held) 9803 mutex_exit(&connp->conn_lock); 9804 9805 if (saved_ill != NULL) 9806 ill_refrele(saved_ill); 9807 9808 ASSERT(ill != NULL); 9809 /* 9810 * For multicast loopback interfaces replace the multicast address 9811 * with a unicast address for the ire lookup. 9812 */ 9813 if (IS_LOOPBACK(ill)) 9814 v6dstp = &ill->ill_ipif->ipif_v6lcl_addr; 9815 9816 mibptr = ill->ill_ip_mib; 9817 if (do_outrequests) { 9818 BUMP_MIB(mibptr, ipIfStatsHCOutRequests); 9819 do_outrequests = B_FALSE; 9820 } 9821 BUMP_MIB(mibptr, ipIfStatsHCOutMcastPkts); 9822 UPDATE_MIB(mibptr, ipIfStatsHCOutMcastOctets, 9823 ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN); 9824 9825 /* 9826 * As we may lose the conn by the time we reach ip_wput_ire_v6 9827 * we copy conn_multicast_loop and conn_dontroute on to an 9828 * ipsec_out. In case if this datagram goes out secure, 9829 * we need the ill_index also. Copy that also into the 9830 * ipsec_out. 9831 */ 9832 if (mctl_present) { 9833 io = (ipsec_out_t *)first_mp->b_rptr; 9834 ASSERT(first_mp->b_datap->db_type == M_CTL); 9835 ASSERT(io->ipsec_out_type == IPSEC_OUT); 9836 } else { 9837 ASSERT(mp == first_mp); 9838 if ((first_mp = ipsec_alloc_ipsec_out(ipst->ips_netstack)) == 9839 NULL) { 9840 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 9841 freemsg(mp); 9842 if (ill != NULL) 9843 ill_refrele(ill); 9844 if (need_decref) 9845 CONN_DEC_REF(connp); 9846 return; 9847 } 9848 io = (ipsec_out_t *)first_mp->b_rptr; 9849 /* This is not a secure packet */ 9850 io->ipsec_out_secure = B_FALSE; 9851 io->ipsec_out_use_global_policy = B_TRUE; 9852 io->ipsec_out_zoneid = 9853 (zoneid != ALL_ZONES ? zoneid : GLOBAL_ZONEID); 9854 first_mp->b_cont = mp; 9855 mctl_present = B_TRUE; 9856 } 9857 io->ipsec_out_ill_index = ill->ill_phyint->phyint_ifindex; 9858 io->ipsec_out_unspec_src = unspec_src; 9859 if (connp != NULL) 9860 io->ipsec_out_dontroute = connp->conn_dontroute; 9861 9862 send_from_ill: 9863 ASSERT(ill != NULL); 9864 ASSERT(mibptr == ill->ill_ip_mib); 9865 9866 if (do_outrequests) { 9867 BUMP_MIB(mibptr, ipIfStatsHCOutRequests); 9868 do_outrequests = B_FALSE; 9869 } 9870 9871 /* 9872 * Because nce_xmit() calls ip_output_v6() and NCEs are always tied to 9873 * an underlying interface, IS_UNDER_IPMP() may be true even when 9874 * building IREs that will be used for data traffic. As such, use the 9875 * packet's source address to determine whether the traffic is test 9876 * traffic, and set MATCH_IRE_MARK_TESTHIDDEN if so. 9877 * 9878 * Separately, we also need to mark probe packets so that ND can 9879 * process them specially; see the comments in nce_queue_mp_common(). 9880 */ 9881 if (IS_UNDER_IPMP(ill) && !IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src) && 9882 ipif_lookup_testaddr_v6(ill, &ip6h->ip6_src, NULL)) { 9883 if (ip6i == NULL) { 9884 if ((mp = ip_add_info_v6(mp, NULL, v6dstp)) == NULL) { 9885 if (mctl_present) 9886 freeb(first_mp); 9887 goto discard; 9888 } 9889 9890 if (mctl_present) 9891 first_mp->b_cont = mp; 9892 else 9893 first_mp = mp; 9894 9895 /* ndp_resolver() expects a pulled-up message */ 9896 if (MBLKL(mp) == sizeof (ip6i_t) && 9897 pullupmsg(mp, -1) == 0) { 9898 ip1dbg(("ip_output_v6: pullupmsg failed\n")); 9899 discard: BUMP_MIB(mibptr, ipIfStatsOutDiscards); 9900 ill_refrele(ill); 9901 if (need_decref) 9902 CONN_DEC_REF(connp); 9903 return; 9904 } 9905 ip6i = (ip6i_t *)mp->b_rptr; 9906 ip6h = (ip6_t *)&ip6i[1]; 9907 v6dstp = &ip6h->ip6_dst; 9908 mp->b_rptr = (uchar_t *)ip6h; /* rewound below */ 9909 } 9910 ip6i->ip6i_flags |= IP6I_IPMP_PROBE; 9911 match_flags |= MATCH_IRE_MARK_TESTHIDDEN; 9912 } 9913 9914 if (io != NULL) 9915 io->ipsec_out_ill_index = ill->ill_phyint->phyint_ifindex; 9916 9917 /* 9918 * When a specific ill is specified (using IPV6_PKTINFO, 9919 * IPV6_MULTICAST_IF, or IPV6_BOUND_IF) we will only match 9920 * on routing entries (ftable and ctable) that have a matching 9921 * ire->ire_ipif->ipif_ill. Thus this can only be used 9922 * for destinations that are on-link for the specific ill 9923 * and that can appear on multiple links. Thus it is useful 9924 * for multicast destinations, link-local destinations, and 9925 * at some point perhaps for site-local destinations (if the 9926 * node sits at a site boundary). 9927 * We create the cache entries in the regular ctable since 9928 * it can not "confuse" things for other destinations. 9929 * table. 9930 * 9931 * NOTE : conn_ire_cache is not used for caching ire_ctable_lookups. 9932 * It is used only when ire_cache_lookup is used above. 9933 */ 9934 ire = ire_ctable_lookup_v6(v6dstp, 0, 0, ill->ill_ipif, 9935 zoneid, msg_getlabel(mp), match_flags, ipst); 9936 if (ire != NULL) { 9937 /* 9938 * Check if the ire has the RTF_MULTIRT flag, inherited 9939 * from an IRE_OFFSUBNET ire entry in ip_newroute(). 9940 */ 9941 if (ire->ire_flags & RTF_MULTIRT) { 9942 /* 9943 * Force hop limit of multirouted packets if required. 9944 * The hop limit of such packets is bounded by the 9945 * ip_multirt_ttl ndd variable. 9946 * NDP packets must have a hop limit of 255; don't 9947 * change the hop limit in that case. 9948 */ 9949 if ((ipst->ips_ip_multirt_ttl > 0) && 9950 (ip6h->ip6_hops > ipst->ips_ip_multirt_ttl) && 9951 (ip6h->ip6_hops != IPV6_MAX_HOPS)) { 9952 if (ip_debug > 3) { 9953 ip2dbg(("ip_wput_v6: forcing multirt " 9954 "hop limit to %d (was %d) ", 9955 ipst->ips_ip_multirt_ttl, 9956 ip6h->ip6_hops)); 9957 pr_addr_dbg("v6dst %s\n", AF_INET6, 9958 &ire->ire_addr_v6); 9959 } 9960 ip6h->ip6_hops = ipst->ips_ip_multirt_ttl; 9961 } 9962 9963 /* 9964 * We look at this point if there are pending 9965 * unresolved routes. ire_multirt_need_resolve_v6() 9966 * checks in O(n) that all IRE_OFFSUBNET ire 9967 * entries for the packet's destination and 9968 * flagged RTF_MULTIRT are currently resolved. 9969 * If some remain unresolved, we make a copy 9970 * of the current message. It will be used 9971 * to initiate additional route resolutions. 9972 */ 9973 multirt_need_resolve = 9974 ire_multirt_need_resolve_v6(&ire->ire_addr_v6, 9975 msg_getlabel(first_mp), ipst); 9976 ip2dbg(("ip_wput_v6[send_from_ill]: ire %p, " 9977 "multirt_need_resolve %d, first_mp %p\n", 9978 (void *)ire, multirt_need_resolve, 9979 (void *)first_mp)); 9980 if (multirt_need_resolve) { 9981 copy_mp = copymsg(first_mp); 9982 if (copy_mp != NULL) { 9983 MULTIRT_DEBUG_TAG(copy_mp); 9984 } 9985 } 9986 } 9987 9988 ip1dbg(("ip_wput_v6: send on %s, ire = %p, ill index = %d\n", 9989 ill->ill_name, (void *)ire, 9990 ill->ill_phyint->phyint_ifindex)); 9991 ip_wput_ire_v6(q, first_mp, ire, unspec_src, cksum_request, 9992 connp, caller, ip6i_flags, zoneid); 9993 ire_refrele(ire); 9994 if (need_decref) { 9995 CONN_DEC_REF(connp); 9996 connp = NULL; 9997 } 9998 9999 /* 10000 * Try to resolve another multiroute if 10001 * ire_multirt_need_resolve_v6() deemed it necessary. 10002 * copy_mp will be consumed (sent or freed) by 10003 * ip_newroute_[ipif_]v6(). 10004 */ 10005 if (copy_mp != NULL) { 10006 if (mctl_present) { 10007 ip6h = (ip6_t *)copy_mp->b_cont->b_rptr; 10008 } else { 10009 ip6h = (ip6_t *)copy_mp->b_rptr; 10010 } 10011 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 10012 ipif = ipif_lookup_group_v6(&ip6h->ip6_dst, 10013 zoneid, ipst); 10014 if (ipif == NULL) { 10015 ip1dbg(("ip_wput_v6: No ipif for " 10016 "multicast\n")); 10017 MULTIRT_DEBUG_UNTAG(copy_mp); 10018 freemsg(copy_mp); 10019 return; 10020 } 10021 ip_newroute_ipif_v6(q, copy_mp, ipif, 10022 &ip6h->ip6_dst, &ip6h->ip6_src, unspec_src, 10023 zoneid); 10024 ipif_refrele(ipif); 10025 } else { 10026 ip_newroute_v6(q, copy_mp, &ip6h->ip6_dst, 10027 &ip6h->ip6_src, ill, zoneid, ipst); 10028 } 10029 } 10030 ill_refrele(ill); 10031 return; 10032 } 10033 if (need_decref) { 10034 CONN_DEC_REF(connp); 10035 connp = NULL; 10036 } 10037 10038 /* Update rptr if there was an ip6i_t header. */ 10039 if (ip6i != NULL) 10040 mp->b_rptr -= sizeof (ip6i_t); 10041 if (unspec_src) { 10042 if (ip6i == NULL) { 10043 /* 10044 * Add ip6i_t header to carry unspec_src 10045 * until the packet comes back in ip_wput_v6. 10046 */ 10047 if (mctl_present) { 10048 first_mp->b_cont = 10049 ip_add_info_v6(mp, NULL, v6dstp); 10050 mp = first_mp->b_cont; 10051 if (mp == NULL) 10052 freeb(first_mp); 10053 } else { 10054 first_mp = mp = ip_add_info_v6(mp, NULL, 10055 v6dstp); 10056 } 10057 if (mp == NULL) { 10058 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10059 ill_refrele(ill); 10060 return; 10061 } 10062 ip6i = (ip6i_t *)mp->b_rptr; 10063 if ((mp->b_wptr - (uchar_t *)ip6i) == 10064 sizeof (ip6i_t)) { 10065 /* 10066 * ndp_resolver called from ip_newroute_v6 10067 * expects a pulled up message. 10068 */ 10069 if (!pullupmsg(mp, -1)) { 10070 ip1dbg(("ip_wput_v6: pullupmsg" 10071 " failed\n")); 10072 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10073 freemsg(first_mp); 10074 return; 10075 } 10076 ip6i = (ip6i_t *)mp->b_rptr; 10077 } 10078 ip6h = (ip6_t *)&ip6i[1]; 10079 v6dstp = &ip6h->ip6_dst; 10080 } 10081 ip6i->ip6i_flags |= IP6I_UNSPEC_SRC; 10082 if (mctl_present) { 10083 ASSERT(io != NULL); 10084 io->ipsec_out_unspec_src = unspec_src; 10085 } 10086 } 10087 if (IN6_IS_ADDR_MULTICAST(v6dstp)) { 10088 ip_newroute_ipif_v6(q, first_mp, ill->ill_ipif, v6dstp, 10089 &ip6h->ip6_src, unspec_src, zoneid); 10090 } else { 10091 ip_newroute_v6(q, first_mp, v6dstp, &ip6h->ip6_src, ill, 10092 zoneid, ipst); 10093 } 10094 ill_refrele(ill); 10095 return; 10096 10097 notv6: 10098 /* FIXME?: assume the caller calls the right version of ip_output? */ 10099 if (q->q_next == NULL) { 10100 connp = Q_TO_CONN(q); 10101 10102 /* 10103 * We can change conn_send for all types of conn, even 10104 * though only TCP uses it right now. 10105 * FIXME: sctp could use conn_send but doesn't currently. 10106 */ 10107 ip_setpktversion(connp, B_FALSE, B_TRUE, ipst); 10108 } 10109 BUMP_MIB(mibptr, ipIfStatsOutWrongIPVersion); 10110 (void) ip_output(arg, first_mp, arg2, caller); 10111 if (ill != NULL) 10112 ill_refrele(ill); 10113 } 10114 10115 /* 10116 * If this is a conn_t queue, then we pass in the conn. This includes the 10117 * zoneid. 10118 * Otherwise, this is a message for an ill_t queue, 10119 * in which case we use the global zoneid since those are all part of 10120 * the global zone. 10121 */ 10122 void 10123 ip_wput_v6(queue_t *q, mblk_t *mp) 10124 { 10125 if (CONN_Q(q)) 10126 ip_output_v6(Q_TO_CONN(q), mp, q, IP_WPUT); 10127 else 10128 ip_output_v6(GLOBAL_ZONEID, mp, q, IP_WPUT); 10129 } 10130 10131 /* 10132 * NULL send-to queue - packet is to be delivered locally. 10133 */ 10134 void 10135 ip_wput_local_v6(queue_t *q, ill_t *ill, ip6_t *ip6h, mblk_t *first_mp, 10136 ire_t *ire, int fanout_flags, zoneid_t zoneid) 10137 { 10138 uint32_t ports; 10139 mblk_t *mp = first_mp, *first_mp1; 10140 boolean_t mctl_present; 10141 uint8_t nexthdr; 10142 uint16_t hdr_length; 10143 ipsec_out_t *io; 10144 mib2_ipIfStatsEntry_t *mibptr; 10145 ilm_t *ilm; 10146 uint_t nexthdr_offset; 10147 ip_stack_t *ipst = ill->ill_ipst; 10148 10149 if (DB_TYPE(mp) == M_CTL) { 10150 io = (ipsec_out_t *)mp->b_rptr; 10151 if (!io->ipsec_out_secure) { 10152 mp = mp->b_cont; 10153 freeb(first_mp); 10154 first_mp = mp; 10155 mctl_present = B_FALSE; 10156 } else { 10157 mctl_present = B_TRUE; 10158 mp = first_mp->b_cont; 10159 ipsec_out_to_in(first_mp); 10160 } 10161 } else { 10162 mctl_present = B_FALSE; 10163 } 10164 10165 /* 10166 * Remove reachability confirmation bit from version field 10167 * before passing the packet on to any firewall hooks or 10168 * looping back the packet. 10169 */ 10170 if (ip6h->ip6_vcf & IP_FORWARD_PROG) 10171 ip6h->ip6_vcf &= ~IP_FORWARD_PROG; 10172 10173 DTRACE_PROBE4(ip6__loopback__in__start, 10174 ill_t *, ill, ill_t *, NULL, 10175 ip6_t *, ip6h, mblk_t *, first_mp); 10176 10177 FW_HOOKS6(ipst->ips_ip6_loopback_in_event, 10178 ipst->ips_ipv6firewall_loopback_in, 10179 ill, NULL, ip6h, first_mp, mp, 0, ipst); 10180 10181 DTRACE_PROBE1(ip6__loopback__in__end, mblk_t *, first_mp); 10182 10183 if (first_mp == NULL) 10184 return; 10185 10186 if (ipst->ips_ip6_observe.he_interested) { 10187 zoneid_t szone, dzone, lookup_zoneid = ALL_ZONES; 10188 zoneid_t stackzoneid = netstackid_to_zoneid( 10189 ipst->ips_netstack->netstack_stackid); 10190 10191 szone = (stackzoneid == GLOBAL_ZONEID) ? zoneid : stackzoneid; 10192 /* 10193 * ::1 is special, as we cannot lookup its zoneid by 10194 * address. For this case, restrict the lookup to the 10195 * source zone. 10196 */ 10197 if (IN6_IS_ADDR_LOOPBACK(&ip6h->ip6_dst)) 10198 lookup_zoneid = zoneid; 10199 dzone = ip_get_zoneid_v6(&ip6h->ip6_dst, mp, ill, ipst, 10200 lookup_zoneid); 10201 ipobs_hook(mp, IPOBS_HOOK_LOCAL, szone, dzone, ill, ipst); 10202 } 10203 10204 DTRACE_IP7(receive, mblk_t *, first_mp, conn_t *, NULL, void_ip_t *, 10205 ip6h, __dtrace_ipsr_ill_t *, ill, ipha_t *, NULL, ip6_t *, ip6h, 10206 int, 1); 10207 10208 nexthdr = ip6h->ip6_nxt; 10209 mibptr = ill->ill_ip_mib; 10210 10211 /* Fastpath */ 10212 switch (nexthdr) { 10213 case IPPROTO_TCP: 10214 case IPPROTO_UDP: 10215 case IPPROTO_ICMPV6: 10216 case IPPROTO_SCTP: 10217 hdr_length = IPV6_HDR_LEN; 10218 nexthdr_offset = (uint_t)((uchar_t *)&ip6h->ip6_nxt - 10219 (uchar_t *)ip6h); 10220 break; 10221 default: { 10222 uint8_t *nexthdrp; 10223 10224 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, 10225 &hdr_length, &nexthdrp)) { 10226 /* Malformed packet */ 10227 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10228 freemsg(first_mp); 10229 return; 10230 } 10231 nexthdr = *nexthdrp; 10232 nexthdr_offset = nexthdrp - (uint8_t *)ip6h; 10233 break; 10234 } 10235 } 10236 10237 UPDATE_OB_PKT_COUNT(ire); 10238 ire->ire_last_used_time = lbolt; 10239 10240 switch (nexthdr) { 10241 case IPPROTO_TCP: 10242 if (DB_TYPE(mp) == M_DATA) { 10243 /* 10244 * M_DATA mblk, so init mblk (chain) for 10245 * no struio(). 10246 */ 10247 mblk_t *mp1 = mp; 10248 10249 do { 10250 mp1->b_datap->db_struioflag = 0; 10251 } while ((mp1 = mp1->b_cont) != NULL); 10252 } 10253 ports = *(uint32_t *)(mp->b_rptr + hdr_length + 10254 TCP_PORTS_OFFSET); 10255 ip_fanout_tcp_v6(q, first_mp, ip6h, ill, ill, 10256 fanout_flags|IP_FF_SEND_ICMP|IP_FF_SYN_ADDIRE| 10257 IP_FF_IPINFO|IP6_NO_IPPOLICY|IP_FF_LOOPBACK, 10258 hdr_length, mctl_present, ire->ire_zoneid); 10259 return; 10260 10261 case IPPROTO_UDP: 10262 ports = *(uint32_t *)(mp->b_rptr + hdr_length + 10263 UDP_PORTS_OFFSET); 10264 ip_fanout_udp_v6(q, first_mp, ip6h, ports, ill, ill, 10265 fanout_flags|IP_FF_SEND_ICMP|IP_FF_IPINFO| 10266 IP6_NO_IPPOLICY, mctl_present, ire->ire_zoneid); 10267 return; 10268 10269 case IPPROTO_SCTP: 10270 { 10271 ports = *(uint32_t *)(mp->b_rptr + hdr_length); 10272 ip_fanout_sctp(first_mp, ill, (ipha_t *)ip6h, ports, 10273 fanout_flags|IP_FF_SEND_ICMP|IP_FF_IPINFO, 10274 mctl_present, IP6_NO_IPPOLICY, ire->ire_zoneid); 10275 return; 10276 } 10277 case IPPROTO_ICMPV6: { 10278 icmp6_t *icmp6; 10279 10280 /* check for full IPv6+ICMPv6 header */ 10281 if ((mp->b_wptr - mp->b_rptr) < 10282 (hdr_length + ICMP6_MINLEN)) { 10283 if (!pullupmsg(mp, hdr_length + ICMP6_MINLEN)) { 10284 ip1dbg(("ip_wput_v6: ICMP hdr pullupmsg" 10285 " failed\n")); 10286 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10287 freemsg(first_mp); 10288 return; 10289 } 10290 ip6h = (ip6_t *)mp->b_rptr; 10291 } 10292 icmp6 = (icmp6_t *)((uchar_t *)ip6h + hdr_length); 10293 10294 /* Update output mib stats */ 10295 icmp_update_out_mib_v6(ill, icmp6); 10296 10297 /* Check variable for testing applications */ 10298 if (ipst->ips_ipv6_drop_inbound_icmpv6) { 10299 freemsg(first_mp); 10300 return; 10301 } 10302 /* 10303 * Assume that there is always at least one conn for 10304 * ICMPv6 (in.ndpd) i.e. don't optimize the case 10305 * where there is no conn. 10306 */ 10307 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst) && 10308 !IS_LOOPBACK(ill)) { 10309 ilm_walker_t ilw; 10310 10311 /* 10312 * In the multicast case, applications may have 10313 * joined the group from different zones, so we 10314 * need to deliver the packet to each of them. 10315 * Loop through the multicast memberships 10316 * structures (ilm) on the receive ill and send 10317 * a copy of the packet up each matching one. 10318 * However, we don't do this for multicasts sent 10319 * on the loopback interface (PHYI_LOOPBACK flag 10320 * set) as they must stay in the sender's zone. 10321 */ 10322 ilm = ilm_walker_start(&ilw, ill); 10323 for (; ilm != NULL; 10324 ilm = ilm_walker_step(&ilw, ilm)) { 10325 if (!IN6_ARE_ADDR_EQUAL( 10326 &ilm->ilm_v6addr, &ip6h->ip6_dst)) 10327 continue; 10328 if ((fanout_flags & 10329 IP_FF_NO_MCAST_LOOP) && 10330 ilm->ilm_zoneid == ire->ire_zoneid) 10331 continue; 10332 if (!ipif_lookup_zoneid( 10333 ilw.ilw_walk_ill, ilm->ilm_zoneid, 10334 IPIF_UP, NULL)) 10335 continue; 10336 10337 first_mp1 = ip_copymsg(first_mp); 10338 if (first_mp1 == NULL) 10339 continue; 10340 icmp_inbound_v6(q, first_mp1, 10341 ilw.ilw_walk_ill, ill, hdr_length, 10342 mctl_present, IP6_NO_IPPOLICY, 10343 ilm->ilm_zoneid, NULL); 10344 } 10345 ilm_walker_finish(&ilw); 10346 } else { 10347 first_mp1 = ip_copymsg(first_mp); 10348 if (first_mp1 != NULL) 10349 icmp_inbound_v6(q, first_mp1, ill, ill, 10350 hdr_length, mctl_present, 10351 IP6_NO_IPPOLICY, ire->ire_zoneid, 10352 NULL); 10353 } 10354 } 10355 /* FALLTHRU */ 10356 default: { 10357 /* 10358 * Handle protocols with which IPv6 is less intimate. 10359 */ 10360 fanout_flags |= IP_FF_RAWIP|IP_FF_IPINFO; 10361 10362 /* 10363 * Enable sending ICMP for "Unknown" nexthdr 10364 * case. i.e. where we did not FALLTHRU from 10365 * IPPROTO_ICMPV6 processing case above. 10366 */ 10367 if (nexthdr != IPPROTO_ICMPV6) 10368 fanout_flags |= IP_FF_SEND_ICMP; 10369 /* 10370 * Note: There can be more than one stream bound 10371 * to a particular protocol. When this is the case, 10372 * each one gets a copy of any incoming packets. 10373 */ 10374 ip_fanout_proto_v6(q, first_mp, ip6h, ill, ill, nexthdr, 10375 nexthdr_offset, fanout_flags|IP6_NO_IPPOLICY, 10376 mctl_present, ire->ire_zoneid); 10377 return; 10378 } 10379 } 10380 } 10381 10382 /* 10383 * Send packet using IRE. 10384 * Checksumming is controlled by cksum_request: 10385 * -1 => normal i.e. TCP/UDP/SCTP/ICMPv6 are checksummed and nothing else. 10386 * 1 => Skip TCP/UDP/SCTP checksum 10387 * Otherwise => checksum_request contains insert offset for checksum 10388 * 10389 * Assumes that the following set of headers appear in the first 10390 * mblk: 10391 * ip6_t 10392 * Any extension headers 10393 * TCP/UDP/SCTP header (if present) 10394 * The routine can handle an ICMPv6 header that is not in the first mblk. 10395 * 10396 * NOTE : This function does not ire_refrele the ire passed in as the 10397 * argument unlike ip_wput_ire where the REFRELE is done. 10398 * Refer to ip_wput_ire for more on this. 10399 */ 10400 static void 10401 ip_wput_ire_v6(queue_t *q, mblk_t *mp, ire_t *ire, int unspec_src, 10402 int cksum_request, conn_t *connp, int caller, int flags, zoneid_t zoneid) 10403 { 10404 ip6_t *ip6h; 10405 uint8_t nexthdr; 10406 uint16_t hdr_length; 10407 uint_t reachable = 0x0; 10408 ill_t *ill; 10409 mib2_ipIfStatsEntry_t *mibptr; 10410 mblk_t *first_mp; 10411 boolean_t mctl_present; 10412 ipsec_out_t *io; 10413 boolean_t conn_dontroute; /* conn value for multicast */ 10414 boolean_t conn_multicast_loop; /* conn value for multicast */ 10415 boolean_t multicast_forward; /* Should we forward ? */ 10416 int max_frag; 10417 ip_stack_t *ipst = ire->ire_ipst; 10418 ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec; 10419 10420 ill = ire_to_ill(ire); 10421 first_mp = mp; 10422 multicast_forward = B_FALSE; 10423 10424 if (mp->b_datap->db_type != M_CTL) { 10425 ip6h = (ip6_t *)first_mp->b_rptr; 10426 } else { 10427 io = (ipsec_out_t *)first_mp->b_rptr; 10428 ASSERT(io->ipsec_out_type == IPSEC_OUT); 10429 /* 10430 * Grab the zone id now because the M_CTL can be discarded by 10431 * ip_wput_ire_parse_ipsec_out() below. 10432 */ 10433 ASSERT(zoneid == io->ipsec_out_zoneid); 10434 ASSERT(zoneid != ALL_ZONES); 10435 ip6h = (ip6_t *)first_mp->b_cont->b_rptr; 10436 /* 10437 * For the multicast case, ipsec_out carries conn_dontroute and 10438 * conn_multicast_loop as conn may not be available here. We 10439 * need this for multicast loopback and forwarding which is done 10440 * later in the code. 10441 */ 10442 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 10443 conn_dontroute = io->ipsec_out_dontroute; 10444 conn_multicast_loop = io->ipsec_out_multicast_loop; 10445 /* 10446 * If conn_dontroute is not set or conn_multicast_loop 10447 * is set, we need to do forwarding/loopback. For 10448 * datagrams from ip_wput_multicast, conn_dontroute is 10449 * set to B_TRUE and conn_multicast_loop is set to 10450 * B_FALSE so that we neither do forwarding nor 10451 * loopback. 10452 */ 10453 if (!conn_dontroute || conn_multicast_loop) 10454 multicast_forward = B_TRUE; 10455 } 10456 } 10457 10458 /* 10459 * If the sender didn't supply the hop limit and there is a default 10460 * unicast hop limit associated with the output interface, we use 10461 * that if the packet is unicast. Interface specific unicast hop 10462 * limits as set via the SIOCSLIFLNKINFO ioctl. 10463 */ 10464 if (ill->ill_max_hops != 0 && !(flags & IP6I_HOPLIMIT) && 10465 !(IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst))) { 10466 ip6h->ip6_hops = ill->ill_max_hops; 10467 } 10468 10469 if (ire->ire_type == IRE_LOCAL && ire->ire_zoneid != zoneid && 10470 ire->ire_zoneid != ALL_ZONES) { 10471 /* 10472 * When a zone sends a packet to another zone, we try to deliver 10473 * the packet under the same conditions as if the destination 10474 * was a real node on the network. To do so, we look for a 10475 * matching route in the forwarding table. 10476 * RTF_REJECT and RTF_BLACKHOLE are handled just like 10477 * ip_newroute_v6() does. 10478 * Note that IRE_LOCAL are special, since they are used 10479 * when the zoneid doesn't match in some cases. This means that 10480 * we need to handle ipha_src differently since ire_src_addr 10481 * belongs to the receiving zone instead of the sending zone. 10482 * When ip_restrict_interzone_loopback is set, then 10483 * ire_cache_lookup_v6() ensures that IRE_LOCAL are only used 10484 * for loopback between zones when the logical "Ethernet" would 10485 * have looped them back. 10486 */ 10487 ire_t *src_ire; 10488 10489 src_ire = ire_ftable_lookup_v6(&ip6h->ip6_dst, 0, 0, 0, 10490 NULL, NULL, zoneid, 0, NULL, (MATCH_IRE_RECURSIVE | 10491 MATCH_IRE_DEFAULT | MATCH_IRE_RJ_BHOLE), ipst); 10492 if (src_ire != NULL && 10493 !(src_ire->ire_flags & (RTF_REJECT | RTF_BLACKHOLE)) && 10494 (!ipst->ips_ip_restrict_interzone_loopback || 10495 ire_local_same_lan(ire, src_ire))) { 10496 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src) && 10497 !unspec_src) { 10498 ip6h->ip6_src = src_ire->ire_src_addr_v6; 10499 } 10500 ire_refrele(src_ire); 10501 } else { 10502 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutNoRoutes); 10503 if (src_ire != NULL) { 10504 if (src_ire->ire_flags & RTF_BLACKHOLE) { 10505 ire_refrele(src_ire); 10506 freemsg(first_mp); 10507 return; 10508 } 10509 ire_refrele(src_ire); 10510 } 10511 if (ip_hdr_complete_v6(ip6h, zoneid, ipst)) { 10512 /* Failed */ 10513 freemsg(first_mp); 10514 return; 10515 } 10516 icmp_unreachable_v6(q, first_mp, 10517 ICMP6_DST_UNREACH_NOROUTE, B_FALSE, B_FALSE, 10518 zoneid, ipst); 10519 return; 10520 } 10521 } 10522 10523 if (mp->b_datap->db_type == M_CTL || 10524 ipss->ipsec_outbound_v6_policy_present) { 10525 mp = ip_wput_ire_parse_ipsec_out(first_mp, NULL, ip6h, ire, 10526 connp, unspec_src, zoneid); 10527 if (mp == NULL) { 10528 return; 10529 } 10530 } 10531 10532 first_mp = mp; 10533 if (mp->b_datap->db_type == M_CTL) { 10534 io = (ipsec_out_t *)mp->b_rptr; 10535 ASSERT(io->ipsec_out_type == IPSEC_OUT); 10536 mp = mp->b_cont; 10537 mctl_present = B_TRUE; 10538 } else { 10539 mctl_present = B_FALSE; 10540 } 10541 10542 ip6h = (ip6_t *)mp->b_rptr; 10543 nexthdr = ip6h->ip6_nxt; 10544 mibptr = ill->ill_ip_mib; 10545 10546 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src) && !unspec_src) { 10547 ipif_t *ipif; 10548 10549 /* 10550 * Select the source address using ipif_select_source_v6. 10551 */ 10552 ipif = ipif_select_source_v6(ill, &ip6h->ip6_dst, B_FALSE, 10553 IPV6_PREFER_SRC_DEFAULT, zoneid); 10554 if (ipif == NULL) { 10555 if (ip_debug > 2) { 10556 /* ip1dbg */ 10557 pr_addr_dbg("ip_wput_ire_v6: no src for " 10558 "dst %s\n", AF_INET6, &ip6h->ip6_dst); 10559 printf("through interface %s\n", ill->ill_name); 10560 } 10561 freemsg(first_mp); 10562 return; 10563 } 10564 ip6h->ip6_src = ipif->ipif_v6src_addr; 10565 ipif_refrele(ipif); 10566 } 10567 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 10568 if ((connp != NULL && connp->conn_multicast_loop) || 10569 !IS_LOOPBACK(ill)) { 10570 if (ilm_lookup_ill_v6(ill, &ip6h->ip6_dst, B_FALSE, 10571 ALL_ZONES) != NULL) { 10572 mblk_t *nmp; 10573 int fanout_flags = 0; 10574 10575 if (connp != NULL && 10576 !connp->conn_multicast_loop) { 10577 fanout_flags |= IP_FF_NO_MCAST_LOOP; 10578 } 10579 ip1dbg(("ip_wput_ire_v6: " 10580 "Loopback multicast\n")); 10581 nmp = ip_copymsg(first_mp); 10582 if (nmp != NULL) { 10583 ip6_t *nip6h; 10584 mblk_t *mp_ip6h; 10585 10586 if (mctl_present) { 10587 nip6h = (ip6_t *) 10588 nmp->b_cont->b_rptr; 10589 mp_ip6h = nmp->b_cont; 10590 } else { 10591 nip6h = (ip6_t *)nmp->b_rptr; 10592 mp_ip6h = nmp; 10593 } 10594 10595 DTRACE_PROBE4( 10596 ip6__loopback__out__start, 10597 ill_t *, NULL, 10598 ill_t *, ill, 10599 ip6_t *, nip6h, 10600 mblk_t *, nmp); 10601 10602 FW_HOOKS6( 10603 ipst->ips_ip6_loopback_out_event, 10604 ipst->ips_ipv6firewall_loopback_out, 10605 NULL, ill, nip6h, nmp, mp_ip6h, 10606 0, ipst); 10607 10608 DTRACE_PROBE1( 10609 ip6__loopback__out__end, 10610 mblk_t *, nmp); 10611 10612 /* 10613 * DTrace this as ip:::send. A blocked 10614 * packet will fire the send probe, but 10615 * not the receive probe. 10616 */ 10617 DTRACE_IP7(send, mblk_t *, nmp, 10618 conn_t *, NULL, void_ip_t *, nip6h, 10619 __dtrace_ipsr_ill_t *, ill, 10620 ipha_t *, NULL, ip6_t *, nip6h, 10621 int, 1); 10622 10623 if (nmp != NULL) { 10624 /* 10625 * Deliver locally and to 10626 * every local zone, except 10627 * the sending zone when 10628 * IPV6_MULTICAST_LOOP is 10629 * disabled. 10630 */ 10631 ip_wput_local_v6(RD(q), ill, 10632 nip6h, nmp, ire, 10633 fanout_flags, zoneid); 10634 } 10635 } else { 10636 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10637 ip1dbg(("ip_wput_ire_v6: " 10638 "copymsg failed\n")); 10639 } 10640 } 10641 } 10642 if (ip6h->ip6_hops == 0 || 10643 IN6_IS_ADDR_MC_NODELOCAL(&ip6h->ip6_dst) || 10644 IS_LOOPBACK(ill)) { 10645 /* 10646 * Local multicast or just loopback on loopback 10647 * interface. 10648 */ 10649 BUMP_MIB(mibptr, ipIfStatsHCOutMcastPkts); 10650 UPDATE_MIB(mibptr, ipIfStatsHCOutMcastOctets, 10651 ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN); 10652 ip1dbg(("ip_wput_ire_v6: local multicast only\n")); 10653 freemsg(first_mp); 10654 return; 10655 } 10656 } 10657 10658 if (ire->ire_stq != NULL) { 10659 uint32_t sum; 10660 uint_t ill_index = ((ill_t *)ire->ire_stq->q_ptr)-> 10661 ill_phyint->phyint_ifindex; 10662 queue_t *dev_q = ire->ire_stq->q_next; 10663 10664 /* 10665 * non-NULL send-to queue - packet is to be sent 10666 * out an interface. 10667 */ 10668 10669 /* Driver is flow-controlling? */ 10670 if (!IP_FLOW_CONTROLLED_ULP(nexthdr) && 10671 DEV_Q_FLOW_BLOCKED(dev_q)) { 10672 /* 10673 * Queue packet if we have an conn to give back 10674 * pressure. We can't queue packets intended for 10675 * hardware acceleration since we've tossed that 10676 * state already. If the packet is being fed back 10677 * from ire_send_v6, we don't know the position in 10678 * the queue to enqueue the packet and we discard 10679 * the packet. 10680 */ 10681 if (ipst->ips_ip_output_queue && connp != NULL && 10682 !mctl_present && caller != IRE_SEND) { 10683 if (caller == IP_WSRV) { 10684 idl_tx_list_t *idl_txl; 10685 10686 idl_txl = &ipst->ips_idl_tx_list[0]; 10687 connp->conn_did_putbq = 1; 10688 (void) putbq(connp->conn_wq, mp); 10689 conn_drain_insert(connp, idl_txl); 10690 /* 10691 * caller == IP_WSRV implies we are 10692 * the service thread, and the 10693 * queue is already noenabled. 10694 * The check for canput and 10695 * the putbq is not atomic. 10696 * So we need to check again. 10697 */ 10698 if (canput(dev_q)) 10699 connp->conn_did_putbq = 0; 10700 } else { 10701 (void) putq(connp->conn_wq, mp); 10702 } 10703 return; 10704 } 10705 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10706 freemsg(first_mp); 10707 return; 10708 } 10709 10710 /* 10711 * Look for reachability confirmations from the transport. 10712 */ 10713 if (ip6h->ip6_vcf & IP_FORWARD_PROG) { 10714 reachable |= IPV6_REACHABILITY_CONFIRMATION; 10715 ip6h->ip6_vcf &= ~IP_FORWARD_PROG; 10716 if (mctl_present) 10717 io->ipsec_out_reachable = B_TRUE; 10718 } 10719 /* Fastpath */ 10720 switch (nexthdr) { 10721 case IPPROTO_TCP: 10722 case IPPROTO_UDP: 10723 case IPPROTO_ICMPV6: 10724 case IPPROTO_SCTP: 10725 hdr_length = IPV6_HDR_LEN; 10726 break; 10727 default: { 10728 uint8_t *nexthdrp; 10729 10730 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, 10731 &hdr_length, &nexthdrp)) { 10732 /* Malformed packet */ 10733 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10734 freemsg(first_mp); 10735 return; 10736 } 10737 nexthdr = *nexthdrp; 10738 break; 10739 } 10740 } 10741 10742 if (cksum_request != -1 && nexthdr != IPPROTO_ICMPV6) { 10743 uint16_t *up; 10744 uint16_t *insp; 10745 10746 /* 10747 * The packet header is processed once for all, even 10748 * in the multirouting case. We disable hardware 10749 * checksum if the packet is multirouted, as it will be 10750 * replicated via several interfaces, and not all of 10751 * them may have this capability. 10752 */ 10753 if (cksum_request == 1 && 10754 !(ire->ire_flags & RTF_MULTIRT)) { 10755 /* Skip the transport checksum */ 10756 goto cksum_done; 10757 } 10758 /* 10759 * Do user-configured raw checksum. 10760 * Compute checksum and insert at offset "cksum_request" 10761 */ 10762 10763 /* check for enough headers for checksum */ 10764 cksum_request += hdr_length; /* offset from rptr */ 10765 if ((mp->b_wptr - mp->b_rptr) < 10766 (cksum_request + sizeof (int16_t))) { 10767 if (!pullupmsg(mp, 10768 cksum_request + sizeof (int16_t))) { 10769 ip1dbg(("ip_wput_v6: ICMP hdr pullupmsg" 10770 " failed\n")); 10771 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10772 freemsg(first_mp); 10773 return; 10774 } 10775 ip6h = (ip6_t *)mp->b_rptr; 10776 } 10777 insp = (uint16_t *)((uchar_t *)ip6h + cksum_request); 10778 ASSERT(((uintptr_t)insp & 0x1) == 0); 10779 up = (uint16_t *)&ip6h->ip6_src; 10780 /* 10781 * icmp has placed length and routing 10782 * header adjustment in *insp. 10783 */ 10784 sum = htons(nexthdr) + 10785 up[0] + up[1] + up[2] + up[3] + 10786 up[4] + up[5] + up[6] + up[7] + 10787 up[8] + up[9] + up[10] + up[11] + 10788 up[12] + up[13] + up[14] + up[15]; 10789 sum = (sum & 0xffff) + (sum >> 16); 10790 *insp = IP_CSUM(mp, hdr_length, sum); 10791 } else if (nexthdr == IPPROTO_TCP) { 10792 uint16_t *up; 10793 10794 /* 10795 * Check for full IPv6 header + enough TCP header 10796 * to get at the checksum field. 10797 */ 10798 if ((mp->b_wptr - mp->b_rptr) < 10799 (hdr_length + TCP_CHECKSUM_OFFSET + 10800 TCP_CHECKSUM_SIZE)) { 10801 if (!pullupmsg(mp, hdr_length + 10802 TCP_CHECKSUM_OFFSET + TCP_CHECKSUM_SIZE)) { 10803 ip1dbg(("ip_wput_v6: TCP hdr pullupmsg" 10804 " failed\n")); 10805 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10806 freemsg(first_mp); 10807 return; 10808 } 10809 ip6h = (ip6_t *)mp->b_rptr; 10810 } 10811 10812 up = (uint16_t *)&ip6h->ip6_src; 10813 /* 10814 * Note: The TCP module has stored the length value 10815 * into the tcp checksum field, so we don't 10816 * need to explicitly sum it in here. 10817 */ 10818 sum = up[0] + up[1] + up[2] + up[3] + 10819 up[4] + up[5] + up[6] + up[7] + 10820 up[8] + up[9] + up[10] + up[11] + 10821 up[12] + up[13] + up[14] + up[15]; 10822 10823 /* Fold the initial sum */ 10824 sum = (sum & 0xffff) + (sum >> 16); 10825 10826 up = (uint16_t *)(((uchar_t *)ip6h) + 10827 hdr_length + TCP_CHECKSUM_OFFSET); 10828 10829 IP_CKSUM_XMIT(ill, ire, mp, ip6h, up, IPPROTO_TCP, 10830 hdr_length, ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN, 10831 ire->ire_max_frag, mctl_present, sum); 10832 10833 /* Software checksum? */ 10834 if (DB_CKSUMFLAGS(mp) == 0) { 10835 IP6_STAT(ipst, ip6_out_sw_cksum); 10836 IP6_STAT_UPDATE(ipst, 10837 ip6_tcp_out_sw_cksum_bytes, 10838 (ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN) - 10839 hdr_length); 10840 } 10841 } else if (nexthdr == IPPROTO_UDP) { 10842 uint16_t *up; 10843 10844 /* 10845 * check for full IPv6 header + enough UDP header 10846 * to get at the UDP checksum field 10847 */ 10848 if ((mp->b_wptr - mp->b_rptr) < (hdr_length + 10849 UDP_CHECKSUM_OFFSET + UDP_CHECKSUM_SIZE)) { 10850 if (!pullupmsg(mp, hdr_length + 10851 UDP_CHECKSUM_OFFSET + UDP_CHECKSUM_SIZE)) { 10852 ip1dbg(("ip_wput_v6: UDP hdr pullupmsg" 10853 " failed\n")); 10854 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10855 freemsg(first_mp); 10856 return; 10857 } 10858 ip6h = (ip6_t *)mp->b_rptr; 10859 } 10860 up = (uint16_t *)&ip6h->ip6_src; 10861 /* 10862 * Note: The UDP module has stored the length value 10863 * into the udp checksum field, so we don't 10864 * need to explicitly sum it in here. 10865 */ 10866 sum = up[0] + up[1] + up[2] + up[3] + 10867 up[4] + up[5] + up[6] + up[7] + 10868 up[8] + up[9] + up[10] + up[11] + 10869 up[12] + up[13] + up[14] + up[15]; 10870 10871 /* Fold the initial sum */ 10872 sum = (sum & 0xffff) + (sum >> 16); 10873 10874 up = (uint16_t *)(((uchar_t *)ip6h) + 10875 hdr_length + UDP_CHECKSUM_OFFSET); 10876 10877 IP_CKSUM_XMIT(ill, ire, mp, ip6h, up, IPPROTO_UDP, 10878 hdr_length, ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN, 10879 ire->ire_max_frag, mctl_present, sum); 10880 10881 /* Software checksum? */ 10882 if (DB_CKSUMFLAGS(mp) == 0) { 10883 IP6_STAT(ipst, ip6_out_sw_cksum); 10884 IP6_STAT_UPDATE(ipst, 10885 ip6_udp_out_sw_cksum_bytes, 10886 (ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN) - 10887 hdr_length); 10888 } 10889 } else if (nexthdr == IPPROTO_ICMPV6) { 10890 uint16_t *up; 10891 icmp6_t *icmp6; 10892 10893 /* check for full IPv6+ICMPv6 header */ 10894 if ((mp->b_wptr - mp->b_rptr) < 10895 (hdr_length + ICMP6_MINLEN)) { 10896 if (!pullupmsg(mp, hdr_length + ICMP6_MINLEN)) { 10897 ip1dbg(("ip_wput_v6: ICMP hdr pullupmsg" 10898 " failed\n")); 10899 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10900 freemsg(first_mp); 10901 return; 10902 } 10903 ip6h = (ip6_t *)mp->b_rptr; 10904 } 10905 icmp6 = (icmp6_t *)((uchar_t *)ip6h + hdr_length); 10906 up = (uint16_t *)&ip6h->ip6_src; 10907 /* 10908 * icmp has placed length and routing 10909 * header adjustment in icmp6_cksum. 10910 */ 10911 sum = htons(IPPROTO_ICMPV6) + 10912 up[0] + up[1] + up[2] + up[3] + 10913 up[4] + up[5] + up[6] + up[7] + 10914 up[8] + up[9] + up[10] + up[11] + 10915 up[12] + up[13] + up[14] + up[15]; 10916 sum = (sum & 0xffff) + (sum >> 16); 10917 icmp6->icmp6_cksum = IP_CSUM(mp, hdr_length, sum); 10918 10919 /* Update output mib stats */ 10920 icmp_update_out_mib_v6(ill, icmp6); 10921 } else if (nexthdr == IPPROTO_SCTP) { 10922 sctp_hdr_t *sctph; 10923 10924 if (MBLKL(mp) < (hdr_length + sizeof (*sctph))) { 10925 if (!pullupmsg(mp, hdr_length + 10926 sizeof (*sctph))) { 10927 ip1dbg(("ip_wput_v6: SCTP hdr pullupmsg" 10928 " failed\n")); 10929 BUMP_MIB(ill->ill_ip_mib, 10930 ipIfStatsOutDiscards); 10931 freemsg(mp); 10932 return; 10933 } 10934 ip6h = (ip6_t *)mp->b_rptr; 10935 } 10936 sctph = (sctp_hdr_t *)(mp->b_rptr + hdr_length); 10937 sctph->sh_chksum = 0; 10938 sctph->sh_chksum = sctp_cksum(mp, hdr_length); 10939 } 10940 10941 cksum_done: 10942 /* 10943 * We force the insertion of a fragment header using the 10944 * IPH_FRAG_HDR flag in two cases: 10945 * - after reception of an ICMPv6 "packet too big" message 10946 * with a MTU < 1280 (cf. RFC 2460 section 5) 10947 * - for multirouted IPv6 packets, so that the receiver can 10948 * discard duplicates according to their fragment identifier 10949 * 10950 * Two flags modifed from the API can modify this behavior. 10951 * The first is IPV6_USE_MIN_MTU. With this API the user 10952 * can specify how to manage PMTUD for unicast and multicast. 10953 * 10954 * IPV6_DONTFRAG disallows fragmentation. 10955 */ 10956 max_frag = ire->ire_max_frag; 10957 switch (IP6I_USE_MIN_MTU_API(flags)) { 10958 case IPV6_USE_MIN_MTU_DEFAULT: 10959 case IPV6_USE_MIN_MTU_UNICAST: 10960 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 10961 max_frag = IPV6_MIN_MTU; 10962 } 10963 break; 10964 10965 case IPV6_USE_MIN_MTU_NEVER: 10966 max_frag = IPV6_MIN_MTU; 10967 break; 10968 } 10969 if (ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN > max_frag || 10970 (ire->ire_frag_flag & IPH_FRAG_HDR)) { 10971 if (connp != NULL && (flags & IP6I_DONTFRAG)) { 10972 icmp_pkt2big_v6(ire->ire_stq, first_mp, 10973 max_frag, B_FALSE, B_TRUE, zoneid, ipst); 10974 return; 10975 } 10976 10977 if (ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN != 10978 (mp->b_cont ? msgdsize(mp) : 10979 mp->b_wptr - (uchar_t *)ip6h)) { 10980 ip0dbg(("Packet length mismatch: %d, %ld\n", 10981 ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN, 10982 msgdsize(mp))); 10983 freemsg(first_mp); 10984 return; 10985 } 10986 /* Do IPSEC processing first */ 10987 if (mctl_present) { 10988 ipsec_out_process(q, first_mp, ire, ill_index); 10989 return; 10990 } 10991 ASSERT(mp->b_prev == NULL); 10992 ip2dbg(("Fragmenting Size = %d, mtu = %d\n", 10993 ntohs(ip6h->ip6_plen) + 10994 IPV6_HDR_LEN, max_frag)); 10995 ASSERT(mp == first_mp); 10996 /* Initiate IPPF processing */ 10997 if (IPP_ENABLED(IPP_LOCAL_OUT, ipst)) { 10998 ip_process(IPP_LOCAL_OUT, &mp, ill_index); 10999 if (mp == NULL) { 11000 return; 11001 } 11002 } 11003 ip_wput_frag_v6(mp, ire, reachable, connp, 11004 caller, max_frag); 11005 return; 11006 } 11007 /* Do IPSEC processing first */ 11008 if (mctl_present) { 11009 int extra_len = ipsec_out_extra_length(first_mp); 11010 11011 if (ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN + extra_len > 11012 max_frag && connp != NULL && 11013 (flags & IP6I_DONTFRAG)) { 11014 /* 11015 * IPsec headers will push the packet over the 11016 * MTU limit. Issue an ICMPv6 Packet Too Big 11017 * message for this packet if the upper-layer 11018 * that issued this packet will be able to 11019 * react to the icmp_pkt2big_v6() that we'll 11020 * generate. 11021 */ 11022 icmp_pkt2big_v6(ire->ire_stq, first_mp, 11023 max_frag, B_FALSE, B_TRUE, zoneid, ipst); 11024 return; 11025 } 11026 ipsec_out_process(q, first_mp, ire, ill_index); 11027 return; 11028 } 11029 /* 11030 * XXX multicast: add ip_mforward_v6() here. 11031 * Check conn_dontroute 11032 */ 11033 #ifdef lint 11034 /* 11035 * XXX The only purpose of this statement is to avoid lint 11036 * errors. See the above "XXX multicast". When that gets 11037 * fixed, remove this whole #ifdef lint section. 11038 */ 11039 ip3dbg(("multicast forward is %s.\n", 11040 (multicast_forward ? "TRUE" : "FALSE"))); 11041 #endif 11042 11043 UPDATE_OB_PKT_COUNT(ire); 11044 ire->ire_last_used_time = lbolt; 11045 ASSERT(mp == first_mp); 11046 ip_xmit_v6(mp, ire, reachable, connp, caller, NULL); 11047 } else { 11048 /* 11049 * DTrace this as ip:::send. A blocked packet will fire the 11050 * send probe, but not the receive probe. 11051 */ 11052 DTRACE_IP7(send, mblk_t *, first_mp, conn_t *, NULL, 11053 void_ip_t *, ip6h, __dtrace_ipsr_ill_t *, ill, ipha_t *, 11054 NULL, ip6_t *, ip6h, int, 1); 11055 DTRACE_PROBE4(ip6__loopback__out__start, 11056 ill_t *, NULL, ill_t *, ill, 11057 ip6_t *, ip6h, mblk_t *, first_mp); 11058 FW_HOOKS6(ipst->ips_ip6_loopback_out_event, 11059 ipst->ips_ipv6firewall_loopback_out, 11060 NULL, ill, ip6h, first_mp, mp, 0, ipst); 11061 DTRACE_PROBE1(ip6__loopback__out__end, mblk_t *, first_mp); 11062 if (first_mp != NULL) { 11063 ip_wput_local_v6(RD(q), ill, ip6h, first_mp, ire, 0, 11064 zoneid); 11065 } 11066 } 11067 } 11068 11069 /* 11070 * Outbound IPv6 fragmentation routine using MDT. 11071 */ 11072 static void 11073 ip_wput_frag_mdt_v6(mblk_t *mp, ire_t *ire, size_t max_chunk, 11074 size_t unfragmentable_len, uint8_t nexthdr, uint_t prev_nexthdr_offset) 11075 { 11076 ip6_t *ip6h = (ip6_t *)mp->b_rptr; 11077 uint_t pkts, wroff, hdr_chunk_len, pbuf_idx; 11078 mblk_t *hdr_mp, *md_mp = NULL; 11079 int i1; 11080 multidata_t *mmd; 11081 unsigned char *hdr_ptr, *pld_ptr; 11082 ip_pdescinfo_t pdi; 11083 uint32_t ident; 11084 size_t len; 11085 uint16_t offset; 11086 queue_t *stq = ire->ire_stq; 11087 ill_t *ill = (ill_t *)stq->q_ptr; 11088 ip_stack_t *ipst = ill->ill_ipst; 11089 11090 ASSERT(DB_TYPE(mp) == M_DATA); 11091 ASSERT(MBLKL(mp) > unfragmentable_len); 11092 11093 /* 11094 * Move read ptr past unfragmentable portion, we don't want this part 11095 * of the data in our fragments. 11096 */ 11097 mp->b_rptr += unfragmentable_len; 11098 11099 /* Calculate how many packets we will send out */ 11100 i1 = (mp->b_cont == NULL) ? MBLKL(mp) : msgsize(mp); 11101 pkts = (i1 + max_chunk - 1) / max_chunk; 11102 ASSERT(pkts > 1); 11103 11104 /* Allocate a message block which will hold all the IP Headers. */ 11105 wroff = ipst->ips_ip_wroff_extra; 11106 hdr_chunk_len = wroff + unfragmentable_len + sizeof (ip6_frag_t); 11107 11108 i1 = pkts * hdr_chunk_len; 11109 /* 11110 * Create the header buffer, Multidata and destination address 11111 * and SAP attribute that should be associated with it. 11112 */ 11113 if ((hdr_mp = allocb(i1, BPRI_HI)) == NULL || 11114 ((hdr_mp->b_wptr += i1), 11115 (mmd = mmd_alloc(hdr_mp, &md_mp, KM_NOSLEEP)) == NULL) || 11116 !ip_md_addr_attr(mmd, NULL, ire->ire_nce->nce_res_mp)) { 11117 freemsg(mp); 11118 if (md_mp == NULL) { 11119 freemsg(hdr_mp); 11120 } else { 11121 free_mmd: IP6_STAT(ipst, ip6_frag_mdt_discarded); 11122 freemsg(md_mp); 11123 } 11124 IP6_STAT(ipst, ip6_frag_mdt_allocfail); 11125 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails); 11126 return; 11127 } 11128 IP6_STAT(ipst, ip6_frag_mdt_allocd); 11129 11130 /* 11131 * Add a payload buffer to the Multidata; this operation must not 11132 * fail, or otherwise our logic in this routine is broken. There 11133 * is no memory allocation done by the routine, so any returned 11134 * failure simply tells us that we've done something wrong. 11135 * 11136 * A failure tells us that either we're adding the same payload 11137 * buffer more than once, or we're trying to add more buffers than 11138 * allowed. None of the above cases should happen, and we panic 11139 * because either there's horrible heap corruption, and/or 11140 * programming mistake. 11141 */ 11142 if ((pbuf_idx = mmd_addpldbuf(mmd, mp)) < 0) { 11143 goto pbuf_panic; 11144 } 11145 11146 hdr_ptr = hdr_mp->b_rptr; 11147 pld_ptr = mp->b_rptr; 11148 11149 pdi.flags = PDESC_HBUF_REF | PDESC_PBUF_REF; 11150 11151 ident = htonl(atomic_add_32_nv(&ire->ire_ident, 1)); 11152 11153 /* 11154 * len is the total length of the fragmentable data in this 11155 * datagram. For each fragment sent, we will decrement len 11156 * by the amount of fragmentable data sent in that fragment 11157 * until len reaches zero. 11158 */ 11159 len = ntohs(ip6h->ip6_plen) - (unfragmentable_len - IPV6_HDR_LEN); 11160 11161 offset = 0; 11162 prev_nexthdr_offset += wroff; 11163 11164 while (len != 0) { 11165 size_t mlen; 11166 ip6_t *fip6h; 11167 ip6_frag_t *fraghdr; 11168 int error; 11169 11170 ASSERT((hdr_ptr + hdr_chunk_len) <= hdr_mp->b_wptr); 11171 mlen = MIN(len, max_chunk); 11172 len -= mlen; 11173 11174 fip6h = (ip6_t *)(hdr_ptr + wroff); 11175 ASSERT(OK_32PTR(fip6h)); 11176 bcopy(ip6h, fip6h, unfragmentable_len); 11177 hdr_ptr[prev_nexthdr_offset] = IPPROTO_FRAGMENT; 11178 11179 fip6h->ip6_plen = htons((uint16_t)(mlen + 11180 unfragmentable_len - IPV6_HDR_LEN + sizeof (ip6_frag_t))); 11181 11182 fraghdr = (ip6_frag_t *)((unsigned char *)fip6h + 11183 unfragmentable_len); 11184 fraghdr->ip6f_nxt = nexthdr; 11185 fraghdr->ip6f_reserved = 0; 11186 fraghdr->ip6f_offlg = htons(offset) | 11187 ((len != 0) ? IP6F_MORE_FRAG : 0); 11188 fraghdr->ip6f_ident = ident; 11189 11190 /* 11191 * Record offset and size of header and data of the next packet 11192 * in the multidata message. 11193 */ 11194 PDESC_HDR_ADD(&pdi, hdr_ptr, wroff, 11195 unfragmentable_len + sizeof (ip6_frag_t), 0); 11196 PDESC_PLD_INIT(&pdi); 11197 i1 = MIN(mp->b_wptr - pld_ptr, mlen); 11198 ASSERT(i1 > 0); 11199 PDESC_PLD_SPAN_ADD(&pdi, pbuf_idx, pld_ptr, i1); 11200 if (i1 == mlen) { 11201 pld_ptr += mlen; 11202 } else { 11203 i1 = mlen - i1; 11204 mp = mp->b_cont; 11205 ASSERT(mp != NULL); 11206 ASSERT(MBLKL(mp) >= i1); 11207 /* 11208 * Attach the next payload message block to the 11209 * multidata message. 11210 */ 11211 if ((pbuf_idx = mmd_addpldbuf(mmd, mp)) < 0) 11212 goto pbuf_panic; 11213 PDESC_PLD_SPAN_ADD(&pdi, pbuf_idx, mp->b_rptr, i1); 11214 pld_ptr = mp->b_rptr + i1; 11215 } 11216 11217 if ((mmd_addpdesc(mmd, (pdescinfo_t *)&pdi, &error, 11218 KM_NOSLEEP)) == NULL) { 11219 /* 11220 * Any failure other than ENOMEM indicates that we 11221 * have passed in invalid pdesc info or parameters 11222 * to mmd_addpdesc, which must not happen. 11223 * 11224 * EINVAL is a result of failure on boundary checks 11225 * against the pdesc info contents. It should not 11226 * happen, and we panic because either there's 11227 * horrible heap corruption, and/or programming 11228 * mistake. 11229 */ 11230 if (error != ENOMEM) { 11231 cmn_err(CE_PANIC, "ip_wput_frag_mdt_v6: " 11232 "pdesc logic error detected for " 11233 "mmd %p pinfo %p (%d)\n", 11234 (void *)mmd, (void *)&pdi, error); 11235 /* NOTREACHED */ 11236 } 11237 IP6_STAT(ipst, ip6_frag_mdt_addpdescfail); 11238 /* Free unattached payload message blocks as well */ 11239 md_mp->b_cont = mp->b_cont; 11240 goto free_mmd; 11241 } 11242 11243 /* Advance fragment offset. */ 11244 offset += mlen; 11245 11246 /* Advance to location for next header in the buffer. */ 11247 hdr_ptr += hdr_chunk_len; 11248 11249 /* Did we reach the next payload message block? */ 11250 if (pld_ptr == mp->b_wptr && mp->b_cont != NULL) { 11251 mp = mp->b_cont; 11252 /* 11253 * Attach the next message block with payload 11254 * data to the multidata message. 11255 */ 11256 if ((pbuf_idx = mmd_addpldbuf(mmd, mp)) < 0) 11257 goto pbuf_panic; 11258 pld_ptr = mp->b_rptr; 11259 } 11260 } 11261 11262 ASSERT(hdr_mp->b_wptr == hdr_ptr); 11263 ASSERT(mp->b_wptr == pld_ptr); 11264 11265 /* Update IP statistics */ 11266 UPDATE_MIB(ill->ill_ip_mib, ipIfStatsOutFragCreates, pkts); 11267 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragOKs); 11268 UPDATE_MIB(ill->ill_ip_mib, ipIfStatsHCOutTransmits, pkts); 11269 /* 11270 * The ipv6 header len is accounted for in unfragmentable_len so 11271 * when calculating the fragmentation overhead just add the frag 11272 * header len. 11273 */ 11274 UPDATE_MIB(ill->ill_ip_mib, ipIfStatsHCOutOctets, 11275 (ntohs(ip6h->ip6_plen) - (unfragmentable_len - IPV6_HDR_LEN)) + 11276 pkts * (unfragmentable_len + sizeof (ip6_frag_t))); 11277 IP6_STAT_UPDATE(ipst, ip6_frag_mdt_pkt_out, pkts); 11278 11279 ire->ire_ob_pkt_count += pkts; 11280 if (ire->ire_ipif != NULL) 11281 atomic_add_32(&ire->ire_ipif->ipif_ob_pkt_count, pkts); 11282 11283 ire->ire_last_used_time = lbolt; 11284 /* Send it down */ 11285 putnext(stq, md_mp); 11286 return; 11287 11288 pbuf_panic: 11289 cmn_err(CE_PANIC, "ip_wput_frag_mdt_v6: payload buffer logic " 11290 "error for mmd %p pbuf %p (%d)", (void *)mmd, (void *)mp, 11291 pbuf_idx); 11292 /* NOTREACHED */ 11293 } 11294 11295 /* 11296 * IPv6 fragmentation. Essentially the same as IPv4 fragmentation. 11297 * We have not optimized this in terms of number of mblks 11298 * allocated. For instance, for each fragment sent we always allocate a 11299 * mblk to hold the IPv6 header and fragment header. 11300 * 11301 * Assumes that all the extension headers are contained in the first mblk. 11302 * 11303 * The fragment header is inserted after an hop-by-hop options header 11304 * and after [an optional destinations header followed by] a routing header. 11305 * 11306 * NOTE : This function does not ire_refrele the ire passed in as 11307 * the argument. 11308 */ 11309 void 11310 ip_wput_frag_v6(mblk_t *mp, ire_t *ire, uint_t reachable, conn_t *connp, 11311 int caller, int max_frag) 11312 { 11313 ip6_t *ip6h = (ip6_t *)mp->b_rptr; 11314 ip6_t *fip6h; 11315 mblk_t *hmp; 11316 mblk_t *hmp0; 11317 mblk_t *dmp; 11318 ip6_frag_t *fraghdr; 11319 size_t unfragmentable_len; 11320 size_t len; 11321 size_t mlen; 11322 size_t max_chunk; 11323 uint32_t ident; 11324 uint16_t off_flags; 11325 uint16_t offset = 0; 11326 ill_t *ill; 11327 uint8_t nexthdr; 11328 uint_t prev_nexthdr_offset; 11329 uint8_t *ptr; 11330 ip_stack_t *ipst = ire->ire_ipst; 11331 11332 ASSERT(ire->ire_type == IRE_CACHE); 11333 ill = (ill_t *)ire->ire_stq->q_ptr; 11334 11335 if (max_frag <= 0) { 11336 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails); 11337 freemsg(mp); 11338 return; 11339 } 11340 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragReqds); 11341 11342 /* 11343 * Determine the length of the unfragmentable portion of this 11344 * datagram. This consists of the IPv6 header, a potential 11345 * hop-by-hop options header, a potential pre-routing-header 11346 * destination options header, and a potential routing header. 11347 */ 11348 nexthdr = ip6h->ip6_nxt; 11349 prev_nexthdr_offset = (uint8_t *)&ip6h->ip6_nxt - (uint8_t *)ip6h; 11350 ptr = (uint8_t *)&ip6h[1]; 11351 11352 if (nexthdr == IPPROTO_HOPOPTS) { 11353 ip6_hbh_t *hbh_hdr; 11354 uint_t hdr_len; 11355 11356 hbh_hdr = (ip6_hbh_t *)ptr; 11357 hdr_len = 8 * (hbh_hdr->ip6h_len + 1); 11358 nexthdr = hbh_hdr->ip6h_nxt; 11359 prev_nexthdr_offset = (uint8_t *)&hbh_hdr->ip6h_nxt 11360 - (uint8_t *)ip6h; 11361 ptr += hdr_len; 11362 } 11363 if (nexthdr == IPPROTO_DSTOPTS) { 11364 ip6_dest_t *dest_hdr; 11365 uint_t hdr_len; 11366 11367 dest_hdr = (ip6_dest_t *)ptr; 11368 if (dest_hdr->ip6d_nxt == IPPROTO_ROUTING) { 11369 hdr_len = 8 * (dest_hdr->ip6d_len + 1); 11370 nexthdr = dest_hdr->ip6d_nxt; 11371 prev_nexthdr_offset = (uint8_t *)&dest_hdr->ip6d_nxt 11372 - (uint8_t *)ip6h; 11373 ptr += hdr_len; 11374 } 11375 } 11376 if (nexthdr == IPPROTO_ROUTING) { 11377 ip6_rthdr_t *rthdr; 11378 uint_t hdr_len; 11379 11380 rthdr = (ip6_rthdr_t *)ptr; 11381 nexthdr = rthdr->ip6r_nxt; 11382 prev_nexthdr_offset = (uint8_t *)&rthdr->ip6r_nxt 11383 - (uint8_t *)ip6h; 11384 hdr_len = 8 * (rthdr->ip6r_len + 1); 11385 ptr += hdr_len; 11386 } 11387 unfragmentable_len = (uint_t)(ptr - (uint8_t *)ip6h); 11388 11389 max_chunk = (min(max_frag, ire->ire_max_frag) - unfragmentable_len - 11390 sizeof (ip6_frag_t)) & ~7; 11391 11392 /* Check if we can use MDT to send out the frags. */ 11393 ASSERT(!IRE_IS_LOCAL(ire)); 11394 if (ipst->ips_ip_multidata_outbound && reachable == 0 && 11395 !(ire->ire_flags & RTF_MULTIRT) && ILL_MDT_CAPABLE(ill) && 11396 IP_CAN_FRAG_MDT(mp, unfragmentable_len, max_chunk)) { 11397 ip_wput_frag_mdt_v6(mp, ire, max_chunk, unfragmentable_len, 11398 nexthdr, prev_nexthdr_offset); 11399 return; 11400 } 11401 11402 /* 11403 * Allocate an mblk with enough room for the link-layer 11404 * header, the unfragmentable part of the datagram, and the 11405 * fragment header. This (or a copy) will be used as the 11406 * first mblk for each fragment we send. 11407 */ 11408 hmp = allocb_tmpl(unfragmentable_len + sizeof (ip6_frag_t) + 11409 ipst->ips_ip_wroff_extra, mp); 11410 if (hmp == NULL) { 11411 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails); 11412 freemsg(mp); 11413 return; 11414 } 11415 hmp->b_rptr += ipst->ips_ip_wroff_extra; 11416 hmp->b_wptr = hmp->b_rptr + unfragmentable_len + sizeof (ip6_frag_t); 11417 11418 fip6h = (ip6_t *)hmp->b_rptr; 11419 fraghdr = (ip6_frag_t *)(hmp->b_rptr + unfragmentable_len); 11420 11421 bcopy(ip6h, fip6h, unfragmentable_len); 11422 hmp->b_rptr[prev_nexthdr_offset] = IPPROTO_FRAGMENT; 11423 11424 ident = atomic_add_32_nv(&ire->ire_ident, 1); 11425 11426 fraghdr->ip6f_nxt = nexthdr; 11427 fraghdr->ip6f_reserved = 0; 11428 fraghdr->ip6f_offlg = 0; 11429 fraghdr->ip6f_ident = htonl(ident); 11430 11431 /* 11432 * len is the total length of the fragmentable data in this 11433 * datagram. For each fragment sent, we will decrement len 11434 * by the amount of fragmentable data sent in that fragment 11435 * until len reaches zero. 11436 */ 11437 len = ntohs(ip6h->ip6_plen) - (unfragmentable_len - IPV6_HDR_LEN); 11438 11439 /* 11440 * Move read ptr past unfragmentable portion, we don't want this part 11441 * of the data in our fragments. 11442 */ 11443 mp->b_rptr += unfragmentable_len; 11444 11445 while (len != 0) { 11446 mlen = MIN(len, max_chunk); 11447 len -= mlen; 11448 if (len != 0) { 11449 /* Not last */ 11450 hmp0 = copyb(hmp); 11451 if (hmp0 == NULL) { 11452 freeb(hmp); 11453 freemsg(mp); 11454 BUMP_MIB(ill->ill_ip_mib, 11455 ipIfStatsOutFragFails); 11456 ip1dbg(("ip_wput_frag_v6: copyb failed\n")); 11457 return; 11458 } 11459 off_flags = IP6F_MORE_FRAG; 11460 } else { 11461 /* Last fragment */ 11462 hmp0 = hmp; 11463 hmp = NULL; 11464 off_flags = 0; 11465 } 11466 fip6h = (ip6_t *)(hmp0->b_rptr); 11467 fraghdr = (ip6_frag_t *)(hmp0->b_rptr + unfragmentable_len); 11468 11469 fip6h->ip6_plen = htons((uint16_t)(mlen + 11470 unfragmentable_len - IPV6_HDR_LEN + sizeof (ip6_frag_t))); 11471 /* 11472 * Note: Optimization alert. 11473 * In IPv6 (and IPv4) protocol header, Fragment Offset 11474 * ("offset") is 13 bits wide and in 8-octet units. 11475 * In IPv6 protocol header (unlike IPv4) in a 16 bit field, 11476 * it occupies the most significant 13 bits. 11477 * (least significant 13 bits in IPv4). 11478 * We do not do any shifts here. Not shifting is same effect 11479 * as taking offset value in octet units, dividing by 8 and 11480 * then shifting 3 bits left to line it up in place in proper 11481 * place protocol header. 11482 */ 11483 fraghdr->ip6f_offlg = htons(offset) | off_flags; 11484 11485 if (!(dmp = ip_carve_mp(&mp, mlen))) { 11486 /* mp has already been freed by ip_carve_mp() */ 11487 if (hmp != NULL) 11488 freeb(hmp); 11489 freeb(hmp0); 11490 ip1dbg(("ip_carve_mp: failed\n")); 11491 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails); 11492 return; 11493 } 11494 hmp0->b_cont = dmp; 11495 /* Get the priority marking, if any */ 11496 hmp0->b_band = dmp->b_band; 11497 UPDATE_OB_PKT_COUNT(ire); 11498 ire->ire_last_used_time = lbolt; 11499 ip_xmit_v6(hmp0, ire, reachable | IP6_NO_IPPOLICY, connp, 11500 caller, NULL); 11501 reachable = 0; /* No need to redo state machine in loop */ 11502 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragCreates); 11503 offset += mlen; 11504 } 11505 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragOKs); 11506 } 11507 11508 /* 11509 * Determine if the ill and multicast aspects of that packets 11510 * "matches" the conn. 11511 */ 11512 boolean_t 11513 conn_wantpacket_v6(conn_t *connp, ill_t *ill, ip6_t *ip6h, int fanout_flags, 11514 zoneid_t zoneid) 11515 { 11516 ill_t *bound_ill; 11517 boolean_t wantpacket; 11518 in6_addr_t *v6dst_ptr = &ip6h->ip6_dst; 11519 in6_addr_t *v6src_ptr = &ip6h->ip6_src; 11520 11521 /* 11522 * conn_incoming_ill is set by IPV6_BOUND_IF which limits 11523 * unicast and multicast reception to conn_incoming_ill. 11524 * conn_wantpacket_v6 is called both for unicast and 11525 * multicast. 11526 */ 11527 bound_ill = connp->conn_incoming_ill; 11528 if (bound_ill != NULL) { 11529 if (IS_IPMP(bound_ill)) { 11530 if (bound_ill->ill_grp != ill->ill_grp) 11531 return (B_FALSE); 11532 } else { 11533 if (bound_ill != ill) 11534 return (B_FALSE); 11535 } 11536 } 11537 11538 if (connp->conn_multi_router) 11539 return (B_TRUE); 11540 11541 if (!IN6_IS_ADDR_MULTICAST(v6dst_ptr) && 11542 !IN6_IS_ADDR_V4MAPPED_CLASSD(v6dst_ptr)) { 11543 /* 11544 * Unicast case: we match the conn only if it's in the specified 11545 * zone. 11546 */ 11547 return (IPCL_ZONE_MATCH(connp, zoneid)); 11548 } 11549 11550 if ((fanout_flags & IP_FF_NO_MCAST_LOOP) && 11551 (connp->conn_zoneid == zoneid || zoneid == ALL_ZONES)) { 11552 /* 11553 * Loopback case: the sending endpoint has IP_MULTICAST_LOOP 11554 * disabled, therefore we don't dispatch the multicast packet to 11555 * the sending zone. 11556 */ 11557 return (B_FALSE); 11558 } 11559 11560 if (IS_LOOPBACK(ill) && connp->conn_zoneid != zoneid && 11561 zoneid != ALL_ZONES) { 11562 /* 11563 * Multicast packet on the loopback interface: we only match 11564 * conns who joined the group in the specified zone. 11565 */ 11566 return (B_FALSE); 11567 } 11568 11569 mutex_enter(&connp->conn_lock); 11570 wantpacket = 11571 ilg_lookup_ill_withsrc_v6(connp, v6dst_ptr, v6src_ptr, ill) != NULL; 11572 mutex_exit(&connp->conn_lock); 11573 11574 return (wantpacket); 11575 } 11576 11577 11578 /* 11579 * Transmit a packet and update any NUD state based on the flags 11580 * XXX need to "recover" any ip6i_t when doing putq! 11581 * 11582 * NOTE : This function does not ire_refrele the ire passed in as the 11583 * argument. 11584 */ 11585 void 11586 ip_xmit_v6(mblk_t *mp, ire_t *ire, uint_t flags, conn_t *connp, 11587 int caller, ipsec_out_t *io) 11588 { 11589 mblk_t *mp1; 11590 nce_t *nce = ire->ire_nce; 11591 ill_t *ill; 11592 ill_t *out_ill; 11593 uint64_t delta; 11594 ip6_t *ip6h; 11595 queue_t *stq = ire->ire_stq; 11596 ire_t *ire1 = NULL; 11597 ire_t *save_ire = ire; 11598 boolean_t multirt_send = B_FALSE; 11599 mblk_t *next_mp = NULL; 11600 ip_stack_t *ipst = ire->ire_ipst; 11601 boolean_t fp_prepend = B_FALSE; 11602 uint32_t hlen; 11603 11604 ip6h = (ip6_t *)mp->b_rptr; 11605 ASSERT(!IN6_IS_ADDR_V4MAPPED(&ire->ire_addr_v6)); 11606 ASSERT(ire->ire_ipversion == IPV6_VERSION); 11607 ASSERT(nce != NULL); 11608 ASSERT(mp->b_datap->db_type == M_DATA); 11609 ASSERT(stq != NULL); 11610 11611 ill = ire_to_ill(ire); 11612 if (!ill) { 11613 ip0dbg(("ip_xmit_v6: ire_to_ill failed\n")); 11614 freemsg(mp); 11615 return; 11616 } 11617 11618 /* Flow-control check has been done in ip_wput_ire_v6 */ 11619 if (IP_FLOW_CONTROLLED_ULP(ip6h->ip6_nxt) || caller == IP_WPUT || 11620 caller == IP_WSRV || canput(stq->q_next)) { 11621 uint32_t ill_index; 11622 11623 /* 11624 * In most cases, the emission loop below is entered only 11625 * once. Only in the case where the ire holds the 11626 * RTF_MULTIRT flag, do we loop to process all RTF_MULTIRT 11627 * flagged ires in the bucket, and send the packet 11628 * through all crossed RTF_MULTIRT routes. 11629 */ 11630 if (ire->ire_flags & RTF_MULTIRT) { 11631 /* 11632 * Multirouting case. The bucket where ire is stored 11633 * probably holds other RTF_MULTIRT flagged ires 11634 * to the destination. In this call to ip_xmit_v6, 11635 * we attempt to send the packet through all 11636 * those ires. Thus, we first ensure that ire is the 11637 * first RTF_MULTIRT ire in the bucket, 11638 * before walking the ire list. 11639 */ 11640 ire_t *first_ire; 11641 irb_t *irb = ire->ire_bucket; 11642 ASSERT(irb != NULL); 11643 multirt_send = B_TRUE; 11644 11645 /* Make sure we do not omit any multiroute ire. */ 11646 IRB_REFHOLD(irb); 11647 for (first_ire = irb->irb_ire; 11648 first_ire != NULL; 11649 first_ire = first_ire->ire_next) { 11650 if ((first_ire->ire_flags & RTF_MULTIRT) && 11651 (IN6_ARE_ADDR_EQUAL(&first_ire->ire_addr_v6, 11652 &ire->ire_addr_v6)) && 11653 !(first_ire->ire_marks & 11654 (IRE_MARK_CONDEMNED | IRE_MARK_TESTHIDDEN))) 11655 break; 11656 } 11657 11658 if ((first_ire != NULL) && (first_ire != ire)) { 11659 IRE_REFHOLD(first_ire); 11660 /* ire will be released by the caller */ 11661 ire = first_ire; 11662 nce = ire->ire_nce; 11663 stq = ire->ire_stq; 11664 ill = ire_to_ill(ire); 11665 } 11666 IRB_REFRELE(irb); 11667 } else if (connp != NULL && IPCL_IS_TCP(connp) && 11668 connp->conn_mdt_ok && !connp->conn_tcp->tcp_mdt && 11669 ILL_MDT_USABLE(ill)) { 11670 /* 11671 * This tcp connection was marked as MDT-capable, but 11672 * it has been turned off due changes in the interface. 11673 * Now that the interface support is back, turn it on 11674 * by notifying tcp. We don't directly modify tcp_mdt, 11675 * since we leave all the details to the tcp code that 11676 * knows better. 11677 */ 11678 mblk_t *mdimp = ip_mdinfo_alloc(ill->ill_mdt_capab); 11679 11680 if (mdimp == NULL) { 11681 ip0dbg(("ip_xmit_v6: can't re-enable MDT for " 11682 "connp %p (ENOMEM)\n", (void *)connp)); 11683 } else { 11684 CONN_INC_REF(connp); 11685 SQUEUE_ENTER_ONE(connp->conn_sqp, mdimp, 11686 tcp_input, connp, SQ_FILL, 11687 SQTAG_TCP_INPUT_MCTL); 11688 } 11689 } 11690 11691 do { 11692 mblk_t *mp_ip6h; 11693 11694 if (multirt_send) { 11695 irb_t *irb; 11696 /* 11697 * We are in a multiple send case, need to get 11698 * the next ire and make a duplicate of the 11699 * packet. ire1 holds here the next ire to 11700 * process in the bucket. If multirouting is 11701 * expected, any non-RTF_MULTIRT ire that has 11702 * the right destination address is ignored. 11703 */ 11704 irb = ire->ire_bucket; 11705 ASSERT(irb != NULL); 11706 11707 IRB_REFHOLD(irb); 11708 for (ire1 = ire->ire_next; 11709 ire1 != NULL; 11710 ire1 = ire1->ire_next) { 11711 if (!(ire1->ire_flags & RTF_MULTIRT)) 11712 continue; 11713 if (!IN6_ARE_ADDR_EQUAL( 11714 &ire1->ire_addr_v6, 11715 &ire->ire_addr_v6)) 11716 continue; 11717 if (ire1->ire_marks & 11718 IRE_MARK_CONDEMNED) 11719 continue; 11720 11721 /* Got one */ 11722 if (ire1 != save_ire) { 11723 IRE_REFHOLD(ire1); 11724 } 11725 break; 11726 } 11727 IRB_REFRELE(irb); 11728 11729 if (ire1 != NULL) { 11730 next_mp = copyb(mp); 11731 if ((next_mp == NULL) || 11732 ((mp->b_cont != NULL) && 11733 ((next_mp->b_cont = 11734 dupmsg(mp->b_cont)) == NULL))) { 11735 freemsg(next_mp); 11736 next_mp = NULL; 11737 ire_refrele(ire1); 11738 ire1 = NULL; 11739 } 11740 } 11741 11742 /* Last multiroute ire; don't loop anymore. */ 11743 if (ire1 == NULL) { 11744 multirt_send = B_FALSE; 11745 } 11746 } 11747 11748 ill_index = 11749 ((ill_t *)stq->q_ptr)->ill_phyint->phyint_ifindex; 11750 11751 /* Initiate IPPF processing */ 11752 if (IP6_OUT_IPP(flags, ipst)) { 11753 ip_process(IPP_LOCAL_OUT, &mp, ill_index); 11754 if (mp == NULL) { 11755 BUMP_MIB(ill->ill_ip_mib, 11756 ipIfStatsOutDiscards); 11757 if (next_mp != NULL) 11758 freemsg(next_mp); 11759 if (ire != save_ire) { 11760 ire_refrele(ire); 11761 } 11762 return; 11763 } 11764 ip6h = (ip6_t *)mp->b_rptr; 11765 } 11766 mp_ip6h = mp; 11767 11768 /* 11769 * Check for fastpath, we need to hold nce_lock to 11770 * prevent fastpath update from chaining nce_fp_mp. 11771 */ 11772 11773 ASSERT(nce->nce_ipversion != IPV4_VERSION); 11774 mutex_enter(&nce->nce_lock); 11775 if ((mp1 = nce->nce_fp_mp) != NULL) { 11776 uchar_t *rptr; 11777 11778 hlen = MBLKL(mp1); 11779 rptr = mp->b_rptr - hlen; 11780 /* 11781 * make sure there is room for the fastpath 11782 * datalink header 11783 */ 11784 if (rptr < mp->b_datap->db_base) { 11785 mp1 = copyb(mp1); 11786 mutex_exit(&nce->nce_lock); 11787 if (mp1 == NULL) { 11788 BUMP_MIB(ill->ill_ip_mib, 11789 ipIfStatsOutDiscards); 11790 freemsg(mp); 11791 if (next_mp != NULL) 11792 freemsg(next_mp); 11793 if (ire != save_ire) { 11794 ire_refrele(ire); 11795 } 11796 return; 11797 } 11798 mp1->b_cont = mp; 11799 11800 /* Get the priority marking, if any */ 11801 mp1->b_band = mp->b_band; 11802 mp = mp1; 11803 } else { 11804 mp->b_rptr = rptr; 11805 /* 11806 * fastpath - pre-pend datalink 11807 * header 11808 */ 11809 bcopy(mp1->b_rptr, rptr, hlen); 11810 mutex_exit(&nce->nce_lock); 11811 fp_prepend = B_TRUE; 11812 } 11813 } else { 11814 /* 11815 * Get the DL_UNITDATA_REQ. 11816 */ 11817 mp1 = nce->nce_res_mp; 11818 if (mp1 == NULL) { 11819 mutex_exit(&nce->nce_lock); 11820 ip1dbg(("ip_xmit_v6: No resolution " 11821 "block ire = %p\n", (void *)ire)); 11822 freemsg(mp); 11823 if (next_mp != NULL) 11824 freemsg(next_mp); 11825 if (ire != save_ire) { 11826 ire_refrele(ire); 11827 } 11828 return; 11829 } 11830 /* 11831 * Prepend the DL_UNITDATA_REQ. 11832 */ 11833 mp1 = copyb(mp1); 11834 mutex_exit(&nce->nce_lock); 11835 if (mp1 == NULL) { 11836 BUMP_MIB(ill->ill_ip_mib, 11837 ipIfStatsOutDiscards); 11838 freemsg(mp); 11839 if (next_mp != NULL) 11840 freemsg(next_mp); 11841 if (ire != save_ire) { 11842 ire_refrele(ire); 11843 } 11844 return; 11845 } 11846 mp1->b_cont = mp; 11847 11848 /* Get the priority marking, if any */ 11849 mp1->b_band = mp->b_band; 11850 mp = mp1; 11851 } 11852 11853 out_ill = (ill_t *)stq->q_ptr; 11854 11855 DTRACE_PROBE4(ip6__physical__out__start, 11856 ill_t *, NULL, ill_t *, out_ill, 11857 ip6_t *, ip6h, mblk_t *, mp); 11858 11859 FW_HOOKS6(ipst->ips_ip6_physical_out_event, 11860 ipst->ips_ipv6firewall_physical_out, 11861 NULL, out_ill, ip6h, mp, mp_ip6h, 0, ipst); 11862 11863 DTRACE_PROBE1(ip6__physical__out__end, mblk_t *, mp); 11864 11865 if (mp == NULL) { 11866 if (multirt_send) { 11867 ASSERT(ire1 != NULL); 11868 if (ire != save_ire) { 11869 ire_refrele(ire); 11870 } 11871 /* 11872 * Proceed with the next RTF_MULTIRT 11873 * ire, also set up the send-to queue 11874 * accordingly. 11875 */ 11876 ire = ire1; 11877 ire1 = NULL; 11878 stq = ire->ire_stq; 11879 nce = ire->ire_nce; 11880 ill = ire_to_ill(ire); 11881 mp = next_mp; 11882 next_mp = NULL; 11883 continue; 11884 } else { 11885 ASSERT(next_mp == NULL); 11886 ASSERT(ire1 == NULL); 11887 break; 11888 } 11889 } 11890 11891 if (ipst->ips_ip6_observe.he_interested) { 11892 zoneid_t szone; 11893 11894 szone = ip_get_zoneid_v6(&ip6h->ip6_src, 11895 mp_ip6h, out_ill, ipst, ALL_ZONES); 11896 11897 /* 11898 * The IP observability hook expects b_rptr to 11899 * be where the IPv6 header starts, so advance 11900 * past the link layer header. 11901 */ 11902 if (fp_prepend) 11903 mp_ip6h->b_rptr += hlen; 11904 ipobs_hook(mp_ip6h, IPOBS_HOOK_OUTBOUND, szone, 11905 ALL_ZONES, out_ill, ipst); 11906 if (fp_prepend) 11907 mp_ip6h->b_rptr -= hlen; 11908 } 11909 11910 /* 11911 * Update ire and MIB counters; for save_ire, this has 11912 * been done by the caller. 11913 */ 11914 if (ire != save_ire) { 11915 UPDATE_OB_PKT_COUNT(ire); 11916 ire->ire_last_used_time = lbolt; 11917 11918 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 11919 BUMP_MIB(ill->ill_ip_mib, 11920 ipIfStatsHCOutMcastPkts); 11921 UPDATE_MIB(ill->ill_ip_mib, 11922 ipIfStatsHCOutMcastOctets, 11923 ntohs(ip6h->ip6_plen) + 11924 IPV6_HDR_LEN); 11925 } 11926 } 11927 11928 /* 11929 * Send it down. XXX Do we want to flow control AH/ESP 11930 * packets that carry TCP payloads? We don't flow 11931 * control TCP packets, but we should also not 11932 * flow-control TCP packets that have been protected. 11933 * We don't have an easy way to find out if an AH/ESP 11934 * packet was originally TCP or not currently. 11935 */ 11936 if (io == NULL) { 11937 BUMP_MIB(ill->ill_ip_mib, 11938 ipIfStatsHCOutTransmits); 11939 UPDATE_MIB(ill->ill_ip_mib, 11940 ipIfStatsHCOutOctets, 11941 ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN); 11942 DTRACE_IP7(send, mblk_t *, mp, conn_t *, NULL, 11943 void_ip_t *, ip6h, __dtrace_ipsr_ill_t *, 11944 out_ill, ipha_t *, NULL, ip6_t *, ip6h, 11945 int, 0); 11946 11947 putnext(stq, mp); 11948 } else { 11949 /* 11950 * Safety Pup says: make sure this is 11951 * going to the right interface! 11952 */ 11953 if (io->ipsec_out_capab_ill_index != 11954 ill_index) { 11955 /* IPsec kstats: bump lose counter */ 11956 freemsg(mp1); 11957 } else { 11958 BUMP_MIB(ill->ill_ip_mib, 11959 ipIfStatsHCOutTransmits); 11960 UPDATE_MIB(ill->ill_ip_mib, 11961 ipIfStatsHCOutOctets, 11962 ntohs(ip6h->ip6_plen) + 11963 IPV6_HDR_LEN); 11964 DTRACE_IP7(send, mblk_t *, mp, 11965 conn_t *, NULL, void_ip_t *, ip6h, 11966 __dtrace_ipsr_ill_t *, out_ill, 11967 ipha_t *, NULL, ip6_t *, ip6h, int, 11968 0); 11969 ipsec_hw_putnext(stq, mp); 11970 } 11971 } 11972 11973 if (nce->nce_flags & (NCE_F_NONUD|NCE_F_PERMANENT)) { 11974 if (ire != save_ire) { 11975 ire_refrele(ire); 11976 } 11977 if (multirt_send) { 11978 ASSERT(ire1 != NULL); 11979 /* 11980 * Proceed with the next RTF_MULTIRT 11981 * ire, also set up the send-to queue 11982 * accordingly. 11983 */ 11984 ire = ire1; 11985 ire1 = NULL; 11986 stq = ire->ire_stq; 11987 nce = ire->ire_nce; 11988 ill = ire_to_ill(ire); 11989 mp = next_mp; 11990 next_mp = NULL; 11991 continue; 11992 } 11993 ASSERT(next_mp == NULL); 11994 ASSERT(ire1 == NULL); 11995 return; 11996 } 11997 11998 ASSERT(nce->nce_state != ND_INCOMPLETE); 11999 12000 /* 12001 * Check for upper layer advice 12002 */ 12003 if (flags & IPV6_REACHABILITY_CONFIRMATION) { 12004 /* 12005 * It should be o.k. to check the state without 12006 * a lock here, at most we lose an advice. 12007 */ 12008 nce->nce_last = TICK_TO_MSEC(lbolt64); 12009 if (nce->nce_state != ND_REACHABLE) { 12010 12011 mutex_enter(&nce->nce_lock); 12012 nce->nce_state = ND_REACHABLE; 12013 nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT; 12014 mutex_exit(&nce->nce_lock); 12015 (void) untimeout(nce->nce_timeout_id); 12016 if (ip_debug > 2) { 12017 /* ip1dbg */ 12018 pr_addr_dbg("ip_xmit_v6: state" 12019 " for %s changed to" 12020 " REACHABLE\n", AF_INET6, 12021 &ire->ire_addr_v6); 12022 } 12023 } 12024 if (ire != save_ire) { 12025 ire_refrele(ire); 12026 } 12027 if (multirt_send) { 12028 ASSERT(ire1 != NULL); 12029 /* 12030 * Proceed with the next RTF_MULTIRT 12031 * ire, also set up the send-to queue 12032 * accordingly. 12033 */ 12034 ire = ire1; 12035 ire1 = NULL; 12036 stq = ire->ire_stq; 12037 nce = ire->ire_nce; 12038 ill = ire_to_ill(ire); 12039 mp = next_mp; 12040 next_mp = NULL; 12041 continue; 12042 } 12043 ASSERT(next_mp == NULL); 12044 ASSERT(ire1 == NULL); 12045 return; 12046 } 12047 12048 delta = TICK_TO_MSEC(lbolt64) - nce->nce_last; 12049 ip1dbg(("ip_xmit_v6: delta = %" PRId64 12050 " ill_reachable_time = %d \n", delta, 12051 ill->ill_reachable_time)); 12052 if (delta > (uint64_t)ill->ill_reachable_time) { 12053 nce = ire->ire_nce; 12054 mutex_enter(&nce->nce_lock); 12055 switch (nce->nce_state) { 12056 case ND_REACHABLE: 12057 case ND_STALE: 12058 /* 12059 * ND_REACHABLE is identical to 12060 * ND_STALE in this specific case. If 12061 * reachable time has expired for this 12062 * neighbor (delta is greater than 12063 * reachable time), conceptually, the 12064 * neighbor cache is no longer in 12065 * REACHABLE state, but already in 12066 * STALE state. So the correct 12067 * transition here is to ND_DELAY. 12068 */ 12069 nce->nce_state = ND_DELAY; 12070 mutex_exit(&nce->nce_lock); 12071 NDP_RESTART_TIMER(nce, 12072 ipst->ips_delay_first_probe_time); 12073 if (ip_debug > 3) { 12074 /* ip2dbg */ 12075 pr_addr_dbg("ip_xmit_v6: state" 12076 " for %s changed to" 12077 " DELAY\n", AF_INET6, 12078 &ire->ire_addr_v6); 12079 } 12080 break; 12081 case ND_DELAY: 12082 case ND_PROBE: 12083 mutex_exit(&nce->nce_lock); 12084 /* Timers have already started */ 12085 break; 12086 case ND_UNREACHABLE: 12087 /* 12088 * ndp timer has detected that this nce 12089 * is unreachable and initiated deleting 12090 * this nce and all its associated IREs. 12091 * This is a race where we found the 12092 * ire before it was deleted and have 12093 * just sent out a packet using this 12094 * unreachable nce. 12095 */ 12096 mutex_exit(&nce->nce_lock); 12097 break; 12098 default: 12099 ASSERT(0); 12100 } 12101 } 12102 12103 if (multirt_send) { 12104 ASSERT(ire1 != NULL); 12105 /* 12106 * Proceed with the next RTF_MULTIRT ire, 12107 * Also set up the send-to queue accordingly. 12108 */ 12109 if (ire != save_ire) { 12110 ire_refrele(ire); 12111 } 12112 ire = ire1; 12113 ire1 = NULL; 12114 stq = ire->ire_stq; 12115 nce = ire->ire_nce; 12116 ill = ire_to_ill(ire); 12117 mp = next_mp; 12118 next_mp = NULL; 12119 } 12120 } while (multirt_send); 12121 /* 12122 * In the multirouting case, release the last ire used for 12123 * emission. save_ire will be released by the caller. 12124 */ 12125 if (ire != save_ire) { 12126 ire_refrele(ire); 12127 } 12128 } else { 12129 /* 12130 * Can't apply backpressure, just discard the packet. 12131 */ 12132 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); 12133 freemsg(mp); 12134 return; 12135 } 12136 } 12137 12138 /* 12139 * pr_addr_dbg function provides the needed buffer space to call 12140 * inet_ntop() function's 3rd argument. This function should be 12141 * used by any kernel routine which wants to save INET6_ADDRSTRLEN 12142 * stack buffer space in it's own stack frame. This function uses 12143 * a buffer from it's own stack and prints the information. 12144 * Example: pr_addr_dbg("func: no route for %s\n ", AF_INET, addr) 12145 * 12146 * Note: This function can call inet_ntop() once. 12147 */ 12148 void 12149 pr_addr_dbg(char *fmt1, int af, const void *addr) 12150 { 12151 char buf[INET6_ADDRSTRLEN]; 12152 12153 if (fmt1 == NULL) { 12154 ip0dbg(("pr_addr_dbg: Wrong arguments\n")); 12155 return; 12156 } 12157 12158 /* 12159 * This does not compare debug level and just prints 12160 * out. Thus it is the responsibility of the caller 12161 * to check the appropriate debug-level before calling 12162 * this function. 12163 */ 12164 if (ip_debug > 0) { 12165 printf(fmt1, inet_ntop(af, addr, buf, sizeof (buf))); 12166 } 12167 12168 12169 } 12170 12171 12172 /* 12173 * Return the length in bytes of the IPv6 headers (base header, ip6i_t 12174 * if needed and extension headers) that will be needed based on the 12175 * ip6_pkt_t structure passed by the caller. 12176 * 12177 * The returned length does not include the length of the upper level 12178 * protocol (ULP) header. 12179 */ 12180 int 12181 ip_total_hdrs_len_v6(ip6_pkt_t *ipp) 12182 { 12183 int len; 12184 12185 len = IPV6_HDR_LEN; 12186 if (ipp->ipp_fields & IPPF_HAS_IP6I) 12187 len += sizeof (ip6i_t); 12188 if (ipp->ipp_fields & IPPF_HOPOPTS) { 12189 ASSERT(ipp->ipp_hopoptslen != 0); 12190 len += ipp->ipp_hopoptslen; 12191 } 12192 if (ipp->ipp_fields & IPPF_RTHDR) { 12193 ASSERT(ipp->ipp_rthdrlen != 0); 12194 len += ipp->ipp_rthdrlen; 12195 } 12196 /* 12197 * En-route destination options 12198 * Only do them if there's a routing header as well 12199 */ 12200 if ((ipp->ipp_fields & (IPPF_RTDSTOPTS|IPPF_RTHDR)) == 12201 (IPPF_RTDSTOPTS|IPPF_RTHDR)) { 12202 ASSERT(ipp->ipp_rtdstoptslen != 0); 12203 len += ipp->ipp_rtdstoptslen; 12204 } 12205 if (ipp->ipp_fields & IPPF_DSTOPTS) { 12206 ASSERT(ipp->ipp_dstoptslen != 0); 12207 len += ipp->ipp_dstoptslen; 12208 } 12209 return (len); 12210 } 12211 12212 /* 12213 * All-purpose routine to build a header chain of an IPv6 header 12214 * followed by any required extension headers and a proto header, 12215 * preceeded (where necessary) by an ip6i_t private header. 12216 * 12217 * The fields of the IPv6 header that are derived from the ip6_pkt_t 12218 * will be filled in appropriately. 12219 * Thus the caller must fill in the rest of the IPv6 header, such as 12220 * traffic class/flowid, source address (if not set here), hoplimit (if not 12221 * set here) and destination address. 12222 * 12223 * The extension headers and ip6i_t header will all be fully filled in. 12224 */ 12225 void 12226 ip_build_hdrs_v6(uchar_t *ext_hdrs, uint_t ext_hdrs_len, 12227 ip6_pkt_t *ipp, uint8_t protocol) 12228 { 12229 uint8_t *nxthdr_ptr; 12230 uint8_t *cp; 12231 ip6i_t *ip6i; 12232 ip6_t *ip6h = (ip6_t *)ext_hdrs; 12233 12234 /* 12235 * If sending private ip6i_t header down (checksum info, nexthop, 12236 * or ifindex), adjust ip header pointer and set ip6i_t header pointer, 12237 * then fill it in. (The checksum info will be filled in by icmp). 12238 */ 12239 if (ipp->ipp_fields & IPPF_HAS_IP6I) { 12240 ip6i = (ip6i_t *)ip6h; 12241 ip6h = (ip6_t *)&ip6i[1]; 12242 12243 ip6i->ip6i_flags = 0; 12244 ip6i->ip6i_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 12245 if (ipp->ipp_fields & IPPF_IFINDEX || 12246 ipp->ipp_fields & IPPF_SCOPE_ID) { 12247 ASSERT(ipp->ipp_ifindex != 0); 12248 ip6i->ip6i_flags |= IP6I_IFINDEX; 12249 ip6i->ip6i_ifindex = ipp->ipp_ifindex; 12250 } 12251 if (ipp->ipp_fields & IPPF_ADDR) { 12252 /* 12253 * Enable per-packet source address verification if 12254 * IPV6_PKTINFO specified the source address. 12255 * ip6_src is set in the transport's _wput function. 12256 */ 12257 ASSERT(!IN6_IS_ADDR_UNSPECIFIED( 12258 &ipp->ipp_addr)); 12259 ip6i->ip6i_flags |= IP6I_VERIFY_SRC; 12260 } 12261 if (ipp->ipp_fields & IPPF_UNICAST_HOPS) { 12262 ip6h->ip6_hops = ipp->ipp_unicast_hops; 12263 /* 12264 * We need to set this flag so that IP doesn't 12265 * rewrite the IPv6 header's hoplimit with the 12266 * current default value. 12267 */ 12268 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 12269 } 12270 if (ipp->ipp_fields & IPPF_NEXTHOP) { 12271 ASSERT(!IN6_IS_ADDR_UNSPECIFIED( 12272 &ipp->ipp_nexthop)); 12273 ip6i->ip6i_flags |= IP6I_NEXTHOP; 12274 ip6i->ip6i_nexthop = ipp->ipp_nexthop; 12275 } 12276 /* 12277 * tell IP this is an ip6i_t private header 12278 */ 12279 ip6i->ip6i_nxt = IPPROTO_RAW; 12280 } 12281 /* Initialize IPv6 header */ 12282 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 12283 if (ipp->ipp_fields & IPPF_TCLASS) { 12284 ip6h->ip6_vcf = (ip6h->ip6_vcf & ~IPV6_FLOWINFO_TCLASS) | 12285 (ipp->ipp_tclass << 20); 12286 } 12287 if (ipp->ipp_fields & IPPF_ADDR) 12288 ip6h->ip6_src = ipp->ipp_addr; 12289 12290 nxthdr_ptr = (uint8_t *)&ip6h->ip6_nxt; 12291 cp = (uint8_t *)&ip6h[1]; 12292 /* 12293 * Here's where we have to start stringing together 12294 * any extension headers in the right order: 12295 * Hop-by-hop, destination, routing, and final destination opts. 12296 */ 12297 if (ipp->ipp_fields & IPPF_HOPOPTS) { 12298 /* Hop-by-hop options */ 12299 ip6_hbh_t *hbh = (ip6_hbh_t *)cp; 12300 12301 *nxthdr_ptr = IPPROTO_HOPOPTS; 12302 nxthdr_ptr = &hbh->ip6h_nxt; 12303 12304 bcopy(ipp->ipp_hopopts, cp, ipp->ipp_hopoptslen); 12305 cp += ipp->ipp_hopoptslen; 12306 } 12307 /* 12308 * En-route destination options 12309 * Only do them if there's a routing header as well 12310 */ 12311 if ((ipp->ipp_fields & (IPPF_RTDSTOPTS|IPPF_RTHDR)) == 12312 (IPPF_RTDSTOPTS|IPPF_RTHDR)) { 12313 ip6_dest_t *dst = (ip6_dest_t *)cp; 12314 12315 *nxthdr_ptr = IPPROTO_DSTOPTS; 12316 nxthdr_ptr = &dst->ip6d_nxt; 12317 12318 bcopy(ipp->ipp_rtdstopts, cp, ipp->ipp_rtdstoptslen); 12319 cp += ipp->ipp_rtdstoptslen; 12320 } 12321 /* 12322 * Routing header next 12323 */ 12324 if (ipp->ipp_fields & IPPF_RTHDR) { 12325 ip6_rthdr_t *rt = (ip6_rthdr_t *)cp; 12326 12327 *nxthdr_ptr = IPPROTO_ROUTING; 12328 nxthdr_ptr = &rt->ip6r_nxt; 12329 12330 bcopy(ipp->ipp_rthdr, cp, ipp->ipp_rthdrlen); 12331 cp += ipp->ipp_rthdrlen; 12332 } 12333 /* 12334 * Do ultimate destination options 12335 */ 12336 if (ipp->ipp_fields & IPPF_DSTOPTS) { 12337 ip6_dest_t *dest = (ip6_dest_t *)cp; 12338 12339 *nxthdr_ptr = IPPROTO_DSTOPTS; 12340 nxthdr_ptr = &dest->ip6d_nxt; 12341 12342 bcopy(ipp->ipp_dstopts, cp, ipp->ipp_dstoptslen); 12343 cp += ipp->ipp_dstoptslen; 12344 } 12345 /* 12346 * Now set the last header pointer to the proto passed in 12347 */ 12348 *nxthdr_ptr = protocol; 12349 ASSERT((int)(cp - ext_hdrs) == ext_hdrs_len); 12350 } 12351 12352 /* 12353 * Return a pointer to the routing header extension header 12354 * in the IPv6 header(s) chain passed in. 12355 * If none found, return NULL 12356 * Assumes that all extension headers are in same mblk as the v6 header 12357 */ 12358 ip6_rthdr_t * 12359 ip_find_rthdr_v6(ip6_t *ip6h, uint8_t *endptr) 12360 { 12361 ip6_dest_t *desthdr; 12362 ip6_frag_t *fraghdr; 12363 uint_t hdrlen; 12364 uint8_t nexthdr; 12365 uint8_t *ptr = (uint8_t *)&ip6h[1]; 12366 12367 if (ip6h->ip6_nxt == IPPROTO_ROUTING) 12368 return ((ip6_rthdr_t *)ptr); 12369 12370 /* 12371 * The routing header will precede all extension headers 12372 * other than the hop-by-hop and destination options 12373 * extension headers, so if we see anything other than those, 12374 * we're done and didn't find it. 12375 * We could see a destination options header alone but no 12376 * routing header, in which case we'll return NULL as soon as 12377 * we see anything after that. 12378 * Hop-by-hop and destination option headers are identical, 12379 * so we can use either one we want as a template. 12380 */ 12381 nexthdr = ip6h->ip6_nxt; 12382 while (ptr < endptr) { 12383 /* Is there enough left for len + nexthdr? */ 12384 if (ptr + MIN_EHDR_LEN > endptr) 12385 return (NULL); 12386 12387 switch (nexthdr) { 12388 case IPPROTO_HOPOPTS: 12389 case IPPROTO_DSTOPTS: 12390 /* Assumes the headers are identical for hbh and dst */ 12391 desthdr = (ip6_dest_t *)ptr; 12392 hdrlen = 8 * (desthdr->ip6d_len + 1); 12393 nexthdr = desthdr->ip6d_nxt; 12394 break; 12395 12396 case IPPROTO_ROUTING: 12397 return ((ip6_rthdr_t *)ptr); 12398 12399 case IPPROTO_FRAGMENT: 12400 fraghdr = (ip6_frag_t *)ptr; 12401 hdrlen = sizeof (ip6_frag_t); 12402 nexthdr = fraghdr->ip6f_nxt; 12403 break; 12404 12405 default: 12406 return (NULL); 12407 } 12408 ptr += hdrlen; 12409 } 12410 return (NULL); 12411 } 12412 12413 /* 12414 * Called for source-routed packets originating on this node. 12415 * Manipulates the original routing header by moving every entry up 12416 * one slot, placing the first entry in the v6 header's v6_dst field, 12417 * and placing the ultimate destination in the routing header's last 12418 * slot. 12419 * 12420 * Returns the checksum diference between the ultimate destination 12421 * (last hop in the routing header when the packet is sent) and 12422 * the first hop (ip6_dst when the packet is sent) 12423 */ 12424 /* ARGSUSED2 */ 12425 uint32_t 12426 ip_massage_options_v6(ip6_t *ip6h, ip6_rthdr_t *rth, netstack_t *ns) 12427 { 12428 uint_t numaddr; 12429 uint_t i; 12430 in6_addr_t *addrptr; 12431 in6_addr_t tmp; 12432 ip6_rthdr0_t *rthdr = (ip6_rthdr0_t *)rth; 12433 uint32_t cksm; 12434 uint32_t addrsum = 0; 12435 uint16_t *ptr; 12436 12437 /* 12438 * Perform any processing needed for source routing. 12439 * We know that all extension headers will be in the same mblk 12440 * as the IPv6 header. 12441 */ 12442 12443 /* 12444 * If no segments left in header, or the header length field is zero, 12445 * don't move hop addresses around; 12446 * Checksum difference is zero. 12447 */ 12448 if ((rthdr->ip6r0_segleft == 0) || (rthdr->ip6r0_len == 0)) 12449 return (0); 12450 12451 ptr = (uint16_t *)&ip6h->ip6_dst; 12452 cksm = 0; 12453 for (i = 0; i < (sizeof (in6_addr_t) / sizeof (uint16_t)); i++) { 12454 cksm += ptr[i]; 12455 } 12456 cksm = (cksm & 0xFFFF) + (cksm >> 16); 12457 12458 /* 12459 * Here's where the fun begins - we have to 12460 * move all addresses up one spot, take the 12461 * first hop and make it our first ip6_dst, 12462 * and place the ultimate destination in the 12463 * newly-opened last slot. 12464 */ 12465 addrptr = (in6_addr_t *)((char *)rthdr + sizeof (*rthdr)); 12466 numaddr = rthdr->ip6r0_len / 2; 12467 tmp = *addrptr; 12468 for (i = 0; i < (numaddr - 1); addrptr++, i++) { 12469 *addrptr = addrptr[1]; 12470 } 12471 *addrptr = ip6h->ip6_dst; 12472 ip6h->ip6_dst = tmp; 12473 12474 /* 12475 * From the checksummed ultimate destination subtract the checksummed 12476 * current ip6_dst (the first hop address). Return that number. 12477 * (In the v4 case, the second part of this is done in each routine 12478 * that calls ip_massage_options(). We do it all in this one place 12479 * for v6). 12480 */ 12481 ptr = (uint16_t *)&ip6h->ip6_dst; 12482 for (i = 0; i < (sizeof (in6_addr_t) / sizeof (uint16_t)); i++) { 12483 addrsum += ptr[i]; 12484 } 12485 cksm -= ((addrsum >> 16) + (addrsum & 0xFFFF)); 12486 if ((int)cksm < 0) 12487 cksm--; 12488 cksm = (cksm & 0xFFFF) + (cksm >> 16); 12489 12490 return (cksm); 12491 } 12492 12493 /* 12494 * Propagate a multicast group membership operation (join/leave) (*fn) on 12495 * all interfaces crossed by the related multirt routes. 12496 * The call is considered successful if the operation succeeds 12497 * on at least one interface. 12498 * The function is called if the destination address in the packet to send 12499 * is multirouted. 12500 */ 12501 int 12502 ip_multirt_apply_membership_v6(int (*fn)(conn_t *, boolean_t, 12503 const in6_addr_t *, int, mcast_record_t, const in6_addr_t *, mblk_t *), 12504 ire_t *ire, conn_t *connp, boolean_t checkonly, const in6_addr_t *v6grp, 12505 mcast_record_t fmode, const in6_addr_t *v6src, mblk_t *first_mp) 12506 { 12507 ire_t *ire_gw; 12508 irb_t *irb; 12509 int index, error = 0; 12510 opt_restart_t *or; 12511 ip_stack_t *ipst = ire->ire_ipst; 12512 12513 irb = ire->ire_bucket; 12514 ASSERT(irb != NULL); 12515 12516 ASSERT(DB_TYPE(first_mp) == M_CTL); 12517 or = (opt_restart_t *)first_mp->b_rptr; 12518 12519 IRB_REFHOLD(irb); 12520 for (; ire != NULL; ire = ire->ire_next) { 12521 if ((ire->ire_flags & RTF_MULTIRT) == 0) 12522 continue; 12523 if (!IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, v6grp)) 12524 continue; 12525 12526 ire_gw = ire_ftable_lookup_v6(&ire->ire_gateway_addr_v6, 0, 0, 12527 IRE_INTERFACE, NULL, NULL, ALL_ZONES, 0, NULL, 12528 MATCH_IRE_RECURSIVE | MATCH_IRE_TYPE, ipst); 12529 /* No resolver exists for the gateway; skip this ire. */ 12530 if (ire_gw == NULL) 12531 continue; 12532 index = ire_gw->ire_ipif->ipif_ill->ill_phyint->phyint_ifindex; 12533 /* 12534 * A resolver exists: we can get the interface on which we have 12535 * to apply the operation. 12536 */ 12537 error = fn(connp, checkonly, v6grp, index, fmode, v6src, 12538 first_mp); 12539 if (error == 0) 12540 or->or_private = CGTP_MCAST_SUCCESS; 12541 12542 if (ip_debug > 0) { 12543 ulong_t off; 12544 char *ksym; 12545 12546 ksym = kobj_getsymname((uintptr_t)fn, &off); 12547 ip2dbg(("ip_multirt_apply_membership_v6: " 12548 "called %s, multirt group 0x%08x via itf 0x%08x, " 12549 "error %d [success %u]\n", 12550 ksym ? ksym : "?", 12551 ntohl(V4_PART_OF_V6((*v6grp))), 12552 ntohl(V4_PART_OF_V6(ire_gw->ire_src_addr_v6)), 12553 error, or->or_private)); 12554 } 12555 12556 ire_refrele(ire_gw); 12557 if (error == EINPROGRESS) { 12558 IRB_REFRELE(irb); 12559 return (error); 12560 } 12561 } 12562 IRB_REFRELE(irb); 12563 /* 12564 * Consider the call as successful if we succeeded on at least 12565 * one interface. Otherwise, return the last encountered error. 12566 */ 12567 return (or->or_private == CGTP_MCAST_SUCCESS ? 0 : error); 12568 } 12569 12570 void 12571 *ip6_kstat_init(netstackid_t stackid, ip6_stat_t *ip6_statisticsp) 12572 { 12573 kstat_t *ksp; 12574 12575 ip6_stat_t template = { 12576 { "ip6_udp_fast_path", KSTAT_DATA_UINT64 }, 12577 { "ip6_udp_slow_path", KSTAT_DATA_UINT64 }, 12578 { "ip6_udp_fannorm", KSTAT_DATA_UINT64 }, 12579 { "ip6_udp_fanmb", KSTAT_DATA_UINT64 }, 12580 { "ip6_out_sw_cksum", KSTAT_DATA_UINT64 }, 12581 { "ip6_in_sw_cksum", KSTAT_DATA_UINT64 }, 12582 { "ip6_tcp_in_full_hw_cksum_err", KSTAT_DATA_UINT64 }, 12583 { "ip6_tcp_in_part_hw_cksum_err", KSTAT_DATA_UINT64 }, 12584 { "ip6_tcp_in_sw_cksum_err", KSTAT_DATA_UINT64 }, 12585 { "ip6_tcp_out_sw_cksum_bytes", KSTAT_DATA_UINT64 }, 12586 { "ip6_udp_in_full_hw_cksum_err", KSTAT_DATA_UINT64 }, 12587 { "ip6_udp_in_part_hw_cksum_err", KSTAT_DATA_UINT64 }, 12588 { "ip6_udp_in_sw_cksum_err", KSTAT_DATA_UINT64 }, 12589 { "ip6_udp_out_sw_cksum_bytes", KSTAT_DATA_UINT64 }, 12590 { "ip6_frag_mdt_pkt_out", KSTAT_DATA_UINT64 }, 12591 { "ip6_frag_mdt_discarded", KSTAT_DATA_UINT64 }, 12592 { "ip6_frag_mdt_allocfail", KSTAT_DATA_UINT64 }, 12593 { "ip6_frag_mdt_addpdescfail", KSTAT_DATA_UINT64 }, 12594 { "ip6_frag_mdt_allocd", KSTAT_DATA_UINT64 }, 12595 }; 12596 ksp = kstat_create_netstack("ip", 0, "ip6stat", "net", 12597 KSTAT_TYPE_NAMED, sizeof (template) / sizeof (kstat_named_t), 12598 KSTAT_FLAG_VIRTUAL, stackid); 12599 12600 if (ksp == NULL) 12601 return (NULL); 12602 12603 bcopy(&template, ip6_statisticsp, sizeof (template)); 12604 ksp->ks_data = (void *)ip6_statisticsp; 12605 ksp->ks_private = (void *)(uintptr_t)stackid; 12606 12607 kstat_install(ksp); 12608 return (ksp); 12609 } 12610 12611 void 12612 ip6_kstat_fini(netstackid_t stackid, kstat_t *ksp) 12613 { 12614 if (ksp != NULL) { 12615 ASSERT(stackid == (netstackid_t)(uintptr_t)ksp->ks_private); 12616 kstat_delete_netstack(ksp, stackid); 12617 } 12618 } 12619 12620 /* 12621 * The following two functions set and get the value for the 12622 * IPV6_SRC_PREFERENCES socket option. 12623 */ 12624 int 12625 ip6_set_src_preferences(conn_t *connp, uint32_t prefs) 12626 { 12627 /* 12628 * We only support preferences that are covered by 12629 * IPV6_PREFER_SRC_MASK. 12630 */ 12631 if (prefs & ~IPV6_PREFER_SRC_MASK) 12632 return (EINVAL); 12633 12634 /* 12635 * Look for conflicting preferences or default preferences. If 12636 * both bits of a related pair are clear, the application wants the 12637 * system's default value for that pair. Both bits in a pair can't 12638 * be set. 12639 */ 12640 if ((prefs & IPV6_PREFER_SRC_MIPMASK) == 0) { 12641 prefs |= IPV6_PREFER_SRC_MIPDEFAULT; 12642 } else if ((prefs & IPV6_PREFER_SRC_MIPMASK) == 12643 IPV6_PREFER_SRC_MIPMASK) { 12644 return (EINVAL); 12645 } 12646 if ((prefs & IPV6_PREFER_SRC_TMPMASK) == 0) { 12647 prefs |= IPV6_PREFER_SRC_TMPDEFAULT; 12648 } else if ((prefs & IPV6_PREFER_SRC_TMPMASK) == 12649 IPV6_PREFER_SRC_TMPMASK) { 12650 return (EINVAL); 12651 } 12652 if ((prefs & IPV6_PREFER_SRC_CGAMASK) == 0) { 12653 prefs |= IPV6_PREFER_SRC_CGADEFAULT; 12654 } else if ((prefs & IPV6_PREFER_SRC_CGAMASK) == 12655 IPV6_PREFER_SRC_CGAMASK) { 12656 return (EINVAL); 12657 } 12658 12659 connp->conn_src_preferences = prefs; 12660 return (0); 12661 } 12662 12663 size_t 12664 ip6_get_src_preferences(conn_t *connp, uint32_t *val) 12665 { 12666 *val = connp->conn_src_preferences; 12667 return (sizeof (connp->conn_src_preferences)); 12668 } 12669 12670 int 12671 ip6_set_pktinfo(cred_t *cr, conn_t *connp, struct in6_pktinfo *pkti) 12672 { 12673 ire_t *ire; 12674 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 12675 12676 /* 12677 * Verify the source address and ifindex. Privileged users can use 12678 * any source address. For ancillary data the source address is 12679 * checked in ip_wput_v6. 12680 */ 12681 if (pkti->ipi6_ifindex != 0) { 12682 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 12683 if (!phyint_exists(pkti->ipi6_ifindex, ipst)) { 12684 rw_exit(&ipst->ips_ill_g_lock); 12685 return (ENXIO); 12686 } 12687 rw_exit(&ipst->ips_ill_g_lock); 12688 } 12689 if (!IN6_IS_ADDR_UNSPECIFIED(&pkti->ipi6_addr) && 12690 secpolicy_net_rawaccess(cr) != 0) { 12691 ire = ire_route_lookup_v6(&pkti->ipi6_addr, 0, 0, 12692 (IRE_LOCAL|IRE_LOOPBACK), NULL, NULL, 12693 connp->conn_zoneid, NULL, MATCH_IRE_TYPE, ipst); 12694 if (ire != NULL) 12695 ire_refrele(ire); 12696 else 12697 return (ENXIO); 12698 } 12699 return (0); 12700 } 12701 12702 /* 12703 * Get the size of the IP options (including the IP headers size) 12704 * without including the AH header's size. If till_ah is B_FALSE, 12705 * and if AH header is present, dest options beyond AH header will 12706 * also be included in the returned size. 12707 */ 12708 int 12709 ipsec_ah_get_hdr_size_v6(mblk_t *mp, boolean_t till_ah) 12710 { 12711 ip6_t *ip6h; 12712 uint8_t nexthdr; 12713 uint8_t *whereptr; 12714 ip6_hbh_t *hbhhdr; 12715 ip6_dest_t *dsthdr; 12716 ip6_rthdr_t *rthdr; 12717 int ehdrlen; 12718 int size; 12719 ah_t *ah; 12720 12721 ip6h = (ip6_t *)mp->b_rptr; 12722 size = IPV6_HDR_LEN; 12723 nexthdr = ip6h->ip6_nxt; 12724 whereptr = (uint8_t *)&ip6h[1]; 12725 for (;;) { 12726 /* Assume IP has already stripped it */ 12727 ASSERT(nexthdr != IPPROTO_FRAGMENT && nexthdr != IPPROTO_RAW); 12728 switch (nexthdr) { 12729 case IPPROTO_HOPOPTS: 12730 hbhhdr = (ip6_hbh_t *)whereptr; 12731 nexthdr = hbhhdr->ip6h_nxt; 12732 ehdrlen = 8 * (hbhhdr->ip6h_len + 1); 12733 break; 12734 case IPPROTO_DSTOPTS: 12735 dsthdr = (ip6_dest_t *)whereptr; 12736 nexthdr = dsthdr->ip6d_nxt; 12737 ehdrlen = 8 * (dsthdr->ip6d_len + 1); 12738 break; 12739 case IPPROTO_ROUTING: 12740 rthdr = (ip6_rthdr_t *)whereptr; 12741 nexthdr = rthdr->ip6r_nxt; 12742 ehdrlen = 8 * (rthdr->ip6r_len + 1); 12743 break; 12744 default : 12745 if (till_ah) { 12746 ASSERT(nexthdr == IPPROTO_AH); 12747 return (size); 12748 } 12749 /* 12750 * If we don't have a AH header to traverse, 12751 * return now. This happens normally for 12752 * outbound datagrams where we have not inserted 12753 * the AH header. 12754 */ 12755 if (nexthdr != IPPROTO_AH) { 12756 return (size); 12757 } 12758 12759 /* 12760 * We don't include the AH header's size 12761 * to be symmetrical with other cases where 12762 * we either don't have a AH header (outbound) 12763 * or peek into the AH header yet (inbound and 12764 * not pulled up yet). 12765 */ 12766 ah = (ah_t *)whereptr; 12767 nexthdr = ah->ah_nexthdr; 12768 ehdrlen = (ah->ah_length << 2) + 8; 12769 12770 if (nexthdr == IPPROTO_DSTOPTS) { 12771 if (whereptr + ehdrlen >= mp->b_wptr) { 12772 /* 12773 * The destination options header 12774 * is not part of the first mblk. 12775 */ 12776 whereptr = mp->b_cont->b_rptr; 12777 } else { 12778 whereptr += ehdrlen; 12779 } 12780 12781 dsthdr = (ip6_dest_t *)whereptr; 12782 ehdrlen = 8 * (dsthdr->ip6d_len + 1); 12783 size += ehdrlen; 12784 } 12785 return (size); 12786 } 12787 whereptr += ehdrlen; 12788 size += ehdrlen; 12789 } 12790 } 12791 12792 /* 12793 * Utility routine that checks if `v6srcp' is a valid address on underlying 12794 * interface `ill'. If `ipifp' is non-NULL, it's set to a held ipif 12795 * associated with `v6srcp' on success. NOTE: if this is not called from 12796 * inside the IPSQ (ill_g_lock is not held), `ill' may be removed from the 12797 * group during or after this lookup. 12798 */ 12799 static boolean_t 12800 ipif_lookup_testaddr_v6(ill_t *ill, const in6_addr_t *v6srcp, ipif_t **ipifp) 12801 { 12802 ipif_t *ipif; 12803 12804 ipif = ipif_lookup_addr_exact_v6(v6srcp, ill, ill->ill_ipst); 12805 if (ipif != NULL) { 12806 if (ipifp != NULL) 12807 *ipifp = ipif; 12808 else 12809 ipif_refrele(ipif); 12810 return (B_TRUE); 12811 } 12812 12813 if (ip_debug > 2) { 12814 pr_addr_dbg("ipif_lookup_testaddr_v6: cannot find ipif for " 12815 "src %s\n", AF_INET6, v6srcp); 12816 } 12817 return (B_FALSE); 12818 } 12819